1 /*
2  * Copyright (C) 2011 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5  * use this file except in compliance with the License. You may obtain a copy of
6  * the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13  * License for the specific language governing permissions and limitations under
14  * the License.
15  */
16 package android.speech.tts;
17 
18 import android.annotation.NonNull;
19 import android.media.AudioFormat;
20 import android.speech.tts.TextToSpeechService.UtteranceProgressDispatcher;
21 import android.util.Log;
22 
23 import java.io.IOException;
24 import java.nio.ByteBuffer;
25 import java.nio.ByteOrder;
26 import java.nio.channels.FileChannel;
27 import java.util.Arrays;
28 
29 /**
30  * Speech synthesis request that writes the audio to a WAV file.
31  */
32 class FileSynthesisCallback extends AbstractSynthesisCallback {
33 
34     private static final String TAG = "FileSynthesisRequest";
35     private static final boolean DBG = false;
36 
37     private static final int MAX_AUDIO_BUFFER_SIZE = 8192;
38 
39     private static final int WAV_HEADER_LENGTH = 44;
40     private static final short WAV_FORMAT_PCM = 0x0001;
41 
42     private final Object mStateLock = new Object();
43 
44     private int mSampleRateInHz;
45     private int mAudioFormat;
46     private int mChannelCount;
47 
48     private FileChannel mFileChannel;
49 
50     private final UtteranceProgressDispatcher mDispatcher;
51 
52     private boolean mStarted = false;
53     private boolean mDone = false;
54 
55     /** Status code of synthesis */
56     protected int mStatusCode;
57 
FileSynthesisCallback(@onNull FileChannel fileChannel, @NonNull UtteranceProgressDispatcher dispatcher, boolean clientIsUsingV2)58     FileSynthesisCallback(@NonNull FileChannel fileChannel,
59             @NonNull UtteranceProgressDispatcher dispatcher, boolean clientIsUsingV2) {
60         super(clientIsUsingV2);
61         mFileChannel = fileChannel;
62         mDispatcher = dispatcher;
63         mStatusCode = TextToSpeech.SUCCESS;
64     }
65 
66     @Override
stop()67     void stop() {
68         synchronized (mStateLock) {
69             if (mDone) {
70                 return;
71             }
72             if (mStatusCode == TextToSpeech.STOPPED) {
73                 return;
74             }
75 
76             mStatusCode = TextToSpeech.STOPPED;
77             cleanUp();
78             mDispatcher.dispatchOnStop();
79         }
80     }
81 
82     /**
83      * Must be called while holding the monitor on {@link #mStateLock}.
84      */
cleanUp()85     private void cleanUp() {
86         closeFile();
87     }
88 
89     /**
90      * Must be called while holding the monitor on {@link #mStateLock}.
91      */
closeFile()92     private void closeFile() {
93         // File will be closed by the SpeechItem in the speech service.
94         mFileChannel = null;
95     }
96 
97     @Override
getMaxBufferSize()98     public int getMaxBufferSize() {
99         return MAX_AUDIO_BUFFER_SIZE;
100     }
101 
102     @Override
start(int sampleRateInHz, int audioFormat, int channelCount)103     public int start(int sampleRateInHz, int audioFormat, int channelCount) {
104         if (DBG) {
105             Log.d(TAG, "FileSynthesisRequest.start(" + sampleRateInHz + "," + audioFormat
106                     + "," + channelCount + ")");
107         }
108         if (audioFormat != AudioFormat.ENCODING_PCM_8BIT &&
109             audioFormat != AudioFormat.ENCODING_PCM_16BIT &&
110             audioFormat != AudioFormat.ENCODING_PCM_FLOAT) {
111             Log.e(TAG, "Audio format encoding " + audioFormat + " not supported. Please use one " +
112                        "of AudioFormat.ENCODING_PCM_8BIT, AudioFormat.ENCODING_PCM_16BIT or " +
113                        "AudioFormat.ENCODING_PCM_FLOAT");
114         }
115         mDispatcher.dispatchOnBeginSynthesis(sampleRateInHz, audioFormat, channelCount);
116 
117         FileChannel fileChannel = null;
118         synchronized (mStateLock) {
119             if (mStatusCode == TextToSpeech.STOPPED) {
120                 if (DBG) Log.d(TAG, "Request has been aborted.");
121                 return errorCodeOnStop();
122             }
123             if (mStatusCode != TextToSpeech.SUCCESS) {
124                 if (DBG) Log.d(TAG, "Error was raised");
125                 return TextToSpeech.ERROR;
126             }
127             if (mStarted) {
128                 Log.e(TAG, "Start called twice");
129                 return TextToSpeech.ERROR;
130             }
131             mStarted = true;
132             mSampleRateInHz = sampleRateInHz;
133             mAudioFormat = audioFormat;
134             mChannelCount = channelCount;
135 
136             mDispatcher.dispatchOnStart();
137             fileChannel = mFileChannel;
138         }
139 
140         try {
141             fileChannel.write(ByteBuffer.allocate(WAV_HEADER_LENGTH));
142                 return TextToSpeech.SUCCESS;
143         } catch (IOException ex) {
144             Log.e(TAG, "Failed to write wav header to output file descriptor", ex);
145             synchronized (mStateLock) {
146                 cleanUp();
147                 mStatusCode = TextToSpeech.ERROR_OUTPUT;
148             }
149             return TextToSpeech.ERROR;
150         }
151     }
152 
153     @Override
audioAvailable(byte[] buffer, int offset, int length)154     public int audioAvailable(byte[] buffer, int offset, int length) {
155         if (DBG) {
156             Log.d(TAG, "FileSynthesisRequest.audioAvailable(" + Arrays.toString(buffer)
157                     + "," + offset + "," + length + ")");
158         }
159         FileChannel fileChannel = null;
160         synchronized (mStateLock) {
161             if (mStatusCode == TextToSpeech.STOPPED) {
162                 if (DBG) Log.d(TAG, "Request has been aborted.");
163                 return errorCodeOnStop();
164             }
165             if (mStatusCode != TextToSpeech.SUCCESS) {
166                 if (DBG) Log.d(TAG, "Error was raised");
167                 return TextToSpeech.ERROR;
168             }
169             if (mFileChannel == null) {
170                 Log.e(TAG, "File not open");
171                 mStatusCode = TextToSpeech.ERROR_OUTPUT;
172                 return TextToSpeech.ERROR;
173             }
174             if (!mStarted) {
175                 Log.e(TAG, "Start method was not called");
176                 return TextToSpeech.ERROR;
177             }
178             fileChannel = mFileChannel;
179         }
180 
181         final byte[] bufferCopy = new byte[length];
182         System.arraycopy(buffer, offset, bufferCopy, 0, length);
183         mDispatcher.dispatchOnAudioAvailable(bufferCopy);
184 
185         try {
186             fileChannel.write(ByteBuffer.wrap(buffer,  offset,  length));
187             return TextToSpeech.SUCCESS;
188         } catch (IOException ex) {
189             Log.e(TAG, "Failed to write to output file descriptor", ex);
190             synchronized (mStateLock) {
191                 cleanUp();
192                 mStatusCode = TextToSpeech.ERROR_OUTPUT;
193             }
194             return TextToSpeech.ERROR;
195         }
196     }
197 
198     @Override
done()199     public int done() {
200         if (DBG) Log.d(TAG, "FileSynthesisRequest.done()");
201         FileChannel fileChannel = null;
202 
203         int sampleRateInHz = 0;
204         int audioFormat = 0;
205         int channelCount = 0;
206 
207         synchronized (mStateLock) {
208             if (mDone) {
209                 Log.w(TAG, "Duplicate call to done()");
210                 // This is not an error that would prevent synthesis. Hence no
211                 // setStatusCode is set.
212                 return TextToSpeech.ERROR;
213             }
214             if (mStatusCode == TextToSpeech.STOPPED) {
215                 if (DBG) Log.d(TAG, "Request has been aborted.");
216                 return errorCodeOnStop();
217             }
218             if (mStatusCode != TextToSpeech.SUCCESS && mStatusCode != TextToSpeech.STOPPED) {
219                 mDispatcher.dispatchOnError(mStatusCode);
220                 return TextToSpeech.ERROR;
221             }
222             if (mFileChannel == null) {
223                 Log.e(TAG, "File not open");
224                 return TextToSpeech.ERROR;
225             }
226             mDone = true;
227             fileChannel = mFileChannel;
228             sampleRateInHz = mSampleRateInHz;
229             audioFormat = mAudioFormat;
230             channelCount = mChannelCount;
231         }
232 
233         try {
234             // Write WAV header at start of file
235             fileChannel.position(0);
236             int dataLength = (int) (fileChannel.size() - WAV_HEADER_LENGTH);
237             fileChannel.write(
238                     makeWavHeader(sampleRateInHz, audioFormat, channelCount, dataLength));
239 
240             synchronized (mStateLock) {
241                 closeFile();
242                 mDispatcher.dispatchOnSuccess();
243                 return TextToSpeech.SUCCESS;
244             }
245         } catch (IOException ex) {
246             Log.e(TAG, "Failed to write to output file descriptor", ex);
247             synchronized (mStateLock) {
248                 cleanUp();
249             }
250             return TextToSpeech.ERROR;
251         }
252     }
253 
254     @Override
error()255     public void error() {
256         error(TextToSpeech.ERROR_SYNTHESIS);
257     }
258 
259     @Override
error(int errorCode)260     public void error(int errorCode) {
261         if (DBG) Log.d(TAG, "FileSynthesisRequest.error()");
262         synchronized (mStateLock) {
263             if (mDone) {
264                 return;
265             }
266             cleanUp();
267             mStatusCode = errorCode;
268         }
269     }
270 
271     @Override
hasStarted()272     public boolean hasStarted() {
273         synchronized (mStateLock) {
274             return mStarted;
275         }
276     }
277 
278     @Override
hasFinished()279     public boolean hasFinished() {
280         synchronized (mStateLock) {
281             return mDone;
282         }
283     }
284 
makeWavHeader(int sampleRateInHz, int audioFormat, int channelCount, int dataLength)285     private ByteBuffer makeWavHeader(int sampleRateInHz, int audioFormat, int channelCount,
286             int dataLength) {
287         int sampleSizeInBytes = AudioFormat.getBytesPerSample(audioFormat);
288         int byteRate = sampleRateInHz * sampleSizeInBytes * channelCount;
289         short blockAlign = (short) (sampleSizeInBytes * channelCount);
290         short bitsPerSample = (short) (sampleSizeInBytes * 8);
291 
292         byte[] headerBuf = new byte[WAV_HEADER_LENGTH];
293         ByteBuffer header = ByteBuffer.wrap(headerBuf);
294         header.order(ByteOrder.LITTLE_ENDIAN);
295 
296         header.put(new byte[]{ 'R', 'I', 'F', 'F' });
297         header.putInt(dataLength + WAV_HEADER_LENGTH - 8);  // RIFF chunk size
298         header.put(new byte[]{ 'W', 'A', 'V', 'E' });
299         header.put(new byte[]{ 'f', 'm', 't', ' ' });
300         header.putInt(16);  // size of fmt chunk
301         header.putShort(WAV_FORMAT_PCM);
302         header.putShort((short) channelCount);
303         header.putInt(sampleRateInHz);
304         header.putInt(byteRate);
305         header.putShort(blockAlign);
306         header.putShort(bitsPerSample);
307         header.put(new byte[]{ 'd', 'a', 't', 'a' });
308         header.putInt(dataLength);
309         header.flip();
310 
311         return header;
312     }
313 
314     @Override
rangeStart(int markerInFrames, int start, int end)315     public void rangeStart(int markerInFrames, int start, int end) {
316         mDispatcher.dispatchOnRangeStart(markerInFrames, start, end);
317     }
318 }
319