1 /* 2 * Copyright (C) 2011 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 * use this file except in compliance with the License. You may obtain a copy of 6 * the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 * License for the specific language governing permissions and limitations under 14 * the License. 15 */ 16 package android.speech.tts; 17 18 import android.annotation.NonNull; 19 import android.media.AudioFormat; 20 import android.speech.tts.TextToSpeechService.UtteranceProgressDispatcher; 21 import android.util.Log; 22 23 import java.io.IOException; 24 import java.nio.ByteBuffer; 25 import java.nio.ByteOrder; 26 import java.nio.channels.FileChannel; 27 import java.util.Arrays; 28 29 /** 30 * Speech synthesis request that writes the audio to a WAV file. 31 */ 32 class FileSynthesisCallback extends AbstractSynthesisCallback { 33 34 private static final String TAG = "FileSynthesisRequest"; 35 private static final boolean DBG = false; 36 37 private static final int MAX_AUDIO_BUFFER_SIZE = 8192; 38 39 private static final int WAV_HEADER_LENGTH = 44; 40 private static final short WAV_FORMAT_PCM = 0x0001; 41 42 private final Object mStateLock = new Object(); 43 44 private int mSampleRateInHz; 45 private int mAudioFormat; 46 private int mChannelCount; 47 48 private FileChannel mFileChannel; 49 50 private final UtteranceProgressDispatcher mDispatcher; 51 52 private boolean mStarted = false; 53 private boolean mDone = false; 54 55 /** Status code of synthesis */ 56 protected int mStatusCode; 57 FileSynthesisCallback(@onNull FileChannel fileChannel, @NonNull UtteranceProgressDispatcher dispatcher, boolean clientIsUsingV2)58 FileSynthesisCallback(@NonNull FileChannel fileChannel, 59 @NonNull UtteranceProgressDispatcher dispatcher, boolean clientIsUsingV2) { 60 super(clientIsUsingV2); 61 mFileChannel = fileChannel; 62 mDispatcher = dispatcher; 63 mStatusCode = TextToSpeech.SUCCESS; 64 } 65 66 @Override stop()67 void stop() { 68 synchronized (mStateLock) { 69 if (mDone) { 70 return; 71 } 72 if (mStatusCode == TextToSpeech.STOPPED) { 73 return; 74 } 75 76 mStatusCode = TextToSpeech.STOPPED; 77 cleanUp(); 78 mDispatcher.dispatchOnStop(); 79 } 80 } 81 82 /** 83 * Must be called while holding the monitor on {@link #mStateLock}. 84 */ cleanUp()85 private void cleanUp() { 86 closeFile(); 87 } 88 89 /** 90 * Must be called while holding the monitor on {@link #mStateLock}. 91 */ closeFile()92 private void closeFile() { 93 // File will be closed by the SpeechItem in the speech service. 94 mFileChannel = null; 95 } 96 97 @Override getMaxBufferSize()98 public int getMaxBufferSize() { 99 return MAX_AUDIO_BUFFER_SIZE; 100 } 101 102 @Override start(int sampleRateInHz, int audioFormat, int channelCount)103 public int start(int sampleRateInHz, int audioFormat, int channelCount) { 104 if (DBG) { 105 Log.d(TAG, "FileSynthesisRequest.start(" + sampleRateInHz + "," + audioFormat 106 + "," + channelCount + ")"); 107 } 108 if (audioFormat != AudioFormat.ENCODING_PCM_8BIT && 109 audioFormat != AudioFormat.ENCODING_PCM_16BIT && 110 audioFormat != AudioFormat.ENCODING_PCM_FLOAT) { 111 Log.e(TAG, "Audio format encoding " + audioFormat + " not supported. Please use one " + 112 "of AudioFormat.ENCODING_PCM_8BIT, AudioFormat.ENCODING_PCM_16BIT or " + 113 "AudioFormat.ENCODING_PCM_FLOAT"); 114 } 115 mDispatcher.dispatchOnBeginSynthesis(sampleRateInHz, audioFormat, channelCount); 116 117 FileChannel fileChannel = null; 118 synchronized (mStateLock) { 119 if (mStatusCode == TextToSpeech.STOPPED) { 120 if (DBG) Log.d(TAG, "Request has been aborted."); 121 return errorCodeOnStop(); 122 } 123 if (mStatusCode != TextToSpeech.SUCCESS) { 124 if (DBG) Log.d(TAG, "Error was raised"); 125 return TextToSpeech.ERROR; 126 } 127 if (mStarted) { 128 Log.e(TAG, "Start called twice"); 129 return TextToSpeech.ERROR; 130 } 131 mStarted = true; 132 mSampleRateInHz = sampleRateInHz; 133 mAudioFormat = audioFormat; 134 mChannelCount = channelCount; 135 136 mDispatcher.dispatchOnStart(); 137 fileChannel = mFileChannel; 138 } 139 140 try { 141 fileChannel.write(ByteBuffer.allocate(WAV_HEADER_LENGTH)); 142 return TextToSpeech.SUCCESS; 143 } catch (IOException ex) { 144 Log.e(TAG, "Failed to write wav header to output file descriptor", ex); 145 synchronized (mStateLock) { 146 cleanUp(); 147 mStatusCode = TextToSpeech.ERROR_OUTPUT; 148 } 149 return TextToSpeech.ERROR; 150 } 151 } 152 153 @Override audioAvailable(byte[] buffer, int offset, int length)154 public int audioAvailable(byte[] buffer, int offset, int length) { 155 if (DBG) { 156 Log.d(TAG, "FileSynthesisRequest.audioAvailable(" + Arrays.toString(buffer) 157 + "," + offset + "," + length + ")"); 158 } 159 FileChannel fileChannel = null; 160 synchronized (mStateLock) { 161 if (mStatusCode == TextToSpeech.STOPPED) { 162 if (DBG) Log.d(TAG, "Request has been aborted."); 163 return errorCodeOnStop(); 164 } 165 if (mStatusCode != TextToSpeech.SUCCESS) { 166 if (DBG) Log.d(TAG, "Error was raised"); 167 return TextToSpeech.ERROR; 168 } 169 if (mFileChannel == null) { 170 Log.e(TAG, "File not open"); 171 mStatusCode = TextToSpeech.ERROR_OUTPUT; 172 return TextToSpeech.ERROR; 173 } 174 if (!mStarted) { 175 Log.e(TAG, "Start method was not called"); 176 return TextToSpeech.ERROR; 177 } 178 fileChannel = mFileChannel; 179 } 180 181 final byte[] bufferCopy = new byte[length]; 182 System.arraycopy(buffer, offset, bufferCopy, 0, length); 183 mDispatcher.dispatchOnAudioAvailable(bufferCopy); 184 185 try { 186 fileChannel.write(ByteBuffer.wrap(buffer, offset, length)); 187 return TextToSpeech.SUCCESS; 188 } catch (IOException ex) { 189 Log.e(TAG, "Failed to write to output file descriptor", ex); 190 synchronized (mStateLock) { 191 cleanUp(); 192 mStatusCode = TextToSpeech.ERROR_OUTPUT; 193 } 194 return TextToSpeech.ERROR; 195 } 196 } 197 198 @Override done()199 public int done() { 200 if (DBG) Log.d(TAG, "FileSynthesisRequest.done()"); 201 FileChannel fileChannel = null; 202 203 int sampleRateInHz = 0; 204 int audioFormat = 0; 205 int channelCount = 0; 206 207 synchronized (mStateLock) { 208 if (mDone) { 209 Log.w(TAG, "Duplicate call to done()"); 210 // This is not an error that would prevent synthesis. Hence no 211 // setStatusCode is set. 212 return TextToSpeech.ERROR; 213 } 214 if (mStatusCode == TextToSpeech.STOPPED) { 215 if (DBG) Log.d(TAG, "Request has been aborted."); 216 return errorCodeOnStop(); 217 } 218 if (mStatusCode != TextToSpeech.SUCCESS && mStatusCode != TextToSpeech.STOPPED) { 219 mDispatcher.dispatchOnError(mStatusCode); 220 return TextToSpeech.ERROR; 221 } 222 if (mFileChannel == null) { 223 Log.e(TAG, "File not open"); 224 return TextToSpeech.ERROR; 225 } 226 mDone = true; 227 fileChannel = mFileChannel; 228 sampleRateInHz = mSampleRateInHz; 229 audioFormat = mAudioFormat; 230 channelCount = mChannelCount; 231 } 232 233 try { 234 // Write WAV header at start of file 235 fileChannel.position(0); 236 int dataLength = (int) (fileChannel.size() - WAV_HEADER_LENGTH); 237 fileChannel.write( 238 makeWavHeader(sampleRateInHz, audioFormat, channelCount, dataLength)); 239 240 synchronized (mStateLock) { 241 closeFile(); 242 mDispatcher.dispatchOnSuccess(); 243 return TextToSpeech.SUCCESS; 244 } 245 } catch (IOException ex) { 246 Log.e(TAG, "Failed to write to output file descriptor", ex); 247 synchronized (mStateLock) { 248 cleanUp(); 249 } 250 return TextToSpeech.ERROR; 251 } 252 } 253 254 @Override error()255 public void error() { 256 error(TextToSpeech.ERROR_SYNTHESIS); 257 } 258 259 @Override error(int errorCode)260 public void error(int errorCode) { 261 if (DBG) Log.d(TAG, "FileSynthesisRequest.error()"); 262 synchronized (mStateLock) { 263 if (mDone) { 264 return; 265 } 266 cleanUp(); 267 mStatusCode = errorCode; 268 } 269 } 270 271 @Override hasStarted()272 public boolean hasStarted() { 273 synchronized (mStateLock) { 274 return mStarted; 275 } 276 } 277 278 @Override hasFinished()279 public boolean hasFinished() { 280 synchronized (mStateLock) { 281 return mDone; 282 } 283 } 284 makeWavHeader(int sampleRateInHz, int audioFormat, int channelCount, int dataLength)285 private ByteBuffer makeWavHeader(int sampleRateInHz, int audioFormat, int channelCount, 286 int dataLength) { 287 int sampleSizeInBytes = AudioFormat.getBytesPerSample(audioFormat); 288 int byteRate = sampleRateInHz * sampleSizeInBytes * channelCount; 289 short blockAlign = (short) (sampleSizeInBytes * channelCount); 290 short bitsPerSample = (short) (sampleSizeInBytes * 8); 291 292 byte[] headerBuf = new byte[WAV_HEADER_LENGTH]; 293 ByteBuffer header = ByteBuffer.wrap(headerBuf); 294 header.order(ByteOrder.LITTLE_ENDIAN); 295 296 header.put(new byte[]{ 'R', 'I', 'F', 'F' }); 297 header.putInt(dataLength + WAV_HEADER_LENGTH - 8); // RIFF chunk size 298 header.put(new byte[]{ 'W', 'A', 'V', 'E' }); 299 header.put(new byte[]{ 'f', 'm', 't', ' ' }); 300 header.putInt(16); // size of fmt chunk 301 header.putShort(WAV_FORMAT_PCM); 302 header.putShort((short) channelCount); 303 header.putInt(sampleRateInHz); 304 header.putInt(byteRate); 305 header.putShort(blockAlign); 306 header.putShort(bitsPerSample); 307 header.put(new byte[]{ 'd', 'a', 't', 'a' }); 308 header.putInt(dataLength); 309 header.flip(); 310 311 return header; 312 } 313 314 @Override rangeStart(int markerInFrames, int start, int end)315 public void rangeStart(int markerInFrames, int start, int end) { 316 mDispatcher.dispatchOnRangeStart(markerInFrames, start, end); 317 } 318 } 319