1 /** 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package android.media.soundtrigger; 18 import static android.hardware.soundtrigger.SoundTrigger.STATUS_OK; 19 20 import android.annotation.IntDef; 21 import android.annotation.NonNull; 22 import android.annotation.Nullable; 23 import android.annotation.RequiresPermission; 24 import android.annotation.SystemApi; 25 import android.compat.annotation.UnsupportedAppUsage; 26 import android.hardware.soundtrigger.IRecognitionStatusCallback; 27 import android.hardware.soundtrigger.SoundTrigger; 28 import android.hardware.soundtrigger.SoundTrigger.ModuleProperties; 29 import android.hardware.soundtrigger.SoundTrigger.RecognitionConfig; 30 import android.media.AudioFormat; 31 import android.os.Build; 32 import android.os.Handler; 33 import android.os.Looper; 34 import android.os.Message; 35 import android.os.ParcelUuid; 36 import android.os.RemoteException; 37 import android.util.Slog; 38 39 import com.android.internal.app.ISoundTriggerSession; 40 41 import java.io.PrintWriter; 42 import java.lang.annotation.Retention; 43 import java.lang.annotation.RetentionPolicy; 44 import java.util.UUID; 45 46 /** 47 * A class that allows interaction with the actual sound trigger detection on the system. 48 * Sound trigger detection refers to a detectors that match generic sound patterns that are 49 * not voice-based. The voice-based recognition models should utilize the {@link 50 * VoiceInteractionService} instead. Access to this class is protected by a permission 51 * granted only to system or privileged apps. 52 * 53 * @hide 54 */ 55 @SystemApi 56 public final class SoundTriggerDetector { 57 private static final boolean DBG = false; 58 private static final String TAG = "SoundTriggerDetector"; 59 60 private static final int MSG_AVAILABILITY_CHANGED = 1; 61 private static final int MSG_SOUND_TRIGGER_DETECTED = 2; 62 private static final int MSG_DETECTION_ERROR = 3; 63 private static final int MSG_DETECTION_PAUSE = 4; 64 private static final int MSG_DETECTION_RESUME = 5; 65 66 private final Object mLock = new Object(); 67 68 private final ISoundTriggerSession mSoundTriggerSession; 69 private final UUID mSoundModelId; 70 private final Callback mCallback; 71 private final Handler mHandler; 72 private final RecognitionCallback mRecognitionCallback; 73 74 /** @hide */ 75 @Retention(RetentionPolicy.SOURCE) 76 @IntDef(flag = true, 77 value = { 78 RECOGNITION_FLAG_NONE, 79 RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO, 80 RECOGNITION_FLAG_ALLOW_MULTIPLE_TRIGGERS, 81 RECOGNITION_FLAG_ENABLE_AUDIO_ECHO_CANCELLATION, 82 RECOGNITION_FLAG_ENABLE_AUDIO_NOISE_SUPPRESSION, 83 RECOGNITION_FLAG_RUN_IN_BATTERY_SAVER, 84 }) 85 public @interface RecognitionFlags {} 86 87 /** 88 * Empty flag for {@link #startRecognition(int)}. 89 * 90 * @hide 91 */ 92 public static final int RECOGNITION_FLAG_NONE = 0; 93 94 /** 95 * Recognition flag for {@link #startRecognition(int)} that indicates 96 * whether the trigger audio for hotword needs to be captured. 97 */ 98 public static final int RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO = 0x1; 99 100 /** 101 * Recognition flag for {@link #startRecognition(int)} that indicates 102 * whether the recognition should keep going on even after the 103 * model triggers. 104 * If this flag is specified, it's possible to get multiple 105 * triggers after a call to {@link #startRecognition(int)}, if the model 106 * triggers multiple times. 107 * When this isn't specified, the default behavior is to stop recognition once the 108 * trigger happens, till the caller starts recognition again. 109 */ 110 public static final int RECOGNITION_FLAG_ALLOW_MULTIPLE_TRIGGERS = 0x2; 111 112 /** 113 * Audio capabilities flag for {@link #startRecognition(int)} that indicates 114 * if the underlying recognition should use AEC. 115 * This capability may or may not be supported by the system, and support can be queried 116 * by calling {@link SoundTriggerManager#getModuleProperties()} and checking 117 * {@link ModuleProperties#audioCapabilities}. The corresponding capabilities field for 118 * this flag is {@link SoundTrigger.ModuleProperties#AUDIO_CAPABILITY_ECHO_CANCELLATION}. 119 * If this flag is passed without the audio capability supported, there will be no audio effect 120 * applied. 121 */ 122 public static final int RECOGNITION_FLAG_ENABLE_AUDIO_ECHO_CANCELLATION = 0x4; 123 124 /** 125 * Audio capabilities flag for {@link #startRecognition(int)} that indicates 126 * if the underlying recognition should use noise suppression. 127 * This capability may or may not be supported by the system, and support can be queried 128 * by calling {@link SoundTriggerManager#getModuleProperties()} and checking 129 * {@link ModuleProperties#audioCapabilities}. The corresponding capabilities field for 130 * this flag is {@link SoundTrigger.ModuleProperties#AUDIO_CAPABILITY_NOISE_SUPPRESSION}. 131 * If this flag is passed without the audio capability supported, there will be no audio effect 132 * applied. 133 */ 134 public static final int RECOGNITION_FLAG_ENABLE_AUDIO_NOISE_SUPPRESSION = 0x8; 135 136 /** 137 * Recognition flag for {@link #startRecognition(int)} that indicates whether the recognition 138 * should continue after battery saver mode is enabled. 139 * When this flag is specified, the caller will be checked for 140 * {@link android.Manifest.permission#SOUND_TRIGGER_RUN_IN_BATTERY_SAVER} permission granted. 141 */ 142 public static final int RECOGNITION_FLAG_RUN_IN_BATTERY_SAVER = 0x10; 143 144 /** 145 * Additional payload for {@link Callback#onDetected}. 146 */ 147 public static class EventPayload { 148 private final boolean mTriggerAvailable; 149 150 // Indicates if {@code captureSession} can be used to continue capturing more audio 151 // from the DSP hardware. 152 private final boolean mCaptureAvailable; 153 // The session to use when attempting to capture more audio from the DSP hardware. 154 private final int mCaptureSession; 155 private final AudioFormat mAudioFormat; 156 // Raw data associated with the event. 157 // This is the audio that triggered the keyphrase if {@code isTriggerAudio} is true. 158 private final byte[] mData; 159 EventPayload(boolean triggerAvailable, boolean captureAvailable, AudioFormat audioFormat, int captureSession, byte[] data)160 private EventPayload(boolean triggerAvailable, boolean captureAvailable, 161 AudioFormat audioFormat, int captureSession, byte[] data) { 162 mTriggerAvailable = triggerAvailable; 163 mCaptureAvailable = captureAvailable; 164 mCaptureSession = captureSession; 165 mAudioFormat = audioFormat; 166 mData = data; 167 } 168 169 /** 170 * Gets the format of the audio obtained using {@link #getTriggerAudio()}. 171 * May be null if there's no audio present. 172 */ 173 @Nullable getCaptureAudioFormat()174 public AudioFormat getCaptureAudioFormat() { 175 return mAudioFormat; 176 } 177 178 /** 179 * Gets the raw audio that triggered the detector. 180 * This may be null if the trigger audio isn't available. 181 * If non-null, the format of the audio can be obtained by calling 182 * {@link #getCaptureAudioFormat()}. 183 * 184 * @see AlwaysOnHotwordDetector#RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO 185 */ 186 @Nullable getTriggerAudio()187 public byte[] getTriggerAudio() { 188 if (mTriggerAvailable) { 189 return mData; 190 } else { 191 return null; 192 } 193 } 194 195 /** 196 * Gets the opaque data passed from the detection engine for the event. 197 * This may be null if it was not populated by the engine, or if the data is known to 198 * contain the trigger audio. 199 * 200 * @see #getTriggerAudio 201 * 202 * @hide 203 */ 204 @Nullable 205 @UnsupportedAppUsage(maxTargetSdk = Build.VERSION_CODES.R, trackingBug = 170729553) getData()206 public byte[] getData() { 207 if (!mTriggerAvailable) { 208 return mData; 209 } else { 210 return null; 211 } 212 } 213 214 /** 215 * Gets the session ID to start a capture from the DSP. 216 * This may be null if streaming capture isn't possible. 217 * If non-null, the format of the audio that can be captured can be 218 * obtained using {@link #getCaptureAudioFormat()}. 219 * 220 * TODO: Candidate for Public API when the API to start capture with a session ID 221 * is made public. 222 * 223 * TODO: Add this to {@link #getCaptureAudioFormat()}: 224 * "Gets the format of the audio obtained using {@link #getTriggerAudio()} 225 * or {@link #getCaptureSession()}. May be null if no audio can be obtained 226 * for either the trigger or a streaming session." 227 * 228 * TODO: Should this return a known invalid value instead? 229 * 230 * @hide 231 */ 232 @Nullable 233 @UnsupportedAppUsage(maxTargetSdk = Build.VERSION_CODES.R, trackingBug = 170729553) getCaptureSession()234 public Integer getCaptureSession() { 235 if (mCaptureAvailable) { 236 return mCaptureSession; 237 } else { 238 return null; 239 } 240 } 241 } 242 243 public static abstract class Callback { 244 /** 245 * Called when the availability of the sound model changes. 246 */ onAvailabilityChanged(int status)247 public abstract void onAvailabilityChanged(int status); 248 249 /** 250 * Called when the sound model has triggered (such as when it matched a 251 * given sound pattern). 252 */ onDetected(@onNull EventPayload eventPayload)253 public abstract void onDetected(@NonNull EventPayload eventPayload); 254 255 /** 256 * Called when the detection fails due to an error. 257 */ onError()258 public abstract void onError(); 259 260 /** 261 * Called when the recognition is paused temporarily for some reason. 262 * This is an informational callback, and the clients shouldn't be doing anything here 263 * except showing an indication on their UI if they have to. 264 */ onRecognitionPaused()265 public abstract void onRecognitionPaused(); 266 267 /** 268 * Called when the recognition is resumed after it was temporarily paused. 269 * This is an informational callback, and the clients shouldn't be doing anything here 270 * except showing an indication on their UI if they have to. 271 */ onRecognitionResumed()272 public abstract void onRecognitionResumed(); 273 } 274 275 /** 276 * This class should be constructed by the {@link SoundTriggerManager}. 277 * @hide 278 */ SoundTriggerDetector(ISoundTriggerSession soundTriggerSession, UUID soundModelId, @NonNull Callback callback, @Nullable Handler handler)279 SoundTriggerDetector(ISoundTriggerSession soundTriggerSession, UUID soundModelId, 280 @NonNull Callback callback, @Nullable Handler handler) { 281 mSoundTriggerSession = soundTriggerSession; 282 mSoundModelId = soundModelId; 283 mCallback = callback; 284 if (handler == null) { 285 mHandler = new MyHandler(); 286 } else { 287 mHandler = new MyHandler(handler.getLooper()); 288 } 289 mRecognitionCallback = new RecognitionCallback(); 290 } 291 292 /** 293 * Starts recognition on the associated sound model. Result is indicated via the 294 * {@link Callback}. 295 * @return Indicates whether the call succeeded or not. 296 */ 297 @RequiresPermission(android.Manifest.permission.MANAGE_SOUND_TRIGGER) startRecognition(@ecognitionFlags int recognitionFlags)298 public boolean startRecognition(@RecognitionFlags int recognitionFlags) { 299 if (DBG) { 300 Slog.d(TAG, "startRecognition()"); 301 } 302 boolean captureTriggerAudio = 303 (recognitionFlags & RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO) != 0; 304 305 boolean allowMultipleTriggers = 306 (recognitionFlags & RECOGNITION_FLAG_ALLOW_MULTIPLE_TRIGGERS) != 0; 307 308 boolean runInBatterySaver = (recognitionFlags & RECOGNITION_FLAG_RUN_IN_BATTERY_SAVER) != 0; 309 310 int audioCapabilities = 0; 311 if ((recognitionFlags & RECOGNITION_FLAG_ENABLE_AUDIO_ECHO_CANCELLATION) != 0) { 312 audioCapabilities |= SoundTrigger.ModuleProperties.AUDIO_CAPABILITY_ECHO_CANCELLATION; 313 } 314 if ((recognitionFlags & RECOGNITION_FLAG_ENABLE_AUDIO_NOISE_SUPPRESSION) != 0) { 315 audioCapabilities |= SoundTrigger.ModuleProperties.AUDIO_CAPABILITY_NOISE_SUPPRESSION; 316 } 317 318 int status; 319 try { 320 status = mSoundTriggerSession.startRecognition(new ParcelUuid(mSoundModelId), 321 mRecognitionCallback, new RecognitionConfig(captureTriggerAudio, 322 allowMultipleTriggers, null, null, audioCapabilities), 323 runInBatterySaver); 324 } catch (RemoteException e) { 325 return false; 326 } 327 return status == STATUS_OK; 328 } 329 330 /** 331 * Stops recognition for the associated model. 332 */ 333 @RequiresPermission(android.Manifest.permission.MANAGE_SOUND_TRIGGER) stopRecognition()334 public boolean stopRecognition() { 335 int status = STATUS_OK; 336 try { 337 status = mSoundTriggerSession.stopRecognition(new ParcelUuid(mSoundModelId), 338 mRecognitionCallback); 339 } catch (RemoteException e) { 340 return false; 341 } 342 return status == STATUS_OK; 343 } 344 345 /** 346 * @hide 347 */ dump(String prefix, PrintWriter pw)348 public void dump(String prefix, PrintWriter pw) { 349 synchronized (mLock) { 350 // TODO: Dump useful debug information. 351 } 352 } 353 354 /** 355 * Callback that handles events from the lower sound trigger layer. 356 * 357 * Note that these callbacks will be called synchronously from the SoundTriggerService 358 * layer and thus should do minimal work (such as sending a message on a handler to do 359 * the real work). 360 * @hide 361 */ 362 private class RecognitionCallback extends IRecognitionStatusCallback.Stub { 363 364 /** 365 * @hide 366 */ 367 @Override onGenericSoundTriggerDetected(SoundTrigger.GenericRecognitionEvent event)368 public void onGenericSoundTriggerDetected(SoundTrigger.GenericRecognitionEvent event) { 369 Slog.d(TAG, "onGenericSoundTriggerDetected()" + event); 370 Message.obtain(mHandler, 371 MSG_SOUND_TRIGGER_DETECTED, 372 new EventPayload(event.triggerInData, event.captureAvailable, 373 event.captureFormat, event.captureSession, event.data)) 374 .sendToTarget(); 375 } 376 377 @Override onKeyphraseDetected(SoundTrigger.KeyphraseRecognitionEvent event)378 public void onKeyphraseDetected(SoundTrigger.KeyphraseRecognitionEvent event) { 379 Slog.e(TAG, "Ignoring onKeyphraseDetected() called for " + event); 380 } 381 382 /** 383 * @hide 384 */ 385 @Override onError(int status)386 public void onError(int status) { 387 Slog.d(TAG, "onError()" + status); 388 mHandler.sendEmptyMessage(MSG_DETECTION_ERROR); 389 } 390 391 /** 392 * @hide 393 */ 394 @Override onRecognitionPaused()395 public void onRecognitionPaused() { 396 Slog.d(TAG, "onRecognitionPaused()"); 397 mHandler.sendEmptyMessage(MSG_DETECTION_PAUSE); 398 } 399 400 /** 401 * @hide 402 */ 403 @Override onRecognitionResumed()404 public void onRecognitionResumed() { 405 Slog.d(TAG, "onRecognitionResumed()"); 406 mHandler.sendEmptyMessage(MSG_DETECTION_RESUME); 407 } 408 } 409 410 private class MyHandler extends Handler { 411 MyHandler()412 MyHandler() { 413 super(); 414 } 415 MyHandler(Looper looper)416 MyHandler(Looper looper) { 417 super(looper); 418 } 419 420 @Override handleMessage(Message msg)421 public void handleMessage(Message msg) { 422 if (mCallback == null) { 423 Slog.w(TAG, "Received message: " + msg.what + " for NULL callback."); 424 return; 425 } 426 switch (msg.what) { 427 case MSG_SOUND_TRIGGER_DETECTED: 428 mCallback.onDetected((EventPayload) msg.obj); 429 break; 430 case MSG_DETECTION_ERROR: 431 mCallback.onError(); 432 break; 433 case MSG_DETECTION_PAUSE: 434 mCallback.onRecognitionPaused(); 435 break; 436 case MSG_DETECTION_RESUME: 437 mCallback.onRecognitionResumed(); 438 break; 439 default: 440 super.handleMessage(msg); 441 442 } 443 } 444 } 445 } 446