1 /**
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package android.media.soundtrigger;
18 import static android.hardware.soundtrigger.SoundTrigger.STATUS_OK;
19 
20 import android.annotation.IntDef;
21 import android.annotation.NonNull;
22 import android.annotation.Nullable;
23 import android.annotation.RequiresPermission;
24 import android.annotation.SystemApi;
25 import android.compat.annotation.UnsupportedAppUsage;
26 import android.hardware.soundtrigger.IRecognitionStatusCallback;
27 import android.hardware.soundtrigger.SoundTrigger;
28 import android.hardware.soundtrigger.SoundTrigger.ModuleProperties;
29 import android.hardware.soundtrigger.SoundTrigger.RecognitionConfig;
30 import android.media.AudioFormat;
31 import android.os.Build;
32 import android.os.Handler;
33 import android.os.Looper;
34 import android.os.Message;
35 import android.os.ParcelUuid;
36 import android.os.RemoteException;
37 import android.util.Slog;
38 
39 import com.android.internal.app.ISoundTriggerSession;
40 
41 import java.io.PrintWriter;
42 import java.lang.annotation.Retention;
43 import java.lang.annotation.RetentionPolicy;
44 import java.util.UUID;
45 
46 /**
47  * A class that allows interaction with the actual sound trigger detection on the system.
48  * Sound trigger detection refers to a detectors that match generic sound patterns that are
49  * not voice-based. The voice-based recognition models should utilize the {@link
50  * VoiceInteractionService} instead. Access to this class is protected by a permission
51  * granted only to system or privileged apps.
52  *
53  * @hide
54  */
55 @SystemApi
56 public final class SoundTriggerDetector {
57     private static final boolean DBG = false;
58     private static final String TAG = "SoundTriggerDetector";
59 
60     private static final int MSG_AVAILABILITY_CHANGED = 1;
61     private static final int MSG_SOUND_TRIGGER_DETECTED = 2;
62     private static final int MSG_DETECTION_ERROR = 3;
63     private static final int MSG_DETECTION_PAUSE = 4;
64     private static final int MSG_DETECTION_RESUME = 5;
65 
66     private final Object mLock = new Object();
67 
68     private final ISoundTriggerSession mSoundTriggerSession;
69     private final UUID mSoundModelId;
70     private final Callback mCallback;
71     private final Handler mHandler;
72     private final RecognitionCallback mRecognitionCallback;
73 
74     /** @hide */
75     @Retention(RetentionPolicy.SOURCE)
76     @IntDef(flag = true,
77             value = {
78                 RECOGNITION_FLAG_NONE,
79                 RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO,
80                 RECOGNITION_FLAG_ALLOW_MULTIPLE_TRIGGERS,
81                 RECOGNITION_FLAG_ENABLE_AUDIO_ECHO_CANCELLATION,
82                 RECOGNITION_FLAG_ENABLE_AUDIO_NOISE_SUPPRESSION,
83                 RECOGNITION_FLAG_RUN_IN_BATTERY_SAVER,
84             })
85     public @interface RecognitionFlags {}
86 
87     /**
88      * Empty flag for {@link #startRecognition(int)}.
89      *
90      *  @hide
91      */
92     public static final int RECOGNITION_FLAG_NONE = 0;
93 
94     /**
95      * Recognition flag for {@link #startRecognition(int)} that indicates
96      * whether the trigger audio for hotword needs to be captured.
97      */
98     public static final int RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO = 0x1;
99 
100     /**
101      * Recognition flag for {@link #startRecognition(int)} that indicates
102      * whether the recognition should keep going on even after the
103      * model triggers.
104      * If this flag is specified, it's possible to get multiple
105      * triggers after a call to {@link #startRecognition(int)}, if the model
106      * triggers multiple times.
107      * When this isn't specified, the default behavior is to stop recognition once the
108      * trigger happens, till the caller starts recognition again.
109      */
110     public static final int RECOGNITION_FLAG_ALLOW_MULTIPLE_TRIGGERS = 0x2;
111 
112     /**
113      * Audio capabilities flag for {@link #startRecognition(int)} that indicates
114      * if the underlying recognition should use AEC.
115      * This capability may or may not be supported by the system, and support can be queried
116      * by calling {@link SoundTriggerManager#getModuleProperties()} and checking
117      * {@link ModuleProperties#audioCapabilities}. The corresponding capabilities field for
118      * this flag is {@link SoundTrigger.ModuleProperties#AUDIO_CAPABILITY_ECHO_CANCELLATION}.
119      * If this flag is passed without the audio capability supported, there will be no audio effect
120      * applied.
121      */
122     public static final int RECOGNITION_FLAG_ENABLE_AUDIO_ECHO_CANCELLATION = 0x4;
123 
124     /**
125      * Audio capabilities flag for {@link #startRecognition(int)} that indicates
126      * if the underlying recognition should use noise suppression.
127      * This capability may or may not be supported by the system, and support can be queried
128      * by calling {@link SoundTriggerManager#getModuleProperties()} and checking
129      * {@link ModuleProperties#audioCapabilities}. The corresponding capabilities field for
130      * this flag is {@link SoundTrigger.ModuleProperties#AUDIO_CAPABILITY_NOISE_SUPPRESSION}.
131      * If this flag is passed without the audio capability supported, there will be no audio effect
132      * applied.
133      */
134     public static final int RECOGNITION_FLAG_ENABLE_AUDIO_NOISE_SUPPRESSION = 0x8;
135 
136     /**
137      * Recognition flag for {@link #startRecognition(int)} that indicates whether the recognition
138      * should continue after battery saver mode is enabled.
139      * When this flag is specified, the caller will be checked for
140      * {@link android.Manifest.permission#SOUND_TRIGGER_RUN_IN_BATTERY_SAVER} permission granted.
141      */
142     public static final int RECOGNITION_FLAG_RUN_IN_BATTERY_SAVER = 0x10;
143 
144     /**
145      * Additional payload for {@link Callback#onDetected}.
146      */
147     public static class EventPayload {
148         private final boolean mTriggerAvailable;
149 
150         // Indicates if {@code captureSession} can be used to continue capturing more audio
151         // from the DSP hardware.
152         private final boolean mCaptureAvailable;
153         // The session to use when attempting to capture more audio from the DSP hardware.
154         private final int mCaptureSession;
155         private final AudioFormat mAudioFormat;
156         // Raw data associated with the event.
157         // This is the audio that triggered the keyphrase if {@code isTriggerAudio} is true.
158         private final byte[] mData;
159 
EventPayload(boolean triggerAvailable, boolean captureAvailable, AudioFormat audioFormat, int captureSession, byte[] data)160         private EventPayload(boolean triggerAvailable, boolean captureAvailable,
161                 AudioFormat audioFormat, int captureSession, byte[] data) {
162             mTriggerAvailable = triggerAvailable;
163             mCaptureAvailable = captureAvailable;
164             mCaptureSession = captureSession;
165             mAudioFormat = audioFormat;
166             mData = data;
167         }
168 
169         /**
170          * Gets the format of the audio obtained using {@link #getTriggerAudio()}.
171          * May be null if there's no audio present.
172          */
173         @Nullable
getCaptureAudioFormat()174         public AudioFormat getCaptureAudioFormat() {
175             return mAudioFormat;
176         }
177 
178         /**
179          * Gets the raw audio that triggered the detector.
180          * This may be null if the trigger audio isn't available.
181          * If non-null, the format of the audio can be obtained by calling
182          * {@link #getCaptureAudioFormat()}.
183          *
184          * @see AlwaysOnHotwordDetector#RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO
185          */
186         @Nullable
getTriggerAudio()187         public byte[] getTriggerAudio() {
188             if (mTriggerAvailable) {
189                 return mData;
190             } else {
191                 return null;
192             }
193         }
194 
195         /**
196          * Gets the opaque data passed from the detection engine for the event.
197          * This may be null if it was not populated by the engine, or if the data is known to
198          * contain the trigger audio.
199          *
200          * @see #getTriggerAudio
201          *
202          * @hide
203          */
204         @Nullable
205         @UnsupportedAppUsage(maxTargetSdk = Build.VERSION_CODES.R, trackingBug = 170729553)
getData()206         public byte[] getData() {
207             if (!mTriggerAvailable) {
208                 return mData;
209             } else {
210                 return null;
211             }
212         }
213 
214         /**
215          * Gets the session ID to start a capture from the DSP.
216          * This may be null if streaming capture isn't possible.
217          * If non-null, the format of the audio that can be captured can be
218          * obtained using {@link #getCaptureAudioFormat()}.
219          *
220          * TODO: Candidate for Public API when the API to start capture with a session ID
221          * is made public.
222          *
223          * TODO: Add this to {@link #getCaptureAudioFormat()}:
224          * "Gets the format of the audio obtained using {@link #getTriggerAudio()}
225          * or {@link #getCaptureSession()}. May be null if no audio can be obtained
226          * for either the trigger or a streaming session."
227          *
228          * TODO: Should this return a known invalid value instead?
229          *
230          * @hide
231          */
232         @Nullable
233         @UnsupportedAppUsage(maxTargetSdk = Build.VERSION_CODES.R, trackingBug = 170729553)
getCaptureSession()234         public Integer getCaptureSession() {
235             if (mCaptureAvailable) {
236                 return mCaptureSession;
237             } else {
238                 return null;
239             }
240         }
241     }
242 
243     public static abstract class Callback {
244         /**
245          * Called when the availability of the sound model changes.
246          */
onAvailabilityChanged(int status)247         public abstract void onAvailabilityChanged(int status);
248 
249         /**
250          * Called when the sound model has triggered (such as when it matched a
251          * given sound pattern).
252          */
onDetected(@onNull EventPayload eventPayload)253         public abstract void onDetected(@NonNull EventPayload eventPayload);
254 
255         /**
256          *  Called when the detection fails due to an error.
257          */
onError()258         public abstract void onError();
259 
260         /**
261          * Called when the recognition is paused temporarily for some reason.
262          * This is an informational callback, and the clients shouldn't be doing anything here
263          * except showing an indication on their UI if they have to.
264          */
onRecognitionPaused()265         public abstract void onRecognitionPaused();
266 
267         /**
268          * Called when the recognition is resumed after it was temporarily paused.
269          * This is an informational callback, and the clients shouldn't be doing anything here
270          * except showing an indication on their UI if they have to.
271          */
onRecognitionResumed()272         public abstract void onRecognitionResumed();
273     }
274 
275     /**
276      * This class should be constructed by the {@link SoundTriggerManager}.
277      * @hide
278      */
SoundTriggerDetector(ISoundTriggerSession soundTriggerSession, UUID soundModelId, @NonNull Callback callback, @Nullable Handler handler)279     SoundTriggerDetector(ISoundTriggerSession soundTriggerSession, UUID soundModelId,
280             @NonNull Callback callback, @Nullable Handler handler) {
281         mSoundTriggerSession = soundTriggerSession;
282         mSoundModelId = soundModelId;
283         mCallback = callback;
284         if (handler == null) {
285             mHandler = new MyHandler();
286         } else {
287             mHandler = new MyHandler(handler.getLooper());
288         }
289         mRecognitionCallback = new RecognitionCallback();
290     }
291 
292     /**
293      * Starts recognition on the associated sound model. Result is indicated via the
294      * {@link Callback}.
295      * @return Indicates whether the call succeeded or not.
296      */
297     @RequiresPermission(android.Manifest.permission.MANAGE_SOUND_TRIGGER)
startRecognition(@ecognitionFlags int recognitionFlags)298     public boolean startRecognition(@RecognitionFlags int recognitionFlags) {
299         if (DBG) {
300             Slog.d(TAG, "startRecognition()");
301         }
302         boolean captureTriggerAudio =
303                 (recognitionFlags & RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO) != 0;
304 
305         boolean allowMultipleTriggers =
306                 (recognitionFlags & RECOGNITION_FLAG_ALLOW_MULTIPLE_TRIGGERS) != 0;
307 
308         boolean runInBatterySaver = (recognitionFlags & RECOGNITION_FLAG_RUN_IN_BATTERY_SAVER) != 0;
309 
310         int audioCapabilities = 0;
311         if ((recognitionFlags & RECOGNITION_FLAG_ENABLE_AUDIO_ECHO_CANCELLATION) != 0) {
312             audioCapabilities |= SoundTrigger.ModuleProperties.AUDIO_CAPABILITY_ECHO_CANCELLATION;
313         }
314         if ((recognitionFlags & RECOGNITION_FLAG_ENABLE_AUDIO_NOISE_SUPPRESSION) != 0) {
315             audioCapabilities |= SoundTrigger.ModuleProperties.AUDIO_CAPABILITY_NOISE_SUPPRESSION;
316         }
317 
318         int status;
319         try {
320             status = mSoundTriggerSession.startRecognition(new ParcelUuid(mSoundModelId),
321                     mRecognitionCallback, new RecognitionConfig(captureTriggerAudio,
322                             allowMultipleTriggers, null, null, audioCapabilities),
323                     runInBatterySaver);
324         } catch (RemoteException e) {
325             return false;
326         }
327         return status == STATUS_OK;
328     }
329 
330     /**
331      * Stops recognition for the associated model.
332      */
333     @RequiresPermission(android.Manifest.permission.MANAGE_SOUND_TRIGGER)
stopRecognition()334     public boolean stopRecognition() {
335         int status = STATUS_OK;
336         try {
337             status = mSoundTriggerSession.stopRecognition(new ParcelUuid(mSoundModelId),
338                     mRecognitionCallback);
339         } catch (RemoteException e) {
340             return false;
341         }
342         return status == STATUS_OK;
343     }
344 
345     /**
346      * @hide
347      */
dump(String prefix, PrintWriter pw)348     public void dump(String prefix, PrintWriter pw) {
349         synchronized (mLock) {
350             // TODO: Dump useful debug information.
351         }
352     }
353 
354     /**
355      * Callback that handles events from the lower sound trigger layer.
356      *
357      * Note that these callbacks will be called synchronously from the SoundTriggerService
358      * layer and thus should do minimal work (such as sending a message on a handler to do
359      * the real work).
360      * @hide
361      */
362     private class RecognitionCallback extends IRecognitionStatusCallback.Stub {
363 
364         /**
365          * @hide
366          */
367         @Override
onGenericSoundTriggerDetected(SoundTrigger.GenericRecognitionEvent event)368         public void onGenericSoundTriggerDetected(SoundTrigger.GenericRecognitionEvent event) {
369             Slog.d(TAG, "onGenericSoundTriggerDetected()" + event);
370             Message.obtain(mHandler,
371                     MSG_SOUND_TRIGGER_DETECTED,
372                     new EventPayload(event.triggerInData, event.captureAvailable,
373                             event.captureFormat, event.captureSession, event.data))
374                     .sendToTarget();
375         }
376 
377         @Override
onKeyphraseDetected(SoundTrigger.KeyphraseRecognitionEvent event)378         public void onKeyphraseDetected(SoundTrigger.KeyphraseRecognitionEvent event) {
379             Slog.e(TAG, "Ignoring onKeyphraseDetected() called for " + event);
380         }
381 
382         /**
383          * @hide
384          */
385         @Override
onError(int status)386         public void onError(int status) {
387             Slog.d(TAG, "onError()" + status);
388             mHandler.sendEmptyMessage(MSG_DETECTION_ERROR);
389         }
390 
391         /**
392          * @hide
393          */
394         @Override
onRecognitionPaused()395         public void onRecognitionPaused() {
396             Slog.d(TAG, "onRecognitionPaused()");
397             mHandler.sendEmptyMessage(MSG_DETECTION_PAUSE);
398         }
399 
400         /**
401          * @hide
402          */
403         @Override
onRecognitionResumed()404         public void onRecognitionResumed() {
405             Slog.d(TAG, "onRecognitionResumed()");
406             mHandler.sendEmptyMessage(MSG_DETECTION_RESUME);
407         }
408     }
409 
410     private class MyHandler extends Handler {
411 
MyHandler()412         MyHandler() {
413             super();
414         }
415 
MyHandler(Looper looper)416         MyHandler(Looper looper) {
417             super(looper);
418         }
419 
420         @Override
handleMessage(Message msg)421         public void handleMessage(Message msg) {
422             if (mCallback == null) {
423                   Slog.w(TAG, "Received message: " + msg.what + " for NULL callback.");
424                   return;
425             }
426             switch (msg.what) {
427                 case MSG_SOUND_TRIGGER_DETECTED:
428                     mCallback.onDetected((EventPayload) msg.obj);
429                     break;
430                 case MSG_DETECTION_ERROR:
431                     mCallback.onError();
432                     break;
433                 case MSG_DETECTION_PAUSE:
434                     mCallback.onRecognitionPaused();
435                     break;
436                 case MSG_DETECTION_RESUME:
437                     mCallback.onRecognitionResumed();
438                     break;
439                 default:
440                     super.handleMessage(msg);
441 
442             }
443         }
444     }
445 }
446