1 /**
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package android.media.soundtrigger;
18 import static android.hardware.soundtrigger.SoundTrigger.STATUS_OK;
19 
20 import android.annotation.IntDef;
21 import android.annotation.NonNull;
22 import android.annotation.Nullable;
23 import android.annotation.RequiresPermission;
24 import android.annotation.SystemApi;
25 import android.compat.annotation.UnsupportedAppUsage;
26 import android.hardware.soundtrigger.IRecognitionStatusCallback;
27 import android.hardware.soundtrigger.SoundTrigger;
28 import android.hardware.soundtrigger.SoundTrigger.GenericSoundModel;
29 import android.hardware.soundtrigger.SoundTrigger.ModuleProperties;
30 import android.hardware.soundtrigger.SoundTrigger.RecognitionConfig;
31 import android.media.AudioFormat;
32 import android.os.Build;
33 import android.os.Handler;
34 import android.os.Looper;
35 import android.os.Message;
36 import android.os.ParcelUuid;
37 import android.os.RemoteException;
38 import android.util.Slog;
39 
40 import com.android.internal.app.ISoundTriggerSession;
41 
42 import java.io.PrintWriter;
43 import java.lang.annotation.Retention;
44 import java.lang.annotation.RetentionPolicy;
45 import java.util.UUID;
46 
47 /**
48  * A class that allows interaction with the actual sound trigger detection on the system.
49  * Sound trigger detection refers to a detectors that match generic sound patterns that are
50  * not voice-based. The voice-based recognition models should utilize the {@link
51  * VoiceInteractionService} instead. Access to this class is protected by a permission
52  * granted only to system or privileged apps.
53  * @deprecated use {@link SoundTriggerManager} directly
54  *
55  * @hide
56  */
57 @Deprecated
58 @SystemApi
59 public final class SoundTriggerDetector {
60     private static final boolean DBG = false;
61     private static final String TAG = "SoundTriggerDetector";
62 
63     private static final int MSG_AVAILABILITY_CHANGED = 1;
64     private static final int MSG_SOUND_TRIGGER_DETECTED = 2;
65     private static final int MSG_DETECTION_ERROR = 3;
66     private static final int MSG_DETECTION_PAUSE = 4;
67     private static final int MSG_DETECTION_RESUME = 5;
68 
69     private final Object mLock = new Object();
70 
71     private final ISoundTriggerSession mSoundTriggerSession;
72     private final GenericSoundModel mSoundModel;
73     private final Callback mCallback;
74     private final Handler mHandler;
75     private final RecognitionCallback mRecognitionCallback;
76 
77     /** @hide */
78     @Retention(RetentionPolicy.SOURCE)
79     @IntDef(flag = true,
80             value = {
81                 RECOGNITION_FLAG_NONE,
82                 RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO,
83                 RECOGNITION_FLAG_ALLOW_MULTIPLE_TRIGGERS,
84                 RECOGNITION_FLAG_ENABLE_AUDIO_ECHO_CANCELLATION,
85                 RECOGNITION_FLAG_ENABLE_AUDIO_NOISE_SUPPRESSION,
86                 RECOGNITION_FLAG_RUN_IN_BATTERY_SAVER,
87             })
88     public @interface RecognitionFlags {}
89 
90     /**
91      * Empty flag for {@link #startRecognition(int)}.
92      *
93      *  @hide
94      */
95     public static final int RECOGNITION_FLAG_NONE = 0;
96 
97     /**
98      * Recognition flag for {@link #startRecognition(int)} that indicates
99      * whether the trigger audio for hotword needs to be captured.
100      */
101     public static final int RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO = 0x1;
102 
103     /**
104      * Recognition flag for {@link #startRecognition(int)} that indicates
105      * whether the recognition should keep going on even after the
106      * model triggers.
107      * If this flag is specified, it's possible to get multiple
108      * triggers after a call to {@link #startRecognition(int)}, if the model
109      * triggers multiple times.
110      * When this isn't specified, the default behavior is to stop recognition once the
111      * trigger happens, till the caller starts recognition again.
112      */
113     public static final int RECOGNITION_FLAG_ALLOW_MULTIPLE_TRIGGERS = 0x2;
114 
115     /**
116      * Audio capabilities flag for {@link #startRecognition(int)} that indicates
117      * if the underlying recognition should use AEC.
118      * This capability may or may not be supported by the system, and support can be queried
119      * by calling {@link SoundTriggerManager#getModuleProperties()} and checking
120      * {@link ModuleProperties#audioCapabilities}. The corresponding capabilities field for
121      * this flag is {@link SoundTrigger.ModuleProperties#AUDIO_CAPABILITY_ECHO_CANCELLATION}.
122      * If this flag is passed without the audio capability supported, there will be no audio effect
123      * applied.
124      */
125     public static final int RECOGNITION_FLAG_ENABLE_AUDIO_ECHO_CANCELLATION = 0x4;
126 
127     /**
128      * Audio capabilities flag for {@link #startRecognition(int)} that indicates
129      * if the underlying recognition should use noise suppression.
130      * This capability may or may not be supported by the system, and support can be queried
131      * by calling {@link SoundTriggerManager#getModuleProperties()} and checking
132      * {@link ModuleProperties#audioCapabilities}. The corresponding capabilities field for
133      * this flag is {@link SoundTrigger.ModuleProperties#AUDIO_CAPABILITY_NOISE_SUPPRESSION}.
134      * If this flag is passed without the audio capability supported, there will be no audio effect
135      * applied.
136      */
137     public static final int RECOGNITION_FLAG_ENABLE_AUDIO_NOISE_SUPPRESSION = 0x8;
138 
139     /**
140      * Recognition flag for {@link #startRecognition(int)} that indicates whether the recognition
141      * should continue after battery saver mode is enabled.
142      * When this flag is specified, the caller will be checked for
143      * {@link android.Manifest.permission#SOUND_TRIGGER_RUN_IN_BATTERY_SAVER} permission granted.
144      */
145     public static final int RECOGNITION_FLAG_RUN_IN_BATTERY_SAVER = 0x10;
146 
147     /**
148      * Additional payload for {@link Callback#onDetected}.
149      */
150     public static class EventPayload {
151         private final boolean mTriggerAvailable;
152 
153         // Indicates if {@code captureSession} can be used to continue capturing more audio
154         // from the DSP hardware.
155         private final boolean mCaptureAvailable;
156         // The session to use when attempting to capture more audio from the DSP hardware.
157         private final int mCaptureSession;
158         private final AudioFormat mAudioFormat;
159         // Raw data associated with the event.
160         // This is the audio that triggered the keyphrase if {@code isTriggerAudio} is true.
161         private final byte[] mData;
162 
EventPayload(boolean triggerAvailable, boolean captureAvailable, AudioFormat audioFormat, int captureSession, byte[] data)163         private EventPayload(boolean triggerAvailable, boolean captureAvailable,
164                 AudioFormat audioFormat, int captureSession, byte[] data) {
165             mTriggerAvailable = triggerAvailable;
166             mCaptureAvailable = captureAvailable;
167             mCaptureSession = captureSession;
168             mAudioFormat = audioFormat;
169             mData = data;
170         }
171 
172         /**
173          * Gets the format of the audio obtained using {@link #getTriggerAudio()}.
174          * May be null if there's no audio present.
175          */
176         @Nullable
getCaptureAudioFormat()177         public AudioFormat getCaptureAudioFormat() {
178             return mAudioFormat;
179         }
180 
181         /**
182          * Gets the raw audio that triggered the detector.
183          * This may be null if the trigger audio isn't available.
184          * If non-null, the format of the audio can be obtained by calling
185          * {@link #getCaptureAudioFormat()}.
186          *
187          * @see AlwaysOnHotwordDetector#RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO
188          */
189         @Nullable
getTriggerAudio()190         public byte[] getTriggerAudio() {
191             if (mTriggerAvailable) {
192                 return mData;
193             } else {
194                 return null;
195             }
196         }
197 
198         /**
199          * Gets the opaque data passed from the detection engine for the event.
200          * This may be null if it was not populated by the engine, or if the data is known to
201          * contain the trigger audio.
202          *
203          * @see #getTriggerAudio
204          *
205          * @hide
206          */
207         @Nullable
208         @UnsupportedAppUsage(maxTargetSdk = Build.VERSION_CODES.R, trackingBug = 170729553)
getData()209         public byte[] getData() {
210             if (!mTriggerAvailable) {
211                 return mData;
212             } else {
213                 return null;
214             }
215         }
216 
217         /**
218          * Gets the session ID to start a capture from the DSP.
219          * This may be null if streaming capture isn't possible.
220          * If non-null, the format of the audio that can be captured can be
221          * obtained using {@link #getCaptureAudioFormat()}.
222          *
223          * TODO: Candidate for Public API when the API to start capture with a session ID
224          * is made public.
225          *
226          * TODO: Add this to {@link #getCaptureAudioFormat()}:
227          * "Gets the format of the audio obtained using {@link #getTriggerAudio()}
228          * or {@link #getCaptureSession()}. May be null if no audio can be obtained
229          * for either the trigger or a streaming session."
230          *
231          * TODO: Should this return a known invalid value instead?
232          *
233          * @hide
234          */
235         @Nullable
236         @UnsupportedAppUsage(maxTargetSdk = Build.VERSION_CODES.R, trackingBug = 170729553)
getCaptureSession()237         public Integer getCaptureSession() {
238             if (mCaptureAvailable) {
239                 return mCaptureSession;
240             } else {
241                 return null;
242             }
243         }
244     }
245 
246     public static abstract class Callback {
247         /**
248          * Called when the availability of the sound model changes.
249          */
onAvailabilityChanged(int status)250         public abstract void onAvailabilityChanged(int status);
251 
252         /**
253          * Called when the sound model has triggered (such as when it matched a
254          * given sound pattern).
255          */
onDetected(@onNull EventPayload eventPayload)256         public abstract void onDetected(@NonNull EventPayload eventPayload);
257 
258         /**
259          *  Called when the detection fails due to an error.
260          */
onError()261         public abstract void onError();
262 
263         /**
264          * Called when the recognition is paused temporarily for some reason.
265          * This is an informational callback, and the clients shouldn't be doing anything here
266          * except showing an indication on their UI if they have to.
267          */
onRecognitionPaused()268         public abstract void onRecognitionPaused();
269 
270         /**
271          * Called when the recognition is resumed after it was temporarily paused.
272          * This is an informational callback, and the clients shouldn't be doing anything here
273          * except showing an indication on their UI if they have to.
274          */
onRecognitionResumed()275         public abstract void onRecognitionResumed();
276     }
277 
278     /**
279      * This class should be constructed by the {@link SoundTriggerManager}.
280      * @hide
281      */
SoundTriggerDetector(ISoundTriggerSession soundTriggerSession, @NonNull GenericSoundModel soundModel, @NonNull Callback callback, @Nullable Handler handler)282     SoundTriggerDetector(ISoundTriggerSession soundTriggerSession,
283             @NonNull GenericSoundModel soundModel,
284             @NonNull Callback callback, @Nullable Handler handler) {
285         mSoundTriggerSession = soundTriggerSession;
286         mSoundModel = soundModel;
287         mCallback = callback;
288         if (handler == null) {
289             mHandler = new MyHandler();
290         } else {
291             mHandler = new MyHandler(handler.getLooper());
292         }
293         mRecognitionCallback = new RecognitionCallback();
294     }
295 
296     /**
297      * Starts recognition on the associated sound model. Result is indicated via the
298      * {@link Callback}.
299      * @deprecated use {@link SoundTriggerManager} directly
300      * @return Indicates whether the call succeeded or not.
301      */
302     @Deprecated
303     @RequiresPermission(android.Manifest.permission.MANAGE_SOUND_TRIGGER)
startRecognition(@ecognitionFlags int recognitionFlags)304     public boolean startRecognition(@RecognitionFlags int recognitionFlags) {
305         if (DBG) {
306             Slog.d(TAG, "startRecognition()");
307         }
308         boolean captureTriggerAudio =
309                 (recognitionFlags & RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO) != 0;
310 
311         boolean allowMultipleTriggers =
312                 (recognitionFlags & RECOGNITION_FLAG_ALLOW_MULTIPLE_TRIGGERS) != 0;
313 
314         boolean runInBatterySaver = (recognitionFlags & RECOGNITION_FLAG_RUN_IN_BATTERY_SAVER) != 0;
315 
316         int audioCapabilities = 0;
317         if ((recognitionFlags & RECOGNITION_FLAG_ENABLE_AUDIO_ECHO_CANCELLATION) != 0) {
318             audioCapabilities |= SoundTrigger.ModuleProperties.AUDIO_CAPABILITY_ECHO_CANCELLATION;
319         }
320         if ((recognitionFlags & RECOGNITION_FLAG_ENABLE_AUDIO_NOISE_SUPPRESSION) != 0) {
321             audioCapabilities |= SoundTrigger.ModuleProperties.AUDIO_CAPABILITY_NOISE_SUPPRESSION;
322         }
323 
324         int status;
325         try {
326             status = mSoundTriggerSession.startRecognition(mSoundModel,
327                     mRecognitionCallback, new RecognitionConfig(captureTriggerAudio,
328                             allowMultipleTriggers, null, null, audioCapabilities),
329                     runInBatterySaver);
330         } catch (RemoteException e) {
331             return false;
332         }
333         return status == STATUS_OK;
334     }
335 
336     /**
337      * Stops recognition for the associated model.
338      * @deprecated use {@link SoundTriggerManager} directly
339      */
340     @Deprecated
341     @RequiresPermission(android.Manifest.permission.MANAGE_SOUND_TRIGGER)
stopRecognition()342     public boolean stopRecognition() {
343         int status = STATUS_OK;
344         try {
345             status = mSoundTriggerSession.stopRecognition(new ParcelUuid(mSoundModel.getUuid()),
346                     mRecognitionCallback);
347         } catch (RemoteException e) {
348             return false;
349         }
350         return status == STATUS_OK;
351     }
352 
353     /**
354      * @hide
355      */
dump(String prefix, PrintWriter pw)356     public void dump(String prefix, PrintWriter pw) {
357         synchronized (mLock) {
358             // TODO: Dump useful debug information.
359         }
360     }
361 
362     /**
363      * Callback that handles events from the lower sound trigger layer.
364      *
365      * Note that these callbacks will be called synchronously from the SoundTriggerService
366      * layer and thus should do minimal work (such as sending a message on a handler to do
367      * the real work).
368      * @hide
369      */
370     private class RecognitionCallback extends IRecognitionStatusCallback.Stub {
371 
372         /**
373          * @hide
374          */
375         @Override
onGenericSoundTriggerDetected(SoundTrigger.GenericRecognitionEvent event)376         public void onGenericSoundTriggerDetected(SoundTrigger.GenericRecognitionEvent event) {
377             Slog.d(TAG, "onGenericSoundTriggerDetected()" + event);
378             Message.obtain(mHandler,
379                     MSG_SOUND_TRIGGER_DETECTED,
380                     new EventPayload(event.triggerInData, event.captureAvailable,
381                             event.captureFormat, event.captureSession, event.data))
382                     .sendToTarget();
383         }
384 
385         @Override
onKeyphraseDetected(SoundTrigger.KeyphraseRecognitionEvent event)386         public void onKeyphraseDetected(SoundTrigger.KeyphraseRecognitionEvent event) {
387             Slog.e(TAG, "Ignoring onKeyphraseDetected() called for " + event);
388         }
389 
390         /**
391          * @hide
392          */
393         @Override
onRecognitionPaused()394         public void onRecognitionPaused() {
395             Slog.d(TAG, "onRecognitionPaused()");
396             mHandler.sendEmptyMessage(MSG_DETECTION_PAUSE);
397         }
398 
399         /**
400          * @hide
401          */
402         @Override
onRecognitionResumed()403         public void onRecognitionResumed() {
404             Slog.d(TAG, "onRecognitionResumed()");
405             mHandler.sendEmptyMessage(MSG_DETECTION_RESUME);
406         }
407 
408         /**
409          * @hide
410          */
411         @Override
onPreempted()412         public void onPreempted() {
413             Slog.d(TAG, "onPreempted()");
414             mHandler.sendEmptyMessage(MSG_DETECTION_ERROR);
415         }
416 
417         /**
418          * @hide
419          */
420         @Override
onModuleDied()421         public void onModuleDied() {
422             Slog.d(TAG, "onModuleDied()");
423             mHandler.sendEmptyMessage(MSG_DETECTION_ERROR);
424         }
425 
426         /**
427          * @hide
428          */
429         @Override
onResumeFailed(int status)430         public void onResumeFailed(int status) {
431             Slog.d(TAG, "onResumeFailed()" + status);
432             mHandler.sendEmptyMessage(MSG_DETECTION_ERROR);
433         }
434 
435         /**
436          * @hide
437          */
438         @Override
onPauseFailed(int status)439         public void onPauseFailed(int status) {
440             Slog.d(TAG, "onPauseFailed()" + status);
441             mHandler.sendEmptyMessage(MSG_DETECTION_ERROR);
442         }
443     }
444 
445     private class MyHandler extends Handler {
446 
MyHandler()447         MyHandler() {
448             super();
449         }
450 
MyHandler(Looper looper)451         MyHandler(Looper looper) {
452             super(looper);
453         }
454 
455         @Override
handleMessage(Message msg)456         public void handleMessage(Message msg) {
457             if (mCallback == null) {
458                   Slog.w(TAG, "Received message: " + msg.what + " for NULL callback.");
459                   return;
460             }
461             switch (msg.what) {
462                 case MSG_SOUND_TRIGGER_DETECTED:
463                     mCallback.onDetected((EventPayload) msg.obj);
464                     break;
465                 case MSG_DETECTION_ERROR:
466                     mCallback.onError();
467                     break;
468                 case MSG_DETECTION_PAUSE:
469                     mCallback.onRecognitionPaused();
470                     break;
471                 case MSG_DETECTION_RESUME:
472                     mCallback.onRecognitionResumed();
473                     break;
474                 default:
475                     super.handleMessage(msg);
476 
477             }
478         }
479     }
480 }
481