1 /*
2  * Copyright (C) 2021 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 #include "channels.h"
19 #include <math.h>
20 
21 namespace android::audio_utils::channels {
22 
23 /**
24  * ChannelMix
25  *
26  * Converts audio streams with different positional channel configurations.
27  * Currently only downmix to stereo is supported, so there is no outputChannelMask argument.
28  *
29  * TODO: In the future, consider downmix to 7.1 and 5.1 targets instead of just stereo.
30  */
31 class ChannelMix {
32 public:
33 
34     /**
35      * Creates a ChannelMix object
36      *
37      * Note: If construction is unsuccessful then getInputChannelMask will return
38      * AUDIO_CHANNEL_NONE.
39      *
40      * \param inputChannelMask   channel position mask for input audio data.
41      */
ChannelMix(audio_channel_mask_t inputChannelMask)42     explicit ChannelMix(audio_channel_mask_t inputChannelMask) {
43         setInputChannelMask(inputChannelMask);
44     }
45 
46     ChannelMix() = default;
47 
48     /**
49      * Set the input channel mask.
50      *
51      * \param inputChannelMask channel position mask for input data.
52      *
53      * \return false if the channel mask is not supported.
54      */
setInputChannelMask(audio_channel_mask_t inputChannelMask)55     bool setInputChannelMask(audio_channel_mask_t inputChannelMask) {
56         if (mInputChannelMask != inputChannelMask) {
57             if (inputChannelMask & ~((1 << MAX_INPUT_CHANNELS_SUPPORTED) - 1)) {
58                 return false;  // not channel position mask, or has unknown channels.
59             }
60 
61             // Compute at what index each channel is: samples will be in the following order:
62             //   FL  FR  FC    LFE   BL  BR  BC    SL  SR
63             //
64             // Prior to API 32, use of downmix resulted in channels being scaled in half amplitude.
65             // We now use a compliant downmix matrix for 5.1 with the following standards:
66             // ITU-R 775-2, ATSC A/52, ETSI TS 101 154, IEC 14496-3, which is unity gain for the
67             // front left and front right channel contribution.
68             //
69             // For 7.1 to 5.1 we set equal contributions for the side and back channels
70             // which follow Dolby downmix recommendations.
71             //
72             // We add contributions from the LFE into the L and R channels
73             // at a weight of 0.5 (rather than the power preserving 0.707)
74             // which is to ensure that headphones can still experience LFE
75             // with lesser risk of speaker overload.
76             //
77             // Note: geometrically left and right channels contribute only to the corresponding
78             // left and right outputs respectively.  Geometrically center channels contribute
79             // to both left and right outputs, so they are scaled by 0.707 to preserve power.
80             //
81             //  (transfer matrix)
82             //   FL  FR  FC    LFE  BL  BR     BC  SL    SR
83             //   1.0     0.707 0.5  0.707      0.5 0.707
84             //       1.0 0.707 0.5       0.707 0.5       0.707
85             int index = 0;
86             constexpr float COEF_25 = 0.2508909536f;
87             constexpr float COEF_35 = 0.3543928915f;
88             constexpr float COEF_36 = 0.3552343859f;
89             constexpr float COEF_61 = 0.6057043428f;
90             for (unsigned tmp = inputChannelMask; tmp != 0; ++index) {
91                 const unsigned lowestBit = tmp & -(signed)tmp;
92                 switch (lowestBit) {
93                     case AUDIO_CHANNEL_OUT_FRONT_LEFT:
94                     case AUDIO_CHANNEL_OUT_TOP_FRONT_LEFT:
95                     case AUDIO_CHANNEL_OUT_BOTTOM_FRONT_LEFT:
96                         mMatrix[index][0] = 1.f;
97                         mMatrix[index][1] = 0.f;
98                         break;
99                     case AUDIO_CHANNEL_OUT_SIDE_LEFT:
100                     case AUDIO_CHANNEL_OUT_BACK_LEFT:
101                     case AUDIO_CHANNEL_OUT_TOP_BACK_LEFT:
102                     case AUDIO_CHANNEL_OUT_FRONT_WIDE_LEFT: // FRONT_WIDE closer to SIDE.
103                         mMatrix[index][0] = MINUS_3_DB_IN_FLOAT;
104                         mMatrix[index][1] = 0.f;
105                         break;
106                     case AUDIO_CHANNEL_OUT_FRONT_RIGHT:
107                     case AUDIO_CHANNEL_OUT_TOP_FRONT_RIGHT:
108                     case AUDIO_CHANNEL_OUT_BOTTOM_FRONT_RIGHT:
109                         mMatrix[index][0] = 0.f;
110                         mMatrix[index][1] = 1.f;
111                         break;
112                     case AUDIO_CHANNEL_OUT_SIDE_RIGHT:
113                     case AUDIO_CHANNEL_OUT_BACK_RIGHT:
114                     case AUDIO_CHANNEL_OUT_TOP_BACK_RIGHT:
115                     case AUDIO_CHANNEL_OUT_FRONT_WIDE_RIGHT: // FRONT_WIDE closer to SIDE.
116                         mMatrix[index][0] = 0.f;
117                         mMatrix[index][1] = MINUS_3_DB_IN_FLOAT;
118                         break;
119                     case AUDIO_CHANNEL_OUT_FRONT_CENTER:
120                     case AUDIO_CHANNEL_OUT_TOP_FRONT_CENTER:
121                     case AUDIO_CHANNEL_OUT_BOTTOM_FRONT_CENTER:
122                         mMatrix[index][0] = mMatrix[index][1] = MINUS_3_DB_IN_FLOAT;
123                         break;
124                     case AUDIO_CHANNEL_OUT_TOP_SIDE_LEFT:
125                         mMatrix[index][0] = COEF_61;
126                         mMatrix[index][1] = 0.f;
127                         break;
128                     case AUDIO_CHANNEL_OUT_TOP_SIDE_RIGHT:
129                         mMatrix[index][0] = 0.f;
130                         mMatrix[index][1] = COEF_61;
131                         break;
132                     case AUDIO_CHANNEL_OUT_FRONT_LEFT_OF_CENTER:
133                         mMatrix[index][0] = COEF_61;
134                         mMatrix[index][1] = COEF_25;
135                         break;
136                     case AUDIO_CHANNEL_OUT_FRONT_RIGHT_OF_CENTER:
137                         mMatrix[index][0] = COEF_25;
138                         mMatrix[index][1] = COEF_61;
139                         break;
140                     case AUDIO_CHANNEL_OUT_TOP_CENTER:
141                         mMatrix[index][0] = mMatrix[index][1] = COEF_36;
142                         break;
143                     case AUDIO_CHANNEL_OUT_TOP_BACK_CENTER:
144                         mMatrix[index][0] = mMatrix[index][1] = COEF_35;
145                         break;
146                     case AUDIO_CHANNEL_OUT_LOW_FREQUENCY_2:
147                         mMatrix[index][0] = 0.f;
148                         mMatrix[index][1] = MINUS_3_DB_IN_FLOAT;
149                         break;
150                     case AUDIO_CHANNEL_OUT_LOW_FREQUENCY:
151                         if (inputChannelMask & AUDIO_CHANNEL_OUT_LOW_FREQUENCY_2) {
152                             mMatrix[index][0] = MINUS_3_DB_IN_FLOAT;
153                             mMatrix[index][1] = 0.f;
154                             break;
155                         }
156                         FALLTHROUGH_INTENDED;
157                     case AUDIO_CHANNEL_OUT_BACK_CENTER:
158                         mMatrix[index][0] = mMatrix[index][1] = 0.5f;
159                         break;
160                 }
161                 tmp ^= lowestBit;
162             }
163             mInputChannelMask = inputChannelMask;
164             // Note: mLastValidChannelIndexPlusOne is the same as mInputChannelCount for
165             // this particular matrix, as it has a nonzero column for every channel position.
166             mInputChannelCount = mLastValidChannelIndexPlusOne = index;
167         }
168         return true;
169     }
170 
171     /**
172      * Returns the input channel mask.
173      */
getInputChannelMask()174     audio_channel_mask_t getInputChannelMask() const {
175         return mInputChannelMask;
176     }
177 
178     /**
179      * Downmixes audio data in src to dst.
180      *
181      * \param src          input audio buffer to downmix
182      * \param dst          downmixed stereo audio samples
183      * \param frameCount   number of frames to downmix
184      * \param accumulate   is true if the downmix is added to the destination or
185      *                     false if the downmix replaces the destination.
186      *
187      * \return false if the channel mask set is not supported.
188      */
process(const float * src,float * dst,size_t frameCount,bool accumulate)189     bool process(const float *src, float *dst, size_t frameCount, bool accumulate) const {
190         return accumulate ? processSwitch<true>(src, dst, frameCount)
191                 : processSwitch<false>(src, dst, frameCount);
192     }
193 
194     /**
195      * Downmixes audio data in src to dst.
196      *
197      * \param src          input audio buffer to downmix
198      * \param dst          downmixed stereo audio samples
199      * \param frameCount   number of frames to downmix
200      * \param accumulate   is true if the downmix is added to the destination or
201      *                     false if the downmix replaces the destination.
202      * \param inputChannelMask channel position mask for input data.
203      *
204      * \return false if the channel mask set is not supported.
205      */
process(const float * src,float * dst,size_t frameCount,bool accumulate,audio_channel_mask_t inputChannelMask)206     bool process(const float *src, float *dst, size_t frameCount, bool accumulate,
207             audio_channel_mask_t inputChannelMask) {
208         return setInputChannelMask(inputChannelMask) && process(src, dst, frameCount, accumulate);
209     }
210 
211     // The maximum channels supported (bits in the channel mask).
212     static constexpr size_t MAX_INPUT_CHANNELS_SUPPORTED = FCC_26;
213 
214 private:
215     // These values are modified only when the input channel mask changes.
216     // Keep alignment for matrix for more stable benchmarking.
217     // Currently only stereo output supported.
218     alignas(128) float mMatrix[MAX_INPUT_CHANNELS_SUPPORTED][FCC_2];
219     audio_channel_mask_t mInputChannelMask = AUDIO_CHANNEL_NONE;
220     size_t mLastValidChannelIndexPlusOne = 0;
221     size_t mInputChannelCount = 0;
222 
223     // Static/const parameters.
224     static inline constexpr size_t mOutputChannelCount = FCC_2;    // stereo out only
225     static inline constexpr float MINUS_3_DB_IN_FLOAT = M_SQRT1_2; // -3dB = 0.70710678
226     static inline constexpr float LIMIT_AMPLITUDE = M_SQRT2;       // 3dB = 1.41421356
clamp(float value)227     static inline float clamp(float value) {
228         return fmin(fmax(value, -LIMIT_AMPLITUDE), LIMIT_AMPLITUDE);
229     }
230 
231     /**
232      * Downmixes audio data in src to dst.
233      *
234      * ACCUMULATE is true if the downmix is added to the destination or
235      *               false if the downmix replaces the destination.
236      *
237      * \param src          multichannel audio buffer to downmix
238      * \param dst          downmixed stereo audio samples
239      * \param frameCount   number of multichannel frames to downmix
240      *
241      * \return false if the CHANNEL_COUNT is not supported.
242      */
243     template <bool ACCUMULATE>
processSwitch(const float * src,float * dst,size_t frameCount)244     bool processSwitch(const float *src, float *dst, size_t frameCount) const {
245         constexpr bool ANDROID_SPECIFIC = true;  // change for testing.
246         if constexpr (ANDROID_SPECIFIC) {
247             switch (mInputChannelMask) {
248             case AUDIO_CHANNEL_OUT_QUAD_BACK:
249             case AUDIO_CHANNEL_OUT_QUAD_SIDE:
250                 return specificProcess<4 /* CHANNEL_COUNT */, ACCUMULATE>(src, dst, frameCount);
251             case AUDIO_CHANNEL_OUT_5POINT1_BACK:
252             case AUDIO_CHANNEL_OUT_5POINT1_SIDE:
253                 return specificProcess<6 /* CHANNEL_COUNT */, ACCUMULATE>(src, dst, frameCount);
254             case AUDIO_CHANNEL_OUT_7POINT1:
255                 return specificProcess<8 /* CHANNEL_COUNT */, ACCUMULATE>(src, dst, frameCount);
256             default:
257                 break; // handled below.
258             }
259         }
260         return matrixProcess(src, dst, frameCount, ACCUMULATE);
261     }
262 
263     /**
264      * Converts a source audio stream to destination audio stream with a matrix
265      * channel conversion.
266      *
267      * \param src          multichannel audio buffer to downmix
268      * \param dst          downmixed stereo audio samples
269      * \param frameCount   number of multichannel frames to downmix
270      * \param accumulate   is true if the downmix is added to the destination or
271      *                     false if the downmix replaces the destination.
272      *
273      * \return false if the CHANNEL_COUNT is not supported.
274      */
matrixProcess(const float * src,float * dst,size_t frameCount,bool accumulate)275     bool matrixProcess(const float *src, float *dst, size_t frameCount, bool accumulate) const {
276         // matrix multiply
277         if (mInputChannelMask == AUDIO_CHANNEL_NONE) return false;
278         while (frameCount) {
279             float ch[2]{}; // left, right
280             for (size_t i = 0; i < mLastValidChannelIndexPlusOne; ++i) {
281                 ch[0] += mMatrix[i][0] * src[i];
282                 ch[1] += mMatrix[i][1] * src[i];
283             }
284             if (accumulate) {
285                 ch[0] += dst[0];
286                 ch[1] += dst[1];
287             }
288             dst[0] = clamp(ch[0]);
289             dst[1] = clamp(ch[1]);
290             src += mInputChannelCount;
291             dst += mOutputChannelCount;
292             --frameCount;
293         }
294         return true;
295     }
296 
297     /**
298      * Downmixes to stereo a multichannel signal of specified number of channels
299      *
300      * CHANNEL_COUNT is the number of channels of the src input.
301      * ACCUMULATE is true if the downmix is added to the destination or
302      *               false if the downmix replaces the destination.
303      *
304      * \param src          multichannel audio buffer to downmix
305      * \param dst          downmixed stereo audio samples
306      * \param frameCount   number of multichannel frames to downmix
307      *
308      * \return false if the CHANNEL_COUNT is not supported.
309      */
310     template <int CHANNEL_COUNT, bool ACCUMULATE>
specificProcess(const float * src,float * dst,size_t frameCount)311     static bool specificProcess(const float *src, float *dst, size_t frameCount) {
312         while (frameCount > 0) {
313             float ch[2]; // left, right
314             if constexpr (CHANNEL_COUNT == 4) { // QUAD
315                 // sample at index 0 is FL
316                 // sample at index 1 is FR
317                 // sample at index 2 is RL (or SL)
318                 // sample at index 3 is RR (or SR)
319                 // FL + RL
320                 ch[0] = src[0] + src[2] * MINUS_3_DB_IN_FLOAT;
321                 // FR + RR
322                 ch[1] = src[1] + src[3] * MINUS_3_DB_IN_FLOAT;
323             } else if constexpr (CHANNEL_COUNT == 6) { // 5.1
324                 // sample at index 0 is FL
325                 // sample at index 1 is FR
326                 // sample at index 2 is FC
327                 // sample at index 3 is LFE
328                 // sample at index 4 is RL (or SL)
329                 // sample at index 5 is RR (or SR)
330                 const float centerPlusLfeContrib = src[2] + src[3] * MINUS_3_DB_IN_FLOAT;
331                 // FL + RL + centerPlusLfeContrib
332                 ch[0] = src[0] + (src[4] + centerPlusLfeContrib) * MINUS_3_DB_IN_FLOAT;
333                 // FR + RR + centerPlusLfeContrib
334                 ch[1] = src[1] + (src[5] + centerPlusLfeContrib) * MINUS_3_DB_IN_FLOAT;
335             } else if constexpr (CHANNEL_COUNT == 8) { // 7.1
336                 // sample at index 0 is FL
337                 // sample at index 1 is FR
338                 // sample at index 2 is FC
339                 // sample at index 3 is LFE
340                 // sample at index 4 is RL
341                 // sample at index 5 is RR
342                 // sample at index 6 is SL
343                 // sample at index 7 is SR
344                 const float centerPlusLfeContrib = src[2] + src[3] * MINUS_3_DB_IN_FLOAT;
345                 // FL + RL + SL + centerPlusLfeContrib
346                 ch[0] = src[0] + (src[4] + src[6] + centerPlusLfeContrib) * MINUS_3_DB_IN_FLOAT;
347                 // FR + RR + SR + centerPlusLfeContrib
348                 ch[1] = src[1] + (src[5] + src[7] + centerPlusLfeContrib) * MINUS_3_DB_IN_FLOAT;
349             } else {
350                 return false;
351             }
352             if constexpr (ACCUMULATE) {
353                 dst[0] = clamp(dst[0] + ch[0]);
354                 dst[1] = clamp(dst[1] + ch[1]);
355             } else {
356                 dst[0] = clamp(ch[0]);
357                 dst[1] = clamp(ch[1]);
358             }
359             src += CHANNEL_COUNT;
360             dst += mOutputChannelCount;
361             --frameCount;
362         }
363         return true;
364     }
365 };
366 
367 } // android::audio_utils::channels
368