1 /*
2 * Copyright (C) 2019 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #define LOG_TAG "Operations"
18
19 #include <algorithm>
20 #include <vector>
21
22 #include "OperationResolver.h"
23 #include "Tracing.h"
24
25 #ifdef NN_INCLUDE_CPU_IMPLEMENTATION
26 #include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
27 #include <tensorflow/lite/kernels/internal/reference/integer_ops/l2normalization.h>
28
29 #include "CpuOperationUtils.h"
30 #endif // NN_INCLUDE_CPU_IMPLEMENTATION
31
32 namespace android {
33 namespace nn {
34 namespace l2_norm {
35
36 constexpr char kOperationName[] = "L2_NORMALIZATION";
37
38 constexpr uint32_t kNumInputs = 2;
39 constexpr uint32_t kInputTensor = 0;
40 constexpr uint32_t kAxisScalar = 1;
41
42 constexpr uint32_t kNumOutputs = 1;
43 constexpr uint32_t kOutputTensor = 0;
44
45 #ifdef NN_INCLUDE_CPU_IMPLEMENTATION
46 namespace {
47
l2normFloat32Impl(const float * inputData,const Shape & inputShape,int32_t axis,float * outputData,const Shape & outputShape)48 inline bool l2normFloat32Impl(const float* inputData, const Shape& inputShape, int32_t axis,
49 float* outputData, const Shape& outputShape) {
50 NNTRACE_TRANS("l2normFloat32");
51 constexpr float kEpsilon = 1e-6f;
52 const uint32_t outerSize = getNumberOfElements(inputShape, 0, axis);
53 const uint32_t axisSize = getSizeOfDimension(inputShape, axis);
54 const uint32_t innerSize =
55 getNumberOfElements(inputShape, axis + 1, getNumberOfDimensions(inputShape));
56 for (uint32_t outer = 0; outer < outerSize; ++outer) {
57 const float* inputBeg = inputData + outer * axisSize * innerSize;
58 const float* inputEnd = inputBeg + axisSize * innerSize;
59 float* outputBeg = outputData + outer * axisSize * innerSize;
60 for (uint32_t inner = 0; inner < innerSize; ++inner, ++inputBeg, ++inputEnd, ++outputBeg) {
61 float sum = 0.0f;
62 for (const float* p = inputBeg; p < inputEnd; p += innerSize) {
63 float val = *p;
64 sum += val * val;
65 }
66 float l2_norm = std::max(std::sqrt(sum), kEpsilon);
67 float* pOut = outputBeg;
68 for (const float* p = inputBeg; p < inputEnd; p += innerSize, pOut += innerSize) {
69 *pOut = *p / l2_norm;
70 }
71 }
72 }
73 return true;
74 }
75
l2normQuant8Impl(const uint8_t * inputData,const Shape & inputShape,int32_t axis,uint8_t * outputData,const Shape & outputShape)76 inline bool l2normQuant8Impl(const uint8_t* inputData, const Shape& inputShape, int32_t axis,
77 uint8_t* outputData, const Shape& outputShape) {
78 NNTRACE_TRANS("l2normQuant8");
79 const uint32_t outerSize = getNumberOfElements(inputShape, 0, axis);
80 const uint32_t axisSize = getSizeOfDimension(inputShape, axis);
81 const uint32_t innerSize =
82 getNumberOfElements(inputShape, axis + 1, getNumberOfDimensions(inputShape));
83 for (uint32_t outer = 0; outer < outerSize; ++outer) {
84 const uint8_t* inputBeg = inputData + outer * axisSize * innerSize;
85 const uint8_t* inputEnd = inputBeg + axisSize * innerSize;
86 uint8_t* outputBeg = outputData + outer * axisSize * innerSize;
87 for (uint32_t inner = 0; inner < innerSize; ++inner, ++inputBeg, ++inputEnd, ++outputBeg) {
88 int32_t sum = 0;
89 for (const uint8_t* p = inputBeg; p < inputEnd; p += innerSize) {
90 int32_t val = static_cast<int32_t>(*p) - inputShape.offset;
91 sum += val * val;
92 }
93 int32_t invMultiplier, invShift;
94 tflite::GetInvSqrtQuantizedMultiplierExp(sum, -1, &invMultiplier, &invShift);
95 uint8_t* pOut = outputBeg;
96 for (const uint8_t* p = inputBeg; p < inputEnd; p += innerSize, pOut += innerSize) {
97 int32_t val = static_cast<int32_t>(*p) - inputShape.offset;
98 int32_t scaledVal = tflite::MultiplyByQuantizedMultiplierSmallerThanOneExp(
99 val * 128, invMultiplier, invShift) +
100 128;
101 *pOut = static_cast<uint8_t>(std::min(std::max(scaledVal, 0), 255));
102 }
103 }
104 }
105 return true;
106 }
107
l2normQuant8SignedImpl(const int8_t * inputData,const Shape & inputShape,int32_t axis,int8_t * outputData,const Shape & outputShape)108 inline bool l2normQuant8SignedImpl(const int8_t* inputData, const Shape& inputShape, int32_t axis,
109 int8_t* outputData, const Shape& outputShape) {
110 NNTRACE_TRANS("l2normQuant8Signed");
111 const uint32_t outerSize = getNumberOfElements(inputShape, 0, axis);
112 const uint32_t axisSize = getSizeOfDimension(inputShape, axis);
113 const uint32_t innerSize =
114 getNumberOfElements(inputShape, axis + 1, getNumberOfDimensions(inputShape));
115 for (uint32_t outer = 0; outer < outerSize; ++outer) {
116 const int8_t* inputBeg = inputData + outer * axisSize * innerSize;
117 const int8_t* inputEnd = inputBeg + axisSize * innerSize;
118 int8_t* outputBeg = outputData + outer * axisSize * innerSize;
119 for (uint32_t inner = 0; inner < innerSize; ++inner, ++inputBeg, ++inputEnd, ++outputBeg) {
120 int32_t sum = 0;
121 for (const int8_t* p = inputBeg; p < inputEnd; p += innerSize) {
122 int32_t val = static_cast<int32_t>(*p) - inputShape.offset;
123 sum += val * val;
124 }
125 int32_t invMultiplier, invShift;
126 tflite::GetInvSqrtQuantizedMultiplierExp(sum, -1, &invMultiplier, &invShift);
127 int8_t* pOut = outputBeg;
128 for (const int8_t* p = inputBeg; p < inputEnd; p += innerSize, pOut += innerSize) {
129 int32_t val = static_cast<int32_t>(*p) - inputShape.offset;
130 int32_t scaledVal = tflite::MultiplyByQuantizedMultiplierSmallerThanOneExp(
131 val * 128, invMultiplier, invShift);
132 *pOut = static_cast<int8_t>(std::min(std::max(scaledVal, -128), 127));
133 }
134 }
135 }
136 return true;
137 }
138
l2normFloat32(const float * inputData,const Shape & inputShape,int32_t axis,float * outputData,const Shape & outputShape)139 bool l2normFloat32(const float* inputData, const Shape& inputShape, int32_t axis, float* outputData,
140 const Shape& outputShape) {
141 int32_t ndim = getNumberOfDimensions(inputShape);
142 NN_CHECK(handleNegativeAxis(inputShape, &axis));
143 // TFLite optimized implementation only supports computation along the last axis
144 if (axis == ndim - 1) {
145 NNTRACE_COMP("optimized_ops::L2Normalization::float");
146 tflite::L2NormalizationParams param = {.input_zero_point = 0};
147 tflite::optimized_ops::L2Normalization(param, convertShapeToTflshape(inputShape), inputData,
148 convertShapeToTflshape(outputShape), outputData);
149 return true;
150 } else {
151 return l2normFloat32Impl(inputData, inputShape, axis, outputData, outputShape);
152 }
153 }
154
l2normFloat16(const _Float16 * inputData,const Shape & inputShape,int32_t axis,_Float16 * outputData,const Shape & outputShape)155 bool l2normFloat16(const _Float16* inputData, const Shape& inputShape, int32_t axis,
156 _Float16* outputData, const Shape& outputShape) {
157 NNTRACE_TRANS("l2normFloat16");
158 std::vector<float> inputDataFloat32(getNumberOfElements(inputShape));
159 convertFloat16ToFloat32(inputData, &inputDataFloat32);
160 std::vector<float> outputDataFloat32(getNumberOfElements(outputShape));
161
162 l2normFloat32(inputDataFloat32.data(), inputShape, axis, outputDataFloat32.data(), outputShape);
163 convertFloat32ToFloat16(outputDataFloat32, outputData);
164
165 return true;
166 }
167
l2normQuant8(const uint8_t * inputData,const Shape & inputShape,int32_t axis,uint8_t * outputData,const Shape & outputShape)168 bool l2normQuant8(const uint8_t* inputData, const Shape& inputShape, int32_t axis,
169 uint8_t* outputData, const Shape& outputShape) {
170 int32_t ndim = getNumberOfDimensions(inputShape);
171 NN_CHECK(handleNegativeAxis(inputShape, &axis));
172 // TFLite optimized implementation only supports computation along the last axis
173 if (axis == ndim - 1) {
174 NNTRACE_COMP("optimized_ops::L2Normalization::uint8");
175 tflite::L2NormalizationParams param = {.input_zero_point = inputShape.offset};
176 tflite::optimized_ops::L2Normalization(param, convertShapeToTflshape(inputShape), inputData,
177 convertShapeToTflshape(outputShape), outputData);
178 return true;
179 } else {
180 return l2normQuant8Impl(inputData, inputShape, axis, outputData, outputShape);
181 }
182 }
183
l2normQuant8Signed(const int8_t * inputData,const Shape & inputShape,int32_t axis,int8_t * outputData,const Shape & outputShape)184 bool l2normQuant8Signed(const int8_t* inputData, const Shape& inputShape, int32_t axis,
185 int8_t* outputData, const Shape& outputShape) {
186 int32_t ndim = getNumberOfDimensions(inputShape);
187 NN_CHECK(handleNegativeAxis(inputShape, &axis));
188 // TFLite implementation only supports computation along the last axis
189 if (axis == ndim - 1) {
190 NNTRACE_COMP("reference_integer_ops::L2Normalization");
191 const int32_t outerSize = getNumberOfElements(inputShape, 0, axis);
192 const int32_t axisSize = getSizeOfDimension(inputShape, axis);
193 tflite::reference_integer_ops::L2Normalization(inputShape.offset, outerSize, axisSize,
194 inputData, outputData);
195 return true;
196 } else {
197 return l2normQuant8SignedImpl(inputData, inputShape, axis, outputData, outputShape);
198 }
199 }
200
201 } // namespace
202 #endif // NN_INCLUDE_CPU_IMPLEMENTATION
203
validate(const IOperationValidationContext * context)204 Result<Version> validate(const IOperationValidationContext* context) {
205 NN_RET_CHECK(context->getNumInputs() == kNumInputs ||
206 context->getNumInputs() == kNumInputs - 1);
207 NN_RET_CHECK_EQ(context->getNumOutputs(), kNumOutputs);
208
209 const OperandType inputType = context->getInputType(kInputTensor);
210 std::vector<OperandType> inExpectedTypes = {inputType};
211 auto minSupportedVersion = Version::ANDROID_OC_MR1;
212 if (inputType == OperandType::TENSOR_FLOAT16 || inputType == OperandType::TENSOR_QUANT8_ASYMM) {
213 minSupportedVersion = Version::ANDROID_Q;
214 } else if (inputType == OperandType::TENSOR_FLOAT32) {
215 minSupportedVersion = Version::ANDROID_OC_MR1;
216 } else if (inputType == OperandType::TENSOR_QUANT8_ASYMM_SIGNED) {
217 minSupportedVersion = Version::ANDROID_R;
218 } else {
219 NN_RET_CHECK_FAIL() << "Unsupported tensor type for operation " << kOperationName;
220 }
221 if (context->getNumInputs() == kNumInputs) {
222 inExpectedTypes.push_back(OperandType::INT32);
223 minSupportedVersion = Version::ANDROID_Q;
224 } else if (context->getInputShape(kInputTensor).dimensions.size() != 4) {
225 minSupportedVersion = Version::ANDROID_Q;
226 }
227 const Shape& input = context->getInputShape(kInputTensor);
228 if (hasKnownRank(input)) {
229 NN_RET_CHECK_LE(getNumberOfDimensions(input), 4);
230 }
231 NN_RET_CHECK(validateInputTypes(context, inExpectedTypes));
232 NN_RET_CHECK(validateOutputTypes(context, {inputType}));
233 return minSupportedVersion;
234 }
235
236 #ifdef NN_INCLUDE_CPU_IMPLEMENTATION
prepare(IOperationExecutionContext * context)237 bool prepare(IOperationExecutionContext* context) {
238 const Shape& input = context->getInputShape(kInputTensor);
239 int32_t numDimensions = getNumberOfDimensions(input);
240 int32_t axis = context->getNumInputs() == kNumInputs
241 ? context->getInputValue<int32_t>(kAxisScalar)
242 : -1;
243 NN_RET_CHECK_LE(numDimensions, 4);
244 NN_RET_CHECK_GE(axis, -numDimensions);
245 NN_RET_CHECK_LT(axis, numDimensions);
246 Shape output = context->getOutputShape(kOutputTensor);
247 output.type = input.type;
248 output.dimensions = input.dimensions;
249 if (output.type == OperandType::TENSOR_QUANT8_ASYMM) {
250 output.scale = 1.0f / 128.0f;
251 output.offset = 128;
252 } else if (output.type == OperandType::TENSOR_QUANT8_ASYMM_SIGNED) {
253 output.scale = 1.0f / 128.0f;
254 output.offset = 0;
255 } else {
256 output.scale = 0;
257 output.offset = 0;
258 }
259 return context->setOutputShape(kOutputTensor, output);
260 }
261
execute(IOperationExecutionContext * context)262 bool execute(IOperationExecutionContext* context) {
263 int32_t axis = context->getNumInputs() == kNumInputs
264 ? context->getInputValue<int32_t>(kAxisScalar)
265 : -1;
266 NN_RET_CHECK(handleNegativeAxis(context->getInputShape(kInputTensor), &axis));
267 switch (context->getInputType(kInputTensor)) {
268 case OperandType::TENSOR_FLOAT32:
269 return l2normFloat32(context->getInputBuffer<float>(kInputTensor),
270 context->getInputShape(kInputTensor), axis,
271 context->getOutputBuffer<float>(kOutputTensor),
272 context->getOutputShape(kOutputTensor));
273 case OperandType::TENSOR_FLOAT16:
274 return l2normFloat16(context->getInputBuffer<_Float16>(kInputTensor),
275 context->getInputShape(kInputTensor), axis,
276 context->getOutputBuffer<_Float16>(kOutputTensor),
277 context->getOutputShape(kOutputTensor));
278 case OperandType::TENSOR_QUANT8_ASYMM:
279 return l2normQuant8(context->getInputBuffer<uint8_t>(kInputTensor),
280 context->getInputShape(kInputTensor), axis,
281 context->getOutputBuffer<uint8_t>(kOutputTensor),
282 context->getOutputShape(kOutputTensor));
283 case OperandType::TENSOR_QUANT8_ASYMM_SIGNED:
284 return l2normQuant8Signed(context->getInputBuffer<int8_t>(kInputTensor),
285 context->getInputShape(kInputTensor), axis,
286 context->getOutputBuffer<int8_t>(kOutputTensor),
287 context->getOutputShape(kOutputTensor));
288 default:
289 NN_RET_CHECK_FAIL() << "Unsupported tensor type for operation " << kOperationName;
290 }
291 }
292 #endif // NN_INCLUDE_CPU_IMPLEMENTATION
293
294 } // namespace l2_norm
295
296 NN_REGISTER_OPERATION(L2_NORMALIZATION, l2_norm::kOperationName, l2_norm::validate,
297 l2_norm::prepare, l2_norm::execute);
298
299 } // namespace nn
300 } // namespace android
301