1 /*
2  * Copyright (C) 2018 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define LOG_TAG "Operations"
18 
19 #include <algorithm>
20 #include <cfloat>
21 #include <cmath>
22 #include <vector>
23 
24 #include "OperationResolver.h"
25 #include "OperationsUtils.h"
26 #include "Tracing.h"
27 
28 #ifdef NN_INCLUDE_CPU_IMPLEMENTATION
29 #include "CpuOperationUtils.h"
30 #endif  // NN_INCLUDE_CPU_IMPLEMENTATION
31 
32 namespace android {
33 namespace nn {
34 namespace heatmap_max_keypoint {
35 
36 constexpr char kOperationName[] = "HEATMAP_MAX_KEYPOINT";
37 
38 constexpr uint32_t kNumInputs = 3;
39 constexpr uint32_t kHeatmapTensor = 0;
40 constexpr uint32_t kBoxesTensor = 1;
41 constexpr uint32_t kLayoutScalar = 2;
42 
43 constexpr uint32_t kNumOutputs = 2;
44 constexpr uint32_t kOutputScoreTensor = 0;
45 constexpr uint32_t kOutputKeypointTensor = 1;
46 
47 #ifdef NN_INCLUDE_CPU_IMPLEMENTATION
48 namespace {
49 
50 // This function uses Taylor expansion up to the quatratic term to approximate bicubic
51 // upscaling result.
52 // 2nd order Taylor expansion: D(x) = D - b'x + 1/2 * x'Ax
53 // where D = grid[1][1], Taylor expansion center, the original score,
54 //       x = delta, the correction on max keypoint position,
55 //       D(x) = deltaScore, the accuracy score after correction
solveForDelta(const float grid[3][3],float * delta,float * deltaScore,float fpAtol=1e-5f,float fpRtol=1e-5f)56 static void solveForDelta(const float grid[3][3], float* delta, float* deltaScore,
57                           float fpAtol = 1e-5f, float fpRtol = 1e-5f) {
58     // b: negative 1st order derivative at center
59     // A: Hessian matrix at center (2nd order derivative)
60     float A[2][2], b[2];
61     b[0] = -(grid[1][2] - grid[1][0]) / 2.0f;
62     b[1] = -(grid[2][1] - grid[0][1]) / 2.0f;
63     A[0][0] = grid[1][0] - 2.0f * grid[1][1] + grid[1][2];
64     A[0][1] = (grid[2][2] - grid[2][0] - grid[0][2] + grid[0][0]) / 4.0f;
65     A[1][0] = A[0][1];
66     A[1][1] = grid[0][1] - 2.0f * grid[1][1] + grid[2][1];
67 
68     // solve Ax=b, where x=delta -> delta = inv(A) * b
69     float crossProd1 = A[0][0] * A[1][1], crossProd2 = A[0][1] * A[1][0];
70     float detA = crossProd1 - crossProd2;
71     // check if A is invertible
72     if (std::abs(detA) < (fpAtol + fpRtol * crossProd1)) return;
73     delta[0] = (A[1][1] * b[0] - A[0][1] * b[1]) / detA;
74     delta[1] = (A[0][0] * b[1] - A[1][0] * b[0]) / detA;
75 
76     // clip out of range delta, i.e. delta > 3/2
77     if (std::abs(delta[0]) > 1.5f || std::abs(delta[1]) > 1.5f) {
78         float scale = 1.5f / std::max(std::abs(delta[0]), std::abs(delta[1]));
79         delta[0] *= scale;
80         delta[1] *= scale;
81     }
82 
83     *deltaScore = grid[1][1] - b[0] * delta[0] - b[1] * delta[1] +
84                   ((A[0][0] * delta[0] + A[0][1] * delta[1]) * delta[0] +
85                    (A[1][0] * delta[0] + A[1][1] * delta[1]) * delta[1]) /
86                           2.0f;
87 }
88 
heatmapMaxKeypointFloat32Nhwc(const float * heatmap,const Shape & heatmapShape,const float * boxes,const Shape & boxesShape,float * outputScoreData,const Shape & outputScoreShape,float * outputKeypointData,const Shape & outputKeypointShape,float fpAtol,float fpRtol)89 inline bool heatmapMaxKeypointFloat32Nhwc(const float* heatmap, const Shape& heatmapShape,
90                                           const float* boxes, const Shape& boxesShape,
91                                           float* outputScoreData, const Shape& outputScoreShape,
92                                           float* outputKeypointData,
93                                           const Shape& outputKeypointShape, float fpAtol,
94                                           float fpRtol) {
95     NNTRACE_TRANS("HeatmapMaxKeypoint");
96 
97     uint32_t numBoxes = getSizeOfDimension(heatmapShape, 0);
98     uint32_t heatmapSize = getSizeOfDimension(heatmapShape, 1);
99     uint32_t numKeypoints = getSizeOfDimension(heatmapShape, 3);
100     uint32_t boxInfoLength = getSizeOfDimension(boxesShape, 1);
101 
102     const float* heatmapBase = heatmap;
103     const float* boxInfoBase = boxes;
104     float* outputScoreBase = outputScoreData;
105     float* outputKeypointBase = outputKeypointData;
106     for (uint32_t i = 0; i < numBoxes; i++) {
107         NN_RET_CHECK_LE(boxInfoBase[0], boxInfoBase[2]);
108         NN_RET_CHECK_LE(boxInfoBase[1], boxInfoBase[3]);
109         for (uint32_t j = 0; j < numKeypoints; j++) {
110             // find max score and its index
111             uint32_t maxIndex = 0;
112             float maxScore = -FLT_MAX;
113             for (uint32_t k = 0; k < heatmapSize * heatmapSize; k++) {
114                 float val = heatmapBase[k * numKeypoints + j];
115                 if (maxScore < val) {
116                     maxScore = val;
117                     maxIndex = k;
118                 }
119             }
120 
121             uint32_t maxIndexWidth = maxIndex % heatmapSize;
122             uint32_t maxIndexHeight = maxIndex / heatmapSize;
123 
124             // get local 3x3 grid
125             float localGrid[3][3];
126             for (int32_t dh = -1; dh <= 1; dh++) {
127                 for (int32_t dw = -1; dw <= 1; dw++) {
128                     // cast uint32_t to int32_t
129                     int32_t h = static_cast<int32_t>(maxIndexHeight) + dh;
130                     int32_t w = static_cast<int32_t>(maxIndexWidth) + dw;
131 
132                     // use mirroring for out of bound indexing
133                     // need to ensure heatmapSize >= 2
134                     h = h < 0 ? 1 : (h >= heatmapSize ? heatmapSize - 2 : h);
135                     w = w < 0 ? 1 : (w >= heatmapSize ? heatmapSize - 2 : w);
136 
137                     uint32_t heatmapIndex = static_cast<uint32_t>(h) * heatmapSize * numKeypoints +
138                                             static_cast<uint32_t>(w) * numKeypoints + j;
139                     localGrid[dh + 1][dw + 1] = heatmapBase[heatmapIndex];
140                 }
141             }
142 
143             float delta[2] = {0.0f, 0.0f}, deltaScore = maxScore;
144             solveForDelta(localGrid, delta, &deltaScore, fpAtol, fpRtol);
145 
146             float wRoiStart = boxInfoBase[0];
147             float hRoiStart = boxInfoBase[1];
148             float wRoiEnd = boxInfoBase[2];
149             float hRoiEnd = boxInfoBase[3];
150             float roiWidth = wRoiEnd - wRoiStart;
151             float roiHeight = hRoiEnd - hRoiStart;
152             float wRelativePos = (static_cast<float>(maxIndexWidth) + delta[0] + 0.5f) /
153                                  static_cast<float>(heatmapSize);
154             float hRelativePos = (static_cast<float>(maxIndexHeight) + delta[1] + 0.5f) /
155                                  static_cast<float>(heatmapSize);
156             *outputScoreBase++ = deltaScore;
157             outputKeypointBase[0] = wRelativePos * roiWidth + wRoiStart;
158             outputKeypointBase[1] = hRelativePos * roiHeight + hRoiStart;
159             outputKeypointBase += 2;
160         }
161         boxInfoBase += boxInfoLength;
162         heatmapBase += heatmapSize * heatmapSize * numKeypoints;
163     }
164 
165     return true;
166 }
167 
heatmapMaxKeypointFloat32(const float * heatmap,const Shape & heatmapShape,const float * boxes,const Shape & boxesShape,bool layout,float * outputScoreData,const Shape & outputScoreShape,float * outputKeypointData,const Shape & outputKeypointShape,float fpAtol,float fpRtol)168 inline bool heatmapMaxKeypointFloat32(const float* heatmap, const Shape& heatmapShape,
169                                       const float* boxes, const Shape& boxesShape, bool layout,
170                                       float* outputScoreData, const Shape& outputScoreShape,
171                                       float* outputKeypointData, const Shape& outputKeypointShape,
172                                       float fpAtol, float fpRtol) {
173     std::vector<float> heatmap_nhwc;
174     Shape heatmapShape_nhwc;
175     if (layout) {
176         NN_RET_CHECK(convertNchwToNhwc(heatmap, heatmapShape, &heatmap_nhwc, &heatmapShape_nhwc));
177     }
178     const float* heatmap_tmp = layout ? heatmap_nhwc.data() : heatmap;
179     const Shape& heatmapShape_tmp = layout ? heatmapShape_nhwc : heatmapShape;
180     return heatmapMaxKeypointFloat32Nhwc(heatmap_tmp, heatmapShape_tmp, boxes, boxesShape,
181                                          outputScoreData, outputScoreShape, outputKeypointData,
182                                          outputKeypointShape, fpAtol, fpRtol);
183 }
184 
heatmapMaxKeypointQuant(const uint8_t * heatmap,const Shape & heatmapShape,const uint16_t * boxes,const Shape & boxesShape,bool layout,uint8_t * outputScoreData,const Shape & outputScoreShape,uint16_t * outputKeypointData,const Shape & outputKeypointShape,float fpAtol,float fpRtol)185 inline bool heatmapMaxKeypointQuant(const uint8_t* heatmap, const Shape& heatmapShape,
186                                     const uint16_t* boxes, const Shape& boxesShape, bool layout,
187                                     uint8_t* outputScoreData, const Shape& outputScoreShape,
188                                     uint16_t* outputKeypointData, const Shape& outputKeypointShape,
189                                     float fpAtol, float fpRtol) {
190     std::vector<float> heatmap_float32(getNumberOfElements(heatmapShape));
191     convertQuantToFloat32(heatmap, heatmapShape.scale, heatmapShape.offset, &heatmap_float32);
192     std::vector<float> boxes_float32(getNumberOfElements(boxesShape));
193     convertQuantToFloat32(boxes, boxesShape.scale, boxesShape.offset, &boxes_float32);
194     std::vector<float> outputScore_float32(getNumberOfElements(outputScoreShape));
195     std::vector<float> outputKeypoint_float32(getNumberOfElements(outputKeypointShape));
196     NN_RET_CHECK(heatmapMaxKeypointFloat32(
197             heatmap_float32.data(), heatmapShape, boxes_float32.data(), boxesShape, layout,
198             outputScore_float32.data(), outputScoreShape, outputKeypoint_float32.data(),
199             outputKeypointShape, fpAtol, fpRtol));
200     convertFloat32ToQuant(outputScore_float32, outputScoreShape.scale, outputScoreShape.offset,
201                           outputScoreData);
202     convertFloat32ToQuant(outputKeypoint_float32, outputKeypointShape.scale,
203                           outputKeypointShape.offset, outputKeypointData);
204     return true;
205 }
206 
heatmapMaxKeypointQuant(const int8_t * heatmap,const Shape & heatmapShape,const uint16_t * boxes,const Shape & boxesShape,bool layout,int8_t * outputScoreData,const Shape & outputScoreShape,uint16_t * outputKeypointData,const Shape & outputKeypointShape,float fpAtol,float fpRtol)207 inline bool heatmapMaxKeypointQuant(const int8_t* heatmap, const Shape& heatmapShape,
208                                     const uint16_t* boxes, const Shape& boxesShape, bool layout,
209                                     int8_t* outputScoreData, const Shape& outputScoreShape,
210                                     uint16_t* outputKeypointData, const Shape& outputKeypointShape,
211                                     float fpAtol, float fpRtol) {
212     std::vector<float> heatmap_float32(getNumberOfElements(heatmapShape));
213     convertQuantToFloat32(heatmap, heatmapShape.scale, heatmapShape.offset, &heatmap_float32);
214     std::vector<float> boxes_float32(getNumberOfElements(boxesShape));
215     convertQuantToFloat32(boxes, boxesShape.scale, boxesShape.offset, &boxes_float32);
216     std::vector<float> outputScore_float32(getNumberOfElements(outputScoreShape));
217     std::vector<float> outputKeypoint_float32(getNumberOfElements(outputKeypointShape));
218     NN_RET_CHECK(heatmapMaxKeypointFloat32(
219             heatmap_float32.data(), heatmapShape, boxes_float32.data(), boxesShape, layout,
220             outputScore_float32.data(), outputScoreShape, outputKeypoint_float32.data(),
221             outputKeypointShape, fpAtol, fpRtol));
222     convertFloat32ToQuant(outputScore_float32, outputScoreShape.scale, outputScoreShape.offset,
223                           outputScoreData);
224     convertFloat32ToQuant(outputKeypoint_float32, outputKeypointShape.scale,
225                           outputKeypointShape.offset, outputKeypointData);
226     return true;
227 }
228 
229 }  // namespace
230 #endif  // NN_INCLUDE_CPU_IMPLEMENTATION
231 
validate(const IOperationValidationContext * context)232 Result<Version> validate(const IOperationValidationContext* context) {
233     NN_RET_CHECK_EQ(context->getNumInputs(), kNumInputs);
234     NN_RET_CHECK_EQ(context->getNumOutputs(), kNumOutputs);
235     std::vector<OperandType> inExpectedTypes;
236     std::vector<OperandType> outExpectedTypes;
237     auto inputType = context->getInputType(kHeatmapTensor);
238     auto minSupportedVersion = Version::ANDROID_Q;
239     if (inputType == OperandType::TENSOR_FLOAT32 || inputType == OperandType::TENSOR_FLOAT16) {
240         inExpectedTypes = {inputType, inputType, OperandType::BOOL};
241         outExpectedTypes = {inputType, inputType};
242     } else if (inputType == OperandType::TENSOR_QUANT8_ASYMM) {
243         inExpectedTypes = {OperandType::TENSOR_QUANT8_ASYMM, OperandType::TENSOR_QUANT16_ASYMM,
244                            OperandType::BOOL};
245         outExpectedTypes = {OperandType::TENSOR_QUANT8_ASYMM, OperandType::TENSOR_QUANT16_ASYMM};
246     } else if (inputType == OperandType::TENSOR_QUANT8_ASYMM_SIGNED) {
247         inExpectedTypes = {OperandType::TENSOR_QUANT8_ASYMM_SIGNED,
248                            OperandType::TENSOR_QUANT16_ASYMM, OperandType::BOOL};
249         outExpectedTypes = {OperandType::TENSOR_QUANT8_ASYMM_SIGNED,
250                             OperandType::TENSOR_QUANT16_ASYMM};
251         minSupportedVersion = Version::ANDROID_R;
252     } else {
253         return NN_ERROR() << "Unsupported input tensor type for operation " << kOperationName;
254     }
255     NN_RET_CHECK(validateInputTypes(context, inExpectedTypes));
256     NN_RET_CHECK(validateOutputTypes(context, outExpectedTypes));
257     return minSupportedVersion;
258 }
259 
260 #ifdef NN_INCLUDE_CPU_IMPLEMENTATION
prepare(IOperationExecutionContext * context)261 bool prepare(IOperationExecutionContext* context) {
262     bool layout = context->getInputValue<bool>(kLayoutScalar);
263     Shape heatmapShape = context->getInputShape(kHeatmapTensor);
264     Shape boxesShape = context->getInputShape(kBoxesTensor);
265     NN_RET_CHECK_EQ(getNumberOfDimensions(heatmapShape), 4);
266     NN_RET_CHECK_EQ(getNumberOfDimensions(boxesShape), 2);
267 
268     uint32_t numBoxes = getSizeOfDimension(heatmapShape, 0);
269     uint32_t heatmapSize = getSizeOfDimension(heatmapShape, 2);
270     uint32_t numKeypoints = getSizeOfDimension(heatmapShape, layout ? 1 : 3);
271     uint32_t boxInfoLength = getSizeOfDimension(boxesShape, 1);
272     NN_RET_CHECK_EQ(getSizeOfDimension(heatmapShape, layout ? 3 : 1), heatmapSize);
273     NN_RET_CHECK_GE(heatmapSize, 2);
274     NN_RET_CHECK_EQ(getSizeOfDimension(boxesShape, 0), numBoxes);
275     NN_RET_CHECK_EQ(boxInfoLength, 4);
276 
277     if (heatmapShape.type == OperandType::TENSOR_QUANT8_ASYMM ||
278         heatmapShape.type == OperandType::TENSOR_QUANT8_ASYMM_SIGNED) {
279         NN_RET_CHECK_EQ(boxesShape.scale, 0.125f);
280         NN_RET_CHECK_EQ(boxesShape.offset, 0);
281     }
282 
283     Shape outputScore = context->getOutputShape(kOutputScoreTensor);
284     outputScore.type = heatmapShape.type;
285     outputScore.dimensions = {numBoxes, numKeypoints};
286     NN_RET_CHECK(context->setOutputShape(kOutputScoreTensor, outputScore));
287 
288     Shape outputKeypoint = context->getOutputShape(kOutputKeypointTensor);
289     outputKeypoint.type = boxesShape.type;
290     outputKeypoint.dimensions = {numBoxes, numKeypoints, 2};
291     outputKeypoint.offset = 0;
292     outputKeypoint.scale = 0.f;
293     if (heatmapShape.type == OperandType::TENSOR_QUANT8_ASYMM ||
294         heatmapShape.type == OperandType::TENSOR_QUANT8_ASYMM_SIGNED) {
295         outputKeypoint.scale = 0.125f;
296     }
297     NN_RET_CHECK(context->setOutputShape(kOutputKeypointTensor, outputKeypoint));
298     return true;
299 }
300 
execute(IOperationExecutionContext * context)301 bool execute(IOperationExecutionContext* context) {
302     bool layout = context->getInputValue<bool>(kLayoutScalar);
303     switch (context->getInputType(kHeatmapTensor)) {
304         case OperandType::TENSOR_FLOAT16: {
305             const auto heatmap = context->getInputBuffer<_Float16>(kHeatmapTensor);
306             const auto heatmapShape = context->getInputShape(kHeatmapTensor);
307             const auto boxes = context->getInputBuffer<_Float16>(kBoxesTensor);
308             const auto boxesShape = context->getInputShape(kBoxesTensor);
309             auto outputScoreData = context->getOutputBuffer<_Float16>(kOutputScoreTensor);
310             const auto outputScoreShape = context->getOutputShape(kOutputScoreTensor);
311             auto outputKeypointData = context->getOutputBuffer<_Float16>(kOutputKeypointTensor);
312             const auto outputKeypointShape = context->getOutputShape(kOutputKeypointTensor);
313             std::vector<float> heatmap_float32(getNumberOfElements(heatmapShape));
314             convertFloat16ToFloat32(heatmap, &heatmap_float32);
315             std::vector<float> boxes_float32(getNumberOfElements(boxesShape));
316             convertFloat16ToFloat32(boxes, &boxes_float32);
317             std::vector<float> outputScore_float32(getNumberOfElements(outputScoreShape));
318             std::vector<float> outputKeypoint_float32(getNumberOfElements(outputKeypointShape));
319             NN_RET_CHECK(heatmapMaxKeypointFloat32(
320                     heatmap_float32.data(), heatmapShape, boxes_float32.data(), boxesShape, layout,
321                     outputScore_float32.data(), outputScoreShape, outputKeypoint_float32.data(),
322                     outputKeypointShape, 1e-3f, 1e-3f));
323             convertFloat32ToFloat16(outputScore_float32, outputScoreData);
324             convertFloat32ToFloat16(outputKeypoint_float32, outputKeypointData);
325             return true;
326         }
327         case OperandType::TENSOR_FLOAT32: {
328             return heatmapMaxKeypointFloat32(context->getInputBuffer<float>(kHeatmapTensor),
329                                              context->getInputShape(kHeatmapTensor),
330                                              context->getInputBuffer<float>(kBoxesTensor),
331                                              context->getInputShape(kBoxesTensor), layout,
332                                              context->getOutputBuffer<float>(kOutputScoreTensor),
333                                              context->getOutputShape(kOutputScoreTensor),
334                                              context->getOutputBuffer<float>(kOutputKeypointTensor),
335                                              context->getOutputShape(kOutputKeypointTensor), 1e-5f,
336                                              1e-5f);
337         }
338         case OperandType::TENSOR_QUANT8_ASYMM: {
339             return heatmapMaxKeypointQuant(
340                     context->getInputBuffer<uint8_t>(kHeatmapTensor),
341                     context->getInputShape(kHeatmapTensor),
342                     context->getInputBuffer<uint16_t>(kBoxesTensor),
343                     context->getInputShape(kBoxesTensor), layout,
344                     context->getOutputBuffer<uint8_t>(kOutputScoreTensor),
345                     context->getOutputShape(kOutputScoreTensor),
346                     context->getOutputBuffer<uint16_t>(kOutputKeypointTensor),
347                     context->getOutputShape(kOutputKeypointTensor), 1e-5f, 1e-5f);
348         }
349         case OperandType::TENSOR_QUANT8_ASYMM_SIGNED: {
350             return heatmapMaxKeypointQuant(
351                     context->getInputBuffer<int8_t>(kHeatmapTensor),
352                     context->getInputShape(kHeatmapTensor),
353                     context->getInputBuffer<uint16_t>(kBoxesTensor),
354                     context->getInputShape(kBoxesTensor), layout,
355                     context->getOutputBuffer<int8_t>(kOutputScoreTensor),
356                     context->getOutputShape(kOutputScoreTensor),
357                     context->getOutputBuffer<uint16_t>(kOutputKeypointTensor),
358                     context->getOutputShape(kOutputKeypointTensor), 1e-5f, 1e-5f);
359         }
360         default:
361             NN_RET_CHECK_FAIL() << "Unsupported tensor type for operation " << kOperationName;
362     }
363 }
364 #endif  // NN_INCLUDE_CPU_IMPLEMENTATION
365 
366 }  // namespace heatmap_max_keypoint
367 
368 NN_REGISTER_OPERATION(HEATMAP_MAX_KEYPOINT, heatmap_max_keypoint::kOperationName,
369                       heatmap_max_keypoint::validate, heatmap_max_keypoint::prepare,
370                       heatmap_max_keypoint::execute);
371 
372 }  // namespace nn
373 }  // namespace android
374