1 /*
2  * Copyright (C) 2021 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "GeneratedTestHarness.h"
18 
19 #include <aidl/android/hardware/neuralnetworks/ErrorStatus.h>
20 #include <aidl/android/hardware/neuralnetworks/RequestMemoryPool.h>
21 #include <android-base/logging.h>
22 #include <android/binder_auto_utils.h>
23 #include <android/sync.h>
24 #include <gtest/gtest.h>
25 
26 #include <algorithm>
27 #include <chrono>
28 #include <iostream>
29 #include <iterator>
30 #include <numeric>
31 #include <vector>
32 
33 #include <MemoryUtils.h>
34 #include <android/binder_status.h>
35 #include <nnapi/Result.h>
36 #include <nnapi/SharedMemory.h>
37 #include <nnapi/Types.h>
38 #include <nnapi/hal/aidl/Conversions.h>
39 #include <nnapi/hal/aidl/Utils.h>
40 
41 #include "Callbacks.h"
42 #include "TestHarness.h"
43 #include "Utils.h"
44 #include "VtsHalNeuralnetworks.h"
45 
46 namespace aidl::android::hardware::neuralnetworks::vts::functional {
47 
48 namespace nn = ::android::nn;
49 using namespace test_helper;
50 using implementation::PreparedModelCallback;
51 
52 namespace {
53 
54 enum class OutputType { FULLY_SPECIFIED, UNSPECIFIED, INSUFFICIENT, MISSED_DEADLINE };
55 
56 struct TestConfig {
57     Executor executor;
58     bool measureTiming;
59     OutputType outputType;
60     MemoryType memoryType;
61     // `reportSkipping` indicates if a test should print an info message in case
62     // it is skipped. The field is set to true by default and is set to false in
63     // quantization coupling tests to suppress skipping a test
64     bool reportSkipping;
TestConfigaidl::android::hardware::neuralnetworks::vts::functional::__anon8cc611d90110::TestConfig65     TestConfig(Executor executor, bool measureTiming, OutputType outputType, MemoryType memoryType)
66         : executor(executor),
67           measureTiming(measureTiming),
68           outputType(outputType),
69           memoryType(memoryType),
70           reportSkipping(true) {}
TestConfigaidl::android::hardware::neuralnetworks::vts::functional::__anon8cc611d90110::TestConfig71     TestConfig(Executor executor, bool measureTiming, OutputType outputType, MemoryType memoryType,
72                bool reportSkipping)
73         : executor(executor),
74           measureTiming(measureTiming),
75           outputType(outputType),
76           memoryType(memoryType),
77           reportSkipping(reportSkipping) {}
78 };
79 
80 enum class IOType { INPUT, OUTPUT };
81 
82 class DeviceMemoryAllocator {
83   public:
DeviceMemoryAllocator(const std::shared_ptr<IDevice> & device,const std::shared_ptr<IPreparedModel> & preparedModel,const TestModel & testModel)84     DeviceMemoryAllocator(const std::shared_ptr<IDevice>& device,
85                           const std::shared_ptr<IPreparedModel>& preparedModel,
86                           const TestModel& testModel)
87         : kDevice(device), kPreparedModel(preparedModel), kTestModel(testModel) {}
88 
89     // Allocate device memory for a target input/output operand.
90     // Return {IBuffer object, token} if successful.
91     // Return {nullptr, 0} if device memory is not supported.
92     template <IOType ioType>
allocate(uint32_t index)93     std::pair<std::shared_ptr<IBuffer>, int32_t> allocate(uint32_t index) {
94         std::pair<std::shared_ptr<IBuffer>, int32_t> buffer;
95         allocateInternal<ioType>(index, &buffer);
96         return buffer;
97     }
98 
99   private:
100     template <IOType ioType>
allocateInternal(int32_t index,std::pair<std::shared_ptr<IBuffer>,int32_t> * result)101     void allocateInternal(int32_t index, std::pair<std::shared_ptr<IBuffer>, int32_t>* result) {
102         ASSERT_NE(result, nullptr);
103 
104         // Prepare arguments.
105         BufferRole role = {.modelIndex = 0, .ioIndex = index, .probability = 1.0f};
106         std::vector<BufferRole> inputRoles, outputRoles;
107         if constexpr (ioType == IOType::INPUT) {
108             inputRoles = {role};
109         } else {
110             outputRoles = {role};
111         }
112 
113         // Allocate device memory.
114         DeviceBuffer buffer;
115         IPreparedModelParcel parcel;
116         parcel.preparedModel = kPreparedModel;
117         const auto ret = kDevice->allocate({}, {parcel}, inputRoles, outputRoles, &buffer);
118 
119         // Check allocation results.
120         if (ret.isOk()) {
121             ASSERT_NE(buffer.buffer, nullptr);
122             ASSERT_GT(buffer.token, 0);
123         } else {
124             ASSERT_EQ(ret.getExceptionCode(), EX_SERVICE_SPECIFIC);
125             ASSERT_EQ(static_cast<ErrorStatus>(ret.getServiceSpecificError()),
126                       ErrorStatus::GENERAL_FAILURE);
127             buffer.buffer = nullptr;
128             buffer.token = 0;
129         }
130 
131         // Initialize input data from TestBuffer.
132         if constexpr (ioType == IOType::INPUT) {
133             if (buffer.buffer != nullptr) {
134                 // TestBuffer -> Shared memory.
135                 const auto& testBuffer =
136                         kTestModel.main.operands[kTestModel.main.inputIndexes[index]].data;
137                 ASSERT_GT(testBuffer.size(), 0);
138                 const auto sharedMemory = nn::createSharedMemory(testBuffer.size()).value();
139                 const auto memory = utils::convert(sharedMemory).value();
140                 const auto mapping = nn::map(sharedMemory).value();
141                 uint8_t* inputPtr = static_cast<uint8_t*>(std::get<void*>(mapping.pointer));
142                 ASSERT_NE(inputPtr, nullptr);
143                 const uint8_t* begin = testBuffer.get<uint8_t>();
144                 const uint8_t* end = begin + testBuffer.size();
145                 std::copy(begin, end, inputPtr);
146 
147                 // Shared memory -> IBuffer.
148                 auto ret = buffer.buffer->copyFrom(memory, {});
149                 ASSERT_TRUE(ret.isOk());
150             }
151         }
152         *result = {std::move(buffer.buffer), buffer.token};
153     }
154 
155     const std::shared_ptr<IDevice> kDevice;
156     const std::shared_ptr<IPreparedModel> kPreparedModel;
157     const TestModel& kTestModel;
158 };
159 
createSubgraph(const TestSubgraph & testSubgraph,uint32_t * constCopySize,std::vector<const TestBuffer * > * constCopies,uint32_t * constRefSize,std::vector<const TestBuffer * > * constReferences)160 Subgraph createSubgraph(const TestSubgraph& testSubgraph, uint32_t* constCopySize,
161                         std::vector<const TestBuffer*>* constCopies, uint32_t* constRefSize,
162                         std::vector<const TestBuffer*>* constReferences) {
163     CHECK(constCopySize != nullptr);
164     CHECK(constCopies != nullptr);
165     CHECK(constRefSize != nullptr);
166     CHECK(constReferences != nullptr);
167 
168     // Operands.
169     std::vector<Operand> operands(testSubgraph.operands.size());
170     for (uint32_t i = 0; i < testSubgraph.operands.size(); i++) {
171         const auto& op = testSubgraph.operands[i];
172 
173         DataLocation loc = {};
174         if (op.lifetime == TestOperandLifeTime::CONSTANT_COPY) {
175             loc = {
176                     .poolIndex = 0,
177                     .offset = *constCopySize,
178                     .length = static_cast<int64_t>(op.data.size()),
179             };
180             constCopies->push_back(&op.data);
181             *constCopySize += op.data.alignedSize();
182         } else if (op.lifetime == TestOperandLifeTime::CONSTANT_REFERENCE) {
183             loc = {
184                     .poolIndex = 0,
185                     .offset = *constRefSize,
186                     .length = static_cast<int64_t>(op.data.size()),
187             };
188             constReferences->push_back(&op.data);
189             *constRefSize += op.data.alignedSize();
190         } else if (op.lifetime == TestOperandLifeTime::SUBGRAPH) {
191             loc = {
192                     .poolIndex = 0,
193                     .offset = *op.data.get<uint32_t>(),
194                     .length = 0,
195             };
196         }
197 
198         std::optional<OperandExtraParams> extraParams;
199         if (op.type == TestOperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL) {
200             using Tag = OperandExtraParams::Tag;
201             extraParams = OperandExtraParams::make<Tag::channelQuant>(SymmPerChannelQuantParams{
202                     .scales = op.channelQuant.scales,
203                     .channelDim = static_cast<int32_t>(op.channelQuant.channelDim)});
204         }
205 
206         operands[i] = {.type = static_cast<OperandType>(op.type),
207                        .dimensions = utils::toSigned(op.dimensions).value(),
208                        .scale = op.scale,
209                        .zeroPoint = op.zeroPoint,
210                        .lifetime = static_cast<OperandLifeTime>(op.lifetime),
211                        .location = loc,
212                        .extraParams = std::move(extraParams)};
213     }
214 
215     // Operations.
216     std::vector<Operation> operations(testSubgraph.operations.size());
217     std::transform(testSubgraph.operations.begin(), testSubgraph.operations.end(),
218                    operations.begin(), [](const TestOperation& op) -> Operation {
219                        return {.type = static_cast<OperationType>(op.type),
220                                .inputs = utils::toSigned(op.inputs).value(),
221                                .outputs = utils::toSigned(op.outputs).value()};
222                    });
223 
224     return {.operands = std::move(operands),
225             .operations = std::move(operations),
226             .inputIndexes = utils::toSigned(testSubgraph.inputIndexes).value(),
227             .outputIndexes = utils::toSigned(testSubgraph.outputIndexes).value()};
228 }
229 
copyTestBuffers(const std::vector<const TestBuffer * > & buffers,uint8_t * output)230 void copyTestBuffers(const std::vector<const TestBuffer*>& buffers, uint8_t* output) {
231     uint32_t offset = 0;
232     for (const TestBuffer* buffer : buffers) {
233         const uint8_t* begin = buffer->get<uint8_t>();
234         const uint8_t* end = begin + buffer->size();
235         std::copy(begin, end, output + offset);
236         offset += buffer->alignedSize();
237     }
238 }
239 
240 }  // namespace
241 
waitForSyncFence(int syncFd)242 void waitForSyncFence(int syncFd) {
243     constexpr int kInfiniteTimeout = -1;
244     ASSERT_GT(syncFd, 0);
245     int r = sync_wait(syncFd, kInfiniteTimeout);
246     ASSERT_GE(r, 0);
247 }
248 
createModel(const TestModel & testModel)249 Model createModel(const TestModel& testModel) {
250     uint32_t constCopySize = 0;
251     uint32_t constRefSize = 0;
252     std::vector<const TestBuffer*> constCopies;
253     std::vector<const TestBuffer*> constReferences;
254 
255     Subgraph mainSubgraph = createSubgraph(testModel.main, &constCopySize, &constCopies,
256                                            &constRefSize, &constReferences);
257     std::vector<Subgraph> refSubgraphs(testModel.referenced.size());
258     std::transform(testModel.referenced.begin(), testModel.referenced.end(), refSubgraphs.begin(),
259                    [&constCopySize, &constCopies, &constRefSize,
260                     &constReferences](const TestSubgraph& testSubgraph) {
261                        return createSubgraph(testSubgraph, &constCopySize, &constCopies,
262                                              &constRefSize, &constReferences);
263                    });
264 
265     // Constant copies.
266     std::vector<uint8_t> operandValues(constCopySize);
267     copyTestBuffers(constCopies, operandValues.data());
268 
269     // Shared memory.
270     std::vector<nn::SharedMemory> pools = {};
271     if (constRefSize > 0) {
272         const auto pool = nn::createSharedMemory(constRefSize).value();
273         pools.push_back(pool);
274 
275         // load data
276         const auto mappedMemory = nn::map(pool).value();
277         uint8_t* mappedPtr = static_cast<uint8_t*>(std::get<void*>(mappedMemory.pointer));
278         CHECK(mappedPtr != nullptr);
279 
280         copyTestBuffers(constReferences, mappedPtr);
281     }
282 
283     std::vector<Memory> aidlPools;
284     aidlPools.reserve(pools.size());
285     for (auto& pool : pools) {
286         auto aidlPool = utils::convert(pool).value();
287         aidlPools.push_back(std::move(aidlPool));
288     }
289 
290     return {.main = std::move(mainSubgraph),
291             .referenced = std::move(refSubgraphs),
292             .operandValues = std::move(operandValues),
293             .pools = std::move(aidlPools),
294             .relaxComputationFloat32toFloat16 = testModel.isRelaxed};
295 }
296 
isOutputSizeGreaterThanOne(const TestModel & testModel,uint32_t index)297 static bool isOutputSizeGreaterThanOne(const TestModel& testModel, uint32_t index) {
298     const auto byteSize = testModel.main.operands[testModel.main.outputIndexes[index]].data.size();
299     return byteSize > 1u;
300 }
301 
makeOutputInsufficientSize(uint32_t outputIndex,Request * request)302 static void makeOutputInsufficientSize(uint32_t outputIndex, Request* request) {
303     auto& loc = request->outputs[outputIndex].location;
304     ASSERT_GT(loc.length, 1u);
305     loc.length -= 1u;
306     // Test that the padding is not used for output data.
307     loc.padding += 1u;
308 }
309 
makeOutputDimensionsUnspecified(Model * model)310 static void makeOutputDimensionsUnspecified(Model* model) {
311     for (auto i : model->main.outputIndexes) {
312         auto& dims = model->main.operands[i].dimensions;
313         std::fill(dims.begin(), dims.end(), 0);
314     }
315 }
316 
317 // Manages the lifetime of memory resources used in an execution.
318 class ExecutionContext {
319   public:
ExecutionContext(std::shared_ptr<IDevice> device,std::shared_ptr<IPreparedModel> preparedModel)320     ExecutionContext(std::shared_ptr<IDevice> device, std::shared_ptr<IPreparedModel> preparedModel)
321         : kDevice(std::move(device)), kPreparedModel(std::move(preparedModel)) {}
322 
323     std::optional<Request> createRequest(const TestModel& testModel, MemoryType memoryType);
324     std::vector<TestBuffer> getOutputBuffers(const TestModel& testModel,
325                                              const Request& request) const;
326 
327   private:
328     // Get a TestBuffer with data copied from an IBuffer object.
329     void getBuffer(const std::shared_ptr<IBuffer>& buffer, size_t size,
330                    TestBuffer* testBuffer) const;
331 
332     static constexpr uint32_t kInputPoolIndex = 0;
333     static constexpr uint32_t kOutputPoolIndex = 1;
334     static constexpr uint32_t kDeviceMemoryBeginIndex = 2;
335 
336     const std::shared_ptr<IDevice> kDevice;
337     const std::shared_ptr<IPreparedModel> kPreparedModel;
338     std::unique_ptr<TestMemoryBase> mInputMemory, mOutputMemory;
339     std::vector<std::shared_ptr<IBuffer>> mBuffers;
340 };
341 
342 // Returns the number of bytes needed to round up "size" to the nearest multiple of "multiple".
roundUpBytesNeeded(uint32_t size,uint32_t multiple)343 static uint32_t roundUpBytesNeeded(uint32_t size, uint32_t multiple) {
344     CHECK(multiple != 0);
345     return ((size + multiple - 1) / multiple) * multiple - size;
346 }
347 
createRequest(const TestModel & testModel,MemoryType memoryType)348 std::optional<Request> ExecutionContext::createRequest(const TestModel& testModel,
349                                                        MemoryType memoryType) {
350     // Memory pools are organized as:
351     // - 0: Input shared memory pool
352     // - 1: Output shared memory pool
353     // - [2, 2+i): Input device memories
354     // - [2+i, 2+i+o): Output device memories
355     DeviceMemoryAllocator allocator(kDevice, kPreparedModel, testModel);
356     std::vector<int32_t> tokens;
357     mBuffers.clear();
358 
359     // Model inputs.
360     std::vector<RequestArgument> inputs(testModel.main.inputIndexes.size());
361     size_t inputSize = 0;
362     for (uint32_t i = 0; i < testModel.main.inputIndexes.size(); i++) {
363         const auto& op = testModel.main.operands[testModel.main.inputIndexes[i]];
364         if (op.data.size() == 0) {
365             // Omitted input.
366             inputs[i] = {.hasNoValue = true};
367             continue;
368         } else if (memoryType == MemoryType::DEVICE) {
369             SCOPED_TRACE("Input index = " + std::to_string(i));
370             auto [buffer, token] = allocator.allocate<IOType::INPUT>(i);
371             if (buffer != nullptr) {
372                 DataLocation loc = {.poolIndex = static_cast<int32_t>(mBuffers.size() +
373                                                                       kDeviceMemoryBeginIndex)};
374                 mBuffers.push_back(std::move(buffer));
375                 tokens.push_back(token);
376                 inputs[i] = {.hasNoValue = false, .location = loc, .dimensions = {}};
377                 continue;
378             }
379         }
380 
381         // Reserve shared memory for input.
382         inputSize += roundUpBytesNeeded(inputSize, nn::kDefaultRequestMemoryAlignment);
383         const auto padding = roundUpBytesNeeded(op.data.size(), nn::kDefaultRequestMemoryPadding);
384         DataLocation loc = {.poolIndex = kInputPoolIndex,
385                             .offset = static_cast<int64_t>(inputSize),
386                             .length = static_cast<int64_t>(op.data.size()),
387                             .padding = static_cast<int64_t>(padding)};
388         inputSize += (op.data.size() + padding);
389         inputs[i] = {.hasNoValue = false, .location = loc, .dimensions = {}};
390     }
391 
392     // Model outputs.
393     std::vector<RequestArgument> outputs(testModel.main.outputIndexes.size());
394     size_t outputSize = 0;
395     for (uint32_t i = 0; i < testModel.main.outputIndexes.size(); i++) {
396         const auto& op = testModel.main.operands[testModel.main.outputIndexes[i]];
397         if (memoryType == MemoryType::DEVICE) {
398             SCOPED_TRACE("Output index = " + std::to_string(i));
399             auto [buffer, token] = allocator.allocate<IOType::OUTPUT>(i);
400             if (buffer != nullptr) {
401                 DataLocation loc = {.poolIndex = static_cast<int32_t>(mBuffers.size() +
402                                                                       kDeviceMemoryBeginIndex)};
403                 mBuffers.push_back(std::move(buffer));
404                 tokens.push_back(token);
405                 outputs[i] = {.hasNoValue = false, .location = loc, .dimensions = {}};
406                 continue;
407             }
408         }
409 
410         // In the case of zero-sized output, we should at least provide a one-byte buffer.
411         // This is because zero-sized tensors are only supported internally to the driver, or
412         // reported in output shapes. It is illegal for the client to pre-specify a zero-sized
413         // tensor as model output. Otherwise, we will have two semantic conflicts:
414         // - "Zero dimension" conflicts with "unspecified dimension".
415         // - "Omitted operand buffer" conflicts with "zero-sized operand buffer".
416         size_t bufferSize = std::max<size_t>(op.data.size(), 1);
417 
418         // Reserve shared memory for output.
419         outputSize += roundUpBytesNeeded(outputSize, nn::kDefaultRequestMemoryAlignment);
420         const auto padding = roundUpBytesNeeded(bufferSize, nn::kDefaultRequestMemoryPadding);
421         DataLocation loc = {.poolIndex = kOutputPoolIndex,
422                             .offset = static_cast<int64_t>(outputSize),
423                             .length = static_cast<int64_t>(bufferSize),
424                             .padding = static_cast<int64_t>(padding)};
425         outputSize += (bufferSize + padding);
426         outputs[i] = {.hasNoValue = false, .location = loc, .dimensions = {}};
427     }
428 
429     if (memoryType == MemoryType::DEVICE && mBuffers.empty()) {
430         return std::nullopt;
431     }
432 
433     // Memory pools.
434     if (memoryType == MemoryType::BLOB_AHWB) {
435         mInputMemory = TestBlobAHWB::create(std::max<size_t>(inputSize, 1));
436         mOutputMemory = TestBlobAHWB::create(std::max<size_t>(outputSize, 1));
437     } else {
438         mInputMemory = TestAshmem::create(std::max<size_t>(inputSize, 1), /*aidlReadonly=*/true);
439         mOutputMemory = TestAshmem::create(std::max<size_t>(outputSize, 1), /*aidlReadonly=*/false);
440     }
441     CHECK_NE(mInputMemory, nullptr);
442     CHECK_NE(mOutputMemory, nullptr);
443     std::vector<RequestMemoryPool> pools;
444     pools.reserve(kDeviceMemoryBeginIndex + mBuffers.size());
445 
446     auto copiedInputMemory = utils::clone(*mInputMemory->getAidlMemory());
447     CHECK(copiedInputMemory.has_value()) << copiedInputMemory.error().message;
448     auto copiedOutputMemory = utils::clone(*mOutputMemory->getAidlMemory());
449     CHECK(copiedOutputMemory.has_value()) << copiedOutputMemory.error().message;
450 
451     pools.push_back(RequestMemoryPool::make<RequestMemoryPool::Tag::pool>(
452             std::move(copiedInputMemory).value()));
453     pools.push_back(RequestMemoryPool::make<RequestMemoryPool::Tag::pool>(
454             std::move(copiedOutputMemory).value()));
455     for (const auto& token : tokens) {
456         pools.push_back(RequestMemoryPool::make<RequestMemoryPool::Tag::token>(token));
457     }
458 
459     // Copy input data to the input shared memory pool.
460     uint8_t* inputPtr = mInputMemory->getPointer();
461     for (uint32_t i = 0; i < testModel.main.inputIndexes.size(); i++) {
462         if (!inputs[i].hasNoValue && inputs[i].location.poolIndex == kInputPoolIndex) {
463             const auto& op = testModel.main.operands[testModel.main.inputIndexes[i]];
464             const uint8_t* begin = op.data.get<uint8_t>();
465             const uint8_t* end = begin + op.data.size();
466             std::copy(begin, end, inputPtr + inputs[i].location.offset);
467         }
468     }
469     return Request{
470             .inputs = std::move(inputs), .outputs = std::move(outputs), .pools = std::move(pools)};
471 }
472 
getOutputBuffers(const TestModel & testModel,const Request & request) const473 std::vector<TestBuffer> ExecutionContext::getOutputBuffers(const TestModel& testModel,
474                                                            const Request& request) const {
475     // Copy out output results.
476     uint8_t* outputPtr = mOutputMemory->getPointer();
477     std::vector<TestBuffer> outputBuffers;
478     for (uint32_t i = 0; i < request.outputs.size(); i++) {
479         const auto& outputLoc = request.outputs[i].location;
480         if (outputLoc.poolIndex == kOutputPoolIndex) {
481             outputBuffers.emplace_back(outputLoc.length, outputPtr + outputLoc.offset);
482         } else {
483             const auto& op = testModel.main.operands[testModel.main.outputIndexes[i]];
484             if (op.data.size() == 0) {
485                 outputBuffers.emplace_back(0, nullptr);
486             } else {
487                 SCOPED_TRACE("Output index = " + std::to_string(i));
488                 const uint32_t bufferIndex = outputLoc.poolIndex - kDeviceMemoryBeginIndex;
489                 TestBuffer buffer;
490                 getBuffer(mBuffers[bufferIndex], op.data.size(), &buffer);
491                 outputBuffers.push_back(std::move(buffer));
492             }
493         }
494     }
495     return outputBuffers;
496 }
497 
498 // Get a TestBuffer with data copied from an IBuffer object.
getBuffer(const std::shared_ptr<IBuffer> & buffer,size_t size,TestBuffer * testBuffer) const499 void ExecutionContext::getBuffer(const std::shared_ptr<IBuffer>& buffer, size_t size,
500                                  TestBuffer* testBuffer) const {
501     // IBuffer -> Shared memory.
502     auto sharedMemory = nn::createSharedMemory(size).value();
503     auto aidlMemory = utils::convert(sharedMemory).value();
504     const auto ret = buffer->copyTo(aidlMemory);
505     ASSERT_TRUE(ret.isOk());
506 
507     // Shared memory -> TestBuffer.
508     const auto outputMemory = nn::map(sharedMemory).value();
509     const uint8_t* outputPtr = std::visit(
510             [](auto* ptr) { return static_cast<const uint8_t*>(ptr); }, outputMemory.pointer);
511     ASSERT_NE(outputPtr, nullptr);
512     ASSERT_NE(testBuffer, nullptr);
513     *testBuffer = TestBuffer(size, outputPtr);
514 }
515 
hasZeroSizedOutput(const TestModel & testModel)516 static bool hasZeroSizedOutput(const TestModel& testModel) {
517     return std::any_of(testModel.main.outputIndexes.begin(), testModel.main.outputIndexes.end(),
518                        [&testModel](uint32_t index) {
519                            return testModel.main.operands[index].data.size() == 0;
520                        });
521 }
522 
EvaluatePreparedModel(const std::shared_ptr<IDevice> & device,const std::shared_ptr<IPreparedModel> & preparedModel,const TestModel & testModel,const TestConfig & testConfig,bool * skipped=nullptr)523 void EvaluatePreparedModel(const std::shared_ptr<IDevice>& device,
524                            const std::shared_ptr<IPreparedModel>& preparedModel,
525                            const TestModel& testModel, const TestConfig& testConfig,
526                            bool* skipped = nullptr) {
527     if (skipped != nullptr) {
528         *skipped = false;
529     }
530     // If output0 does not have size larger than one byte, we can not test with insufficient buffer.
531     if (testConfig.outputType == OutputType::INSUFFICIENT &&
532         !isOutputSizeGreaterThanOne(testModel, 0)) {
533         return;
534     }
535 
536     ExecutionContext context(device, preparedModel);
537     auto maybeRequest = context.createRequest(testModel, testConfig.memoryType);
538     // Skip if testing memory domain but no device memory has been allocated.
539     if (!maybeRequest.has_value()) {
540         return;
541     }
542 
543     Request request = std::move(maybeRequest).value();
544 
545     constexpr uint32_t kInsufficientOutputIndex = 0;
546     if (testConfig.outputType == OutputType::INSUFFICIENT) {
547         makeOutputInsufficientSize(kInsufficientOutputIndex, &request);
548     }
549 
550     int64_t loopTimeoutDurationNs = kOmittedTimeoutDuration;
551     // OutputType::MISSED_DEADLINE is only used by
552     // TestKind::INTINITE_LOOP_TIMEOUT tests to verify that an infinite loop is
553     // aborted after a timeout.
554     if (testConfig.outputType == OutputType::MISSED_DEADLINE) {
555         // Override the default loop timeout duration with a small value to
556         // speed up test execution.
557         constexpr int64_t kMillisecond = 1'000'000;
558         loopTimeoutDurationNs = 1 * kMillisecond;
559     }
560 
561     ErrorStatus executionStatus;
562     std::vector<OutputShape> outputShapes;
563     Timing timing = kNoTiming;
564     switch (testConfig.executor) {
565         case Executor::SYNC: {
566             SCOPED_TRACE("synchronous");
567 
568             ExecutionResult executionResult;
569             // execute
570             const auto ret = preparedModel->executeSynchronously(request, testConfig.measureTiming,
571                                                                  kNoDeadline, loopTimeoutDurationNs,
572                                                                  &executionResult);
573             ASSERT_TRUE(ret.isOk() || ret.getExceptionCode() == EX_SERVICE_SPECIFIC)
574                     << ret.getDescription();
575             if (ret.isOk()) {
576                 executionStatus = executionResult.outputSufficientSize
577                                           ? ErrorStatus::NONE
578                                           : ErrorStatus::OUTPUT_INSUFFICIENT_SIZE;
579                 outputShapes = std::move(executionResult.outputShapes);
580                 timing = executionResult.timing;
581             } else {
582                 executionStatus = static_cast<ErrorStatus>(ret.getServiceSpecificError());
583             }
584             break;
585         }
586         case Executor::BURST: {
587             SCOPED_TRACE("burst");
588 
589             // create burst
590             std::shared_ptr<IBurst> burst;
591             auto ret = preparedModel->configureExecutionBurst(&burst);
592             ASSERT_TRUE(ret.isOk()) << ret.getDescription();
593             ASSERT_NE(nullptr, burst.get());
594 
595             // associate a unique slot with each memory pool
596             int64_t currentSlot = 0;
597             std::vector<int64_t> slots;
598             slots.reserve(request.pools.size());
599             for (const auto& pool : request.pools) {
600                 if (pool.getTag() == RequestMemoryPool::Tag::pool) {
601                     slots.push_back(currentSlot++);
602                 } else {
603                     EXPECT_EQ(pool.getTag(), RequestMemoryPool::Tag::token);
604                     slots.push_back(-1);
605                 }
606             }
607 
608             ExecutionResult executionResult;
609             // execute
610             ret = burst->executeSynchronously(request, slots, testConfig.measureTiming, kNoDeadline,
611                                               loopTimeoutDurationNs, &executionResult);
612             ASSERT_TRUE(ret.isOk() || ret.getExceptionCode() == EX_SERVICE_SPECIFIC)
613                     << ret.getDescription();
614             if (ret.isOk()) {
615                 executionStatus = executionResult.outputSufficientSize
616                                           ? ErrorStatus::NONE
617                                           : ErrorStatus::OUTPUT_INSUFFICIENT_SIZE;
618                 outputShapes = std::move(executionResult.outputShapes);
619                 timing = executionResult.timing;
620             } else {
621                 executionStatus = static_cast<ErrorStatus>(ret.getServiceSpecificError());
622             }
623 
624             // Mark each slot as unused after the execution. This is unnecessary because the burst
625             // is freed after this scope ends, but this is here to test the functionality.
626             for (int64_t slot : slots) {
627                 ret = burst->releaseMemoryResource(slot);
628                 ASSERT_TRUE(ret.isOk()) << ret.getDescription();
629             }
630 
631             break;
632         }
633         case Executor::FENCED: {
634             SCOPED_TRACE("fenced");
635             ErrorStatus result = ErrorStatus::NONE;
636             FencedExecutionResult executionResult;
637             auto ret = preparedModel->executeFenced(request, {}, testConfig.measureTiming,
638                                                     kNoDeadline, loopTimeoutDurationNs, kNoDuration,
639                                                     &executionResult);
640             ASSERT_TRUE(ret.isOk() || ret.getExceptionCode() == EX_SERVICE_SPECIFIC)
641                     << ret.getDescription();
642             if (!ret.isOk()) {
643                 result = static_cast<ErrorStatus>(ret.getServiceSpecificError());
644                 executionStatus = result;
645             } else if (executionResult.syncFence.get() != -1) {
646                 std::vector<ndk::ScopedFileDescriptor> waitFor;
647                 auto dupFd = dup(executionResult.syncFence.get());
648                 ASSERT_NE(dupFd, -1);
649                 waitFor.emplace_back(dupFd);
650                 // If a sync fence is returned, try start another run waiting for the sync fence.
651                 ret = preparedModel->executeFenced(request, waitFor, testConfig.measureTiming,
652                                                    kNoDeadline, loopTimeoutDurationNs, kNoDuration,
653                                                    &executionResult);
654                 ASSERT_TRUE(ret.isOk());
655                 waitForSyncFence(executionResult.syncFence.get());
656             }
657             if (result == ErrorStatus::NONE) {
658                 ASSERT_NE(executionResult.callback, nullptr);
659                 Timing timingFenced;
660                 auto ret = executionResult.callback->getExecutionInfo(&timing, &timingFenced,
661                                                                       &executionStatus);
662                 ASSERT_TRUE(ret.isOk());
663             }
664             break;
665         }
666         default: {
667             FAIL() << "Unsupported execution mode for AIDL interface.";
668         }
669     }
670 
671     if (testConfig.outputType != OutputType::FULLY_SPECIFIED &&
672         executionStatus == ErrorStatus::GENERAL_FAILURE) {
673         if (skipped != nullptr) {
674             *skipped = true;
675         }
676         if (!testConfig.reportSkipping) {
677             return;
678         }
679         LOG(INFO) << "NN VTS: Early termination of test because vendor service cannot "
680                      "execute model that it does not support.";
681         std::cout << "[          ]   Early termination of test because vendor service cannot "
682                      "execute model that it does not support."
683                   << std::endl;
684         GTEST_SKIP();
685     }
686     if (!testConfig.measureTiming) {
687         EXPECT_EQ(timing, kNoTiming);
688     } else {
689         if (timing.timeOnDeviceNs != -1 && timing.timeInDriverNs != -1) {
690             EXPECT_LE(timing.timeOnDeviceNs, timing.timeInDriverNs);
691         }
692     }
693 
694     switch (testConfig.outputType) {
695         case OutputType::FULLY_SPECIFIED:
696             if (testConfig.executor == Executor::FENCED && hasZeroSizedOutput(testModel)) {
697                 // Executor::FENCED does not support zero-sized output.
698                 ASSERT_EQ(ErrorStatus::INVALID_ARGUMENT, executionStatus);
699                 return;
700             }
701             // If the model output operands are fully specified, outputShapes must be either
702             // either empty, or have the same number of elements as the number of outputs.
703             ASSERT_EQ(ErrorStatus::NONE, executionStatus);
704             ASSERT_TRUE(outputShapes.size() == 0 ||
705                         outputShapes.size() == testModel.main.outputIndexes.size());
706             break;
707         case OutputType::UNSPECIFIED:
708             if (testConfig.executor == Executor::FENCED) {
709                 // For Executor::FENCED, the output shape must be fully specified.
710                 ASSERT_EQ(ErrorStatus::INVALID_ARGUMENT, executionStatus);
711                 return;
712             }
713             // If the model output operands are not fully specified, outputShapes must have
714             // the same number of elements as the number of outputs.
715             ASSERT_EQ(ErrorStatus::NONE, executionStatus);
716             ASSERT_EQ(outputShapes.size(), testModel.main.outputIndexes.size());
717             break;
718         case OutputType::INSUFFICIENT:
719             if (testConfig.executor == Executor::FENCED) {
720                 // For Executor::FENCED, the output shape must be fully specified.
721                 ASSERT_EQ(ErrorStatus::INVALID_ARGUMENT, executionStatus);
722                 return;
723             }
724             ASSERT_EQ(ErrorStatus::OUTPUT_INSUFFICIENT_SIZE, executionStatus);
725             ASSERT_EQ(outputShapes.size(), testModel.main.outputIndexes.size());
726             // Check that all returned output dimensions are at least as fully specified as the
727             // union of the information about the corresponding operand in the model and in the
728             // request. In this test, all model outputs have known rank with all dimensions
729             // unspecified, and no dimensional information is provided in the request.
730             for (uint32_t i = 0; i < outputShapes.size(); i++) {
731                 ASSERT_EQ(outputShapes[i].isSufficient, i != kInsufficientOutputIndex);
732                 const auto& actual = outputShapes[i].dimensions;
733                 const auto& golden =
734                         testModel.main.operands[testModel.main.outputIndexes[i]].dimensions;
735                 ASSERT_EQ(actual.size(), golden.size());
736                 for (uint32_t j = 0; j < actual.size(); j++) {
737                     if (actual[j] == 0) continue;
738                     EXPECT_EQ(actual[j], golden[j]) << "index: " << j;
739                 }
740             }
741             return;
742         case OutputType::MISSED_DEADLINE:
743             ASSERT_TRUE(executionStatus == ErrorStatus::MISSED_DEADLINE_TRANSIENT ||
744                         executionStatus == ErrorStatus::MISSED_DEADLINE_PERSISTENT)
745                     << "executionStatus = " << executionStatus;
746             return;
747     }
748 
749     // Go through all outputs, check returned output shapes.
750     for (uint32_t i = 0; i < outputShapes.size(); i++) {
751         EXPECT_TRUE(outputShapes[i].isSufficient);
752         const auto& expect = testModel.main.operands[testModel.main.outputIndexes[i]].dimensions;
753         const auto unsignedActual = nn::toUnsigned(outputShapes[i].dimensions);
754         ASSERT_TRUE(unsignedActual.has_value());
755         const std::vector<uint32_t>& actual = unsignedActual.value();
756         EXPECT_EQ(expect, actual);
757     }
758 
759     // Retrieve execution results.
760     const std::vector<TestBuffer> outputs = context.getOutputBuffers(testModel, request);
761 
762     // We want "close-enough" results.
763     checkResults(testModel, outputs);
764 }
765 
EvaluatePreparedModel(const std::shared_ptr<IDevice> & device,const std::shared_ptr<IPreparedModel> & preparedModel,const TestModel & testModel,TestKind testKind)766 void EvaluatePreparedModel(const std::shared_ptr<IDevice>& device,
767                            const std::shared_ptr<IPreparedModel>& preparedModel,
768                            const TestModel& testModel, TestKind testKind) {
769     std::vector<OutputType> outputTypesList;
770     std::vector<bool> measureTimingList;
771     std::vector<Executor> executorList;
772     std::vector<MemoryType> memoryTypeList;
773 
774     switch (testKind) {
775         case TestKind::GENERAL: {
776             outputTypesList = {OutputType::FULLY_SPECIFIED};
777             measureTimingList = {false, true};
778             executorList = {Executor::SYNC, Executor::BURST};
779             memoryTypeList = {MemoryType::ASHMEM};
780         } break;
781         case TestKind::DYNAMIC_SHAPE: {
782             outputTypesList = {OutputType::UNSPECIFIED, OutputType::INSUFFICIENT};
783             measureTimingList = {false, true};
784             executorList = {Executor::SYNC, Executor::BURST, Executor::FENCED};
785             memoryTypeList = {MemoryType::ASHMEM};
786         } break;
787         case TestKind::MEMORY_DOMAIN: {
788             outputTypesList = {OutputType::FULLY_SPECIFIED};
789             measureTimingList = {false};
790             executorList = {Executor::SYNC, Executor::BURST, Executor::FENCED};
791             memoryTypeList = {MemoryType::BLOB_AHWB, MemoryType::DEVICE};
792         } break;
793         case TestKind::FENCED_COMPUTE: {
794             outputTypesList = {OutputType::FULLY_SPECIFIED};
795             measureTimingList = {false, true};
796             executorList = {Executor::FENCED};
797             memoryTypeList = {MemoryType::ASHMEM};
798         } break;
799         case TestKind::QUANTIZATION_COUPLING: {
800             LOG(FATAL) << "Wrong TestKind for EvaluatePreparedModel";
801             return;
802         } break;
803         case TestKind::INTINITE_LOOP_TIMEOUT: {
804             outputTypesList = {OutputType::MISSED_DEADLINE};
805             measureTimingList = {false, true};
806             executorList = {Executor::SYNC, Executor::BURST, Executor::FENCED};
807             memoryTypeList = {MemoryType::ASHMEM};
808         } break;
809     }
810 
811     for (const OutputType outputType : outputTypesList) {
812         for (const bool measureTiming : measureTimingList) {
813             for (const Executor executor : executorList) {
814                 for (const MemoryType memoryType : memoryTypeList) {
815                     const TestConfig testConfig(executor, measureTiming, outputType, memoryType);
816                     EvaluatePreparedModel(device, preparedModel, testModel, testConfig);
817                 }
818             }
819         }
820     }
821 }
822 
EvaluatePreparedCoupledModels(const std::shared_ptr<IDevice> & device,const std::shared_ptr<IPreparedModel> & preparedModel,const TestModel & testModel,const std::shared_ptr<IPreparedModel> & preparedCoupledModel,const TestModel & coupledModel)823 void EvaluatePreparedCoupledModels(const std::shared_ptr<IDevice>& device,
824                                    const std::shared_ptr<IPreparedModel>& preparedModel,
825                                    const TestModel& testModel,
826                                    const std::shared_ptr<IPreparedModel>& preparedCoupledModel,
827                                    const TestModel& coupledModel) {
828     const std::vector<OutputType> outputTypesList = {OutputType::FULLY_SPECIFIED};
829     const std::vector<bool> measureTimingList = {false, true};
830     const std::vector<Executor> executorList = {Executor::SYNC, Executor::BURST, Executor::FENCED};
831 
832     for (const OutputType outputType : outputTypesList) {
833         for (const bool measureTiming : measureTimingList) {
834             for (const Executor executor : executorList) {
835                 const TestConfig testConfig(executor, measureTiming, outputType, MemoryType::ASHMEM,
836                                             /*reportSkipping=*/false);
837                 bool baseSkipped = false;
838                 EvaluatePreparedModel(device, preparedModel, testModel, testConfig, &baseSkipped);
839                 bool coupledSkipped = false;
840                 EvaluatePreparedModel(device, preparedCoupledModel, coupledModel, testConfig,
841                                       &coupledSkipped);
842                 ASSERT_EQ(baseSkipped, coupledSkipped);
843                 if (baseSkipped) {
844                     LOG(INFO) << "NN VTS: Early termination of test because vendor service cannot "
845                                  "execute model that it does not support.";
846                     std::cout << "[          ]   Early termination of test because vendor service "
847                                  "cannot "
848                                  "execute model that it does not support."
849                               << std::endl;
850                     GTEST_SKIP();
851                 }
852             }
853         }
854     }
855 }
856 
Execute(const std::shared_ptr<IDevice> & device,const TestModel & testModel,TestKind testKind)857 void Execute(const std::shared_ptr<IDevice>& device, const TestModel& testModel,
858              TestKind testKind) {
859     Model model = createModel(testModel);
860     if (testKind == TestKind::DYNAMIC_SHAPE) {
861         makeOutputDimensionsUnspecified(&model);
862     }
863 
864     std::shared_ptr<IPreparedModel> preparedModel;
865     switch (testKind) {
866         case TestKind::GENERAL:
867         case TestKind::DYNAMIC_SHAPE:
868         case TestKind::MEMORY_DOMAIN:
869         case TestKind::FENCED_COMPUTE:
870         case TestKind::INTINITE_LOOP_TIMEOUT: {
871             createPreparedModel(device, model, &preparedModel);
872             if (preparedModel == nullptr) return;
873             EvaluatePreparedModel(device, preparedModel, testModel, testKind);
874         } break;
875         case TestKind::QUANTIZATION_COUPLING: {
876             ASSERT_TRUE(testModel.hasQuant8CoupledOperands());
877             createPreparedModel(device, model, &preparedModel,
878                                 /*reportSkipping*/ false);
879             TestModel signedQuantizedModel = convertQuant8AsymmOperandsToSigned(testModel);
880             std::shared_ptr<IPreparedModel> preparedCoupledModel;
881             createPreparedModel(device, createModel(signedQuantizedModel), &preparedCoupledModel,
882                                 /*reportSkipping*/ false);
883             // If we couldn't prepare a model with unsigned quantization, we must
884             // fail to prepare a model with signed quantization as well.
885             if (preparedModel == nullptr) {
886                 ASSERT_EQ(preparedCoupledModel, nullptr);
887                 // If we failed to prepare both of the models, we can safely skip
888                 // the test.
889                 LOG(INFO) << "NN VTS: Early termination of test because vendor service cannot "
890                              "prepare model that it does not support.";
891                 std::cout
892                         << "[          ]   Early termination of test because vendor service cannot "
893                            "prepare model that it does not support."
894                         << std::endl;
895                 GTEST_SKIP();
896             }
897             ASSERT_NE(preparedCoupledModel, nullptr);
898             EvaluatePreparedCoupledModels(device, preparedModel, testModel, preparedCoupledModel,
899                                           signedQuantizedModel);
900         } break;
901     }
902 }
903 
SetUp()904 void GeneratedTestBase::SetUp() {
905     testing::TestWithParam<GeneratedTestParam>::SetUp();
906     ASSERT_NE(kDevice, nullptr);
907     const bool deviceIsResponsive =
908             ndk::ScopedAStatus::fromStatus(AIBinder_ping(kDevice->asBinder().get())).isOk();
909     ASSERT_TRUE(deviceIsResponsive);
910 }
911 
getNamedModels(const FilterFn & filter)912 std::vector<NamedModel> getNamedModels(const FilterFn& filter) {
913     return TestModelManager::get().getTestModels(filter);
914 }
915 
getNamedModels(const FilterNameFn & filter)916 std::vector<NamedModel> getNamedModels(const FilterNameFn& filter) {
917     return TestModelManager::get().getTestModels(filter);
918 }
919 
printGeneratedTest(const testing::TestParamInfo<GeneratedTestParam> & info)920 std::string printGeneratedTest(const testing::TestParamInfo<GeneratedTestParam>& info) {
921     const auto& [namedDevice, namedModel] = info.param;
922     return gtestCompliantName(getName(namedDevice) + "_" + getName(namedModel));
923 }
924 
925 // Tag for the generated tests
926 class GeneratedTest : public GeneratedTestBase {};
927 
928 // Tag for the dynamic output shape tests
929 class DynamicOutputShapeTest : public GeneratedTest {};
930 
931 // Tag for the memory domain tests
932 class MemoryDomainTest : public GeneratedTest {};
933 
934 // Tag for the fenced compute tests
935 class FencedComputeTest : public GeneratedTest {};
936 
937 // Tag for the dynamic output shape tests
938 class QuantizationCouplingTest : public GeneratedTest {};
939 
940 // Tag for the loop timeout tests
941 class InfiniteLoopTimeoutTest : public GeneratedTest {};
942 
TEST_P(GeneratedTest,Test)943 TEST_P(GeneratedTest, Test) {
944     Execute(kDevice, kTestModel, TestKind::GENERAL);
945 }
946 
TEST_P(DynamicOutputShapeTest,Test)947 TEST_P(DynamicOutputShapeTest, Test) {
948     Execute(kDevice, kTestModel, TestKind::DYNAMIC_SHAPE);
949 }
950 
TEST_P(MemoryDomainTest,Test)951 TEST_P(MemoryDomainTest, Test) {
952     Execute(kDevice, kTestModel, TestKind::MEMORY_DOMAIN);
953 }
954 
TEST_P(FencedComputeTest,Test)955 TEST_P(FencedComputeTest, Test) {
956     Execute(kDevice, kTestModel, TestKind::FENCED_COMPUTE);
957 }
958 
TEST_P(QuantizationCouplingTest,Test)959 TEST_P(QuantizationCouplingTest, Test) {
960     Execute(kDevice, kTestModel, TestKind::QUANTIZATION_COUPLING);
961 }
962 
TEST_P(InfiniteLoopTimeoutTest,Test)963 TEST_P(InfiniteLoopTimeoutTest, Test) {
964     Execute(kDevice, kTestModel, TestKind::INTINITE_LOOP_TIMEOUT);
965 }
966 
967 INSTANTIATE_GENERATED_TEST(GeneratedTest,
__anon8cc611d90602(const TestModel& testModel) 968                            [](const TestModel& testModel) { return !testModel.expectFailure; });
969 
__anon8cc611d90702(const TestModel& testModel) 970 INSTANTIATE_GENERATED_TEST(DynamicOutputShapeTest, [](const TestModel& testModel) {
971     return !testModel.expectFailure && !testModel.hasScalarOutputs();
972 });
973 
974 INSTANTIATE_GENERATED_TEST(MemoryDomainTest,
__anon8cc611d90802(const TestModel& testModel) 975                            [](const TestModel& testModel) { return !testModel.expectFailure; });
976 
977 INSTANTIATE_GENERATED_TEST(FencedComputeTest,
__anon8cc611d90902(const TestModel& testModel) 978                            [](const TestModel& testModel) { return !testModel.expectFailure; });
979 
__anon8cc611d90a02(const TestModel& testModel) 980 INSTANTIATE_GENERATED_TEST(QuantizationCouplingTest, [](const TestModel& testModel) {
981     return !testModel.expectFailure && testModel.hasQuant8CoupledOperands() &&
982            testModel.main.operations.size() == 1;
983 });
984 
__anon8cc611d90b02(const TestModel& testModel) 985 INSTANTIATE_GENERATED_TEST(InfiniteLoopTimeoutTest, [](const TestModel& testModel) {
986     return testModel.isInfiniteLoopTimeoutTest();
987 });
988 
989 }  // namespace aidl::android::hardware::neuralnetworks::vts::functional
990