1 /*
2 * Copyright (C) 2018 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <ExecutionBurstServer.h>
18 #include <HalInterfaces.h>
19 #include <SampleDriver.h>
20 #include <ValidateHal.h>
21 #include <gtest/gtest.h>
22
23 #include <algorithm>
24 #include <chrono>
25 #include <iterator>
26 #include <map>
27 #include <queue>
28 #include <set>
29 #include <string>
30 #include <thread>
31 #include <tuple>
32 #include <utility>
33 #include <vector>
34
35 #include "CompilationBuilder.h"
36 #include "HalUtils.h"
37 #include "Manager.h"
38 #include "NeuralNetworks.h"
39 #include "NeuralNetworksOEM.h"
40 #include "TestNeuralNetworksWrapper.h"
41
42 namespace {
43
44 using namespace ::android;
45 namespace V1_0 = ::android::hardware::neuralnetworks::V1_0;
46 namespace V1_1 = ::android::hardware::neuralnetworks::V1_1;
47 namespace V1_2 = ::android::hardware::neuralnetworks::V1_2;
48 namespace V1_3 = ::android::hardware::neuralnetworks::V1_3;
49
50 using CompilationBuilder = nn::CompilationBuilder;
51 using Device = nn::Device;
52 using DeviceManager = nn::DeviceManager;
53 using ExecutePreference = nn::test_wrapper::ExecutePreference;
54 using ExecutionBurstServer = nn::ExecutionBurstServer;
55 using HidlModel = V1_3::Model;
56 using Result = nn::test_wrapper::Result;
57 using SampleDriver = nn::sample_driver::SampleDriver;
58 using SamplePreparedModel = nn::sample_driver::SamplePreparedModel;
59 using SampleFencedExecutionCallback = nn::sample_driver::SampleFencedExecutionCallback;
60 using WrapperModel = nn::test_wrapper::Model;
61 using WrapperOperandType = nn::test_wrapper::OperandType;
62 using WrapperType = nn::test_wrapper::Type;
63 using nn::convertToV1_0;
64 using nn::convertToV1_3;
65
66 template <typename T>
67 using MQDescriptorSync = hardware::MQDescriptorSync<T>;
68
69 constexpr V1_2::Timing kBadTiming = {.timeOnDevice = UINT64_MAX, .timeInDriver = UINT64_MAX};
70 constexpr V1_2::Timing kGoodUnfencedTiming = {.timeOnDevice = 123, .timeInDriver = 456};
71 constexpr V1_2::Timing kGoodFencedTiming = {.timeOnDevice = 23, .timeInDriver = 56};
72
73 // This is an IDevice for testing purposes. The test driver has customized
74 // getCapabilities_1_3 and getSupportedOperations_1_3.
75 class TestDriver : public SampleDriver {
76 public:
TestDriver(const char * name,V1_3::Capabilities capabilities,const std::vector<bool> & supportedOps)77 TestDriver(const char* name, V1_3::Capabilities capabilities,
78 const std::vector<bool>& supportedOps)
79 : SampleDriver(name), mCapabilities(capabilities), mSupportedOps(supportedOps) {}
~TestDriver()80 ~TestDriver() override {}
81
getCapabilities_1_3(getCapabilities_1_3_cb cb)82 hardware::Return<void> getCapabilities_1_3(getCapabilities_1_3_cb cb) override {
83 cb(V1_3::ErrorStatus::NONE, mCapabilities);
84 return hardware::Void();
85 }
86
getSupportedOperations_1_3(const V1_3::Model & model,getSupportedOperations_1_3_cb cb)87 hardware::Return<void> getSupportedOperations_1_3(const V1_3::Model& model,
88 getSupportedOperations_1_3_cb cb) override {
89 if (!android::nn::validateModel(model)) {
90 cb(V1_3::ErrorStatus::INVALID_ARGUMENT, std::vector<bool>());
91 return hardware::Void();
92 }
93 const size_t count = model.main.operations.size();
94 std::vector<bool> supported(count);
95 std::transform(model.main.operations.begin(), model.main.operations.end(),
96 supported.begin(), [this](V1_3::Operation op) {
97 return mSupportedOps[static_cast<int32_t>(op.type)];
98 });
99 cb(V1_3::ErrorStatus::NONE, supported);
100 return hardware::Void();
101 }
102
103 private:
104 V1_3::Capabilities mCapabilities;
105 std::vector<bool> mSupportedOps;
106 };
107
108 class IntrospectionControlTest : public ::testing::Test {
109 protected:
SetUp()110 virtual void SetUp() {}
TearDown()111 virtual void TearDown() {
112 if (mEvent) {
113 ANeuralNetworksEvent_free(mEvent);
114 }
115 if (mExecution) {
116 ANeuralNetworksExecution_free(mExecution);
117 }
118 if (mCompilation) {
119 ANeuralNetworksCompilation_free(mCompilation);
120 }
121 DeviceManager::get()->forTest_reInitializeDeviceList();
122 }
123
124 struct DeviceSpecification {
DeviceSpecification__anon138cc3190110::IntrospectionControlTest::DeviceSpecification125 DeviceSpecification(const std::string& name, float perf, std::vector<bool>& supportedOps)
126 : mName(name), mSupportedOps(supportedOps) {
127 V1_0::PerformanceInfo perfInfo = {.execTime = perf, .powerUsage = perf};
128 mCapabilities = {
129 .relaxedFloat32toFloat16PerformanceScalar = perfInfo,
130 .relaxedFloat32toFloat16PerformanceTensor = perfInfo,
131 .operandPerformance =
132 nn::nonExtensionOperandPerformance<nn::HalVersion::V1_3>(perfInfo),
133 .ifPerformance = perfInfo,
134 .whilePerformance = perfInfo};
135 }
136 std::string mName;
137 V1_3::Capabilities mCapabilities;
138 std::vector<bool> mSupportedOps;
139 };
140
141 // From a vector of DeviceSpecification, register new Devices.
registerDevices(std::vector<DeviceSpecification> specifications)142 void registerDevices(std::vector<DeviceSpecification> specifications) {
143 for (const auto& specification : specifications) {
144 DeviceManager::get()->forTest_registerDevice(nn::makeSharedDevice(
145 specification.mName.c_str(),
146 new TestDriver(specification.mName.c_str(), specification.mCapabilities,
147 specification.mSupportedOps)));
148 }
149 }
150
selectDeviceByName(const std::string & name)151 bool selectDeviceByName(const std::string& name) {
152 uint32_t numDevices = 0;
153 EXPECT_EQ(ANeuralNetworks_getDeviceCount(&numDevices), ANEURALNETWORKS_NO_ERROR);
154 EXPECT_GE(numDevices, (uint32_t)1);
155
156 for (uint32_t i = 0; i < numDevices; i++) {
157 ANeuralNetworksDevice* device = nullptr;
158 EXPECT_EQ(ANeuralNetworks_getDevice(i, &device), ANEURALNETWORKS_NO_ERROR);
159 const char* buffer = nullptr;
160 int result = ANeuralNetworksDevice_getName(device, &buffer);
161 if (result == ANEURALNETWORKS_NO_ERROR && name.compare(buffer) == 0) {
162 mDevices.push_back(device);
163 return true;
164 }
165 }
166 return false;
167 }
168
isSupportedOpListExpected(const std::vector<bool> & expected)169 bool isSupportedOpListExpected(const std::vector<bool>& expected) {
170 const uint32_t kMaxNumberOperations = 256;
171 EXPECT_LE(expected.size(), kMaxNumberOperations);
172 ANeuralNetworksModel* modelHandle = mModel.getHandle();
173 bool supported[kMaxNumberOperations] = {false};
174 EXPECT_EQ(ANeuralNetworksModel_getSupportedOperationsForDevices(
175 modelHandle, mDevices.data(), mDevices.size(), supported),
176 ANEURALNETWORKS_NO_ERROR);
177 return std::equal(expected.begin(), expected.end(), supported);
178 }
179
prepareForExecution(bool measureTiming=false)180 int prepareForExecution(bool measureTiming = false) {
181 ANeuralNetworksModel* modelHandle = mModel.getHandle();
182 int result = ANeuralNetworksCompilation_createForDevices(modelHandle, mDevices.data(),
183 mDevices.size(), &mCompilation);
184 if (result != ANEURALNETWORKS_NO_ERROR) {
185 return result;
186 }
187 EXPECT_EQ(ANeuralNetworksCompilation_finish(mCompilation), ANEURALNETWORKS_NO_ERROR);
188 EXPECT_EQ(ANeuralNetworksExecution_create(mCompilation, &mExecution),
189 ANEURALNETWORKS_NO_ERROR);
190 if (measureTiming) {
191 // Don't call setMeasureTiming unless we need to -- cannot call this
192 // API unless there is exactly one device.
193 EXPECT_EQ(ANeuralNetworksExecution_setMeasureTiming(mExecution, true),
194 ANEURALNETWORKS_NO_ERROR);
195 }
196 return ANEURALNETWORKS_NO_ERROR;
197 }
198
199 std::vector<ANeuralNetworksDevice*> mDevices;
200 ANeuralNetworksEvent* mEvent = nullptr;
201 ANeuralNetworksExecution* mExecution = nullptr;
202 ANeuralNetworksCompilation* mCompilation = nullptr;
203 WrapperModel mModel;
204 };
205
createSimpleAddModel(WrapperModel * model)206 void createSimpleAddModel(WrapperModel* model) {
207 WrapperOperandType type0(WrapperType::TENSOR_FLOAT32, {2});
208 WrapperOperandType type1(WrapperType::INT32, {});
209 // Phase 1, operands
210 auto op1 = model->addOperand(&type0);
211 auto op2 = model->addOperand(&type0);
212 auto act = model->addOperand(&type1);
213 auto op3 = model->addOperand(&type0);
214 // Phase 2, operations
215 static int32_t act_init[] = {0};
216 model->setOperandValue(act, act_init, sizeof(act_init));
217 model->addOperation(ANEURALNETWORKS_ADD, {op1, op2, act}, {op3});
218 // Phase 3, inputs and outputs
219 model->identifyInputsAndOutputs({op1, op2}, {op3});
220 model->finish();
221 ASSERT_TRUE(model->isValid());
222 }
223
224 // This test verifies that a simple ADD model is able to run on a single device that claims being
225 // able to handle all operations.
TEST_F(IntrospectionControlTest,SimpleAddModel)226 TEST_F(IntrospectionControlTest, SimpleAddModel) {
227 // This is needed before we have the CPU fallback path being treated as a Device.
228 // TODO(miaowang): remove once b/72506261 is fixed.
229 if (DeviceManager::get()->getUseCpuOnly()) {
230 GTEST_SKIP();
231 }
232
233 createSimpleAddModel(&mModel);
234
235 std::string driverName = "test-all";
236 std::vector<bool> ops(android::nn::kNumberOfOperationTypes, true);
237 registerDevices({{driverName, 0.9, ops}});
238
239 EXPECT_TRUE(selectDeviceByName(driverName));
240 EXPECT_TRUE(isSupportedOpListExpected({true}));
241 EXPECT_EQ(prepareForExecution(), ANEURALNETWORKS_NO_ERROR);
242
243 // Verify that the mCompilation is actually using the "test-all" device.
244 CompilationBuilder* c = reinterpret_cast<CompilationBuilder*>(mCompilation);
245 const std::string& deviceNameBuffer =
246 c->forTest_getExecutionPlan().forTest_simpleGetDevice()->getName();
247 EXPECT_EQ(driverName, deviceNameBuffer);
248
249 float input1[2] = {1.0f, 2.0f};
250 float input2[2] = {3.0f, 4.0f};
251 float output[2];
252 EXPECT_EQ(ANeuralNetworksExecution_setInput(mExecution, 0, nullptr, input1, sizeof(input1)),
253 ANEURALNETWORKS_NO_ERROR);
254 EXPECT_EQ(ANeuralNetworksExecution_setInput(mExecution, 1, nullptr, input2, sizeof(input2)),
255 ANEURALNETWORKS_NO_ERROR);
256 EXPECT_EQ(ANeuralNetworksExecution_setOutput(mExecution, 0, nullptr, output, sizeof(output)),
257 ANEURALNETWORKS_NO_ERROR);
258 EXPECT_EQ(ANeuralNetworksExecution_setMeasureTiming(mExecution, true),
259 ANEURALNETWORKS_NO_ERROR);
260
261 EXPECT_EQ(ANeuralNetworksExecution_startCompute(mExecution, &mEvent), ANEURALNETWORKS_NO_ERROR);
262 EXPECT_EQ(ANeuralNetworksEvent_wait(mEvent), ANEURALNETWORKS_NO_ERROR);
263 EXPECT_EQ(output[0], input1[0] + input2[0]);
264 EXPECT_EQ(output[1], input1[1] + input2[1]);
265
266 uint64_t timeOnHardware, timeInDriver;
267 EXPECT_EQ(ANeuralNetworksExecution_getDuration(mExecution, ANEURALNETWORKS_DURATION_ON_HARDWARE,
268 &timeOnHardware),
269 ANEURALNETWORKS_NO_ERROR);
270 EXPECT_EQ(ANeuralNetworksExecution_getDuration(mExecution, ANEURALNETWORKS_DURATION_IN_DRIVER,
271 &timeInDriver),
272 ANEURALNETWORKS_NO_ERROR);
273 if (timeOnHardware != UINT64_MAX && timeInDriver != UINT64_MAX) {
274 EXPECT_LE(timeOnHardware, timeInDriver);
275 }
276 }
277
278 /*-- Begin test drivers -------------------------------------------------------------------------*/
279
280 namespace test_drivers {
281
282 enum class Success : uint32_t {
283 // ASYNC: Return ErrorStatus::GENERAL_FAILURE; notify ErrorStatus::GENERAL_FAILURE and
284 // kBadTiming
285 // SYNC, BURST: Return ErrorStatus::GENERAL_FAILURE and kBadTiming
286 // FENCED: Return ErrorStatus::GENERAL_FAILURE, empty hidl_handle, and a nullptr callback
287 FAIL_LAUNCH,
288
289 // ASYNC: Return ErrorStatus::NONE; notify ErrorStatus::GENERAL_FAILURE and kBadTiming
290 FAIL_WAIT,
291
292 // Bit representation for PASS: One bit set to indicate PASS rather than
293 // FAIL, one bit for each of the four timing fields (Unfenced, Fenced) x
294 // (OnDevice, InDriver) to distinguish between unavailable timing (bit is
295 // clear) and available timing (bit is set), and one bit to call out the
296 // special case of CPU.
297 PASS_BIT = 1 << 4,
298 PASS_UNFENCED_DEVICE_BIT = 1 << 5,
299 PASS_UNFENCED_DRIVER_BIT = 1 << 6,
300 PASS_FENCED_DEVICE_BIT = 1 << 7,
301 PASS_FENCED_DRIVER_BIT = 1 << 8,
302 PASS_CPU_BIT = 1 << 9,
303
304 // Each of the four timing fields may be either unavailable or 0
305 PASS_CPU = PASS_BIT | PASS_CPU_BIT,
306
307 // ASYNC: Return ErrorStatus::NONE; notify ErrorStatus::NONE and timing
308 // SYNC, BURST: Return ErrorStatus::NONE and timing
309 // FENCED: Return ErrorStatus::NONE, empty hidl_handle, and a callback with timing.
310 //
311 // For each PASS other than PASS_CPU, an enum name has the form
312 // PASS_${UNFENCED_TIME}_${FENCED_TIME}. For example, PASS_NEITHER_BOTH
313 // means that only fenced timing is available (both timeOnDevice and
314 // timeInDriver). If _${FENCED_TIME} is omitted, it is equivalent to
315 // _NEITHER; so PASS_BOTH means that only unfenced timing is available (both
316 // timeOnDevice and timeInDriver).
317 PASS_NEITHER = PASS_BIT,
318 PASS_DEVICE = PASS_BIT | PASS_UNFENCED_DEVICE_BIT,
319 PASS_DRIVER = PASS_BIT | PASS_UNFENCED_DRIVER_BIT,
320 PASS_BOTH = PASS_BIT | PASS_UNFENCED_DEVICE_BIT | PASS_UNFENCED_DRIVER_BIT,
321 PASS_NEITHER_DEVICE = PASS_BIT | PASS_FENCED_DEVICE_BIT,
322 PASS_NEITHER_DRIVER = PASS_BIT | PASS_FENCED_DRIVER_BIT,
323 PASS_NEITHER_BOTH = PASS_BIT | PASS_FENCED_DEVICE_BIT | PASS_FENCED_DRIVER_BIT,
324 PASS_DEVICE_DEVICE = PASS_DEVICE | PASS_NEITHER_DEVICE,
325 PASS_DEVICE_DRIVER = PASS_DEVICE | PASS_NEITHER_DRIVER,
326 PASS_DEVICE_BOTH = PASS_DEVICE | PASS_NEITHER_BOTH,
327 PASS_DRIVER_DEVICE = PASS_DRIVER | PASS_NEITHER_DEVICE,
328 PASS_DRIVER_DRIVER = PASS_DRIVER | PASS_NEITHER_DRIVER,
329 PASS_DRIVER_BOTH = PASS_DRIVER | PASS_NEITHER_BOTH,
330 PASS_BOTH_DEVICE = PASS_BOTH | PASS_NEITHER_DEVICE,
331 PASS_BOTH_DRIVER = PASS_BOTH | PASS_NEITHER_DRIVER,
332 PASS_BOTH_BOTH = PASS_BOTH | PASS_NEITHER_BOTH,
333 };
334
hasBit(Success mask,Success bit)335 bool hasBit(Success mask, Success bit) {
336 const uint32_t bitAsInt = static_cast<uint32_t>(bit);
337 CHECK(bitAsInt && (bitAsInt & (bitAsInt - 1)) == 0)
338 << "second argument must be a single bit rather than " << static_cast<uint32_t>(bit);
339 return static_cast<uint32_t>(mask) & bitAsInt;
340 }
341
clearBit(Success mask,Success bit)342 Success clearBit(Success mask, Success bit) {
343 const uint32_t bitAsInt = static_cast<uint32_t>(bit);
344 CHECK(bitAsInt && (bitAsInt & (bitAsInt - 1)) == 0)
345 << "second argument must be a single bit rather than " << static_cast<uint32_t>(bit);
346 return static_cast<Success>(static_cast<uint32_t>(mask) & ~bitAsInt);
347 }
348
operator <<(std::ostream & os,Success success)349 std::ostream& operator<<(std::ostream& os, Success success) {
350 switch (success) {
351 case Success::FAIL_LAUNCH:
352 return os << "FAIL_LAUNCH";
353 case Success::FAIL_WAIT:
354 return os << "FAIL_WAIT";
355 case Success::PASS_CPU:
356 return os << "PASS_CPU";
357 default:
358 break;
359 }
360
361 static const std::vector<std::pair<Success, const char*>> bits = {
362 {Success::PASS_BIT, "PASS"},
363 {Success::PASS_UNFENCED_DEVICE_BIT, "UNFENCED_DEVICE"},
364 {Success::PASS_UNFENCED_DRIVER_BIT, "UNFENCED_DRIVER"},
365 {Success::PASS_FENCED_DEVICE_BIT, "FENCED_DEVICE"},
366 {Success::PASS_FENCED_DRIVER_BIT, "FENCED_DRIVER"},
367 };
368 bool gotOutput = false;
369 for (const auto& b : bits) {
370 if (hasBit(success, b.first)) {
371 if (gotOutput) {
372 os << '|';
373 } else {
374 gotOutput = true;
375 }
376 os << b.second;
377 success = clearBit(success, b.first);
378 }
379 }
380 if (uint32_t successAsInt = static_cast<uint32_t>(success)) {
381 if (gotOutput) {
382 os << '|';
383 }
384 os << successAsInt;
385 }
386 return os;
387 }
388
389 // Returns (unfenced timing, fenced timing).
390 // Not for PASS_CPU.
getExpectedTiming(Success s,bool fencedExecution)391 std::pair<V1_2::Timing, V1_2::Timing> getExpectedTiming(Success s, bool fencedExecution) {
392 CHECK_NE(s, Success::PASS_CPU);
393
394 if (!hasBit(s, Success::PASS_BIT)) {
395 return {kBadTiming, kBadTiming};
396 }
397
398 std::pair<V1_2::Timing, V1_2::Timing> result;
399 result.first.timeOnDevice = hasBit(s, Success::PASS_UNFENCED_DEVICE_BIT)
400 ? kGoodUnfencedTiming.timeOnDevice
401 : UINT64_MAX;
402 result.first.timeInDriver = hasBit(s, Success::PASS_UNFENCED_DRIVER_BIT)
403 ? kGoodUnfencedTiming.timeInDriver
404 : UINT64_MAX;
405 if (fencedExecution) {
406 result.second.timeOnDevice = hasBit(s, Success::PASS_FENCED_DEVICE_BIT)
407 ? kGoodFencedTiming.timeOnDevice
408 : UINT64_MAX;
409 result.second.timeInDriver = hasBit(s, Success::PASS_FENCED_DRIVER_BIT)
410 ? kGoodFencedTiming.timeInDriver
411 : UINT64_MAX;
412 } else {
413 result.second = result.first;
414 }
415 return result;
416 }
417
418 // For these tests we don't care about actually running an inference -- we
419 // just want to placeholder up execution status and timing results, and control
420 // when the execution finishes.
421 class TestPreparedModelLatest : public SamplePreparedModel {
422 public:
TestPreparedModelLatest(const HidlModel & model,const SampleDriver * driver,Success success)423 TestPreparedModelLatest(const HidlModel& model, const SampleDriver* driver, Success success)
424 : SamplePreparedModel(model, driver, V1_1::ExecutionPreference::FAST_SINGLE_ANSWER, uid_t{},
425 nn::kDefaultPriority13),
426 mSuccess(success) {}
427
execute(const V1_0::Request &,const sp<V1_0::IExecutionCallback> & callback)428 hardware::Return<V1_0::ErrorStatus> execute(
429 const V1_0::Request&, const sp<V1_0::IExecutionCallback>& callback) override {
430 switch (mSuccess) {
431 case Success::PASS_NEITHER:
432 std::thread([callback] {
433 dummyExecution();
434 callback->notify(V1_0::ErrorStatus::NONE);
435 }).detach();
436 return V1_0::ErrorStatus::NONE;
437 case Success::FAIL_LAUNCH:
438 dummyExecution();
439 callback->notify(V1_0::ErrorStatus::GENERAL_FAILURE);
440 return V1_0::ErrorStatus::GENERAL_FAILURE;
441 case Success::FAIL_WAIT:
442 std::thread([callback] {
443 dummyExecution();
444 callback->notify(V1_0::ErrorStatus::GENERAL_FAILURE);
445 }).detach();
446 return V1_0::ErrorStatus::NONE;
447 default:
448 ADD_FAILURE() << "Unexpected Success kind";
449 return V1_0::ErrorStatus::GENERAL_FAILURE;
450 }
451 }
452
execute_1_2(const V1_0::Request &,V1_2::MeasureTiming measure,const sp<V1_2::IExecutionCallback> & callback)453 hardware::Return<V1_0::ErrorStatus> execute_1_2(
454 const V1_0::Request&, V1_2::MeasureTiming measure,
455 const sp<V1_2::IExecutionCallback>& callback) override {
456 EXPECT_EQ(measure, V1_2::MeasureTiming::YES);
457 switch (mSuccess) {
458 case Success::PASS_NEITHER:
459 case Success::PASS_DEVICE:
460 case Success::PASS_DRIVER:
461 case Success::PASS_BOTH:
462 std::thread([this, callback] {
463 dummyExecution();
464 callback->notify_1_2(V1_0::ErrorStatus::NONE, {},
465 getExpectedTiming(mSuccess, false).first);
466 }).detach();
467 return V1_0::ErrorStatus::NONE;
468 case Success::FAIL_LAUNCH:
469 dummyExecution();
470 callback->notify(V1_0::ErrorStatus::GENERAL_FAILURE);
471 return V1_0::ErrorStatus::GENERAL_FAILURE;
472 case Success::FAIL_WAIT:
473 std::thread([callback] {
474 dummyExecution();
475 callback->notify(V1_0::ErrorStatus::GENERAL_FAILURE);
476 }).detach();
477 return V1_0::ErrorStatus::NONE;
478 default:
479 ADD_FAILURE() << "Unexpected Success kind";
480 return V1_0::ErrorStatus::GENERAL_FAILURE;
481 }
482 }
483
execute_1_3(const V1_3::Request &,V1_2::MeasureTiming measure,const V1_3::OptionalTimePoint &,const V1_3::OptionalTimeoutDuration &,const sp<V1_3::IExecutionCallback> & callback)484 hardware::Return<V1_3::ErrorStatus> execute_1_3(
485 const V1_3::Request&, V1_2::MeasureTiming measure, const V1_3::OptionalTimePoint&,
486 const V1_3::OptionalTimeoutDuration&,
487 const sp<V1_3::IExecutionCallback>& callback) override {
488 // Use a placeholder V1_0::Request because execute_1_2 ignores request entirely.
489 const V1_0::ErrorStatus status = execute_1_2(V1_0::Request{}, measure, callback);
490 return convertToV1_3(status);
491 }
492
executeSynchronously(const V1_0::Request &,V1_2::MeasureTiming measure,executeSynchronously_cb cb)493 hardware::Return<void> executeSynchronously(const V1_0::Request&, V1_2::MeasureTiming measure,
494 executeSynchronously_cb cb) override {
495 EXPECT_EQ(measure, V1_2::MeasureTiming::YES);
496 switch (mSuccess) {
497 case Success::PASS_NEITHER:
498 case Success::PASS_DEVICE:
499 case Success::PASS_DRIVER:
500 case Success::PASS_BOTH:
501 dummyExecution();
502 cb(V1_0::ErrorStatus::NONE, {}, getExpectedTiming(mSuccess, false).first);
503 return hardware::Void();
504 case Success::FAIL_WAIT:
505 // While this is a synchronous execution method, the NNAPI
506 // runtime may call it even for asynchronous execution, so we
507 // need to tolerate Success::FAIL_WAIT here, not just
508 // Success::FAIL_LAUNCH.
509 FALLTHROUGH_INTENDED;
510 case Success::FAIL_LAUNCH:
511 dummyExecution();
512 cb(V1_0::ErrorStatus::GENERAL_FAILURE, {}, kBadTiming);
513 return hardware::Void();
514 default:
515 ADD_FAILURE() << "Unexpected Success kind";
516 cb(V1_0::ErrorStatus::GENERAL_FAILURE, {}, kBadTiming);
517 return hardware::Void();
518 }
519 }
520
executeSynchronously_1_3(const V1_3::Request &,V1_2::MeasureTiming measure,const V1_3::OptionalTimePoint &,const V1_3::OptionalTimeoutDuration &,executeSynchronously_1_3_cb cb)521 hardware::Return<void> executeSynchronously_1_3(const V1_3::Request&,
522 V1_2::MeasureTiming measure,
523 const V1_3::OptionalTimePoint&,
524 const V1_3::OptionalTimeoutDuration&,
525 executeSynchronously_1_3_cb cb) override {
526 const auto wrappedCb = [&cb](V1_0::ErrorStatus status,
527 const hardware::hidl_vec<V1_2::OutputShape>& outputShapes,
528 V1_2::Timing timing) {
529 cb(convertToV1_3(status), outputShapes, timing);
530 };
531 // Use a placeholder V1_0::Request because executeSynchronously ignores request entirely.
532 return executeSynchronously(V1_0::Request{}, measure, wrappedCb);
533 }
534
535 // ExecutionBurstServer::create has an overload that will use
536 // IPreparedModel::executeSynchronously(), so we can rely on that, rather
537 // than having to implement ExecutionBurstServer::IExecutorWithCache.
configureExecutionBurst(const sp<V1_2::IBurstCallback> & callback,const MQDescriptorSync<V1_2::FmqRequestDatum> & requestChannel,const MQDescriptorSync<V1_2::FmqResultDatum> & resultChannel,configureExecutionBurst_cb cb)538 hardware::Return<void> configureExecutionBurst(
539 const sp<V1_2::IBurstCallback>& callback,
540 const MQDescriptorSync<V1_2::FmqRequestDatum>& requestChannel,
541 const MQDescriptorSync<V1_2::FmqResultDatum>& resultChannel,
542 configureExecutionBurst_cb cb) override {
543 const sp<V1_2::IBurstContext> burst = ExecutionBurstServer::create(
544 callback, requestChannel, resultChannel, this, std::chrono::microseconds{0});
545
546 cb(burst == nullptr ? V1_0::ErrorStatus::GENERAL_FAILURE : V1_0::ErrorStatus::NONE, burst);
547 return hardware::Void();
548 }
549
executeFenced(const V1_3::Request &,const hardware::hidl_vec<hardware::hidl_handle> &,V1_2::MeasureTiming measure,const V1_3::OptionalTimePoint &,const V1_3::OptionalTimeoutDuration &,const V1_3::OptionalTimeoutDuration &,executeFenced_cb callback)550 hardware::Return<void> executeFenced(const V1_3::Request&,
551 const hardware::hidl_vec<hardware::hidl_handle>&,
552 V1_2::MeasureTiming measure,
553 const V1_3::OptionalTimePoint&,
554 const V1_3::OptionalTimeoutDuration&,
555 const V1_3::OptionalTimeoutDuration&,
556 executeFenced_cb callback) override {
557 EXPECT_EQ(measure, V1_2::MeasureTiming::YES);
558 if (hasBit(mSuccess, Success::PASS_BIT)) {
559 dummyExecution();
560 const auto expectedTiming = getExpectedTiming(mSuccess, true);
561 sp<SampleFencedExecutionCallback> fencedExecutionCallback =
562 new SampleFencedExecutionCallback(expectedTiming.first, expectedTiming.second,
563 V1_3::ErrorStatus::NONE);
564 callback(V1_3::ErrorStatus::NONE, hardware::hidl_handle(nullptr),
565 fencedExecutionCallback);
566 return hardware::Void();
567 }
568 switch (mSuccess) {
569 case Success::FAIL_WAIT:
570 // Due to the limitation of the SampleDriver,
571 // FAIL_WAIT behaves the same as FAIL_LAUNCH.
572 // If the SampleDriver is updated to return real
573 // sync fences, this must be updated.
574 FALLTHROUGH_INTENDED;
575 case Success::FAIL_LAUNCH:
576 dummyExecution();
577 callback(V1_3::ErrorStatus::GENERAL_FAILURE, hardware::hidl_handle(nullptr),
578 nullptr);
579 return hardware::Void();
580 default:
581 ADD_FAILURE() << "Unexpected Success kind";
582 return hardware::Void();
583 }
584 }
585
586 // We can place the TestPreparedModelLatest system in a "pause" mode where
587 // no execution will complete until the system is taken out of that mode.
588 // Initially, the system is not in that mode.
pauseExecutions(bool v)589 static void pauseExecutions(bool v) { mPauseExecutions.store(v); }
590
591 // This function is only guaranteed to work in the following pattern:
592 // Consider thread A as primary thread
593 // - thread A: pauseExecutions(true);
594 // - thread A: launch execution (as thread B)
595 // - thread A: waitForExecutionToBegin(), block until call to dummyExecution by
596 // thread B makes mExecutionsInFlight nonzero
597 // - thread B: dummyExecution(), which makes mExecutionsInFlight nonzero and blocks
598 // until thread A calls pauseExecutions(false)
599 // - thread A: waitForExecutionToBegin() returns
600 // - thread A: pauseExecutions(false), allowing dummyExecution() on thread B to continue
601 // - thread B: dummyExecution() zeroes mExecutionsInFlight and returns
602 // - thread B: thread exits
waitForExecutionToBegin()603 static void waitForExecutionToBegin() {
604 CHECK(mPauseExecutions.load());
605 while (mExecutionsInFlight.load() == 0) {
606 }
607 }
608
609 private:
610 Success mSuccess;
611
612 static std::atomic<bool> mPauseExecutions;
613 static std::atomic<unsigned int> mExecutionsInFlight;
614
dummyExecution()615 static void dummyExecution() {
616 CHECK_EQ(mExecutionsInFlight.fetch_add(1), 0u) << "We do not support concurrent executions";
617 while (mPauseExecutions.load()) {
618 }
619 mExecutionsInFlight.fetch_sub(1);
620 }
621 };
622 std::atomic<bool> TestPreparedModelLatest::mPauseExecutions = false;
623 std::atomic<unsigned int> TestPreparedModelLatest::mExecutionsInFlight = 0;
624
625 using TestPreparedModel13 = TestPreparedModelLatest;
626
627 // Like TestPreparedModelLatest, but implementing 1.2
628 class TestPreparedModel12 : public V1_2::IPreparedModel {
629 public:
TestPreparedModel12(const HidlModel & model,const SampleDriver * driver,Success success)630 TestPreparedModel12(const HidlModel& model, const SampleDriver* driver, Success success)
631 : mLatestPreparedModel(new TestPreparedModelLatest(model, driver, success)) {}
632
execute(const V1_0::Request & request,const sp<V1_0::IExecutionCallback> & callback)633 hardware::Return<V1_0::ErrorStatus> execute(
634 const V1_0::Request& request, const sp<V1_0::IExecutionCallback>& callback) override {
635 return mLatestPreparedModel->execute(request, callback);
636 }
637
execute_1_2(const V1_0::Request & request,V1_2::MeasureTiming measure,const sp<V1_2::IExecutionCallback> & callback)638 hardware::Return<V1_0::ErrorStatus> execute_1_2(
639 const V1_0::Request& request, V1_2::MeasureTiming measure,
640 const sp<V1_2::IExecutionCallback>& callback) override {
641 return mLatestPreparedModel->execute_1_2(request, measure, callback);
642 }
643
executeSynchronously(const V1_0::Request & request,V1_2::MeasureTiming measure,executeSynchronously_cb cb)644 hardware::Return<void> executeSynchronously(const V1_0::Request& request,
645 V1_2::MeasureTiming measure,
646 executeSynchronously_cb cb) override {
647 return mLatestPreparedModel->executeSynchronously(request, measure, cb);
648 }
649
configureExecutionBurst(const sp<V1_2::IBurstCallback> & callback,const MQDescriptorSync<V1_2::FmqRequestDatum> & requestChannel,const MQDescriptorSync<V1_2::FmqResultDatum> & resultChannel,configureExecutionBurst_cb cb)650 hardware::Return<void> configureExecutionBurst(
651 const sp<V1_2::IBurstCallback>& callback,
652 const MQDescriptorSync<V1_2::FmqRequestDatum>& requestChannel,
653 const MQDescriptorSync<V1_2::FmqResultDatum>& resultChannel,
654 configureExecutionBurst_cb cb) override {
655 return mLatestPreparedModel->configureExecutionBurst(callback, requestChannel,
656 resultChannel, cb);
657 }
658
659 private:
660 const sp<V1_3::IPreparedModel> mLatestPreparedModel;
661 };
662
663 // Like TestPreparedModelLatest, but implementing 1.0
664 class TestPreparedModel10 : public V1_0::IPreparedModel {
665 public:
TestPreparedModel10(const HidlModel & model,const SampleDriver * driver,Success success)666 TestPreparedModel10(const HidlModel& model, const SampleDriver* driver, Success success)
667 : mLatestPreparedModel(new TestPreparedModelLatest(model, driver, success)) {}
668
execute(const V1_0::Request & request,const sp<V1_0::IExecutionCallback> & callback)669 hardware::Return<V1_0::ErrorStatus> execute(
670 const V1_0::Request& request, const sp<V1_0::IExecutionCallback>& callback) override {
671 return mLatestPreparedModel->execute(request, callback);
672 }
673
674 private:
675 const sp<V1_3::IPreparedModel> mLatestPreparedModel;
676 };
677
678 // Behaves like SampleDriver, except that it produces customized IPrepareModel.
679 class TestDriver13 : public SampleDriver {
680 public:
TestDriver13(const std::string & name,Success success)681 TestDriver13(const std::string& name, Success success)
682 : SampleDriver(name.c_str()), mSuccess(success) {}
683
getCapabilities_1_3(getCapabilities_1_3_cb _hidl_cb)684 hardware::Return<void> getCapabilities_1_3(getCapabilities_1_3_cb _hidl_cb) override {
685 android::nn::initVLogMask();
686 V1_3::Capabilities capabilities = nn::makeCapabilities(0.75f);
687 _hidl_cb(V1_3::ErrorStatus::NONE, capabilities);
688 return hardware::Void();
689 }
690
getSupportedOperations_1_3(const HidlModel & model,getSupportedOperations_1_3_cb cb)691 hardware::Return<void> getSupportedOperations_1_3(const HidlModel& model,
692 getSupportedOperations_1_3_cb cb) override {
693 if (nn::validateModel(model)) {
694 std::vector<bool> supported(model.main.operations.size(), true);
695 cb(V1_3::ErrorStatus::NONE, supported);
696 } else {
697 cb(V1_3::ErrorStatus::INVALID_ARGUMENT, {});
698 }
699 return hardware::Void();
700 }
701
getSupportedOperations_1_2(const V1_2::Model & model,getSupportedOperations_1_2_cb cb)702 hardware::Return<void> getSupportedOperations_1_2(const V1_2::Model& model,
703 getSupportedOperations_1_2_cb cb) override {
704 if (nn::validateModel(model)) {
705 std::vector<bool> supported(model.operations.size(), true);
706 cb(V1_0::ErrorStatus::NONE, supported);
707 } else {
708 std::vector<bool> supported;
709 cb(V1_0::ErrorStatus::INVALID_ARGUMENT, supported);
710 }
711 return hardware::Void();
712 }
713
prepareModel_1_3(const HidlModel & model,V1_1::ExecutionPreference,V1_3::Priority,const V1_3::OptionalTimePoint &,const hardware::hidl_vec<hardware::hidl_handle> &,const hardware::hidl_vec<hardware::hidl_handle> &,const nn::HalCacheToken &,const sp<V1_3::IPreparedModelCallback> & callback)714 hardware::Return<V1_3::ErrorStatus> prepareModel_1_3(
715 const HidlModel& model, V1_1::ExecutionPreference, V1_3::Priority,
716 const V1_3::OptionalTimePoint&, const hardware::hidl_vec<hardware::hidl_handle>&,
717 const hardware::hidl_vec<hardware::hidl_handle>&, const nn::HalCacheToken&,
718 const sp<V1_3::IPreparedModelCallback>& callback) override {
719 callback->notify_1_3(V1_3::ErrorStatus::NONE,
720 new TestPreparedModel13(model, this, mSuccess));
721 return V1_3::ErrorStatus::NONE;
722 }
723
prepareModel_1_2(const V1_2::Model & model,V1_1::ExecutionPreference,const hardware::hidl_vec<hardware::hidl_handle> &,const hardware::hidl_vec<hardware::hidl_handle> &,const nn::HalCacheToken &,const sp<V1_2::IPreparedModelCallback> & callback)724 hardware::Return<V1_0::ErrorStatus> prepareModel_1_2(
725 const V1_2::Model& model, V1_1::ExecutionPreference,
726 const hardware::hidl_vec<hardware::hidl_handle>&,
727 const hardware::hidl_vec<hardware::hidl_handle>&, const nn::HalCacheToken&,
728 const sp<V1_2::IPreparedModelCallback>& callback) override {
729 callback->notify_1_2(V1_0::ErrorStatus::NONE,
730 new TestPreparedModel12(nn::convertToV1_3(model), this, mSuccess));
731 return V1_0::ErrorStatus::NONE;
732 }
733
prepareModel_1_1(const V1_1::Model & model,V1_1::ExecutionPreference,const sp<V1_0::IPreparedModelCallback> & callback)734 hardware::Return<V1_0::ErrorStatus> prepareModel_1_1(
735 const V1_1::Model& model, V1_1::ExecutionPreference,
736 const sp<V1_0::IPreparedModelCallback>& callback) override {
737 callback->notify(V1_0::ErrorStatus::NONE,
738 new TestPreparedModel10(nn::convertToV1_3(model), this, mSuccess));
739 return V1_0::ErrorStatus::NONE;
740 }
741
prepareModel(const V1_0::Model & model,const sp<V1_0::IPreparedModelCallback> & callback)742 hardware::Return<V1_0::ErrorStatus> prepareModel(
743 const V1_0::Model& model, const sp<V1_0::IPreparedModelCallback>& callback) override {
744 return prepareModel_1_1(nn::convertToV1_1(model),
745 V1_1::ExecutionPreference::FAST_SINGLE_ANSWER, callback);
746 }
747
748 private:
749 Success mSuccess;
750 };
751
752 // Like TestDriver, but implementing 1.1
753 class TestDriver11 : public V1_1::IDevice {
754 public:
TestDriver11(const std::string & name,Success success)755 TestDriver11(const std::string& name, Success success)
756 : mLatestDriver(new TestDriver13(name, success)) {}
getCapabilities_1_1(getCapabilities_1_1_cb _hidl_cb)757 hardware::Return<void> getCapabilities_1_1(getCapabilities_1_1_cb _hidl_cb) override {
758 return mLatestDriver->getCapabilities_1_1(_hidl_cb);
759 }
getSupportedOperations_1_1(const V1_1::Model & model,getSupportedOperations_1_1_cb _hidl_cb)760 hardware::Return<void> getSupportedOperations_1_1(
761 const V1_1::Model& model, getSupportedOperations_1_1_cb _hidl_cb) override {
762 return mLatestDriver->getSupportedOperations_1_1(model, _hidl_cb);
763 }
prepareModel_1_1(const V1_1::Model & model,V1_1::ExecutionPreference preference,const sp<V1_0::IPreparedModelCallback> & actualCallback)764 hardware::Return<V1_0::ErrorStatus> prepareModel_1_1(
765 const V1_1::Model& model, V1_1::ExecutionPreference preference,
766 const sp<V1_0::IPreparedModelCallback>& actualCallback) override {
767 return mLatestDriver->prepareModel_1_1(model, preference, actualCallback);
768 }
getStatus()769 hardware::Return<V1_0::DeviceStatus> getStatus() override { return mLatestDriver->getStatus(); }
getCapabilities(getCapabilities_cb _hidl_cb)770 hardware::Return<void> getCapabilities(getCapabilities_cb _hidl_cb) override {
771 return mLatestDriver->getCapabilities(_hidl_cb);
772 }
getSupportedOperations(const V1_0::Model & model,getSupportedOperations_cb _hidl_cb)773 hardware::Return<void> getSupportedOperations(const V1_0::Model& model,
774 getSupportedOperations_cb _hidl_cb) override {
775 return mLatestDriver->getSupportedOperations(model, _hidl_cb);
776 }
prepareModel(const V1_0::Model & model,const sp<V1_0::IPreparedModelCallback> & actualCallback)777 hardware::Return<V1_0::ErrorStatus> prepareModel(
778 const V1_0::Model& model,
779 const sp<V1_0::IPreparedModelCallback>& actualCallback) override {
780 return mLatestDriver->prepareModel(model, actualCallback);
781 }
782
783 private:
784 const sp<V1_3::IDevice> mLatestDriver;
785 };
786
787 } // namespace test_drivers
788
789 /*-- End test drivers -------------------------------------------------------------------------*/
790
791 /*-- Begin timing tests -------------------------------------------------------------------------*/
792
793 namespace timing_tests {
794
795 using namespace test_drivers;
796
797 enum class DriverKind {
798 CPU,
799 OLD, // too old to support timing (1.1 or earlier)
800 NEW // new enough to support timing (1.2 or later)
801 };
802
operator <<(std::ostream & os,DriverKind kind)803 std::ostream& operator<<(std::ostream& os, DriverKind kind) {
804 const char* names[] = {"CPU", "OLD", "NEW"};
805 const uint32_t index = static_cast<uint32_t>(kind);
806 CHECK(index < std::size(names));
807 return os << names[index];
808 }
809
810 enum class Compute { ASYNC, SYNC, BURST, FENCED };
811
operator <<(std::ostream & os,Compute compute)812 std::ostream& operator<<(std::ostream& os, Compute compute) {
813 const char* names[] = {"ASYNC", "SYNC", "BURST", "FENCED"};
814 const uint32_t index = static_cast<uint32_t>(compute);
815 CHECK(index < std::size(names));
816 return os << names[index];
817 }
818
819 class TimingTest : public IntrospectionControlTest,
820 public ::testing::WithParamInterface<std::tuple<DriverKind, Success, Compute>> {
821 public:
TimingTest()822 TimingTest()
823 : kDriverKind(std::get<0>(GetParam())),
824 kSuccess(std::get<1>(GetParam())),
825 kCompute(std::get<2>(GetParam())) {}
826
827 protected:
828 const DriverKind kDriverKind;
829 const Success kSuccess;
830 const Compute kCompute;
831 };
832
TEST_P(TimingTest,Test)833 TEST_P(TimingTest, Test) {
834 // There's no straightforward way to force CPU execution to fail.
835 ASSERT_EQ(kDriverKind == DriverKind::CPU, kSuccess == Success::PASS_CPU);
836
837 // FAIL_WAIT only makes sense for ASYNC and FENCED.
838 ASSERT_TRUE(kCompute == Compute::ASYNC || kCompute == Compute::FENCED ||
839 kSuccess != Success::FAIL_WAIT);
840
841 if (DeviceManager::get()->getUseCpuOnly() != (kDriverKind == DriverKind::CPU)) {
842 // We don't have an elegant way to request the CPU driver. Therefore,
843 // we rely on our test framework to make the choice between CPU and
844 // non-CPU.
845 GTEST_SKIP();
846 }
847
848 createSimpleAddModel(&mModel);
849
850 switch (kDriverKind) {
851 case DriverKind::CPU: {
852 // There should be only one driver -- the CPU
853 const std::string& name = DeviceManager::get()->getDrivers()[0]->getName();
854 ASSERT_TRUE(selectDeviceByName(name));
855 break;
856 }
857 case DriverKind::OLD: {
858 static const char name[] = "old";
859 DeviceManager::get()->forTest_registerDevice(
860 nn::makeSharedDevice(name, new TestDriver11(name, kSuccess)));
861 ASSERT_TRUE(selectDeviceByName(name));
862 break;
863 }
864 case DriverKind::NEW: {
865 static const char name[] = "new";
866 DeviceManager::get()->forTest_registerDevice(
867 nn::makeSharedDevice(name, new TestDriver13(name, kSuccess)));
868 ASSERT_TRUE(selectDeviceByName(name));
869 break;
870 }
871 default:
872 FAIL() << "Unexpected DriverKind";
873 }
874
875 EXPECT_EQ(prepareForExecution(true /*measureTiming*/), ANEURALNETWORKS_NO_ERROR);
876
877 float input1[2] = {1.0f, 2.0f};
878 float input2[2] = {3.0f, 4.0f};
879 float output[2];
880 EXPECT_EQ(ANeuralNetworksExecution_setInput(mExecution, 0, nullptr, input1, sizeof(input1)),
881 ANEURALNETWORKS_NO_ERROR);
882 EXPECT_EQ(ANeuralNetworksExecution_setInput(mExecution, 1, nullptr, input2, sizeof(input2)),
883 ANEURALNETWORKS_NO_ERROR);
884 EXPECT_EQ(ANeuralNetworksExecution_setOutput(mExecution, 0, nullptr, output, sizeof(output)),
885 ANEURALNETWORKS_NO_ERROR);
886 EXPECT_EQ(ANeuralNetworksExecution_setMeasureTiming(mExecution, true),
887 ANEURALNETWORKS_NO_ERROR);
888
889 auto Check = [](bool expectPass, int result) {
890 if (expectPass) {
891 ASSERT_EQ(result, ANEURALNETWORKS_NO_ERROR);
892 } else {
893 ASSERT_NE(result, ANEURALNETWORKS_NO_ERROR);
894 }
895 };
896
897 const bool isPass = hasBit(kSuccess, Success::PASS_BIT);
898 const int expectedGetDurationResultCode =
899 isPass ? ANEURALNETWORKS_NO_ERROR : ANEURALNETWORKS_BAD_STATE;
900
901 const auto getDurationWhileRunning = [this] {
902 if (kDriverKind == DriverKind::CPU) {
903 // Testing DriverKind::CPU would require modifying the CPU execution
904 // path to control execution completion, similarly to how this test
905 // case does with TestPreparedModel::dummyExecution(). This does not
906 // seem worthwhile -- it's intrusive into the runtime code solely
907 // for the sake of testing, and we do not expect that the code paths
908 // needed to ensure correct behavior of
909 // ANeuralNetworksExecution_getDuration() on a running execution
910 // would be any different for CPU than for actual drivers.
911 return;
912 }
913 TestPreparedModelLatest::waitForExecutionToBegin();
914 for (int durationCode :
915 std::vector{ANEURALNETWORKS_DURATION_ON_HARDWARE, ANEURALNETWORKS_DURATION_IN_DRIVER,
916 ANEURALNETWORKS_FENCED_DURATION_ON_HARDWARE,
917 ANEURALNETWORKS_FENCED_DURATION_IN_DRIVER}) {
918 uint64_t time;
919 // Cannot query duration while execution is running
920 EXPECT_EQ(ANeuralNetworksExecution_getDuration(mExecution, durationCode, &time),
921 ANEURALNETWORKS_BAD_STATE);
922 }
923 };
924
925 switch (kCompute) {
926 case Compute::ASYNC: {
927 // Ideally what we'd like to do here is
928 //
929 // Check(kSuccess != Success::FAIL_LAUNCH,
930 // ANeuralNetworksExecution_startCompute(mExecution, &mEvent));
931 // Check(isPass, ANeuralNetworksEvent_wait(mEvent));
932 //
933 // However, in the current implementation of the runtime, a launch
934 // failure at the HAL level does not show up as a launch failure at
935 // the NDK level ("startCompute"): The NNAPI runtime does not call a
936 // driver until it (the runtime) begins execution, so a launch
937 // failure at the HAL level looks like an execution failure at the
938 // NDK level ("wait").
939 SCOPED_TRACE("ASYNC startCompute");
940 TestPreparedModelLatest::pauseExecutions(true);
941 Check(true, // rather than kSuccess != Success::FAIL_LAUNCH
942 ANeuralNetworksExecution_startCompute(mExecution, &mEvent));
943 getDurationWhileRunning();
944 TestPreparedModelLatest::pauseExecutions(false);
945 SCOPED_TRACE("ASYNC wait");
946 Check(isPass, ANeuralNetworksEvent_wait(mEvent));
947 break;
948 }
949 case Compute::SYNC: {
950 SCOPED_TRACE("SYNC");
951 TestPreparedModelLatest::pauseExecutions(true);
952 std::thread run([this, Check, isPass] {
953 Check(isPass, ANeuralNetworksExecution_compute(mExecution));
954 });
955 getDurationWhileRunning();
956 TestPreparedModelLatest::pauseExecutions(false);
957 run.join();
958 break;
959 }
960 case Compute::BURST: {
961 SCOPED_TRACE("BURST");
962 ANeuralNetworksBurst* burst;
963 ASSERT_EQ(ANeuralNetworksBurst_create(mCompilation, &burst), ANEURALNETWORKS_NO_ERROR);
964 TestPreparedModelLatest::pauseExecutions(true);
965 std::thread run([this, Check, isPass, burst] {
966 Check(isPass, ANeuralNetworksExecution_burstCompute(mExecution, burst));
967 });
968 getDurationWhileRunning();
969 TestPreparedModelLatest::pauseExecutions(false);
970 run.join();
971 ANeuralNetworksBurst_free(burst);
972 break;
973 }
974 case Compute::FENCED: {
975 SCOPED_TRACE("FENCED startComputeWithDependencies");
976 TestPreparedModelLatest::pauseExecutions(true);
977
978 // Note, due to the limitation of SampleDriver implementation, the call is synchronous.
979 // If the SampleDriver is updated to return real sync fence, this must be updated.
980 std::thread run([this, Check, isPass] {
981 Check(isPass, ANeuralNetworksExecution_startComputeWithDependencies(
982 mExecution, nullptr, 0, 0, &mEvent));
983 });
984 getDurationWhileRunning();
985 TestPreparedModelLatest::pauseExecutions(false);
986 run.join();
987 SCOPED_TRACE("FENCED wait");
988 Check(isPass, ANeuralNetworksEvent_wait(mEvent));
989 break;
990 }
991 default:
992 FAIL() << "unreachable";
993 }
994
995 uint64_t timeOnHardware, timeInDriver, timeOnHardwareFenced, timeInDriverFenced;
996 EXPECT_EQ(ANeuralNetworksExecution_getDuration(mExecution, ANEURALNETWORKS_DURATION_ON_HARDWARE,
997 &timeOnHardware),
998 expectedGetDurationResultCode);
999 EXPECT_EQ(ANeuralNetworksExecution_getDuration(mExecution, ANEURALNETWORKS_DURATION_IN_DRIVER,
1000 &timeInDriver),
1001 expectedGetDurationResultCode);
1002 EXPECT_EQ(
1003 ANeuralNetworksExecution_getDuration(
1004 mExecution, ANEURALNETWORKS_FENCED_DURATION_ON_HARDWARE, &timeOnHardwareFenced),
1005 expectedGetDurationResultCode);
1006 EXPECT_EQ(ANeuralNetworksExecution_getDuration(
1007 mExecution, ANEURALNETWORKS_FENCED_DURATION_IN_DRIVER, &timeInDriverFenced),
1008 expectedGetDurationResultCode);
1009 switch (kDriverKind) {
1010 case DriverKind::CPU: {
1011 // TODO: Should we require timing to be reported as 0?
1012 EXPECT_TRUE(timeOnHardware == 0 || timeOnHardware == UINT64_MAX)
1013 << "timeOnHardware = " << timeOnHardware;
1014 EXPECT_TRUE(timeInDriver == 0 || timeInDriver == UINT64_MAX)
1015 << "timeInDriver = " << timeOnHardware;
1016 EXPECT_TRUE(timeOnHardwareFenced == 0 || timeOnHardwareFenced == UINT64_MAX)
1017 << "timeOnHardwareFenced = " << timeOnHardwareFenced;
1018 EXPECT_TRUE(timeInDriverFenced == 0 || timeInDriverFenced == UINT64_MAX)
1019 << "timeInDriver = " << timeInDriverFenced;
1020 break;
1021 }
1022 case DriverKind::OLD: {
1023 EXPECT_EQ(timeOnHardware, UINT64_MAX);
1024 EXPECT_EQ(timeInDriver, UINT64_MAX);
1025 EXPECT_EQ(timeOnHardwareFenced, UINT64_MAX);
1026 EXPECT_EQ(timeInDriverFenced, UINT64_MAX);
1027 break;
1028 }
1029 case DriverKind::NEW: {
1030 auto microsToNanos = [](uint64_t micros) {
1031 constexpr uint64_t kNanosPerMicro = 1000;
1032 return micros == UINT64_MAX ? UINT64_MAX : kNanosPerMicro * micros;
1033 };
1034 auto expectedTiming = getExpectedTiming(kSuccess, kCompute == Compute::FENCED);
1035 EXPECT_EQ(timeOnHardware, microsToNanos(expectedTiming.first.timeOnDevice));
1036 EXPECT_EQ(timeInDriver, microsToNanos(expectedTiming.first.timeInDriver));
1037 EXPECT_EQ(timeOnHardwareFenced, microsToNanos(expectedTiming.second.timeOnDevice));
1038 EXPECT_EQ(timeInDriverFenced, microsToNanos(expectedTiming.second.timeInDriver));
1039 break;
1040 }
1041 default:
1042 FAIL() << "unreachable";
1043 }
1044 if (kCompute != Compute::FENCED) {
1045 EXPECT_EQ(timeOnHardware, timeOnHardwareFenced);
1046 EXPECT_EQ(timeInDriver, timeInDriverFenced);
1047 }
1048 auto expectTimingLe = [](uint64_t a, const char* aName, uint64_t b, const char* bName) {
1049 if (a != UINT64_MAX && b != UINT64_MAX) {
1050 EXPECT_LE(a, b) << aName << " exceeds " << bName;
1051 }
1052 };
1053 #define EXPECT_TIMING_LE(a, b) expectTimingLe(a, #a, b, #b)
1054 EXPECT_TIMING_LE(timeOnHardware, timeInDriver);
1055 EXPECT_TIMING_LE(timeOnHardwareFenced, timeInDriverFenced);
1056
1057 EXPECT_TIMING_LE(timeOnHardwareFenced, timeOnHardware);
1058 EXPECT_TIMING_LE(timeInDriverFenced, timeInDriver);
1059 #undef EXPECT_TIMING_LE
1060 }
1061
1062 auto kTimingTestUnfencedValues = ::testing::Values(
1063 // NOTE: We cannot force CPU execution to fail
1064 std::make_tuple(DriverKind::CPU, Success::PASS_CPU, Compute::ASYNC),
1065 std::make_tuple(DriverKind::CPU, Success::PASS_CPU, Compute::SYNC),
1066 std::make_tuple(DriverKind::CPU, Success::PASS_CPU, Compute::BURST),
1067
1068 // NOTE: OLD driver does not provide timing
1069 std::make_tuple(DriverKind::OLD, Success::PASS_NEITHER, Compute::ASYNC),
1070 std::make_tuple(DriverKind::OLD, Success::PASS_NEITHER, Compute::SYNC),
1071 std::make_tuple(DriverKind::OLD, Success::PASS_NEITHER, Compute::BURST),
1072
1073 std::make_tuple(DriverKind::OLD, Success::FAIL_LAUNCH, Compute::ASYNC),
1074 std::make_tuple(DriverKind::OLD, Success::FAIL_LAUNCH, Compute::SYNC),
1075 std::make_tuple(DriverKind::OLD, Success::FAIL_LAUNCH, Compute::BURST),
1076
1077 // NOTE: Only ASYNC is paired with a wait
1078 std::make_tuple(DriverKind::OLD, Success::FAIL_WAIT, Compute::ASYNC),
1079
1080 std::make_tuple(DriverKind::NEW, Success::PASS_NEITHER, Compute::ASYNC),
1081 std::make_tuple(DriverKind::NEW, Success::PASS_NEITHER, Compute::SYNC),
1082 std::make_tuple(DriverKind::NEW, Success::PASS_NEITHER, Compute::BURST),
1083
1084 std::make_tuple(DriverKind::NEW, Success::PASS_DEVICE, Compute::ASYNC),
1085 std::make_tuple(DriverKind::NEW, Success::PASS_DEVICE, Compute::SYNC),
1086 std::make_tuple(DriverKind::NEW, Success::PASS_DEVICE, Compute::BURST),
1087
1088 std::make_tuple(DriverKind::NEW, Success::PASS_DRIVER, Compute::ASYNC),
1089 std::make_tuple(DriverKind::NEW, Success::PASS_DRIVER, Compute::SYNC),
1090 std::make_tuple(DriverKind::NEW, Success::PASS_DRIVER, Compute::BURST),
1091
1092 std::make_tuple(DriverKind::NEW, Success::PASS_BOTH, Compute::ASYNC),
1093 std::make_tuple(DriverKind::NEW, Success::PASS_BOTH, Compute::SYNC),
1094 std::make_tuple(DriverKind::NEW, Success::PASS_BOTH, Compute::BURST),
1095
1096 std::make_tuple(DriverKind::NEW, Success::FAIL_LAUNCH, Compute::ASYNC),
1097 std::make_tuple(DriverKind::NEW, Success::FAIL_LAUNCH, Compute::SYNC),
1098 std::make_tuple(DriverKind::NEW, Success::FAIL_LAUNCH, Compute::BURST),
1099
1100 // NOTE: Only ASYNC is paired with a wait
1101 std::make_tuple(DriverKind::NEW, Success::FAIL_WAIT, Compute::ASYNC));
1102
1103 auto kTimingTestFencedValues = ::testing::Values(
1104 // NOTE: We cannot force CPU execution to fail
1105 std::make_tuple(DriverKind::CPU, Success::PASS_CPU, Compute::FENCED),
1106
1107 // NOTE: OLD driver does not provide timing
1108 std::make_tuple(DriverKind::OLD, Success::PASS_NEITHER, Compute::FENCED),
1109
1110 std::make_tuple(DriverKind::OLD, Success::FAIL_LAUNCH, Compute::FENCED),
1111
1112 std::make_tuple(DriverKind::NEW, Success::PASS_NEITHER, Compute::FENCED),
1113 std::make_tuple(DriverKind::NEW, Success::PASS_DEVICE, Compute::FENCED),
1114 std::make_tuple(DriverKind::NEW, Success::PASS_DRIVER, Compute::FENCED),
1115 std::make_tuple(DriverKind::NEW, Success::PASS_BOTH, Compute::FENCED),
1116 std::make_tuple(DriverKind::NEW, Success::PASS_NEITHER_DEVICE, Compute::FENCED),
1117 std::make_tuple(DriverKind::NEW, Success::PASS_NEITHER_DRIVER, Compute::FENCED),
1118 std::make_tuple(DriverKind::NEW, Success::PASS_NEITHER_BOTH, Compute::FENCED),
1119 std::make_tuple(DriverKind::NEW, Success::PASS_DEVICE_DEVICE, Compute::FENCED),
1120 std::make_tuple(DriverKind::NEW, Success::PASS_DEVICE_DRIVER, Compute::FENCED),
1121 std::make_tuple(DriverKind::NEW, Success::PASS_DEVICE_BOTH, Compute::FENCED),
1122 std::make_tuple(DriverKind::NEW, Success::PASS_DRIVER_DEVICE, Compute::FENCED),
1123 std::make_tuple(DriverKind::NEW, Success::PASS_DRIVER_DRIVER, Compute::FENCED),
1124 std::make_tuple(DriverKind::NEW, Success::PASS_DRIVER_BOTH, Compute::FENCED),
1125 std::make_tuple(DriverKind::NEW, Success::PASS_BOTH_DEVICE, Compute::FENCED),
1126 std::make_tuple(DriverKind::NEW, Success::PASS_BOTH_DRIVER, Compute::FENCED),
1127 std::make_tuple(DriverKind::NEW, Success::PASS_BOTH_BOTH, Compute::FENCED),
1128
1129 std::make_tuple(DriverKind::NEW, Success::FAIL_LAUNCH, Compute::FENCED));
1130
1131 INSTANTIATE_TEST_SUITE_P(Unfenced, TimingTest, kTimingTestUnfencedValues);
1132 INSTANTIATE_TEST_SUITE_P(Fenced, TimingTest, kTimingTestFencedValues);
1133
1134 } // namespace timing_tests
1135
1136 /*-- End timing tests -------------------------------------------------------------------------*/
1137
1138 const float kSimpleCeiling = 2.0f;
1139
createAddMaxModel(WrapperModel * model,bool reverseOrder)1140 void createAddMaxModel(WrapperModel* model, bool reverseOrder) {
1141 WrapperOperandType type0(WrapperType::TENSOR_FLOAT32, {2});
1142 WrapperOperandType type1(WrapperType::INT32, {});
1143 // Phase 1, operands
1144 auto op1 = model->addOperand(&type0);
1145 auto op2 = model->addOperand(&type0);
1146 auto act = model->addOperand(&type1);
1147 auto op3 = model->addOperand(&type0);
1148 auto op4 = model->addOperand(&type0);
1149 auto op5 = model->addOperand(&type0);
1150 // Phase 2, operations
1151 static int32_t act_init[] = {0};
1152 model->setOperandValue(act, act_init, sizeof(act_init));
1153 static float ceiling[] = {kSimpleCeiling, kSimpleCeiling};
1154 model->setOperandValue(op4, ceiling, sizeof(ceiling));
1155 if (reverseOrder) {
1156 // In this case, add MAXIMUM first, but the execution order is still ADD -> MAXIMUM.
1157 model->addOperation(ANEURALNETWORKS_MAXIMUM, {op3, op4}, {op5});
1158 model->addOperation(ANEURALNETWORKS_ADD, {op1, op2, act}, {op3});
1159 } else {
1160 model->addOperation(ANEURALNETWORKS_ADD, {op1, op2, act}, {op3});
1161 model->addOperation(ANEURALNETWORKS_MAXIMUM, {op3, op4}, {op5});
1162 }
1163 // Phase 3, inputs and outputs
1164 model->identifyInputsAndOutputs({op1, op2}, {op5});
1165 model->finish();
1166 ASSERT_TRUE(model->isValid());
1167 }
1168
TEST_F(IntrospectionControlTest,SlicingAddMax)1169 TEST_F(IntrospectionControlTest, SlicingAddMax) {
1170 // This is needed before we have the CPU fallback path being treated as a Device.
1171 if (DeviceManager::get()->getUseCpuOnly()) {
1172 GTEST_SKIP();
1173 }
1174
1175 using namespace test_drivers;
1176
1177 static const char name[] = "driver11";
1178 DeviceManager::get()->forTest_registerDevice(
1179 nn::makeSharedDevice(name, new TestDriver11(name, Success::PASS_BOTH)));
1180 ASSERT_TRUE(selectDeviceByName(name));
1181
1182 createAddMaxModel(&mModel, false);
1183 EXPECT_TRUE(isSupportedOpListExpected({true, false}));
1184 }
1185
TEST_F(IntrospectionControlTest,SlicingMaxAdd)1186 TEST_F(IntrospectionControlTest, SlicingMaxAdd) {
1187 // This is needed before we have the CPU fallback path being treated as a Device.
1188 if (DeviceManager::get()->getUseCpuOnly()) {
1189 GTEST_SKIP();
1190 }
1191
1192 using namespace test_drivers;
1193
1194 static const char name[] = "driver11";
1195 DeviceManager::get()->forTest_registerDevice(
1196 nn::makeSharedDevice(name, new TestDriver11(name, Success::PASS_BOTH)));
1197 ASSERT_TRUE(selectDeviceByName(name));
1198
1199 createAddMaxModel(&mModel, true);
1200 EXPECT_TRUE(isSupportedOpListExpected({false, true}));
1201 }
1202
1203 const float kSimpleMultiplier = 2.0f;
1204
createAddMulModel(WrapperModel * model,bool reverseOrder)1205 void createAddMulModel(WrapperModel* model, bool reverseOrder) {
1206 WrapperOperandType type0(WrapperType::TENSOR_FLOAT32, {2});
1207 WrapperOperandType type1(WrapperType::INT32, {});
1208 // Phase 1, operands
1209 auto op1 = model->addOperand(&type0);
1210 auto op2 = model->addOperand(&type0);
1211 auto act = model->addOperand(&type1);
1212 auto op3 = model->addOperand(&type0);
1213 auto op4 = model->addOperand(&type0);
1214 auto op5 = model->addOperand(&type0);
1215 // Phase 2, operations
1216 static int32_t act_init[] = {0};
1217 model->setOperandValue(act, act_init, sizeof(act_init));
1218 static float multiplier[] = {kSimpleMultiplier, kSimpleMultiplier};
1219 model->setOperandValue(op4, multiplier, sizeof(multiplier));
1220 if (reverseOrder) {
1221 // In this case, add MUL first, but the execution order is still ADD -> MUL.
1222 model->addOperation(ANEURALNETWORKS_MUL, {op3, op4, act}, {op5});
1223 model->addOperation(ANEURALNETWORKS_ADD, {op1, op2, act}, {op3});
1224 } else {
1225 model->addOperation(ANEURALNETWORKS_ADD, {op1, op2, act}, {op3});
1226 model->addOperation(ANEURALNETWORKS_MUL, {op3, op4, act}, {op5});
1227 }
1228 // Phase 3, inputs and outputs
1229 model->identifyInputsAndOutputs({op1, op2}, {op5});
1230 model->finish();
1231 ASSERT_TRUE(model->isValid());
1232 }
1233
TEST_F(IntrospectionControlTest,SlicingFullySupported)1234 TEST_F(IntrospectionControlTest, SlicingFullySupported) {
1235 // This is needed before we have the CPU fallback path being treated as a Device.
1236 if (DeviceManager::get()->getUseCpuOnly()) {
1237 GTEST_SKIP();
1238 }
1239
1240 using namespace test_drivers;
1241
1242 static const char name[] = "driver11";
1243 DeviceManager::get()->forTest_registerDevice(
1244 nn::makeSharedDevice(name, new TestDriver11(name, Success::PASS_BOTH)));
1245 ASSERT_TRUE(selectDeviceByName(name));
1246
1247 createAddMulModel(&mModel, false);
1248 EXPECT_TRUE(isSupportedOpListExpected({true, true}));
1249 }
1250
createCondModel(WrapperModel * model,bool dynamicRank)1251 void createCondModel(WrapperModel* model, bool dynamicRank) {
1252 const auto dimensions = dynamicRank ? std::vector<uint32_t>{} : std::vector<uint32_t>{1};
1253 WrapperOperandType floatType(WrapperType::TENSOR_FLOAT32, dimensions);
1254 WrapperOperandType boolType(WrapperType::TENSOR_BOOL8, {1});
1255 // Phase 1, operands
1256 auto op1 = model->addOperand(&floatType);
1257 auto op2 = model->addOperand(&boolType);
1258 // Phase 2, operations
1259 model->addOperation(ANEURALNETWORKS_LESS, {op1, op1}, {op2});
1260 // Phase 3, inputs and outputs
1261 model->identifyInputsAndOutputs({op1}, {op2});
1262 model->finish();
1263 }
1264
addReluOperation(WrapperModel * model,std::vector<uint32_t> * modelInputIndexes,std::vector<uint32_t> * modelOutputIndexes,bool dynamicRank)1265 void addReluOperation(WrapperModel* model, std::vector<uint32_t>* modelInputIndexes,
1266 std::vector<uint32_t>* modelOutputIndexes, bool dynamicRank) {
1267 const auto dimensions = dynamicRank ? std::vector<uint32_t>{} : std::vector<uint32_t>{1};
1268 WrapperOperandType type(WrapperType::TENSOR_FLOAT32, dimensions);
1269 // Phase 1, operands
1270 auto op1 = model->addOperand(&type);
1271 auto op2 = model->addOperand(&type);
1272 // Phase 2, operations
1273 model->addOperation(ANEURALNETWORKS_RELU, {op1}, {op2});
1274 // Phase 3, inputs and outputs
1275 modelInputIndexes->push_back(op1);
1276 modelOutputIndexes->push_back(op2);
1277 }
1278
createReluModel(WrapperModel * model,bool dynamicRank)1279 void createReluModel(WrapperModel* model, bool dynamicRank) {
1280 std::vector<uint32_t> modelInputIndexes, modelOutputIndexes;
1281 addReluOperation(model, &modelInputIndexes, &modelOutputIndexes, dynamicRank);
1282 model->identifyInputsAndOutputs(modelInputIndexes, modelOutputIndexes);
1283 model->finish();
1284 }
1285
addWhileOperation(std::vector<WrapperModel> * extraModels,WrapperModel * mainModel,std::vector<uint32_t> * modelInputIndexes,std::vector<uint32_t> * modelOutputIndexes,bool dynamicRank)1286 void addWhileOperation(std::vector<WrapperModel>* extraModels, WrapperModel* mainModel,
1287 std::vector<uint32_t>* modelInputIndexes,
1288 std::vector<uint32_t>* modelOutputIndexes, bool dynamicRank) {
1289 const auto dimensions = dynamicRank ? std::vector<uint32_t>{} : std::vector<uint32_t>{1};
1290 WrapperOperandType floatType(WrapperType::TENSOR_FLOAT32, dimensions);
1291 WrapperOperandType modelType(WrapperType::MODEL, {});
1292
1293 extraModels->emplace_back();
1294 extraModels->emplace_back();
1295 WrapperModel* condModel = &extraModels->at(extraModels->size() - 2);
1296 WrapperModel* bodyModel = &extraModels->at(extraModels->size() - 1);
1297 createCondModel(condModel, dynamicRank);
1298 createReluModel(bodyModel, dynamicRank);
1299 ASSERT_TRUE(condModel->isValid());
1300 ASSERT_TRUE(bodyModel->isValid());
1301
1302 // Phase 1, operands
1303 const uint32_t op1 = mainModel->addOperand(&modelType);
1304 const uint32_t op2 = mainModel->addOperand(&modelType);
1305 const uint32_t op3 = mainModel->addOperand(&floatType);
1306 const uint32_t op4 = mainModel->addOperand(&floatType);
1307 mainModel->setOperandValueFromModel(op1, condModel);
1308 mainModel->setOperandValueFromModel(op2, bodyModel);
1309 // Phase 2, operations
1310 mainModel->addOperation(ANEURALNETWORKS_WHILE, {op1, op2, op3}, {op4});
1311 // Phase 3, inputs and outputs
1312 modelInputIndexes->push_back(op3);
1313 modelOutputIndexes->push_back(op4);
1314 }
1315
createReluStaticWhileModel(std::vector<WrapperModel> * extraModels,WrapperModel * mainModel)1316 void createReluStaticWhileModel(std::vector<WrapperModel>* extraModels, WrapperModel* mainModel) {
1317 std::vector<uint32_t> modelInputIndexes, modelOutputIndexes;
1318
1319 // Operation supported in Android API level 27
1320 addReluOperation(mainModel, &modelInputIndexes, &modelOutputIndexes, /*dynamicRank=*/false);
1321 // Operation supported in Android API level 30
1322 addWhileOperation(extraModels, mainModel, &modelInputIndexes, &modelOutputIndexes,
1323 /*dynamicRank=*/false);
1324
1325 mainModel->identifyInputsAndOutputs(modelInputIndexes, modelOutputIndexes);
1326 mainModel->finish();
1327 ASSERT_TRUE(mainModel->isValid());
1328 }
1329
TEST_F(IntrospectionControlTest,ControlFlowNotSupported)1330 TEST_F(IntrospectionControlTest, ControlFlowNotSupported) {
1331 // This is needed before we have the CPU fallback path being treated as a Device.
1332 if (DeviceManager::get()->getUseCpuOnly()) {
1333 GTEST_SKIP();
1334 }
1335
1336 using namespace test_drivers;
1337
1338 static const char name[] = "driver11";
1339 DeviceManager::get()->forTest_registerDevice(
1340 nn::makeSharedDevice(name, new TestDriver11(name, Success::PASS_BOTH)));
1341 ASSERT_TRUE(selectDeviceByName(name));
1342
1343 std::vector<WrapperModel> extraModels;
1344 createReluStaticWhileModel(&extraModels, &mModel);
1345 EXPECT_TRUE(isSupportedOpListExpected({true, false}));
1346
1347 // Clear mModel early because it may reference `extraModels`.
1348 mModel = WrapperModel{};
1349 }
1350
TEST_F(IntrospectionControlTest,ControlFlowSupported)1351 TEST_F(IntrospectionControlTest, ControlFlowSupported) {
1352 // This is needed before we have the CPU fallback path being treated as a Device.
1353 if (DeviceManager::get()->getUseCpuOnly()) {
1354 GTEST_SKIP();
1355 }
1356
1357 using namespace test_drivers;
1358
1359 static const char name[] = "driver13";
1360 DeviceManager::get()->forTest_registerDevice(
1361 nn::makeSharedDevice(name, new TestDriver13(name, Success::PASS_BOTH)));
1362 ASSERT_TRUE(selectDeviceByName(name));
1363
1364 std::vector<WrapperModel> extraModels;
1365 createReluStaticWhileModel(&extraModels, &mModel);
1366 EXPECT_TRUE(isSupportedOpListExpected({true, true}));
1367
1368 // Clear mModel early because it may reference `extraModels`.
1369 mModel = WrapperModel{};
1370 }
1371
createStaticWhileDynamicWhileModel(std::vector<WrapperModel> * extraModels,WrapperModel * mainModel)1372 void createStaticWhileDynamicWhileModel(std::vector<WrapperModel>* extraModels,
1373 WrapperModel* mainModel) {
1374 std::vector<uint32_t> modelInputIndexes, modelOutputIndexes;
1375
1376 // Operation supported in Android API level 30
1377 addWhileOperation(extraModels, mainModel, &modelInputIndexes, &modelOutputIndexes,
1378 /*dynamicRank=*/false);
1379 // Operation supported only by NNAPI runtime
1380 addWhileOperation(extraModels, mainModel, &modelInputIndexes, &modelOutputIndexes,
1381 /*dynamicRank=*/true);
1382
1383 mainModel->identifyInputsAndOutputs(modelInputIndexes, modelOutputIndexes);
1384 mainModel->finish();
1385 ASSERT_TRUE(mainModel->isValid());
1386 }
1387
TEST_F(IntrospectionControlTest,ControlFlowFailedToSlice)1388 TEST_F(IntrospectionControlTest, ControlFlowFailedToSlice) {
1389 // This is needed before we have the CPU fallback path being treated as a Device.
1390 if (DeviceManager::get()->getUseCpuOnly()) {
1391 GTEST_SKIP();
1392 }
1393
1394 using namespace test_drivers;
1395
1396 static const char name[] = "driver13";
1397 DeviceManager::get()->forTest_registerDevice(
1398 nn::makeSharedDevice(name, new TestDriver13(name, Success::PASS_BOTH)));
1399 ASSERT_TRUE(selectDeviceByName(name));
1400
1401 std::vector<WrapperModel> extraModels;
1402 createStaticWhileDynamicWhileModel(&extraModels, &mModel);
1403 EXPECT_TRUE(isSupportedOpListExpected({false, false}));
1404
1405 // Clear mModel early because it may reference `extraModels`.
1406 mModel = WrapperModel{};
1407 }
1408
1409 // TODO(miaowang): add a test to make sure ANNCompilation_create() has CPU
1410 // fallback.
1411 // This test verifies that a device that could only handle ADD would correctly report that an
1412 // ADD->MUL model could not be fully supported.
TEST_F(IntrospectionControlTest,PartialModelNotSupported)1413 TEST_F(IntrospectionControlTest, PartialModelNotSupported) {
1414 // This is needed before we have the CPU fallback path being treated as a Device.
1415 if (DeviceManager::get()->getUseCpuOnly()) {
1416 GTEST_SKIP();
1417 }
1418
1419 createAddMulModel(&mModel, false);
1420
1421 std::string addOnlyDriver = "test-onlyAdd";
1422 std::vector<bool> addOnlyOp(android::nn::kNumberOfOperationTypes, false);
1423 addOnlyOp[ANEURALNETWORKS_ADD] = true;
1424
1425 registerDevices({{addOnlyDriver, 0.9, addOnlyOp}});
1426
1427 EXPECT_TRUE(selectDeviceByName(addOnlyDriver));
1428 EXPECT_TRUE(isSupportedOpListExpected({true, false}));
1429
1430 ANeuralNetworksModel* modelHandle = mModel.getHandle();
1431 EXPECT_EQ(ANeuralNetworksCompilation_createForDevices(modelHandle, mDevices.data(),
1432 mDevices.size(), &mCompilation),
1433 ANEURALNETWORKS_NO_ERROR);
1434 // The compilation must fail as there is no fallback when using
1435 // Introspection API.
1436 EXPECT_NE(ANeuralNetworksCompilation_finish(mCompilation), ANEURALNETWORKS_NO_ERROR);
1437 }
1438
1439 // This test verifies that a device that could only handle ADD would correctly report that an
1440 // ADD->MUL model could not be fully supported. Also verifies that the indices of returned
1441 // supported op list correctly map to the order of operations being added by the user.
TEST_F(IntrospectionControlTest,PartialModelNotSupportedOrder)1442 TEST_F(IntrospectionControlTest, PartialModelNotSupportedOrder) {
1443 // This is needed before we have the CPU fallback path being treated as a Device.
1444 if (DeviceManager::get()->getUseCpuOnly()) {
1445 GTEST_SKIP();
1446 }
1447
1448 createAddMulModel(&mModel, true);
1449
1450 std::string addOnlyDriver = "test-onlyAdd";
1451 std::vector<bool> addOnlyOp(android::nn::kNumberOfOperationTypes, false);
1452 addOnlyOp[ANEURALNETWORKS_ADD] = true;
1453
1454 registerDevices({{addOnlyDriver, 0.9, addOnlyOp}});
1455
1456 EXPECT_TRUE(selectDeviceByName(addOnlyDriver));
1457 EXPECT_TRUE(isSupportedOpListExpected({false, true}));
1458 }
1459
1460 // TODO(miaowang): update the test to make sure the model is actually running on the test devices.
1461 // This test verifies that an ADD->MUL model is able to run on two selected devices that together
1462 // can handle all operations.
TEST_F(IntrospectionControlTest,ModelNeedTwoDevices)1463 TEST_F(IntrospectionControlTest, ModelNeedTwoDevices) {
1464 // This is needed before we have the CPU fallback path being treated as a Device.
1465 if (DeviceManager::get()->getUseCpuOnly()) {
1466 GTEST_SKIP();
1467 }
1468
1469 createAddMulModel(&mModel, false);
1470
1471 std::string addOnlyDriver = "test-onlyAdd";
1472 std::vector<bool> addOnlyOp(android::nn::kNumberOfOperationTypes, false);
1473 addOnlyOp[ANEURALNETWORKS_ADD] = true;
1474
1475 std::string mulOnlyDriver = "test-onlyMul";
1476 std::vector<bool> mulOnlyOp(android::nn::kNumberOfOperationTypes, false);
1477 mulOnlyOp[ANEURALNETWORKS_MUL] = true;
1478
1479 registerDevices({
1480 {addOnlyDriver, 0.9, addOnlyOp},
1481 {mulOnlyDriver, 0.9, mulOnlyOp},
1482 });
1483
1484 EXPECT_TRUE(selectDeviceByName(addOnlyDriver));
1485 EXPECT_TRUE(selectDeviceByName(mulOnlyDriver));
1486 EXPECT_TRUE(isSupportedOpListExpected({true, true}));
1487 EXPECT_EQ(prepareForExecution(), ANEURALNETWORKS_NO_ERROR);
1488
1489 float input1[2] = {1.0f, 2.0f};
1490 float input2[2] = {3.0f, 4.0f};
1491 float output[2];
1492 EXPECT_EQ(ANeuralNetworksExecution_setInput(mExecution, 0, nullptr, input1, sizeof(input1)),
1493 ANEURALNETWORKS_NO_ERROR);
1494 EXPECT_EQ(ANeuralNetworksExecution_setInput(mExecution, 1, nullptr, input2, sizeof(input2)),
1495 ANEURALNETWORKS_NO_ERROR);
1496 EXPECT_EQ(ANeuralNetworksExecution_setOutput(mExecution, 0, nullptr, output, sizeof(output)),
1497 ANEURALNETWORKS_NO_ERROR);
1498
1499 EXPECT_EQ(ANeuralNetworksExecution_startCompute(mExecution, &mEvent), ANEURALNETWORKS_NO_ERROR);
1500 EXPECT_EQ(ANeuralNetworksEvent_wait(mEvent), ANEURALNETWORKS_NO_ERROR);
1501 EXPECT_EQ(output[0], kSimpleMultiplier * (input1[0] + input2[0]));
1502 EXPECT_EQ(output[1], kSimpleMultiplier * (input1[1] + input2[1]));
1503 }
1504 } // namespace
1505