1 /*
2  * Copyright (C) 2018 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <ExecutionBurstServer.h>
18 #include <HalInterfaces.h>
19 #include <SampleDriver.h>
20 #include <ValidateHal.h>
21 #include <gtest/gtest.h>
22 
23 #include <algorithm>
24 #include <chrono>
25 #include <iterator>
26 #include <map>
27 #include <queue>
28 #include <set>
29 #include <string>
30 #include <thread>
31 #include <tuple>
32 #include <utility>
33 #include <vector>
34 
35 #include "CompilationBuilder.h"
36 #include "HalUtils.h"
37 #include "Manager.h"
38 #include "NeuralNetworks.h"
39 #include "NeuralNetworksOEM.h"
40 #include "TestNeuralNetworksWrapper.h"
41 
42 namespace {
43 
44 using namespace ::android;
45 namespace V1_0 = ::android::hardware::neuralnetworks::V1_0;
46 namespace V1_1 = ::android::hardware::neuralnetworks::V1_1;
47 namespace V1_2 = ::android::hardware::neuralnetworks::V1_2;
48 namespace V1_3 = ::android::hardware::neuralnetworks::V1_3;
49 
50 using CompilationBuilder = nn::CompilationBuilder;
51 using Device = nn::Device;
52 using DeviceManager = nn::DeviceManager;
53 using ExecutePreference = nn::test_wrapper::ExecutePreference;
54 using ExecutionBurstServer = nn::ExecutionBurstServer;
55 using HidlModel = V1_3::Model;
56 using Result = nn::test_wrapper::Result;
57 using SampleDriver = nn::sample_driver::SampleDriver;
58 using SamplePreparedModel = nn::sample_driver::SamplePreparedModel;
59 using SampleFencedExecutionCallback = nn::sample_driver::SampleFencedExecutionCallback;
60 using WrapperModel = nn::test_wrapper::Model;
61 using WrapperOperandType = nn::test_wrapper::OperandType;
62 using WrapperType = nn::test_wrapper::Type;
63 using nn::convertToV1_0;
64 using nn::convertToV1_3;
65 
66 template <typename T>
67 using MQDescriptorSync = hardware::MQDescriptorSync<T>;
68 
69 constexpr V1_2::Timing kBadTiming = {.timeOnDevice = UINT64_MAX, .timeInDriver = UINT64_MAX};
70 constexpr V1_2::Timing kGoodUnfencedTiming = {.timeOnDevice = 123, .timeInDriver = 456};
71 constexpr V1_2::Timing kGoodFencedTiming = {.timeOnDevice = 23, .timeInDriver = 56};
72 
73 // This is an IDevice for testing purposes. The test driver has customized
74 // getCapabilities_1_3 and getSupportedOperations_1_3.
75 class TestDriver : public SampleDriver {
76    public:
TestDriver(const char * name,V1_3::Capabilities capabilities,const std::vector<bool> & supportedOps)77     TestDriver(const char* name, V1_3::Capabilities capabilities,
78                const std::vector<bool>& supportedOps)
79         : SampleDriver(name), mCapabilities(capabilities), mSupportedOps(supportedOps) {}
~TestDriver()80     ~TestDriver() override {}
81 
getCapabilities_1_3(getCapabilities_1_3_cb cb)82     hardware::Return<void> getCapabilities_1_3(getCapabilities_1_3_cb cb) override {
83         cb(V1_3::ErrorStatus::NONE, mCapabilities);
84         return hardware::Void();
85     }
86 
getSupportedOperations_1_3(const V1_3::Model & model,getSupportedOperations_1_3_cb cb)87     hardware::Return<void> getSupportedOperations_1_3(const V1_3::Model& model,
88                                                       getSupportedOperations_1_3_cb cb) override {
89         if (!android::nn::validateModel(model)) {
90             cb(V1_3::ErrorStatus::INVALID_ARGUMENT, std::vector<bool>());
91             return hardware::Void();
92         }
93         const size_t count = model.main.operations.size();
94         std::vector<bool> supported(count);
95         std::transform(model.main.operations.begin(), model.main.operations.end(),
96                        supported.begin(), [this](V1_3::Operation op) {
97                            return mSupportedOps[static_cast<int32_t>(op.type)];
98                        });
99         cb(V1_3::ErrorStatus::NONE, supported);
100         return hardware::Void();
101     }
102 
103    private:
104     V1_3::Capabilities mCapabilities;
105     std::vector<bool> mSupportedOps;
106 };
107 
108 class IntrospectionControlTest : public ::testing::Test {
109    protected:
SetUp()110     virtual void SetUp() {}
TearDown()111     virtual void TearDown() {
112         if (mEvent) {
113             ANeuralNetworksEvent_free(mEvent);
114         }
115         if (mExecution) {
116             ANeuralNetworksExecution_free(mExecution);
117         }
118         if (mCompilation) {
119             ANeuralNetworksCompilation_free(mCompilation);
120         }
121         DeviceManager::get()->forTest_reInitializeDeviceList();
122     }
123 
124     struct DeviceSpecification {
DeviceSpecification__anon138cc3190110::IntrospectionControlTest::DeviceSpecification125         DeviceSpecification(const std::string& name, float perf, std::vector<bool>& supportedOps)
126             : mName(name), mSupportedOps(supportedOps) {
127             V1_0::PerformanceInfo perfInfo = {.execTime = perf, .powerUsage = perf};
128             mCapabilities = {
129                     .relaxedFloat32toFloat16PerformanceScalar = perfInfo,
130                     .relaxedFloat32toFloat16PerformanceTensor = perfInfo,
131                     .operandPerformance =
132                             nn::nonExtensionOperandPerformance<nn::HalVersion::V1_3>(perfInfo),
133                     .ifPerformance = perfInfo,
134                     .whilePerformance = perfInfo};
135         }
136         std::string mName;
137         V1_3::Capabilities mCapabilities;
138         std::vector<bool> mSupportedOps;
139     };
140 
141     // From a vector of DeviceSpecification, register new Devices.
registerDevices(std::vector<DeviceSpecification> specifications)142     void registerDevices(std::vector<DeviceSpecification> specifications) {
143         for (const auto& specification : specifications) {
144             DeviceManager::get()->forTest_registerDevice(nn::makeSharedDevice(
145                     specification.mName.c_str(),
146                     new TestDriver(specification.mName.c_str(), specification.mCapabilities,
147                                    specification.mSupportedOps)));
148         }
149     }
150 
selectDeviceByName(const std::string & name)151     bool selectDeviceByName(const std::string& name) {
152         uint32_t numDevices = 0;
153         EXPECT_EQ(ANeuralNetworks_getDeviceCount(&numDevices), ANEURALNETWORKS_NO_ERROR);
154         EXPECT_GE(numDevices, (uint32_t)1);
155 
156         for (uint32_t i = 0; i < numDevices; i++) {
157             ANeuralNetworksDevice* device = nullptr;
158             EXPECT_EQ(ANeuralNetworks_getDevice(i, &device), ANEURALNETWORKS_NO_ERROR);
159             const char* buffer = nullptr;
160             int result = ANeuralNetworksDevice_getName(device, &buffer);
161             if (result == ANEURALNETWORKS_NO_ERROR && name.compare(buffer) == 0) {
162                 mDevices.push_back(device);
163                 return true;
164             }
165         }
166         return false;
167     }
168 
isSupportedOpListExpected(const std::vector<bool> & expected)169     bool isSupportedOpListExpected(const std::vector<bool>& expected) {
170         const uint32_t kMaxNumberOperations = 256;
171         EXPECT_LE(expected.size(), kMaxNumberOperations);
172         ANeuralNetworksModel* modelHandle = mModel.getHandle();
173         bool supported[kMaxNumberOperations] = {false};
174         EXPECT_EQ(ANeuralNetworksModel_getSupportedOperationsForDevices(
175                           modelHandle, mDevices.data(), mDevices.size(), supported),
176                   ANEURALNETWORKS_NO_ERROR);
177         return std::equal(expected.begin(), expected.end(), supported);
178     }
179 
prepareForExecution(bool measureTiming=false)180     int prepareForExecution(bool measureTiming = false) {
181         ANeuralNetworksModel* modelHandle = mModel.getHandle();
182         int result = ANeuralNetworksCompilation_createForDevices(modelHandle, mDevices.data(),
183                                                                  mDevices.size(), &mCompilation);
184         if (result != ANEURALNETWORKS_NO_ERROR) {
185             return result;
186         }
187         EXPECT_EQ(ANeuralNetworksCompilation_finish(mCompilation), ANEURALNETWORKS_NO_ERROR);
188         EXPECT_EQ(ANeuralNetworksExecution_create(mCompilation, &mExecution),
189                   ANEURALNETWORKS_NO_ERROR);
190         if (measureTiming) {
191             // Don't call setMeasureTiming unless we need to -- cannot call this
192             // API unless there is exactly one device.
193             EXPECT_EQ(ANeuralNetworksExecution_setMeasureTiming(mExecution, true),
194                       ANEURALNETWORKS_NO_ERROR);
195         }
196         return ANEURALNETWORKS_NO_ERROR;
197     }
198 
199     std::vector<ANeuralNetworksDevice*> mDevices;
200     ANeuralNetworksEvent* mEvent = nullptr;
201     ANeuralNetworksExecution* mExecution = nullptr;
202     ANeuralNetworksCompilation* mCompilation = nullptr;
203     WrapperModel mModel;
204 };
205 
createSimpleAddModel(WrapperModel * model)206 void createSimpleAddModel(WrapperModel* model) {
207     WrapperOperandType type0(WrapperType::TENSOR_FLOAT32, {2});
208     WrapperOperandType type1(WrapperType::INT32, {});
209     // Phase 1, operands
210     auto op1 = model->addOperand(&type0);
211     auto op2 = model->addOperand(&type0);
212     auto act = model->addOperand(&type1);
213     auto op3 = model->addOperand(&type0);
214     // Phase 2, operations
215     static int32_t act_init[] = {0};
216     model->setOperandValue(act, act_init, sizeof(act_init));
217     model->addOperation(ANEURALNETWORKS_ADD, {op1, op2, act}, {op3});
218     // Phase 3, inputs and outputs
219     model->identifyInputsAndOutputs({op1, op2}, {op3});
220     model->finish();
221     ASSERT_TRUE(model->isValid());
222 }
223 
224 // This test verifies that a simple ADD model is able to run on a single device that claims being
225 // able to handle all operations.
TEST_F(IntrospectionControlTest,SimpleAddModel)226 TEST_F(IntrospectionControlTest, SimpleAddModel) {
227     // This is needed before we have the CPU fallback path being treated as a Device.
228     // TODO(miaowang): remove once b/72506261 is fixed.
229     if (DeviceManager::get()->getUseCpuOnly()) {
230         GTEST_SKIP();
231     }
232 
233     createSimpleAddModel(&mModel);
234 
235     std::string driverName = "test-all";
236     std::vector<bool> ops(android::nn::kNumberOfOperationTypes, true);
237     registerDevices({{driverName, 0.9, ops}});
238 
239     EXPECT_TRUE(selectDeviceByName(driverName));
240     EXPECT_TRUE(isSupportedOpListExpected({true}));
241     EXPECT_EQ(prepareForExecution(), ANEURALNETWORKS_NO_ERROR);
242 
243     // Verify that the mCompilation is actually using the "test-all" device.
244     CompilationBuilder* c = reinterpret_cast<CompilationBuilder*>(mCompilation);
245     const std::string& deviceNameBuffer =
246             c->forTest_getExecutionPlan().forTest_simpleGetDevice()->getName();
247     EXPECT_EQ(driverName, deviceNameBuffer);
248 
249     float input1[2] = {1.0f, 2.0f};
250     float input2[2] = {3.0f, 4.0f};
251     float output[2];
252     EXPECT_EQ(ANeuralNetworksExecution_setInput(mExecution, 0, nullptr, input1, sizeof(input1)),
253               ANEURALNETWORKS_NO_ERROR);
254     EXPECT_EQ(ANeuralNetworksExecution_setInput(mExecution, 1, nullptr, input2, sizeof(input2)),
255               ANEURALNETWORKS_NO_ERROR);
256     EXPECT_EQ(ANeuralNetworksExecution_setOutput(mExecution, 0, nullptr, output, sizeof(output)),
257               ANEURALNETWORKS_NO_ERROR);
258     EXPECT_EQ(ANeuralNetworksExecution_setMeasureTiming(mExecution, true),
259               ANEURALNETWORKS_NO_ERROR);
260 
261     EXPECT_EQ(ANeuralNetworksExecution_startCompute(mExecution, &mEvent), ANEURALNETWORKS_NO_ERROR);
262     EXPECT_EQ(ANeuralNetworksEvent_wait(mEvent), ANEURALNETWORKS_NO_ERROR);
263     EXPECT_EQ(output[0], input1[0] + input2[0]);
264     EXPECT_EQ(output[1], input1[1] + input2[1]);
265 
266     uint64_t timeOnHardware, timeInDriver;
267     EXPECT_EQ(ANeuralNetworksExecution_getDuration(mExecution, ANEURALNETWORKS_DURATION_ON_HARDWARE,
268                                                    &timeOnHardware),
269               ANEURALNETWORKS_NO_ERROR);
270     EXPECT_EQ(ANeuralNetworksExecution_getDuration(mExecution, ANEURALNETWORKS_DURATION_IN_DRIVER,
271                                                    &timeInDriver),
272               ANEURALNETWORKS_NO_ERROR);
273     if (timeOnHardware != UINT64_MAX && timeInDriver != UINT64_MAX) {
274         EXPECT_LE(timeOnHardware, timeInDriver);
275     }
276 }
277 
278 /*-- Begin test drivers -------------------------------------------------------------------------*/
279 
280 namespace test_drivers {
281 
282 enum class Success : uint32_t {
283     // ASYNC: Return ErrorStatus::GENERAL_FAILURE; notify ErrorStatus::GENERAL_FAILURE and
284     // kBadTiming
285     // SYNC, BURST: Return ErrorStatus::GENERAL_FAILURE and kBadTiming
286     // FENCED: Return ErrorStatus::GENERAL_FAILURE, empty hidl_handle, and a nullptr callback
287     FAIL_LAUNCH,
288 
289     // ASYNC: Return ErrorStatus::NONE; notify ErrorStatus::GENERAL_FAILURE and kBadTiming
290     FAIL_WAIT,
291 
292     // Bit representation for PASS: One bit set to indicate PASS rather than
293     // FAIL, one bit for each of the four timing fields (Unfenced, Fenced) x
294     // (OnDevice, InDriver) to distinguish between unavailable timing (bit is
295     // clear) and available timing (bit is set), and one bit to call out the
296     // special case of CPU.
297     PASS_BIT = 1 << 4,
298     PASS_UNFENCED_DEVICE_BIT = 1 << 5,
299     PASS_UNFENCED_DRIVER_BIT = 1 << 6,
300     PASS_FENCED_DEVICE_BIT = 1 << 7,
301     PASS_FENCED_DRIVER_BIT = 1 << 8,
302     PASS_CPU_BIT = 1 << 9,
303 
304     // Each of the four timing fields may be either unavailable or 0
305     PASS_CPU = PASS_BIT | PASS_CPU_BIT,
306 
307     // ASYNC: Return ErrorStatus::NONE; notify ErrorStatus::NONE and timing
308     // SYNC, BURST: Return ErrorStatus::NONE and timing
309     // FENCED: Return ErrorStatus::NONE, empty hidl_handle, and a callback with timing.
310     //
311     // For each PASS other than PASS_CPU, an enum name has the form
312     // PASS_${UNFENCED_TIME}_${FENCED_TIME}.  For example, PASS_NEITHER_BOTH
313     // means that only fenced timing is available (both timeOnDevice and
314     // timeInDriver).  If _${FENCED_TIME} is omitted, it is equivalent to
315     // _NEITHER; so PASS_BOTH means that only unfenced timing is available (both
316     // timeOnDevice and timeInDriver).
317     PASS_NEITHER = PASS_BIT,
318     PASS_DEVICE = PASS_BIT | PASS_UNFENCED_DEVICE_BIT,
319     PASS_DRIVER = PASS_BIT | PASS_UNFENCED_DRIVER_BIT,
320     PASS_BOTH = PASS_BIT | PASS_UNFENCED_DEVICE_BIT | PASS_UNFENCED_DRIVER_BIT,
321     PASS_NEITHER_DEVICE = PASS_BIT | PASS_FENCED_DEVICE_BIT,
322     PASS_NEITHER_DRIVER = PASS_BIT | PASS_FENCED_DRIVER_BIT,
323     PASS_NEITHER_BOTH = PASS_BIT | PASS_FENCED_DEVICE_BIT | PASS_FENCED_DRIVER_BIT,
324     PASS_DEVICE_DEVICE = PASS_DEVICE | PASS_NEITHER_DEVICE,
325     PASS_DEVICE_DRIVER = PASS_DEVICE | PASS_NEITHER_DRIVER,
326     PASS_DEVICE_BOTH = PASS_DEVICE | PASS_NEITHER_BOTH,
327     PASS_DRIVER_DEVICE = PASS_DRIVER | PASS_NEITHER_DEVICE,
328     PASS_DRIVER_DRIVER = PASS_DRIVER | PASS_NEITHER_DRIVER,
329     PASS_DRIVER_BOTH = PASS_DRIVER | PASS_NEITHER_BOTH,
330     PASS_BOTH_DEVICE = PASS_BOTH | PASS_NEITHER_DEVICE,
331     PASS_BOTH_DRIVER = PASS_BOTH | PASS_NEITHER_DRIVER,
332     PASS_BOTH_BOTH = PASS_BOTH | PASS_NEITHER_BOTH,
333 };
334 
hasBit(Success mask,Success bit)335 bool hasBit(Success mask, Success bit) {
336     const uint32_t bitAsInt = static_cast<uint32_t>(bit);
337     CHECK(bitAsInt && (bitAsInt & (bitAsInt - 1)) == 0)
338             << "second argument must be a single bit rather than " << static_cast<uint32_t>(bit);
339     return static_cast<uint32_t>(mask) & bitAsInt;
340 }
341 
clearBit(Success mask,Success bit)342 Success clearBit(Success mask, Success bit) {
343     const uint32_t bitAsInt = static_cast<uint32_t>(bit);
344     CHECK(bitAsInt && (bitAsInt & (bitAsInt - 1)) == 0)
345             << "second argument must be a single bit rather than " << static_cast<uint32_t>(bit);
346     return static_cast<Success>(static_cast<uint32_t>(mask) & ~bitAsInt);
347 }
348 
operator <<(std::ostream & os,Success success)349 std::ostream& operator<<(std::ostream& os, Success success) {
350     switch (success) {
351         case Success::FAIL_LAUNCH:
352             return os << "FAIL_LAUNCH";
353         case Success::FAIL_WAIT:
354             return os << "FAIL_WAIT";
355         case Success::PASS_CPU:
356             return os << "PASS_CPU";
357         default:
358             break;
359     }
360 
361     static const std::vector<std::pair<Success, const char*>> bits = {
362             {Success::PASS_BIT, "PASS"},
363             {Success::PASS_UNFENCED_DEVICE_BIT, "UNFENCED_DEVICE"},
364             {Success::PASS_UNFENCED_DRIVER_BIT, "UNFENCED_DRIVER"},
365             {Success::PASS_FENCED_DEVICE_BIT, "FENCED_DEVICE"},
366             {Success::PASS_FENCED_DRIVER_BIT, "FENCED_DRIVER"},
367     };
368     bool gotOutput = false;
369     for (const auto& b : bits) {
370         if (hasBit(success, b.first)) {
371             if (gotOutput) {
372                 os << '|';
373             } else {
374                 gotOutput = true;
375             }
376             os << b.second;
377             success = clearBit(success, b.first);
378         }
379     }
380     if (uint32_t successAsInt = static_cast<uint32_t>(success)) {
381         if (gotOutput) {
382             os << '|';
383         }
384         os << successAsInt;
385     }
386     return os;
387 }
388 
389 // Returns (unfenced timing, fenced timing).
390 // Not for PASS_CPU.
getExpectedTiming(Success s,bool fencedExecution)391 std::pair<V1_2::Timing, V1_2::Timing> getExpectedTiming(Success s, bool fencedExecution) {
392     CHECK_NE(s, Success::PASS_CPU);
393 
394     if (!hasBit(s, Success::PASS_BIT)) {
395         return {kBadTiming, kBadTiming};
396     }
397 
398     std::pair<V1_2::Timing, V1_2::Timing> result;
399     result.first.timeOnDevice = hasBit(s, Success::PASS_UNFENCED_DEVICE_BIT)
400                                         ? kGoodUnfencedTiming.timeOnDevice
401                                         : UINT64_MAX;
402     result.first.timeInDriver = hasBit(s, Success::PASS_UNFENCED_DRIVER_BIT)
403                                         ? kGoodUnfencedTiming.timeInDriver
404                                         : UINT64_MAX;
405     if (fencedExecution) {
406         result.second.timeOnDevice = hasBit(s, Success::PASS_FENCED_DEVICE_BIT)
407                                              ? kGoodFencedTiming.timeOnDevice
408                                              : UINT64_MAX;
409         result.second.timeInDriver = hasBit(s, Success::PASS_FENCED_DRIVER_BIT)
410                                              ? kGoodFencedTiming.timeInDriver
411                                              : UINT64_MAX;
412     } else {
413         result.second = result.first;
414     }
415     return result;
416 }
417 
418 // For these tests we don't care about actually running an inference -- we
419 // just want to placeholder up execution status and timing results, and control
420 // when the execution finishes.
421 class TestPreparedModelLatest : public SamplePreparedModel {
422    public:
TestPreparedModelLatest(const HidlModel & model,const SampleDriver * driver,Success success)423     TestPreparedModelLatest(const HidlModel& model, const SampleDriver* driver, Success success)
424         : SamplePreparedModel(model, driver, V1_1::ExecutionPreference::FAST_SINGLE_ANSWER, uid_t{},
425                               nn::kDefaultPriority13),
426           mSuccess(success) {}
427 
execute(const V1_0::Request &,const sp<V1_0::IExecutionCallback> & callback)428     hardware::Return<V1_0::ErrorStatus> execute(
429             const V1_0::Request&, const sp<V1_0::IExecutionCallback>& callback) override {
430         switch (mSuccess) {
431             case Success::PASS_NEITHER:
432                 std::thread([callback] {
433                     dummyExecution();
434                     callback->notify(V1_0::ErrorStatus::NONE);
435                 }).detach();
436                 return V1_0::ErrorStatus::NONE;
437             case Success::FAIL_LAUNCH:
438                 dummyExecution();
439                 callback->notify(V1_0::ErrorStatus::GENERAL_FAILURE);
440                 return V1_0::ErrorStatus::GENERAL_FAILURE;
441             case Success::FAIL_WAIT:
442                 std::thread([callback] {
443                     dummyExecution();
444                     callback->notify(V1_0::ErrorStatus::GENERAL_FAILURE);
445                 }).detach();
446                 return V1_0::ErrorStatus::NONE;
447             default:
448                 ADD_FAILURE() << "Unexpected Success kind";
449                 return V1_0::ErrorStatus::GENERAL_FAILURE;
450         }
451     }
452 
execute_1_2(const V1_0::Request &,V1_2::MeasureTiming measure,const sp<V1_2::IExecutionCallback> & callback)453     hardware::Return<V1_0::ErrorStatus> execute_1_2(
454             const V1_0::Request&, V1_2::MeasureTiming measure,
455             const sp<V1_2::IExecutionCallback>& callback) override {
456         EXPECT_EQ(measure, V1_2::MeasureTiming::YES);
457         switch (mSuccess) {
458             case Success::PASS_NEITHER:
459             case Success::PASS_DEVICE:
460             case Success::PASS_DRIVER:
461             case Success::PASS_BOTH:
462                 std::thread([this, callback] {
463                     dummyExecution();
464                     callback->notify_1_2(V1_0::ErrorStatus::NONE, {},
465                                          getExpectedTiming(mSuccess, false).first);
466                 }).detach();
467                 return V1_0::ErrorStatus::NONE;
468             case Success::FAIL_LAUNCH:
469                 dummyExecution();
470                 callback->notify(V1_0::ErrorStatus::GENERAL_FAILURE);
471                 return V1_0::ErrorStatus::GENERAL_FAILURE;
472             case Success::FAIL_WAIT:
473                 std::thread([callback] {
474                     dummyExecution();
475                     callback->notify(V1_0::ErrorStatus::GENERAL_FAILURE);
476                 }).detach();
477                 return V1_0::ErrorStatus::NONE;
478             default:
479                 ADD_FAILURE() << "Unexpected Success kind";
480                 return V1_0::ErrorStatus::GENERAL_FAILURE;
481         }
482     }
483 
execute_1_3(const V1_3::Request &,V1_2::MeasureTiming measure,const V1_3::OptionalTimePoint &,const V1_3::OptionalTimeoutDuration &,const sp<V1_3::IExecutionCallback> & callback)484     hardware::Return<V1_3::ErrorStatus> execute_1_3(
485             const V1_3::Request&, V1_2::MeasureTiming measure, const V1_3::OptionalTimePoint&,
486             const V1_3::OptionalTimeoutDuration&,
487             const sp<V1_3::IExecutionCallback>& callback) override {
488         // Use a placeholder V1_0::Request because execute_1_2 ignores request entirely.
489         const V1_0::ErrorStatus status = execute_1_2(V1_0::Request{}, measure, callback);
490         return convertToV1_3(status);
491     }
492 
executeSynchronously(const V1_0::Request &,V1_2::MeasureTiming measure,executeSynchronously_cb cb)493     hardware::Return<void> executeSynchronously(const V1_0::Request&, V1_2::MeasureTiming measure,
494                                                 executeSynchronously_cb cb) override {
495         EXPECT_EQ(measure, V1_2::MeasureTiming::YES);
496         switch (mSuccess) {
497             case Success::PASS_NEITHER:
498             case Success::PASS_DEVICE:
499             case Success::PASS_DRIVER:
500             case Success::PASS_BOTH:
501                 dummyExecution();
502                 cb(V1_0::ErrorStatus::NONE, {}, getExpectedTiming(mSuccess, false).first);
503                 return hardware::Void();
504             case Success::FAIL_WAIT:
505                 // While this is a synchronous execution method, the NNAPI
506                 // runtime may call it even for asynchronous execution, so we
507                 // need to tolerate Success::FAIL_WAIT here, not just
508                 // Success::FAIL_LAUNCH.
509                 FALLTHROUGH_INTENDED;
510             case Success::FAIL_LAUNCH:
511                 dummyExecution();
512                 cb(V1_0::ErrorStatus::GENERAL_FAILURE, {}, kBadTiming);
513                 return hardware::Void();
514             default:
515                 ADD_FAILURE() << "Unexpected Success kind";
516                 cb(V1_0::ErrorStatus::GENERAL_FAILURE, {}, kBadTiming);
517                 return hardware::Void();
518         }
519     }
520 
executeSynchronously_1_3(const V1_3::Request &,V1_2::MeasureTiming measure,const V1_3::OptionalTimePoint &,const V1_3::OptionalTimeoutDuration &,executeSynchronously_1_3_cb cb)521     hardware::Return<void> executeSynchronously_1_3(const V1_3::Request&,
522                                                     V1_2::MeasureTiming measure,
523                                                     const V1_3::OptionalTimePoint&,
524                                                     const V1_3::OptionalTimeoutDuration&,
525                                                     executeSynchronously_1_3_cb cb) override {
526         const auto wrappedCb = [&cb](V1_0::ErrorStatus status,
527                                      const hardware::hidl_vec<V1_2::OutputShape>& outputShapes,
528                                      V1_2::Timing timing) {
529             cb(convertToV1_3(status), outputShapes, timing);
530         };
531         // Use a placeholder V1_0::Request because executeSynchronously ignores request entirely.
532         return executeSynchronously(V1_0::Request{}, measure, wrappedCb);
533     }
534 
535     // ExecutionBurstServer::create has an overload that will use
536     // IPreparedModel::executeSynchronously(), so we can rely on that, rather
537     // than having to implement ExecutionBurstServer::IExecutorWithCache.
configureExecutionBurst(const sp<V1_2::IBurstCallback> & callback,const MQDescriptorSync<V1_2::FmqRequestDatum> & requestChannel,const MQDescriptorSync<V1_2::FmqResultDatum> & resultChannel,configureExecutionBurst_cb cb)538     hardware::Return<void> configureExecutionBurst(
539             const sp<V1_2::IBurstCallback>& callback,
540             const MQDescriptorSync<V1_2::FmqRequestDatum>& requestChannel,
541             const MQDescriptorSync<V1_2::FmqResultDatum>& resultChannel,
542             configureExecutionBurst_cb cb) override {
543         const sp<V1_2::IBurstContext> burst = ExecutionBurstServer::create(
544                 callback, requestChannel, resultChannel, this, std::chrono::microseconds{0});
545 
546         cb(burst == nullptr ? V1_0::ErrorStatus::GENERAL_FAILURE : V1_0::ErrorStatus::NONE, burst);
547         return hardware::Void();
548     }
549 
executeFenced(const V1_3::Request &,const hardware::hidl_vec<hardware::hidl_handle> &,V1_2::MeasureTiming measure,const V1_3::OptionalTimePoint &,const V1_3::OptionalTimeoutDuration &,const V1_3::OptionalTimeoutDuration &,executeFenced_cb callback)550     hardware::Return<void> executeFenced(const V1_3::Request&,
551                                          const hardware::hidl_vec<hardware::hidl_handle>&,
552                                          V1_2::MeasureTiming measure,
553                                          const V1_3::OptionalTimePoint&,
554                                          const V1_3::OptionalTimeoutDuration&,
555                                          const V1_3::OptionalTimeoutDuration&,
556                                          executeFenced_cb callback) override {
557         EXPECT_EQ(measure, V1_2::MeasureTiming::YES);
558         if (hasBit(mSuccess, Success::PASS_BIT)) {
559             dummyExecution();
560             const auto expectedTiming = getExpectedTiming(mSuccess, true);
561             sp<SampleFencedExecutionCallback> fencedExecutionCallback =
562                     new SampleFencedExecutionCallback(expectedTiming.first, expectedTiming.second,
563                                                       V1_3::ErrorStatus::NONE);
564             callback(V1_3::ErrorStatus::NONE, hardware::hidl_handle(nullptr),
565                      fencedExecutionCallback);
566             return hardware::Void();
567         }
568         switch (mSuccess) {
569             case Success::FAIL_WAIT:
570                 // Due to the limitation of the SampleDriver,
571                 // FAIL_WAIT behaves the same as FAIL_LAUNCH.
572                 // If the SampleDriver is updated to return real
573                 // sync fences, this must be updated.
574                 FALLTHROUGH_INTENDED;
575             case Success::FAIL_LAUNCH:
576                 dummyExecution();
577                 callback(V1_3::ErrorStatus::GENERAL_FAILURE, hardware::hidl_handle(nullptr),
578                          nullptr);
579                 return hardware::Void();
580             default:
581                 ADD_FAILURE() << "Unexpected Success kind";
582                 return hardware::Void();
583         }
584     }
585 
586     // We can place the TestPreparedModelLatest system in a "pause" mode where
587     // no execution will complete until the system is taken out of that mode.
588     // Initially, the system is not in that mode.
pauseExecutions(bool v)589     static void pauseExecutions(bool v) { mPauseExecutions.store(v); }
590 
591     // This function is only guaranteed to work in the following pattern:
592     // Consider thread A as primary thread
593     // - thread A: pauseExecutions(true);
594     // - thread A: launch execution (as thread B)
595     // - thread A: waitForExecutionToBegin(), block until call to dummyExecution by
596     //                                        thread B makes mExecutionsInFlight nonzero
597     // - thread B: dummyExecution(), which makes mExecutionsInFlight nonzero and blocks
598     //                               until thread A calls pauseExecutions(false)
599     // - thread A: waitForExecutionToBegin() returns
600     // - thread A: pauseExecutions(false), allowing dummyExecution() on thread B to continue
601     // - thread B: dummyExecution() zeroes mExecutionsInFlight and returns
602     // - thread B: thread exits
waitForExecutionToBegin()603     static void waitForExecutionToBegin() {
604         CHECK(mPauseExecutions.load());
605         while (mExecutionsInFlight.load() == 0) {
606         }
607     }
608 
609    private:
610     Success mSuccess;
611 
612     static std::atomic<bool> mPauseExecutions;
613     static std::atomic<unsigned int> mExecutionsInFlight;
614 
dummyExecution()615     static void dummyExecution() {
616         CHECK_EQ(mExecutionsInFlight.fetch_add(1), 0u) << "We do not support concurrent executions";
617         while (mPauseExecutions.load()) {
618         }
619         mExecutionsInFlight.fetch_sub(1);
620     }
621 };
622 std::atomic<bool> TestPreparedModelLatest::mPauseExecutions = false;
623 std::atomic<unsigned int> TestPreparedModelLatest::mExecutionsInFlight = 0;
624 
625 using TestPreparedModel13 = TestPreparedModelLatest;
626 
627 // Like TestPreparedModelLatest, but implementing 1.2
628 class TestPreparedModel12 : public V1_2::IPreparedModel {
629    public:
TestPreparedModel12(const HidlModel & model,const SampleDriver * driver,Success success)630     TestPreparedModel12(const HidlModel& model, const SampleDriver* driver, Success success)
631         : mLatestPreparedModel(new TestPreparedModelLatest(model, driver, success)) {}
632 
execute(const V1_0::Request & request,const sp<V1_0::IExecutionCallback> & callback)633     hardware::Return<V1_0::ErrorStatus> execute(
634             const V1_0::Request& request, const sp<V1_0::IExecutionCallback>& callback) override {
635         return mLatestPreparedModel->execute(request, callback);
636     }
637 
execute_1_2(const V1_0::Request & request,V1_2::MeasureTiming measure,const sp<V1_2::IExecutionCallback> & callback)638     hardware::Return<V1_0::ErrorStatus> execute_1_2(
639             const V1_0::Request& request, V1_2::MeasureTiming measure,
640             const sp<V1_2::IExecutionCallback>& callback) override {
641         return mLatestPreparedModel->execute_1_2(request, measure, callback);
642     }
643 
executeSynchronously(const V1_0::Request & request,V1_2::MeasureTiming measure,executeSynchronously_cb cb)644     hardware::Return<void> executeSynchronously(const V1_0::Request& request,
645                                                 V1_2::MeasureTiming measure,
646                                                 executeSynchronously_cb cb) override {
647         return mLatestPreparedModel->executeSynchronously(request, measure, cb);
648     }
649 
configureExecutionBurst(const sp<V1_2::IBurstCallback> & callback,const MQDescriptorSync<V1_2::FmqRequestDatum> & requestChannel,const MQDescriptorSync<V1_2::FmqResultDatum> & resultChannel,configureExecutionBurst_cb cb)650     hardware::Return<void> configureExecutionBurst(
651             const sp<V1_2::IBurstCallback>& callback,
652             const MQDescriptorSync<V1_2::FmqRequestDatum>& requestChannel,
653             const MQDescriptorSync<V1_2::FmqResultDatum>& resultChannel,
654             configureExecutionBurst_cb cb) override {
655         return mLatestPreparedModel->configureExecutionBurst(callback, requestChannel,
656                                                              resultChannel, cb);
657     }
658 
659    private:
660     const sp<V1_3::IPreparedModel> mLatestPreparedModel;
661 };
662 
663 // Like TestPreparedModelLatest, but implementing 1.0
664 class TestPreparedModel10 : public V1_0::IPreparedModel {
665    public:
TestPreparedModel10(const HidlModel & model,const SampleDriver * driver,Success success)666     TestPreparedModel10(const HidlModel& model, const SampleDriver* driver, Success success)
667         : mLatestPreparedModel(new TestPreparedModelLatest(model, driver, success)) {}
668 
execute(const V1_0::Request & request,const sp<V1_0::IExecutionCallback> & callback)669     hardware::Return<V1_0::ErrorStatus> execute(
670             const V1_0::Request& request, const sp<V1_0::IExecutionCallback>& callback) override {
671         return mLatestPreparedModel->execute(request, callback);
672     }
673 
674    private:
675     const sp<V1_3::IPreparedModel> mLatestPreparedModel;
676 };
677 
678 // Behaves like SampleDriver, except that it produces customized IPrepareModel.
679 class TestDriver13 : public SampleDriver {
680    public:
TestDriver13(const std::string & name,Success success)681     TestDriver13(const std::string& name, Success success)
682         : SampleDriver(name.c_str()), mSuccess(success) {}
683 
getCapabilities_1_3(getCapabilities_1_3_cb _hidl_cb)684     hardware::Return<void> getCapabilities_1_3(getCapabilities_1_3_cb _hidl_cb) override {
685         android::nn::initVLogMask();
686         V1_3::Capabilities capabilities = nn::makeCapabilities(0.75f);
687         _hidl_cb(V1_3::ErrorStatus::NONE, capabilities);
688         return hardware::Void();
689     }
690 
getSupportedOperations_1_3(const HidlModel & model,getSupportedOperations_1_3_cb cb)691     hardware::Return<void> getSupportedOperations_1_3(const HidlModel& model,
692                                                       getSupportedOperations_1_3_cb cb) override {
693         if (nn::validateModel(model)) {
694             std::vector<bool> supported(model.main.operations.size(), true);
695             cb(V1_3::ErrorStatus::NONE, supported);
696         } else {
697             cb(V1_3::ErrorStatus::INVALID_ARGUMENT, {});
698         }
699         return hardware::Void();
700     }
701 
getSupportedOperations_1_2(const V1_2::Model & model,getSupportedOperations_1_2_cb cb)702     hardware::Return<void> getSupportedOperations_1_2(const V1_2::Model& model,
703                                                       getSupportedOperations_1_2_cb cb) override {
704         if (nn::validateModel(model)) {
705             std::vector<bool> supported(model.operations.size(), true);
706             cb(V1_0::ErrorStatus::NONE, supported);
707         } else {
708             std::vector<bool> supported;
709             cb(V1_0::ErrorStatus::INVALID_ARGUMENT, supported);
710         }
711         return hardware::Void();
712     }
713 
prepareModel_1_3(const HidlModel & model,V1_1::ExecutionPreference,V1_3::Priority,const V1_3::OptionalTimePoint &,const hardware::hidl_vec<hardware::hidl_handle> &,const hardware::hidl_vec<hardware::hidl_handle> &,const nn::HalCacheToken &,const sp<V1_3::IPreparedModelCallback> & callback)714     hardware::Return<V1_3::ErrorStatus> prepareModel_1_3(
715             const HidlModel& model, V1_1::ExecutionPreference, V1_3::Priority,
716             const V1_3::OptionalTimePoint&, const hardware::hidl_vec<hardware::hidl_handle>&,
717             const hardware::hidl_vec<hardware::hidl_handle>&, const nn::HalCacheToken&,
718             const sp<V1_3::IPreparedModelCallback>& callback) override {
719         callback->notify_1_3(V1_3::ErrorStatus::NONE,
720                              new TestPreparedModel13(model, this, mSuccess));
721         return V1_3::ErrorStatus::NONE;
722     }
723 
prepareModel_1_2(const V1_2::Model & model,V1_1::ExecutionPreference,const hardware::hidl_vec<hardware::hidl_handle> &,const hardware::hidl_vec<hardware::hidl_handle> &,const nn::HalCacheToken &,const sp<V1_2::IPreparedModelCallback> & callback)724     hardware::Return<V1_0::ErrorStatus> prepareModel_1_2(
725             const V1_2::Model& model, V1_1::ExecutionPreference,
726             const hardware::hidl_vec<hardware::hidl_handle>&,
727             const hardware::hidl_vec<hardware::hidl_handle>&, const nn::HalCacheToken&,
728             const sp<V1_2::IPreparedModelCallback>& callback) override {
729         callback->notify_1_2(V1_0::ErrorStatus::NONE,
730                              new TestPreparedModel12(nn::convertToV1_3(model), this, mSuccess));
731         return V1_0::ErrorStatus::NONE;
732     }
733 
prepareModel_1_1(const V1_1::Model & model,V1_1::ExecutionPreference,const sp<V1_0::IPreparedModelCallback> & callback)734     hardware::Return<V1_0::ErrorStatus> prepareModel_1_1(
735             const V1_1::Model& model, V1_1::ExecutionPreference,
736             const sp<V1_0::IPreparedModelCallback>& callback) override {
737         callback->notify(V1_0::ErrorStatus::NONE,
738                          new TestPreparedModel10(nn::convertToV1_3(model), this, mSuccess));
739         return V1_0::ErrorStatus::NONE;
740     }
741 
prepareModel(const V1_0::Model & model,const sp<V1_0::IPreparedModelCallback> & callback)742     hardware::Return<V1_0::ErrorStatus> prepareModel(
743             const V1_0::Model& model, const sp<V1_0::IPreparedModelCallback>& callback) override {
744         return prepareModel_1_1(nn::convertToV1_1(model),
745                                 V1_1::ExecutionPreference::FAST_SINGLE_ANSWER, callback);
746     }
747 
748    private:
749     Success mSuccess;
750 };
751 
752 // Like TestDriver, but implementing 1.1
753 class TestDriver11 : public V1_1::IDevice {
754    public:
TestDriver11(const std::string & name,Success success)755     TestDriver11(const std::string& name, Success success)
756         : mLatestDriver(new TestDriver13(name, success)) {}
getCapabilities_1_1(getCapabilities_1_1_cb _hidl_cb)757     hardware::Return<void> getCapabilities_1_1(getCapabilities_1_1_cb _hidl_cb) override {
758         return mLatestDriver->getCapabilities_1_1(_hidl_cb);
759     }
getSupportedOperations_1_1(const V1_1::Model & model,getSupportedOperations_1_1_cb _hidl_cb)760     hardware::Return<void> getSupportedOperations_1_1(
761             const V1_1::Model& model, getSupportedOperations_1_1_cb _hidl_cb) override {
762         return mLatestDriver->getSupportedOperations_1_1(model, _hidl_cb);
763     }
prepareModel_1_1(const V1_1::Model & model,V1_1::ExecutionPreference preference,const sp<V1_0::IPreparedModelCallback> & actualCallback)764     hardware::Return<V1_0::ErrorStatus> prepareModel_1_1(
765             const V1_1::Model& model, V1_1::ExecutionPreference preference,
766             const sp<V1_0::IPreparedModelCallback>& actualCallback) override {
767         return mLatestDriver->prepareModel_1_1(model, preference, actualCallback);
768     }
getStatus()769     hardware::Return<V1_0::DeviceStatus> getStatus() override { return mLatestDriver->getStatus(); }
getCapabilities(getCapabilities_cb _hidl_cb)770     hardware::Return<void> getCapabilities(getCapabilities_cb _hidl_cb) override {
771         return mLatestDriver->getCapabilities(_hidl_cb);
772     }
getSupportedOperations(const V1_0::Model & model,getSupportedOperations_cb _hidl_cb)773     hardware::Return<void> getSupportedOperations(const V1_0::Model& model,
774                                                   getSupportedOperations_cb _hidl_cb) override {
775         return mLatestDriver->getSupportedOperations(model, _hidl_cb);
776     }
prepareModel(const V1_0::Model & model,const sp<V1_0::IPreparedModelCallback> & actualCallback)777     hardware::Return<V1_0::ErrorStatus> prepareModel(
778             const V1_0::Model& model,
779             const sp<V1_0::IPreparedModelCallback>& actualCallback) override {
780         return mLatestDriver->prepareModel(model, actualCallback);
781     }
782 
783    private:
784     const sp<V1_3::IDevice> mLatestDriver;
785 };
786 
787 }  // namespace test_drivers
788 
789 /*-- End   test drivers -------------------------------------------------------------------------*/
790 
791 /*-- Begin timing tests -------------------------------------------------------------------------*/
792 
793 namespace timing_tests {
794 
795 using namespace test_drivers;
796 
797 enum class DriverKind {
798     CPU,
799     OLD,  // too old to support timing (1.1 or earlier)
800     NEW   // new enough to support timing (1.2 or later)
801 };
802 
operator <<(std::ostream & os,DriverKind kind)803 std::ostream& operator<<(std::ostream& os, DriverKind kind) {
804     const char* names[] = {"CPU", "OLD", "NEW"};
805     const uint32_t index = static_cast<uint32_t>(kind);
806     CHECK(index < std::size(names));
807     return os << names[index];
808 }
809 
810 enum class Compute { ASYNC, SYNC, BURST, FENCED };
811 
operator <<(std::ostream & os,Compute compute)812 std::ostream& operator<<(std::ostream& os, Compute compute) {
813     const char* names[] = {"ASYNC", "SYNC", "BURST", "FENCED"};
814     const uint32_t index = static_cast<uint32_t>(compute);
815     CHECK(index < std::size(names));
816     return os << names[index];
817 }
818 
819 class TimingTest : public IntrospectionControlTest,
820                    public ::testing::WithParamInterface<std::tuple<DriverKind, Success, Compute>> {
821    public:
TimingTest()822     TimingTest()
823         : kDriverKind(std::get<0>(GetParam())),
824           kSuccess(std::get<1>(GetParam())),
825           kCompute(std::get<2>(GetParam())) {}
826 
827    protected:
828     const DriverKind kDriverKind;
829     const Success kSuccess;
830     const Compute kCompute;
831 };
832 
TEST_P(TimingTest,Test)833 TEST_P(TimingTest, Test) {
834     // There's no straightforward way to force CPU execution to fail.
835     ASSERT_EQ(kDriverKind == DriverKind::CPU, kSuccess == Success::PASS_CPU);
836 
837     // FAIL_WAIT only makes sense for ASYNC and FENCED.
838     ASSERT_TRUE(kCompute == Compute::ASYNC || kCompute == Compute::FENCED ||
839                 kSuccess != Success::FAIL_WAIT);
840 
841     if (DeviceManager::get()->getUseCpuOnly() != (kDriverKind == DriverKind::CPU)) {
842         // We don't have an elegant way to request the CPU driver.  Therefore,
843         // we rely on our test framework to make the choice between CPU and
844         // non-CPU.
845         GTEST_SKIP();
846     }
847 
848     createSimpleAddModel(&mModel);
849 
850     switch (kDriverKind) {
851         case DriverKind::CPU: {
852             // There should be only one driver -- the CPU
853             const std::string& name = DeviceManager::get()->getDrivers()[0]->getName();
854             ASSERT_TRUE(selectDeviceByName(name));
855             break;
856         }
857         case DriverKind::OLD: {
858             static const char name[] = "old";
859             DeviceManager::get()->forTest_registerDevice(
860                     nn::makeSharedDevice(name, new TestDriver11(name, kSuccess)));
861             ASSERT_TRUE(selectDeviceByName(name));
862             break;
863         }
864         case DriverKind::NEW: {
865             static const char name[] = "new";
866             DeviceManager::get()->forTest_registerDevice(
867                     nn::makeSharedDevice(name, new TestDriver13(name, kSuccess)));
868             ASSERT_TRUE(selectDeviceByName(name));
869             break;
870         }
871         default:
872             FAIL() << "Unexpected DriverKind";
873     }
874 
875     EXPECT_EQ(prepareForExecution(true /*measureTiming*/), ANEURALNETWORKS_NO_ERROR);
876 
877     float input1[2] = {1.0f, 2.0f};
878     float input2[2] = {3.0f, 4.0f};
879     float output[2];
880     EXPECT_EQ(ANeuralNetworksExecution_setInput(mExecution, 0, nullptr, input1, sizeof(input1)),
881               ANEURALNETWORKS_NO_ERROR);
882     EXPECT_EQ(ANeuralNetworksExecution_setInput(mExecution, 1, nullptr, input2, sizeof(input2)),
883               ANEURALNETWORKS_NO_ERROR);
884     EXPECT_EQ(ANeuralNetworksExecution_setOutput(mExecution, 0, nullptr, output, sizeof(output)),
885               ANEURALNETWORKS_NO_ERROR);
886     EXPECT_EQ(ANeuralNetworksExecution_setMeasureTiming(mExecution, true),
887               ANEURALNETWORKS_NO_ERROR);
888 
889     auto Check = [](bool expectPass, int result) {
890         if (expectPass) {
891             ASSERT_EQ(result, ANEURALNETWORKS_NO_ERROR);
892         } else {
893             ASSERT_NE(result, ANEURALNETWORKS_NO_ERROR);
894         }
895     };
896 
897     const bool isPass = hasBit(kSuccess, Success::PASS_BIT);
898     const int expectedGetDurationResultCode =
899             isPass ? ANEURALNETWORKS_NO_ERROR : ANEURALNETWORKS_BAD_STATE;
900 
901     const auto getDurationWhileRunning = [this] {
902         if (kDriverKind == DriverKind::CPU) {
903             // Testing DriverKind::CPU would require modifying the CPU execution
904             // path to control execution completion, similarly to how this test
905             // case does with TestPreparedModel::dummyExecution(). This does not
906             // seem worthwhile -- it's intrusive into the runtime code solely
907             // for the sake of testing, and we do not expect that the code paths
908             // needed to ensure correct behavior of
909             // ANeuralNetworksExecution_getDuration() on a running execution
910             // would be any different for CPU than for actual drivers.
911             return;
912         }
913         TestPreparedModelLatest::waitForExecutionToBegin();
914         for (int durationCode :
915              std::vector{ANEURALNETWORKS_DURATION_ON_HARDWARE, ANEURALNETWORKS_DURATION_IN_DRIVER,
916                          ANEURALNETWORKS_FENCED_DURATION_ON_HARDWARE,
917                          ANEURALNETWORKS_FENCED_DURATION_IN_DRIVER}) {
918             uint64_t time;
919             // Cannot query duration while execution is running
920             EXPECT_EQ(ANeuralNetworksExecution_getDuration(mExecution, durationCode, &time),
921                       ANEURALNETWORKS_BAD_STATE);
922         }
923     };
924 
925     switch (kCompute) {
926         case Compute::ASYNC: {
927             // Ideally what we'd like to do here is
928             //
929             //     Check(kSuccess != Success::FAIL_LAUNCH,
930             //         ANeuralNetworksExecution_startCompute(mExecution, &mEvent));
931             //     Check(isPass, ANeuralNetworksEvent_wait(mEvent));
932             //
933             // However, in the current implementation of the runtime, a launch
934             // failure at the HAL level does not show up as a launch failure at
935             // the NDK level ("startCompute"): The NNAPI runtime does not call a
936             // driver until it (the runtime) begins execution, so a launch
937             // failure at the HAL level looks like an execution failure at the
938             // NDK level ("wait").
939             SCOPED_TRACE("ASYNC startCompute");
940             TestPreparedModelLatest::pauseExecutions(true);
941             Check(true,  // rather than kSuccess != Success::FAIL_LAUNCH
942                   ANeuralNetworksExecution_startCompute(mExecution, &mEvent));
943             getDurationWhileRunning();
944             TestPreparedModelLatest::pauseExecutions(false);
945             SCOPED_TRACE("ASYNC wait");
946             Check(isPass, ANeuralNetworksEvent_wait(mEvent));
947             break;
948         }
949         case Compute::SYNC: {
950             SCOPED_TRACE("SYNC");
951             TestPreparedModelLatest::pauseExecutions(true);
952             std::thread run([this, Check, isPass] {
953                 Check(isPass, ANeuralNetworksExecution_compute(mExecution));
954             });
955             getDurationWhileRunning();
956             TestPreparedModelLatest::pauseExecutions(false);
957             run.join();
958             break;
959         }
960         case Compute::BURST: {
961             SCOPED_TRACE("BURST");
962             ANeuralNetworksBurst* burst;
963             ASSERT_EQ(ANeuralNetworksBurst_create(mCompilation, &burst), ANEURALNETWORKS_NO_ERROR);
964             TestPreparedModelLatest::pauseExecutions(true);
965             std::thread run([this, Check, isPass, burst] {
966                 Check(isPass, ANeuralNetworksExecution_burstCompute(mExecution, burst));
967             });
968             getDurationWhileRunning();
969             TestPreparedModelLatest::pauseExecutions(false);
970             run.join();
971             ANeuralNetworksBurst_free(burst);
972             break;
973         }
974         case Compute::FENCED: {
975             SCOPED_TRACE("FENCED startComputeWithDependencies");
976             TestPreparedModelLatest::pauseExecutions(true);
977 
978             // Note, due to the limitation of SampleDriver implementation, the call is synchronous.
979             // If the SampleDriver is updated to return real sync fence, this must be updated.
980             std::thread run([this, Check, isPass] {
981                 Check(isPass, ANeuralNetworksExecution_startComputeWithDependencies(
982                                       mExecution, nullptr, 0, 0, &mEvent));
983             });
984             getDurationWhileRunning();
985             TestPreparedModelLatest::pauseExecutions(false);
986             run.join();
987             SCOPED_TRACE("FENCED wait");
988             Check(isPass, ANeuralNetworksEvent_wait(mEvent));
989             break;
990         }
991         default:
992             FAIL() << "unreachable";
993     }
994 
995     uint64_t timeOnHardware, timeInDriver, timeOnHardwareFenced, timeInDriverFenced;
996     EXPECT_EQ(ANeuralNetworksExecution_getDuration(mExecution, ANEURALNETWORKS_DURATION_ON_HARDWARE,
997                                                    &timeOnHardware),
998               expectedGetDurationResultCode);
999     EXPECT_EQ(ANeuralNetworksExecution_getDuration(mExecution, ANEURALNETWORKS_DURATION_IN_DRIVER,
1000                                                    &timeInDriver),
1001               expectedGetDurationResultCode);
1002     EXPECT_EQ(
1003             ANeuralNetworksExecution_getDuration(
1004                     mExecution, ANEURALNETWORKS_FENCED_DURATION_ON_HARDWARE, &timeOnHardwareFenced),
1005             expectedGetDurationResultCode);
1006     EXPECT_EQ(ANeuralNetworksExecution_getDuration(
1007                       mExecution, ANEURALNETWORKS_FENCED_DURATION_IN_DRIVER, &timeInDriverFenced),
1008               expectedGetDurationResultCode);
1009     switch (kDriverKind) {
1010         case DriverKind::CPU: {
1011             // TODO: Should we require timing to be reported as 0?
1012             EXPECT_TRUE(timeOnHardware == 0 || timeOnHardware == UINT64_MAX)
1013                     << "timeOnHardware = " << timeOnHardware;
1014             EXPECT_TRUE(timeInDriver == 0 || timeInDriver == UINT64_MAX)
1015                     << "timeInDriver = " << timeOnHardware;
1016             EXPECT_TRUE(timeOnHardwareFenced == 0 || timeOnHardwareFenced == UINT64_MAX)
1017                     << "timeOnHardwareFenced = " << timeOnHardwareFenced;
1018             EXPECT_TRUE(timeInDriverFenced == 0 || timeInDriverFenced == UINT64_MAX)
1019                     << "timeInDriver = " << timeInDriverFenced;
1020             break;
1021         }
1022         case DriverKind::OLD: {
1023             EXPECT_EQ(timeOnHardware, UINT64_MAX);
1024             EXPECT_EQ(timeInDriver, UINT64_MAX);
1025             EXPECT_EQ(timeOnHardwareFenced, UINT64_MAX);
1026             EXPECT_EQ(timeInDriverFenced, UINT64_MAX);
1027             break;
1028         }
1029         case DriverKind::NEW: {
1030             auto microsToNanos = [](uint64_t micros) {
1031                 constexpr uint64_t kNanosPerMicro = 1000;
1032                 return micros == UINT64_MAX ? UINT64_MAX : kNanosPerMicro * micros;
1033             };
1034             auto expectedTiming = getExpectedTiming(kSuccess, kCompute == Compute::FENCED);
1035             EXPECT_EQ(timeOnHardware, microsToNanos(expectedTiming.first.timeOnDevice));
1036             EXPECT_EQ(timeInDriver, microsToNanos(expectedTiming.first.timeInDriver));
1037             EXPECT_EQ(timeOnHardwareFenced, microsToNanos(expectedTiming.second.timeOnDevice));
1038             EXPECT_EQ(timeInDriverFenced, microsToNanos(expectedTiming.second.timeInDriver));
1039             break;
1040         }
1041         default:
1042             FAIL() << "unreachable";
1043     }
1044     if (kCompute != Compute::FENCED) {
1045         EXPECT_EQ(timeOnHardware, timeOnHardwareFenced);
1046         EXPECT_EQ(timeInDriver, timeInDriverFenced);
1047     }
1048     auto expectTimingLe = [](uint64_t a, const char* aName, uint64_t b, const char* bName) {
1049         if (a != UINT64_MAX && b != UINT64_MAX) {
1050             EXPECT_LE(a, b) << aName << " exceeds " << bName;
1051         }
1052     };
1053 #define EXPECT_TIMING_LE(a, b) expectTimingLe(a, #a, b, #b)
1054     EXPECT_TIMING_LE(timeOnHardware, timeInDriver);
1055     EXPECT_TIMING_LE(timeOnHardwareFenced, timeInDriverFenced);
1056 
1057     EXPECT_TIMING_LE(timeOnHardwareFenced, timeOnHardware);
1058     EXPECT_TIMING_LE(timeInDriverFenced, timeInDriver);
1059 #undef EXPECT_TIMING_LE
1060 }
1061 
1062 auto kTimingTestUnfencedValues = ::testing::Values(
1063         // NOTE: We cannot force CPU execution to fail
1064         std::make_tuple(DriverKind::CPU, Success::PASS_CPU, Compute::ASYNC),
1065         std::make_tuple(DriverKind::CPU, Success::PASS_CPU, Compute::SYNC),
1066         std::make_tuple(DriverKind::CPU, Success::PASS_CPU, Compute::BURST),
1067 
1068         // NOTE: OLD driver does not provide timing
1069         std::make_tuple(DriverKind::OLD, Success::PASS_NEITHER, Compute::ASYNC),
1070         std::make_tuple(DriverKind::OLD, Success::PASS_NEITHER, Compute::SYNC),
1071         std::make_tuple(DriverKind::OLD, Success::PASS_NEITHER, Compute::BURST),
1072 
1073         std::make_tuple(DriverKind::OLD, Success::FAIL_LAUNCH, Compute::ASYNC),
1074         std::make_tuple(DriverKind::OLD, Success::FAIL_LAUNCH, Compute::SYNC),
1075         std::make_tuple(DriverKind::OLD, Success::FAIL_LAUNCH, Compute::BURST),
1076 
1077         // NOTE: Only ASYNC is paired with a wait
1078         std::make_tuple(DriverKind::OLD, Success::FAIL_WAIT, Compute::ASYNC),
1079 
1080         std::make_tuple(DriverKind::NEW, Success::PASS_NEITHER, Compute::ASYNC),
1081         std::make_tuple(DriverKind::NEW, Success::PASS_NEITHER, Compute::SYNC),
1082         std::make_tuple(DriverKind::NEW, Success::PASS_NEITHER, Compute::BURST),
1083 
1084         std::make_tuple(DriverKind::NEW, Success::PASS_DEVICE, Compute::ASYNC),
1085         std::make_tuple(DriverKind::NEW, Success::PASS_DEVICE, Compute::SYNC),
1086         std::make_tuple(DriverKind::NEW, Success::PASS_DEVICE, Compute::BURST),
1087 
1088         std::make_tuple(DriverKind::NEW, Success::PASS_DRIVER, Compute::ASYNC),
1089         std::make_tuple(DriverKind::NEW, Success::PASS_DRIVER, Compute::SYNC),
1090         std::make_tuple(DriverKind::NEW, Success::PASS_DRIVER, Compute::BURST),
1091 
1092         std::make_tuple(DriverKind::NEW, Success::PASS_BOTH, Compute::ASYNC),
1093         std::make_tuple(DriverKind::NEW, Success::PASS_BOTH, Compute::SYNC),
1094         std::make_tuple(DriverKind::NEW, Success::PASS_BOTH, Compute::BURST),
1095 
1096         std::make_tuple(DriverKind::NEW, Success::FAIL_LAUNCH, Compute::ASYNC),
1097         std::make_tuple(DriverKind::NEW, Success::FAIL_LAUNCH, Compute::SYNC),
1098         std::make_tuple(DriverKind::NEW, Success::FAIL_LAUNCH, Compute::BURST),
1099 
1100         // NOTE: Only ASYNC is paired with a wait
1101         std::make_tuple(DriverKind::NEW, Success::FAIL_WAIT, Compute::ASYNC));
1102 
1103 auto kTimingTestFencedValues = ::testing::Values(
1104         // NOTE: We cannot force CPU execution to fail
1105         std::make_tuple(DriverKind::CPU, Success::PASS_CPU, Compute::FENCED),
1106 
1107         // NOTE: OLD driver does not provide timing
1108         std::make_tuple(DriverKind::OLD, Success::PASS_NEITHER, Compute::FENCED),
1109 
1110         std::make_tuple(DriverKind::OLD, Success::FAIL_LAUNCH, Compute::FENCED),
1111 
1112         std::make_tuple(DriverKind::NEW, Success::PASS_NEITHER, Compute::FENCED),
1113         std::make_tuple(DriverKind::NEW, Success::PASS_DEVICE, Compute::FENCED),
1114         std::make_tuple(DriverKind::NEW, Success::PASS_DRIVER, Compute::FENCED),
1115         std::make_tuple(DriverKind::NEW, Success::PASS_BOTH, Compute::FENCED),
1116         std::make_tuple(DriverKind::NEW, Success::PASS_NEITHER_DEVICE, Compute::FENCED),
1117         std::make_tuple(DriverKind::NEW, Success::PASS_NEITHER_DRIVER, Compute::FENCED),
1118         std::make_tuple(DriverKind::NEW, Success::PASS_NEITHER_BOTH, Compute::FENCED),
1119         std::make_tuple(DriverKind::NEW, Success::PASS_DEVICE_DEVICE, Compute::FENCED),
1120         std::make_tuple(DriverKind::NEW, Success::PASS_DEVICE_DRIVER, Compute::FENCED),
1121         std::make_tuple(DriverKind::NEW, Success::PASS_DEVICE_BOTH, Compute::FENCED),
1122         std::make_tuple(DriverKind::NEW, Success::PASS_DRIVER_DEVICE, Compute::FENCED),
1123         std::make_tuple(DriverKind::NEW, Success::PASS_DRIVER_DRIVER, Compute::FENCED),
1124         std::make_tuple(DriverKind::NEW, Success::PASS_DRIVER_BOTH, Compute::FENCED),
1125         std::make_tuple(DriverKind::NEW, Success::PASS_BOTH_DEVICE, Compute::FENCED),
1126         std::make_tuple(DriverKind::NEW, Success::PASS_BOTH_DRIVER, Compute::FENCED),
1127         std::make_tuple(DriverKind::NEW, Success::PASS_BOTH_BOTH, Compute::FENCED),
1128 
1129         std::make_tuple(DriverKind::NEW, Success::FAIL_LAUNCH, Compute::FENCED));
1130 
1131 INSTANTIATE_TEST_SUITE_P(Unfenced, TimingTest, kTimingTestUnfencedValues);
1132 INSTANTIATE_TEST_SUITE_P(Fenced, TimingTest, kTimingTestFencedValues);
1133 
1134 }  // namespace timing_tests
1135 
1136 /*-- End   timing tests -------------------------------------------------------------------------*/
1137 
1138 const float kSimpleCeiling = 2.0f;
1139 
createAddMaxModel(WrapperModel * model,bool reverseOrder)1140 void createAddMaxModel(WrapperModel* model, bool reverseOrder) {
1141     WrapperOperandType type0(WrapperType::TENSOR_FLOAT32, {2});
1142     WrapperOperandType type1(WrapperType::INT32, {});
1143     // Phase 1, operands
1144     auto op1 = model->addOperand(&type0);
1145     auto op2 = model->addOperand(&type0);
1146     auto act = model->addOperand(&type1);
1147     auto op3 = model->addOperand(&type0);
1148     auto op4 = model->addOperand(&type0);
1149     auto op5 = model->addOperand(&type0);
1150     // Phase 2, operations
1151     static int32_t act_init[] = {0};
1152     model->setOperandValue(act, act_init, sizeof(act_init));
1153     static float ceiling[] = {kSimpleCeiling, kSimpleCeiling};
1154     model->setOperandValue(op4, ceiling, sizeof(ceiling));
1155     if (reverseOrder) {
1156         // In this case, add MAXIMUM first, but the execution order is still ADD -> MAXIMUM.
1157         model->addOperation(ANEURALNETWORKS_MAXIMUM, {op3, op4}, {op5});
1158         model->addOperation(ANEURALNETWORKS_ADD, {op1, op2, act}, {op3});
1159     } else {
1160         model->addOperation(ANEURALNETWORKS_ADD, {op1, op2, act}, {op3});
1161         model->addOperation(ANEURALNETWORKS_MAXIMUM, {op3, op4}, {op5});
1162     }
1163     // Phase 3, inputs and outputs
1164     model->identifyInputsAndOutputs({op1, op2}, {op5});
1165     model->finish();
1166     ASSERT_TRUE(model->isValid());
1167 }
1168 
TEST_F(IntrospectionControlTest,SlicingAddMax)1169 TEST_F(IntrospectionControlTest, SlicingAddMax) {
1170     // This is needed before we have the CPU fallback path being treated as a Device.
1171     if (DeviceManager::get()->getUseCpuOnly()) {
1172         GTEST_SKIP();
1173     }
1174 
1175     using namespace test_drivers;
1176 
1177     static const char name[] = "driver11";
1178     DeviceManager::get()->forTest_registerDevice(
1179             nn::makeSharedDevice(name, new TestDriver11(name, Success::PASS_BOTH)));
1180     ASSERT_TRUE(selectDeviceByName(name));
1181 
1182     createAddMaxModel(&mModel, false);
1183     EXPECT_TRUE(isSupportedOpListExpected({true, false}));
1184 }
1185 
TEST_F(IntrospectionControlTest,SlicingMaxAdd)1186 TEST_F(IntrospectionControlTest, SlicingMaxAdd) {
1187     // This is needed before we have the CPU fallback path being treated as a Device.
1188     if (DeviceManager::get()->getUseCpuOnly()) {
1189         GTEST_SKIP();
1190     }
1191 
1192     using namespace test_drivers;
1193 
1194     static const char name[] = "driver11";
1195     DeviceManager::get()->forTest_registerDevice(
1196             nn::makeSharedDevice(name, new TestDriver11(name, Success::PASS_BOTH)));
1197     ASSERT_TRUE(selectDeviceByName(name));
1198 
1199     createAddMaxModel(&mModel, true);
1200     EXPECT_TRUE(isSupportedOpListExpected({false, true}));
1201 }
1202 
1203 const float kSimpleMultiplier = 2.0f;
1204 
createAddMulModel(WrapperModel * model,bool reverseOrder)1205 void createAddMulModel(WrapperModel* model, bool reverseOrder) {
1206     WrapperOperandType type0(WrapperType::TENSOR_FLOAT32, {2});
1207     WrapperOperandType type1(WrapperType::INT32, {});
1208     // Phase 1, operands
1209     auto op1 = model->addOperand(&type0);
1210     auto op2 = model->addOperand(&type0);
1211     auto act = model->addOperand(&type1);
1212     auto op3 = model->addOperand(&type0);
1213     auto op4 = model->addOperand(&type0);
1214     auto op5 = model->addOperand(&type0);
1215     // Phase 2, operations
1216     static int32_t act_init[] = {0};
1217     model->setOperandValue(act, act_init, sizeof(act_init));
1218     static float multiplier[] = {kSimpleMultiplier, kSimpleMultiplier};
1219     model->setOperandValue(op4, multiplier, sizeof(multiplier));
1220     if (reverseOrder) {
1221         // In this case, add MUL first, but the execution order is still ADD -> MUL.
1222         model->addOperation(ANEURALNETWORKS_MUL, {op3, op4, act}, {op5});
1223         model->addOperation(ANEURALNETWORKS_ADD, {op1, op2, act}, {op3});
1224     } else {
1225         model->addOperation(ANEURALNETWORKS_ADD, {op1, op2, act}, {op3});
1226         model->addOperation(ANEURALNETWORKS_MUL, {op3, op4, act}, {op5});
1227     }
1228     // Phase 3, inputs and outputs
1229     model->identifyInputsAndOutputs({op1, op2}, {op5});
1230     model->finish();
1231     ASSERT_TRUE(model->isValid());
1232 }
1233 
TEST_F(IntrospectionControlTest,SlicingFullySupported)1234 TEST_F(IntrospectionControlTest, SlicingFullySupported) {
1235     // This is needed before we have the CPU fallback path being treated as a Device.
1236     if (DeviceManager::get()->getUseCpuOnly()) {
1237         GTEST_SKIP();
1238     }
1239 
1240     using namespace test_drivers;
1241 
1242     static const char name[] = "driver11";
1243     DeviceManager::get()->forTest_registerDevice(
1244             nn::makeSharedDevice(name, new TestDriver11(name, Success::PASS_BOTH)));
1245     ASSERT_TRUE(selectDeviceByName(name));
1246 
1247     createAddMulModel(&mModel, false);
1248     EXPECT_TRUE(isSupportedOpListExpected({true, true}));
1249 }
1250 
createCondModel(WrapperModel * model,bool dynamicRank)1251 void createCondModel(WrapperModel* model, bool dynamicRank) {
1252     const auto dimensions = dynamicRank ? std::vector<uint32_t>{} : std::vector<uint32_t>{1};
1253     WrapperOperandType floatType(WrapperType::TENSOR_FLOAT32, dimensions);
1254     WrapperOperandType boolType(WrapperType::TENSOR_BOOL8, {1});
1255     // Phase 1, operands
1256     auto op1 = model->addOperand(&floatType);
1257     auto op2 = model->addOperand(&boolType);
1258     // Phase 2, operations
1259     model->addOperation(ANEURALNETWORKS_LESS, {op1, op1}, {op2});
1260     // Phase 3, inputs and outputs
1261     model->identifyInputsAndOutputs({op1}, {op2});
1262     model->finish();
1263 }
1264 
addReluOperation(WrapperModel * model,std::vector<uint32_t> * modelInputIndexes,std::vector<uint32_t> * modelOutputIndexes,bool dynamicRank)1265 void addReluOperation(WrapperModel* model, std::vector<uint32_t>* modelInputIndexes,
1266                       std::vector<uint32_t>* modelOutputIndexes, bool dynamicRank) {
1267     const auto dimensions = dynamicRank ? std::vector<uint32_t>{} : std::vector<uint32_t>{1};
1268     WrapperOperandType type(WrapperType::TENSOR_FLOAT32, dimensions);
1269     // Phase 1, operands
1270     auto op1 = model->addOperand(&type);
1271     auto op2 = model->addOperand(&type);
1272     // Phase 2, operations
1273     model->addOperation(ANEURALNETWORKS_RELU, {op1}, {op2});
1274     // Phase 3, inputs and outputs
1275     modelInputIndexes->push_back(op1);
1276     modelOutputIndexes->push_back(op2);
1277 }
1278 
createReluModel(WrapperModel * model,bool dynamicRank)1279 void createReluModel(WrapperModel* model, bool dynamicRank) {
1280     std::vector<uint32_t> modelInputIndexes, modelOutputIndexes;
1281     addReluOperation(model, &modelInputIndexes, &modelOutputIndexes, dynamicRank);
1282     model->identifyInputsAndOutputs(modelInputIndexes, modelOutputIndexes);
1283     model->finish();
1284 }
1285 
addWhileOperation(std::vector<WrapperModel> * extraModels,WrapperModel * mainModel,std::vector<uint32_t> * modelInputIndexes,std::vector<uint32_t> * modelOutputIndexes,bool dynamicRank)1286 void addWhileOperation(std::vector<WrapperModel>* extraModels, WrapperModel* mainModel,
1287                        std::vector<uint32_t>* modelInputIndexes,
1288                        std::vector<uint32_t>* modelOutputIndexes, bool dynamicRank) {
1289     const auto dimensions = dynamicRank ? std::vector<uint32_t>{} : std::vector<uint32_t>{1};
1290     WrapperOperandType floatType(WrapperType::TENSOR_FLOAT32, dimensions);
1291     WrapperOperandType modelType(WrapperType::MODEL, {});
1292 
1293     extraModels->emplace_back();
1294     extraModels->emplace_back();
1295     WrapperModel* condModel = &extraModels->at(extraModels->size() - 2);
1296     WrapperModel* bodyModel = &extraModels->at(extraModels->size() - 1);
1297     createCondModel(condModel, dynamicRank);
1298     createReluModel(bodyModel, dynamicRank);
1299     ASSERT_TRUE(condModel->isValid());
1300     ASSERT_TRUE(bodyModel->isValid());
1301 
1302     // Phase 1, operands
1303     const uint32_t op1 = mainModel->addOperand(&modelType);
1304     const uint32_t op2 = mainModel->addOperand(&modelType);
1305     const uint32_t op3 = mainModel->addOperand(&floatType);
1306     const uint32_t op4 = mainModel->addOperand(&floatType);
1307     mainModel->setOperandValueFromModel(op1, condModel);
1308     mainModel->setOperandValueFromModel(op2, bodyModel);
1309     // Phase 2, operations
1310     mainModel->addOperation(ANEURALNETWORKS_WHILE, {op1, op2, op3}, {op4});
1311     // Phase 3, inputs and outputs
1312     modelInputIndexes->push_back(op3);
1313     modelOutputIndexes->push_back(op4);
1314 }
1315 
createReluStaticWhileModel(std::vector<WrapperModel> * extraModels,WrapperModel * mainModel)1316 void createReluStaticWhileModel(std::vector<WrapperModel>* extraModels, WrapperModel* mainModel) {
1317     std::vector<uint32_t> modelInputIndexes, modelOutputIndexes;
1318 
1319     // Operation supported in Android API level 27
1320     addReluOperation(mainModel, &modelInputIndexes, &modelOutputIndexes, /*dynamicRank=*/false);
1321     // Operation supported in Android API level 30
1322     addWhileOperation(extraModels, mainModel, &modelInputIndexes, &modelOutputIndexes,
1323                       /*dynamicRank=*/false);
1324 
1325     mainModel->identifyInputsAndOutputs(modelInputIndexes, modelOutputIndexes);
1326     mainModel->finish();
1327     ASSERT_TRUE(mainModel->isValid());
1328 }
1329 
TEST_F(IntrospectionControlTest,ControlFlowNotSupported)1330 TEST_F(IntrospectionControlTest, ControlFlowNotSupported) {
1331     // This is needed before we have the CPU fallback path being treated as a Device.
1332     if (DeviceManager::get()->getUseCpuOnly()) {
1333         GTEST_SKIP();
1334     }
1335 
1336     using namespace test_drivers;
1337 
1338     static const char name[] = "driver11";
1339     DeviceManager::get()->forTest_registerDevice(
1340             nn::makeSharedDevice(name, new TestDriver11(name, Success::PASS_BOTH)));
1341     ASSERT_TRUE(selectDeviceByName(name));
1342 
1343     std::vector<WrapperModel> extraModels;
1344     createReluStaticWhileModel(&extraModels, &mModel);
1345     EXPECT_TRUE(isSupportedOpListExpected({true, false}));
1346 
1347     // Clear mModel early because it may reference `extraModels`.
1348     mModel = WrapperModel{};
1349 }
1350 
TEST_F(IntrospectionControlTest,ControlFlowSupported)1351 TEST_F(IntrospectionControlTest, ControlFlowSupported) {
1352     // This is needed before we have the CPU fallback path being treated as a Device.
1353     if (DeviceManager::get()->getUseCpuOnly()) {
1354         GTEST_SKIP();
1355     }
1356 
1357     using namespace test_drivers;
1358 
1359     static const char name[] = "driver13";
1360     DeviceManager::get()->forTest_registerDevice(
1361             nn::makeSharedDevice(name, new TestDriver13(name, Success::PASS_BOTH)));
1362     ASSERT_TRUE(selectDeviceByName(name));
1363 
1364     std::vector<WrapperModel> extraModels;
1365     createReluStaticWhileModel(&extraModels, &mModel);
1366     EXPECT_TRUE(isSupportedOpListExpected({true, true}));
1367 
1368     // Clear mModel early because it may reference `extraModels`.
1369     mModel = WrapperModel{};
1370 }
1371 
createStaticWhileDynamicWhileModel(std::vector<WrapperModel> * extraModels,WrapperModel * mainModel)1372 void createStaticWhileDynamicWhileModel(std::vector<WrapperModel>* extraModels,
1373                                         WrapperModel* mainModel) {
1374     std::vector<uint32_t> modelInputIndexes, modelOutputIndexes;
1375 
1376     // Operation supported in Android API level 30
1377     addWhileOperation(extraModels, mainModel, &modelInputIndexes, &modelOutputIndexes,
1378                       /*dynamicRank=*/false);
1379     // Operation supported only by NNAPI runtime
1380     addWhileOperation(extraModels, mainModel, &modelInputIndexes, &modelOutputIndexes,
1381                       /*dynamicRank=*/true);
1382 
1383     mainModel->identifyInputsAndOutputs(modelInputIndexes, modelOutputIndexes);
1384     mainModel->finish();
1385     ASSERT_TRUE(mainModel->isValid());
1386 }
1387 
TEST_F(IntrospectionControlTest,ControlFlowFailedToSlice)1388 TEST_F(IntrospectionControlTest, ControlFlowFailedToSlice) {
1389     // This is needed before we have the CPU fallback path being treated as a Device.
1390     if (DeviceManager::get()->getUseCpuOnly()) {
1391         GTEST_SKIP();
1392     }
1393 
1394     using namespace test_drivers;
1395 
1396     static const char name[] = "driver13";
1397     DeviceManager::get()->forTest_registerDevice(
1398             nn::makeSharedDevice(name, new TestDriver13(name, Success::PASS_BOTH)));
1399     ASSERT_TRUE(selectDeviceByName(name));
1400 
1401     std::vector<WrapperModel> extraModels;
1402     createStaticWhileDynamicWhileModel(&extraModels, &mModel);
1403     EXPECT_TRUE(isSupportedOpListExpected({false, false}));
1404 
1405     // Clear mModel early because it may reference `extraModels`.
1406     mModel = WrapperModel{};
1407 }
1408 
1409 // TODO(miaowang): add a test to make sure ANNCompilation_create() has CPU
1410 // fallback.
1411 // This test verifies that a device that could only handle ADD would correctly report that an
1412 // ADD->MUL model could not be fully supported.
TEST_F(IntrospectionControlTest,PartialModelNotSupported)1413 TEST_F(IntrospectionControlTest, PartialModelNotSupported) {
1414     // This is needed before we have the CPU fallback path being treated as a Device.
1415     if (DeviceManager::get()->getUseCpuOnly()) {
1416         GTEST_SKIP();
1417     }
1418 
1419     createAddMulModel(&mModel, false);
1420 
1421     std::string addOnlyDriver = "test-onlyAdd";
1422     std::vector<bool> addOnlyOp(android::nn::kNumberOfOperationTypes, false);
1423     addOnlyOp[ANEURALNETWORKS_ADD] = true;
1424 
1425     registerDevices({{addOnlyDriver, 0.9, addOnlyOp}});
1426 
1427     EXPECT_TRUE(selectDeviceByName(addOnlyDriver));
1428     EXPECT_TRUE(isSupportedOpListExpected({true, false}));
1429 
1430     ANeuralNetworksModel* modelHandle = mModel.getHandle();
1431     EXPECT_EQ(ANeuralNetworksCompilation_createForDevices(modelHandle, mDevices.data(),
1432                                                           mDevices.size(), &mCompilation),
1433               ANEURALNETWORKS_NO_ERROR);
1434     // The compilation must fail as there is no fallback when using
1435     // Introspection API.
1436     EXPECT_NE(ANeuralNetworksCompilation_finish(mCompilation), ANEURALNETWORKS_NO_ERROR);
1437 }
1438 
1439 // This test verifies that a device that could only handle ADD would correctly report that an
1440 // ADD->MUL model could not be fully supported. Also verifies that the indices of returned
1441 // supported op list correctly map to the order of operations being added by the user.
TEST_F(IntrospectionControlTest,PartialModelNotSupportedOrder)1442 TEST_F(IntrospectionControlTest, PartialModelNotSupportedOrder) {
1443     // This is needed before we have the CPU fallback path being treated as a Device.
1444     if (DeviceManager::get()->getUseCpuOnly()) {
1445         GTEST_SKIP();
1446     }
1447 
1448     createAddMulModel(&mModel, true);
1449 
1450     std::string addOnlyDriver = "test-onlyAdd";
1451     std::vector<bool> addOnlyOp(android::nn::kNumberOfOperationTypes, false);
1452     addOnlyOp[ANEURALNETWORKS_ADD] = true;
1453 
1454     registerDevices({{addOnlyDriver, 0.9, addOnlyOp}});
1455 
1456     EXPECT_TRUE(selectDeviceByName(addOnlyDriver));
1457     EXPECT_TRUE(isSupportedOpListExpected({false, true}));
1458 }
1459 
1460 // TODO(miaowang): update the test to make sure the model is actually running on the test devices.
1461 // This test verifies that an ADD->MUL model is able to run on two selected devices that together
1462 // can handle all operations.
TEST_F(IntrospectionControlTest,ModelNeedTwoDevices)1463 TEST_F(IntrospectionControlTest, ModelNeedTwoDevices) {
1464     // This is needed before we have the CPU fallback path being treated as a Device.
1465     if (DeviceManager::get()->getUseCpuOnly()) {
1466         GTEST_SKIP();
1467     }
1468 
1469     createAddMulModel(&mModel, false);
1470 
1471     std::string addOnlyDriver = "test-onlyAdd";
1472     std::vector<bool> addOnlyOp(android::nn::kNumberOfOperationTypes, false);
1473     addOnlyOp[ANEURALNETWORKS_ADD] = true;
1474 
1475     std::string mulOnlyDriver = "test-onlyMul";
1476     std::vector<bool> mulOnlyOp(android::nn::kNumberOfOperationTypes, false);
1477     mulOnlyOp[ANEURALNETWORKS_MUL] = true;
1478 
1479     registerDevices({
1480             {addOnlyDriver, 0.9, addOnlyOp},
1481             {mulOnlyDriver, 0.9, mulOnlyOp},
1482     });
1483 
1484     EXPECT_TRUE(selectDeviceByName(addOnlyDriver));
1485     EXPECT_TRUE(selectDeviceByName(mulOnlyDriver));
1486     EXPECT_TRUE(isSupportedOpListExpected({true, true}));
1487     EXPECT_EQ(prepareForExecution(), ANEURALNETWORKS_NO_ERROR);
1488 
1489     float input1[2] = {1.0f, 2.0f};
1490     float input2[2] = {3.0f, 4.0f};
1491     float output[2];
1492     EXPECT_EQ(ANeuralNetworksExecution_setInput(mExecution, 0, nullptr, input1, sizeof(input1)),
1493               ANEURALNETWORKS_NO_ERROR);
1494     EXPECT_EQ(ANeuralNetworksExecution_setInput(mExecution, 1, nullptr, input2, sizeof(input2)),
1495               ANEURALNETWORKS_NO_ERROR);
1496     EXPECT_EQ(ANeuralNetworksExecution_setOutput(mExecution, 0, nullptr, output, sizeof(output)),
1497               ANEURALNETWORKS_NO_ERROR);
1498 
1499     EXPECT_EQ(ANeuralNetworksExecution_startCompute(mExecution, &mEvent), ANEURALNETWORKS_NO_ERROR);
1500     EXPECT_EQ(ANeuralNetworksEvent_wait(mEvent), ANEURALNETWORKS_NO_ERROR);
1501     EXPECT_EQ(output[0], kSimpleMultiplier * (input1[0] + input2[0]));
1502     EXPECT_EQ(output[1], kSimpleMultiplier * (input1[1] + input2[1]));
1503 }
1504 }  // namespace
1505