1 /*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <ControlFlow.h>
18 #include <HalInterfaces.h>
19 #include <SampleDriver.h>
20 #include <Utils.h>
21 #include <ValidateHal.h>
22 #include <gtest/gtest.h>
23
24 #include <algorithm>
25 #include <filesystem>
26 #include <functional>
27 #include <iostream>
28 #include <map>
29 #include <memory>
30 #include <numeric>
31 #include <queue>
32 #include <set>
33 #include <string>
34 #include <tuple>
35 #include <type_traits>
36 #include <utility>
37 #include <vector>
38
39 #include "CompilationBuilder.h"
40 #include "ExecutionPlan.h"
41 #include "HalUtils.h"
42 #include "Manager.h"
43 #include "ModelBuilder.h"
44 #include "NeuralNetworks.h"
45 #include "NeuralNetworksOEM.h"
46 #include "TestNeuralNetworksWrapper.h"
47
48 // Uncomment the following line to generate some debugging output that
49 // may be useful when analyzing failures:
50 //
51 // #define VERBOSE VERBOSE
52
53 // These tests do whitebox testing of the graph partitioning
54 // algorithm. It is "whitebox" in the sense that we're not evaluating
55 // whether a particular partitioning is legal, or "good enough"
56 // according to some metric, but whether it exactly matches the
57 // expected behavior of the current partitioning algorithm.
58 //
59 // A key part of the current partitioning algorithm is to determine
60 // which device among the available devices should be the one to
61 // execute a particular operation from the graph. This determination
62 // is made "locally" -- i.e., it does not depend on the graph
63 // topology, only on the properties of the operation in question.
64 // IDevice::getSupportedOperations() indicates which operations in a
65 // graph can be executed on a device, and IDevice::getCapabilities()
66 // indicates how "good" that device is for executing particular kinds
67 // of operations. For each operation, the partitioning algorithm
68 // picks the "best" device that is capable of executing that
69 // operation; if no device can do so, then the algorithm picks the
70 // cpu.
71 //
72 // As part of this testing approach, we want to make it easy to
73 // specify which operations in a test graph can be executed on which
74 // devices. We accomplish this in the following way:
75 // - A unary OEM operation is available.
76 // - There is a collection of operations (each of which has two inputs
77 // and one output):
78 // - Eight kinds of operations available at driver version V1_0 or
79 // later. They are represented in the graph as ADD or MUL with a
80 // particular activation function -- two opcodes times four
81 // activation functions means eight available operation kinds.
82 // This is a low-level representation detail -- when we specify the
83 // behavior of the device or build a graph, we do so in terms of
84 // operation encodings 0..7.
85 // - Eight kinds of operations available at driver version V1_1 or
86 // later. They are represented in the graph as DIV or SUB with
87 // a particular activation function, exactly analogous to ADD
88 // and MUL above. We use operation encodings 8..15 for them.
89 // - Four kinds of operations available at driver version V1_2 or
90 // later. They are represented in the graph as MAXIMUM,
91 // MINIMUM, POW, or PRELU. These operations take no activation
92 // function, so we only get 4 operation kinds, for which we
93 // use operation encodings 16..19.
94 // - There is another collection of operations (each of which has one input
95 // and one output):
96 // - Single operation available at driver version V1_3 or
97 // later. It is represented in the graph as HARD_SWISH.
98 // These operations take no activation function, for which we
99 // use operation encodings 20..20.
100
101 // When we instantiate a device for testing purposes, we specify what subset of
102 // those operations the device is able to execute.
103 //
104 // In order to determine whether or not a partitioning matches the
105 // expected partitioning, we check the number of partitions, check
106 // which device each partition targets, and compare each partition's
107 // subgraph, model inputs, model outputs, step model inputs, and
108 // step model outputs against what is expected. In order to perform
109 // that comparison, we build a model to compare against a partition's
110 // step model and run a graph comparison algorithm on it. The graph
111 // comparison and the inputs and outputs comparisons are syntactic
112 // rather than semantic comparisons -- they don't allow for
113 // reorderings of inputs and outputs. Because of this, we need to
114 // know exactly how the partitioning algorithm orders inputs and
115 // outputs in order to construct the models and operand lists to
116 // compare against. Here are some relevant behaviors of the
117 // partitioning algorithm:
118 //
119 // - It builds a subgraph by walking operations in forward topological
120 // order, and adding each operation's input operands and output
121 // operands in index order (input followed by output) when that
122 // operation is added. (It does not add an input that has already
123 // been added.)
124 // - It finds model inputs, model outputs, and step model inputs in
125 // the order the corresponding operands were added to the subgraph
126 // (see ExecutionStep methods getModelInputs(), getModelOutputs(),
127 // getTempsAsStepModelInputs(), getOutputsAsStepModelInputs()).
128 // - It finds temps as step model outputs in numerical order of corresponding
129 // operand number in the original model (see ExecutionStep method
130 // getTempsAsStepModelOutputs()).
131 // - When it calls identifyInputsAndOutputs() on the step model, it
132 // passes inputs from getModelInputs() in order, followed by temps as
133 // step model inputs from getTempsAsStepModelInputs() in order,
134 // followed by outputs as step model inputs from
135 // getOutputsAsStepModelInputs() in order; and it passes outputs from
136 // getModelOutputs() in order followed by step model outputs from
137 // getTempsAsStepModelOutputs() in order.
138 //
139 // TODO: Maybe the logic for comparing a partition to an expected
140 // model should be changed to tolerate reorderings of inputs and
141 // outputs, so that when we build models and lists to compare
142 // against, we don't need to worry about input and output
143 // orderings. But is there a way to do this that still lets us
144 // verify that we have the correct relationships between
145 // an (original) model's inputs and outputs and each step model's
146 // inputs and outputs, as well as the correct relationship
147 // between step model inputs and outputs across partitions?
148
149 namespace {
150
151 namespace hardware = android::hardware;
152 namespace V1_0 = ::android::hardware::neuralnetworks::V1_0;
153 namespace V1_1 = ::android::hardware::neuralnetworks::V1_1;
154 namespace V1_2 = ::android::hardware::neuralnetworks::V1_2;
155 namespace V1_3 = ::android::hardware::neuralnetworks::V1_3;
156 using CompilationBuilder = ::android::nn::CompilationBuilder;
157 using Device = ::android::nn::Device;
158 using DeviceManager = ::android::nn::DeviceManager;
159 using ExecutePreference = ::android::nn::test_wrapper::ExecutePreference;
160 using ExecutePriority = ::android::nn::test_wrapper::ExecutePriority;
161 using ExecutionPlan = ::android::nn::ExecutionPlan;
162 using ExecutionStep = ::android::nn::ExecutionStep;
163 using HalCacheToken = ::android::nn::HalCacheToken;
164 using HalVersion = ::android::nn::HalVersion;
165 using HidlModel = V1_3::Model;
166 using IOType = ::android::nn::IOType;
167 using LogicalStep = ::android::nn::LogicalStep;
168 using ModelBuilder = ::android::nn::ModelBuilder;
169 using Operand = ::android::nn::Operand;
170 using Operation = ::android::nn::Operation;
171 using OptionalTimePoint = ::android::nn::OptionalTimePoint;
172 using Result = ::android::nn::test_wrapper::Result;
173 using SampleDriver = ::android::nn::sample_driver::SampleDriver;
174 using SharedDevice = ::android::nn::SharedDevice;
175 using SourceOperandIndex = ::android::nn::SourceOperandIndex;
176 using StepRole = ::android::nn::StepRole;
177 using WrapperCompilation = ::android::nn::test_wrapper::Compilation;
178 using WrapperExecution = ::android::nn::test_wrapper::Execution;
179 using WrapperModel = ::android::nn::test_wrapper::Model;
180 using WrapperOperandType = ::android::nn::test_wrapper::OperandType;
181 using WrapperSymmPerChannelQuantParams = ::android::nn::test_wrapper::SymmPerChannelQuantParams;
182 using WrapperType = ::android::nn::test_wrapper::Type;
183 using android::sp;
184
update(V1_3::Capabilities * capabilities,V1_3::OperandType type,float perf)185 void update(V1_3::Capabilities* capabilities, V1_3::OperandType type, float perf) {
186 V1_0::PerformanceInfo perfInfo = {.execTime = perf, .powerUsage = perf};
187 ::android::nn::update(&capabilities->operandPerformance, type, perfInfo);
188 }
189
lookupExecTime(const V1_3::Capabilities & capabilities,V1_3::OperandType type)190 float lookupExecTime(const V1_3::Capabilities& capabilities, V1_3::OperandType type) {
191 return ::android::nn::lookup(capabilities.operandPerformance, type).execTime;
192 }
193
min(HalVersion a,HalVersion b)194 HalVersion min(HalVersion a, HalVersion b) {
195 return int32_t(a) < int32_t(b) ? a : b;
196 }
197
198 const uint32_t kNumFuseCodes = 4;
199 const uint32_t kBadOperation = ~0;
200
201 // V1_0 operations
202 const uint32_t kFirstEncodingADD = 0;
203 const uint32_t kFirstEncodingMUL = kFirstEncodingADD + kNumFuseCodes;
204 const uint32_t kFirstEncodingV1_0 = kFirstEncodingADD;
205 const uint32_t kLastEncodingV1_0 = kFirstEncodingMUL + kNumFuseCodes - 1;
206
207 // V1_1 operations
208 const uint32_t kFirstEncodingDIV = kLastEncodingV1_0 + 1;
209 const uint32_t kFirstEncodingSUB = kFirstEncodingDIV + kNumFuseCodes;
210 const uint32_t kFirstEncodingV1_1 = kFirstEncodingDIV;
211 const uint32_t kLastEncodingV1_1 = kFirstEncodingSUB + kNumFuseCodes - 1;
212
213 // V1_2 operations
214 const uint32_t kFirstEncodingMAXIMUM = kLastEncodingV1_1 + 1;
215 const uint32_t kFirstEncodingMINIMUM = kFirstEncodingMAXIMUM + 1;
216 const uint32_t kFirstEncodingPOW = kFirstEncodingMINIMUM + 1;
217 const uint32_t kFirstEncodingPRELU = kFirstEncodingPOW + 1;
218 const uint32_t kFirstEncodingV1_2 = kFirstEncodingMAXIMUM;
219 const uint32_t kLastEncodingV1_2 = kFirstEncodingPRELU;
220
221 // V1_3 operations
222 const uint32_t kFirstEncodingHARD_SWISH = kLastEncodingV1_2 + 1;
223 const uint32_t kFirstEncodingV1_3 = kFirstEncodingHARD_SWISH;
224 const uint32_t kLastEncodingV1_3 = kFirstEncodingHARD_SWISH;
225
226 const std::map<V1_3::OperationType, uint32_t> operationToFirstEncoding = {
227 {V1_3::OperationType::ADD, kFirstEncodingADD},
228 {V1_3::OperationType::MUL, kFirstEncodingMUL},
229 {V1_3::OperationType::DIV, kFirstEncodingDIV},
230 {V1_3::OperationType::SUB, kFirstEncodingSUB},
231 {V1_3::OperationType::MAXIMUM, kFirstEncodingMAXIMUM},
232 {V1_3::OperationType::MINIMUM, kFirstEncodingMINIMUM},
233 {V1_3::OperationType::POW, kFirstEncodingPOW},
234 {V1_3::OperationType::PRELU, kFirstEncodingPRELU},
235 {V1_3::OperationType::HARD_SWISH, kFirstEncodingHARD_SWISH},
236 };
237
238 // Sorted in reverse order (std::greater) so that we can use map::lower_bound to
239 // find an entry whose key is numerically less than or equal to a search value.
240 // mapped_type is (OperandCode, hasFuseCode).
241 const std::map<uint32_t, std::pair<uint32_t, bool>, std::greater<>> firstEncodingToOperation = {
242 {kFirstEncodingADD, {ANEURALNETWORKS_ADD, true}},
243 {kFirstEncodingMUL, {ANEURALNETWORKS_MUL, true}},
244 {kFirstEncodingDIV, {ANEURALNETWORKS_DIV, true}},
245 {kFirstEncodingSUB, {ANEURALNETWORKS_SUB, true}},
246 {kFirstEncodingMAXIMUM, {ANEURALNETWORKS_MAXIMUM, false}},
247 {kFirstEncodingMINIMUM, {ANEURALNETWORKS_MINIMUM, false}},
248 {kFirstEncodingPOW, {ANEURALNETWORKS_POW, false}},
249 {kFirstEncodingPRELU, {ANEURALNETWORKS_PRELU, false}},
250 {kFirstEncodingHARD_SWISH, {ANEURALNETWORKS_HARD_SWISH, false}},
251 };
252
253 // Look up the operation with the specified index in a graph, and return the
254 // operation encoding; or, if for some reason this is not one of the encoded
255 // operations, then return kBadOperation.
lookupOperation(std::function<const V1_3::Operation & (uint32_t)> getOperation,std::function<const V1_3::Operand & (uint32_t)> getOperand,std::function<const uint8_t * (uint32_t)> getValue,uint32_t operationIndex)256 uint32_t lookupOperation(std::function<const V1_3::Operation&(uint32_t)> getOperation,
257 std::function<const V1_3::Operand&(uint32_t)> getOperand,
258 std::function<const uint8_t*(uint32_t)> getValue,
259 uint32_t operationIndex) {
260 const V1_3::Operation& operation = getOperation(operationIndex);
261 switch (operation.type) {
262 case V1_3::OperationType::ADD:
263 case V1_3::OperationType::MUL:
264 case V1_3::OperationType::DIV:
265 case V1_3::OperationType::SUB: {
266 // input2 is the fused activation function
267 const V1_3::Operand& input2 = getOperand(operation.inputs[2]);
268 if ((input2.type == V1_3::OperandType::INT32) &&
269 (input2.lifetime == V1_3::OperandLifeTime::CONSTANT_COPY)) {
270 int32_t value;
271 CHECK_EQ(sizeof(value), input2.location.length);
272 memcpy(&value, getValue(input2.location.offset), input2.location.length);
273 return value + operationToFirstEncoding.at(operation.type);
274 }
275 break;
276 }
277 default: {
278 auto it = operationToFirstEncoding.find(operation.type);
279 if (it != operationToFirstEncoding.end()) {
280 return it->second;
281 }
282 break;
283 }
284 }
285 return kBadOperation;
286 }
287
lookupOperation(const HidlModel & model,const V1_3::Subgraph & subgraph,uint32_t operationIndex)288 uint32_t lookupOperation(const HidlModel& model, const V1_3::Subgraph& subgraph,
289 uint32_t operationIndex) {
290 return lookupOperation(
291 [&subgraph](uint32_t index) -> const V1_3::Operation& {
292 return subgraph.operations[index];
293 },
294 [&subgraph](uint32_t index) -> const V1_3::Operand& {
295 return subgraph.operands[index];
296 },
297 [&model](uint32_t offset) { return &model.operandValues[offset]; }, operationIndex);
298 }
299
300 #ifdef VERBOSE
301 // This is a debugging utility function
dump(const char * name,const ModelBuilder * model)302 void dump(const char* name, const ModelBuilder* model) {
303 const HidlModel hidlModel = model->makeHidlModel();
304 std::cout << name << ": " << hidlModel << std::endl;
305 std::cout << "inputs: " << hidlModel.main.inputIndexes << std::endl;
306 std::cout << "outputs: " << hidlModel.main.outputIndexes << std::endl;
307 for (size_t i = 0, e = hidlModel.main.operations.size(); i < e; i++) {
308 std::cout << "operation[" << i << "]: " << hidlModel.main.operations[i] << std::endl;
309 }
310 }
311 #endif
312
313 // This is an IDevice for testing purposes. It only has a few interesting
314 // properties, all of which are specified as constructor arguments: device
315 // capabilities; which subset of operation kinds (0..19) does the device
316 // support; does the device support the OEM operation; does the device support
317 // other operations. The subset is represented with a bitmask, in which
318 // operation kind K corresponds to the bit (1 << K). The other operations are
319 // represented by a set of OperationType.
320 class PartitioningDriver : public SampleDriver {
321 public:
322 enum OEM {
323 OEMNo, // rejected by getSupportedOperations and prepareModel
324 OEMIndecisive, // accepted by getSupportedOperations but not prepareModel
325 OEMYes, // accepted by getSupportedOperations and prepareModel
326 };
327
PartitioningDriver(const char * name,const char * version,V1_3::Capabilities capabilities,uint32_t operationMask,OEM oem=OEMNo,std::set<V1_3::OperationType> operationTypes={})328 PartitioningDriver(const char* name, const char* version, V1_3::Capabilities capabilities,
329 uint32_t operationMask, OEM oem = OEMNo,
330 std::set<V1_3::OperationType> operationTypes = {})
331 : SampleDriver(name),
332 mVersionString(version),
333 mCapabilities(capabilities),
334 mOperationMask(operationMask),
335 mOEM(oem),
336 mOperationTypes(std::move(operationTypes)) {
337 CHECK_EQ(mOperationTypes.count(V1_3::OperationType::OEM_OPERATION), size_t(0));
338 if (operationMask) {
339 std::for_each(mOperationTypes.begin(), mOperationTypes.end(),
__anon63efd43f0502(V1_3::OperationType type) 340 [](V1_3::OperationType type) {
341 CHECK_EQ(operationToFirstEncoding.count(type), size_t(0));
342 });
343 }
344 }
~PartitioningDriver()345 ~PartitioningDriver() override {}
346
getVersionString(getVersionString_cb cb)347 hardware::Return<void> getVersionString(getVersionString_cb cb) override {
348 cb(V1_0::ErrorStatus::NONE, mVersionString);
349 return hardware::Void();
350 }
351
prepareModel_1_3(const V1_3::Model & model,V1_1::ExecutionPreference preference,V1_3::Priority priority,const V1_3::OptionalTimePoint & deadline,const hardware::hidl_vec<hardware::hidl_handle> & modelCache,const hardware::hidl_vec<hardware::hidl_handle> & dataCache,const HalCacheToken & token,const sp<V1_3::IPreparedModelCallback> & callback)352 hardware::Return<V1_3::ErrorStatus> prepareModel_1_3(
353 const V1_3::Model& model, V1_1::ExecutionPreference preference, V1_3::Priority priority,
354 const V1_3::OptionalTimePoint& deadline,
355 const hardware::hidl_vec<hardware::hidl_handle>& modelCache,
356 const hardware::hidl_vec<hardware::hidl_handle>& dataCache, const HalCacheToken& token,
357 const sp<V1_3::IPreparedModelCallback>& callback) override {
358 if (mOEM == OEMIndecisive) {
359 for (const auto& operation : model.main.operations) {
360 if (operation.type == V1_3::OperationType::OEM_OPERATION) {
361 callback->notify_1_3(V1_3::ErrorStatus::INVALID_ARGUMENT, nullptr);
362 return V1_3::ErrorStatus::INVALID_ARGUMENT;
363 }
364 }
365 }
366
367 // NOTE: We verify that all operations in the model are supported.
368 V1_3::ErrorStatus outStatus = V1_3::ErrorStatus::INVALID_ARGUMENT;
369 auto ret = getSupportedOperations_1_3(
370 model, [&outStatus](V1_3::ErrorStatus inStatus,
371 const hardware::hidl_vec<bool>& supportedOperations) {
372 if (inStatus == V1_3::ErrorStatus::NONE) {
373 if (std::all_of(supportedOperations.begin(), supportedOperations.end(),
374 [](bool v) { return v; })) {
375 outStatus = V1_3::ErrorStatus::NONE;
376 }
377 }
378 });
379 if (ret.isOk() && (outStatus == V1_3::ErrorStatus::NONE)) {
380 return SampleDriver::prepareModel_1_3(model, preference, priority, deadline, modelCache,
381 dataCache, token, callback);
382 } else {
383 callback->notify_1_3(V1_3::ErrorStatus::INVALID_ARGUMENT, nullptr);
384 return V1_3::ErrorStatus::INVALID_ARGUMENT;
385 }
386 }
387
getStatus()388 hardware::Return<V1_0::DeviceStatus> getStatus() override {
389 return V1_0::DeviceStatus::AVAILABLE;
390 }
391
getCapabilities_1_3(getCapabilities_1_3_cb cb)392 hardware::Return<void> getCapabilities_1_3(getCapabilities_1_3_cb cb) override {
393 cb(V1_3::ErrorStatus::NONE, mCapabilities);
394 return hardware::Void();
395 }
396
getSupportedOperations_1_3(const V1_3::Model & model,getSupportedOperations_1_3_cb cb)397 hardware::Return<void> getSupportedOperations_1_3(const V1_3::Model& model,
398 getSupportedOperations_1_3_cb cb) override {
399 if (!android::nn::validateModel(model)) {
400 cb(V1_3::ErrorStatus::INVALID_ARGUMENT, std::vector<bool>());
401 return hardware::Void();
402 }
403 cb(V1_3::ErrorStatus::NONE, getSupportedOperationsForSubgraph(model, model.main));
404 return hardware::Void();
405 }
406
getNumberOfCacheFilesNeeded(getNumberOfCacheFilesNeeded_cb cb)407 hardware::Return<void> getNumberOfCacheFilesNeeded(getNumberOfCacheFilesNeeded_cb cb) override {
408 cb(V1_0::ErrorStatus::NONE, /*numModelCache=*/1, /*numDataCache=*/1);
409 return hardware::Void();
410 }
411
412 private:
getSupportedOperationsForSubgraph(const V1_3::Model & model,const V1_3::Subgraph & subgraph)413 std::vector<bool> getSupportedOperationsForSubgraph(const V1_3::Model& model,
414 const V1_3::Subgraph& subgraph) {
415 CHECK(&subgraph == &model.main ||
416 std::find_if(model.referenced.begin(), model.referenced.end(),
417 [&subgraph](const V1_3::Subgraph& refSubgraph) {
418 return &subgraph == &refSubgraph;
419 }) != model.referenced.end());
420 auto supportsEntireSubgraph = [this, &model, &subgraph](uint32_t refSubgraphOperandIndex) {
421 CHECK_LT(refSubgraphOperandIndex, subgraph.operands.size());
422 const V1_3::Operand& refSubgraphOperand = subgraph.operands[refSubgraphOperandIndex];
423 CHECK(refSubgraphOperand.lifetime == V1_3::OperandLifeTime::SUBGRAPH);
424 CHECK_LT(refSubgraphOperand.location.offset, model.referenced.size());
425 const V1_3::Subgraph& refSubgraph =
426 model.referenced[refSubgraphOperand.location.offset];
427 std::vector<bool> supported = getSupportedOperationsForSubgraph(model, refSubgraph);
428 return std::all_of(supported.begin(), supported.end(), [](bool x) { return x; });
429 };
430 const size_t count = subgraph.operations.size();
431 std::vector<bool> supported(count);
432 for (size_t i = 0; i < count; i++) {
433 const V1_3::Operation& operation = subgraph.operations[i];
434 if (mOperationTypes.count(operation.type)) {
435 if (operation.type == V1_3::OperationType::IF) {
436 namespace op = android::nn::operation_if;
437 CHECK_GE(operation.inputs.size(), op::kFirstInput);
438 supported[i] =
439 supportsEntireSubgraph(operation.inputs[op::kThenModelOperand]) &&
440 supportsEntireSubgraph(operation.inputs[op::kElseModelOperand]);
441 } else if (operation.type == V1_3::OperationType::WHILE) {
442 namespace op = android::nn::operation_while;
443 CHECK_GE(operation.inputs.size(), op::kFirstInput);
444 supported[i] =
445 supportsEntireSubgraph(operation.inputs[op::kCondModelOperand]) &&
446 supportsEntireSubgraph(operation.inputs[op::kBodyModelOperand]);
447 } else {
448 supported[i] = true;
449 }
450 continue;
451 }
452 if (operation.type == V1_3::OperationType::OEM_OPERATION) {
453 supported[i] = (mOEM != OEMNo);
454 continue;
455 }
456 supported[i] = false;
457 uint32_t operationEncoding = lookupOperation(model, subgraph, i);
458 if ((operationEncoding != kBadOperation) &&
459 (mOperationMask & (1 << operationEncoding))) {
460 supported[i] = true;
461 }
462 }
463 return supported;
464 }
465
466 std::string mVersionString;
467 V1_3::Capabilities mCapabilities;
468 uint32_t mOperationMask;
469 OEM mOEM;
470 std::set<V1_3::OperationType> mOperationTypes;
471 };
472
473 // Like PartitioningDriver, but implementing 1.2
474 class PartitioningDriverV1_2 : public V1_2::IDevice {
475 public:
PartitioningDriverV1_2(const char * name,const char * version,V1_3::Capabilities capabilities,uint32_t operationMask,PartitioningDriver::OEM oem=PartitioningDriver::OEMNo,std::set<V1_3::OperationType> operationTypes={})476 PartitioningDriverV1_2(const char* name, const char* version, V1_3::Capabilities capabilities,
477 uint32_t operationMask,
478 PartitioningDriver::OEM oem = PartitioningDriver::OEMNo,
479 std::set<V1_3::OperationType> operationTypes = {})
480 : mLatestDriver(new PartitioningDriver(name, version, capabilities, operationMask, oem,
481 operationTypes)) {}
getCapabilities_1_2(getCapabilities_1_2_cb _hidl_cb)482 hardware::Return<void> getCapabilities_1_2(getCapabilities_1_2_cb _hidl_cb) override {
483 return mLatestDriver->getCapabilities_1_2(_hidl_cb);
484 }
getSupportedOperations_1_2(const V1_2::Model & model,getSupportedOperations_1_2_cb _hidl_cb)485 hardware::Return<void> getSupportedOperations_1_2(
486 const V1_2::Model& model, getSupportedOperations_1_2_cb _hidl_cb) override {
487 return mLatestDriver->getSupportedOperations_1_2(model, _hidl_cb);
488 }
prepareModel_1_2(const V1_2::Model & model,V1_1::ExecutionPreference preference,const hardware::hidl_vec<hardware::hidl_handle> & modelCache,const hardware::hidl_vec<hardware::hidl_handle> & dataCache,const HalCacheToken & token,const sp<V1_2::IPreparedModelCallback> & actualCallback)489 hardware::Return<V1_0::ErrorStatus> prepareModel_1_2(
490 const V1_2::Model& model, V1_1::ExecutionPreference preference,
491 const hardware::hidl_vec<hardware::hidl_handle>& modelCache,
492 const hardware::hidl_vec<hardware::hidl_handle>& dataCache, const HalCacheToken& token,
493 const sp<V1_2::IPreparedModelCallback>& actualCallback) override {
494 return mLatestDriver->prepareModel_1_2(model, preference, modelCache, dataCache, token,
495 actualCallback);
496 }
getVersionString(getVersionString_cb _hidl_cb)497 hardware::Return<void> getVersionString(getVersionString_cb _hidl_cb) override {
498 return mLatestDriver->getVersionString(_hidl_cb);
499 }
getType(getType_cb _hidl_cb)500 hardware::Return<void> getType(getType_cb _hidl_cb) override {
501 return mLatestDriver->getType(_hidl_cb);
502 }
getSupportedExtensions(getSupportedExtensions_cb _hidl_cb)503 hardware::Return<void> getSupportedExtensions(getSupportedExtensions_cb _hidl_cb) {
504 return mLatestDriver->getSupportedExtensions(_hidl_cb);
505 }
getNumberOfCacheFilesNeeded(getNumberOfCacheFilesNeeded_cb _hidl_cb)506 hardware::Return<void> getNumberOfCacheFilesNeeded(getNumberOfCacheFilesNeeded_cb _hidl_cb) {
507 return mLatestDriver->getNumberOfCacheFilesNeeded(_hidl_cb);
508 }
prepareModelFromCache(const hardware::hidl_vec<hardware::hidl_handle> & modelCache,const hardware::hidl_vec<hardware::hidl_handle> & dataCache,const HalCacheToken & token,const sp<V1_2::IPreparedModelCallback> & callback)509 hardware::Return<V1_0::ErrorStatus> prepareModelFromCache(
510 const hardware::hidl_vec<hardware::hidl_handle>& modelCache,
511 const hardware::hidl_vec<hardware::hidl_handle>& dataCache, const HalCacheToken& token,
512 const sp<V1_2::IPreparedModelCallback>& callback) {
513 return mLatestDriver->prepareModelFromCache(modelCache, dataCache, token, callback);
514 }
getCapabilities_1_1(getCapabilities_1_1_cb _hidl_cb)515 hardware::Return<void> getCapabilities_1_1(getCapabilities_1_1_cb _hidl_cb) override {
516 return mLatestDriver->getCapabilities_1_1(_hidl_cb);
517 }
getSupportedOperations_1_1(const V1_1::Model & model,getSupportedOperations_1_1_cb _hidl_cb)518 hardware::Return<void> getSupportedOperations_1_1(
519 const V1_1::Model& model, getSupportedOperations_1_1_cb _hidl_cb) override {
520 return mLatestDriver->getSupportedOperations_1_1(model, _hidl_cb);
521 }
prepareModel_1_1(const V1_1::Model & model,V1_1::ExecutionPreference preference,const sp<V1_0::IPreparedModelCallback> & actualCallback)522 hardware::Return<V1_0::ErrorStatus> prepareModel_1_1(
523 const V1_1::Model& model, V1_1::ExecutionPreference preference,
524 const sp<V1_0::IPreparedModelCallback>& actualCallback) override {
525 return mLatestDriver->prepareModel_1_1(model, preference, actualCallback);
526 }
getStatus()527 hardware::Return<V1_0::DeviceStatus> getStatus() override { return mLatestDriver->getStatus(); }
getCapabilities(getCapabilities_cb _hidl_cb)528 hardware::Return<void> getCapabilities(getCapabilities_cb _hidl_cb) override {
529 return mLatestDriver->getCapabilities(_hidl_cb);
530 }
getSupportedOperations(const V1_0::Model & model,getSupportedOperations_cb _hidl_cb)531 hardware::Return<void> getSupportedOperations(const V1_0::Model& model,
532 getSupportedOperations_cb _hidl_cb) override {
533 return mLatestDriver->getSupportedOperations(model, _hidl_cb);
534 }
prepareModel(const V1_0::Model & model,const sp<V1_0::IPreparedModelCallback> & actualCallback)535 hardware::Return<V1_0::ErrorStatus> prepareModel(
536 const V1_0::Model& model,
537 const sp<V1_0::IPreparedModelCallback>& actualCallback) override {
538 return mLatestDriver->prepareModel(model, actualCallback);
539 }
540
541 private:
542 const sp<V1_3::IDevice> mLatestDriver;
543 };
544
545 // Like PartitioningDriver, but implementing 1.1
546 class PartitioningDriverV1_1 : public V1_1::IDevice {
547 public:
PartitioningDriverV1_1(const char * name,const char * version,V1_3::Capabilities capabilities,uint32_t operationMask,PartitioningDriver::OEM oem=PartitioningDriver::OEMNo,std::set<V1_3::OperationType> operationTypes={})548 PartitioningDriverV1_1(const char* name, const char* version, V1_3::Capabilities capabilities,
549 uint32_t operationMask,
550 PartitioningDriver::OEM oem = PartitioningDriver::OEMNo,
551 std::set<V1_3::OperationType> operationTypes = {})
552 : mLatestDriver(new PartitioningDriver(name, version, capabilities, operationMask, oem,
553 operationTypes)) {}
getCapabilities_1_1(getCapabilities_1_1_cb _hidl_cb)554 hardware::Return<void> getCapabilities_1_1(getCapabilities_1_1_cb _hidl_cb) override {
555 return mLatestDriver->getCapabilities_1_1(_hidl_cb);
556 }
getSupportedOperations_1_1(const V1_1::Model & model,getSupportedOperations_1_1_cb _hidl_cb)557 hardware::Return<void> getSupportedOperations_1_1(
558 const V1_1::Model& model, getSupportedOperations_1_1_cb _hidl_cb) override {
559 return mLatestDriver->getSupportedOperations_1_1(model, _hidl_cb);
560 }
prepareModel_1_1(const V1_1::Model & model,V1_1::ExecutionPreference preference,const sp<V1_0::IPreparedModelCallback> & actualCallback)561 hardware::Return<V1_0::ErrorStatus> prepareModel_1_1(
562 const V1_1::Model& model, V1_1::ExecutionPreference preference,
563 const sp<V1_0::IPreparedModelCallback>& actualCallback) override {
564 return mLatestDriver->prepareModel_1_1(model, preference, actualCallback);
565 }
getStatus()566 hardware::Return<V1_0::DeviceStatus> getStatus() override { return mLatestDriver->getStatus(); }
getCapabilities(getCapabilities_cb _hidl_cb)567 hardware::Return<void> getCapabilities(getCapabilities_cb _hidl_cb) override {
568 return mLatestDriver->getCapabilities(_hidl_cb);
569 }
getSupportedOperations(const V1_0::Model & model,getSupportedOperations_cb _hidl_cb)570 hardware::Return<void> getSupportedOperations(const V1_0::Model& model,
571 getSupportedOperations_cb _hidl_cb) override {
572 return mLatestDriver->getSupportedOperations(model, _hidl_cb);
573 }
prepareModel(const V1_0::Model & model,const sp<V1_0::IPreparedModelCallback> & actualCallback)574 hardware::Return<V1_0::ErrorStatus> prepareModel(
575 const V1_0::Model& model,
576 const sp<V1_0::IPreparedModelCallback>& actualCallback) override {
577 return mLatestDriver->prepareModel(model, actualCallback);
578 }
579
580 private:
581 const sp<V1_3::IDevice> mLatestDriver;
582 };
583
584 // Like PartitioningDriver, but implementing 1.0
585 class PartitioningDriverV1_0 : public V1_0::IDevice {
586 public:
PartitioningDriverV1_0(const char * name,const char * version,V1_3::Capabilities capabilities,uint32_t operationMask,PartitioningDriver::OEM oem=PartitioningDriver::OEMNo,std::set<V1_3::OperationType> operationTypes={})587 PartitioningDriverV1_0(const char* name, const char* version, V1_3::Capabilities capabilities,
588 uint32_t operationMask,
589 PartitioningDriver::OEM oem = PartitioningDriver::OEMNo,
590 std::set<V1_3::OperationType> operationTypes = {})
591 : mLatestDriver(new PartitioningDriver(name, version, capabilities, operationMask, oem,
592 operationTypes)) {}
getCapabilities(getCapabilities_cb _hidl_cb)593 hardware::Return<void> getCapabilities(getCapabilities_cb _hidl_cb) override {
594 return mLatestDriver->getCapabilities(_hidl_cb);
595 }
getSupportedOperations(const V1_0::Model & model,getSupportedOperations_cb _hidl_cb)596 hardware::Return<void> getSupportedOperations(const V1_0::Model& model,
597 getSupportedOperations_cb _hidl_cb) override {
598 return mLatestDriver->getSupportedOperations(model, _hidl_cb);
599 }
prepareModel(const V1_0::Model & model,const sp<V1_0::IPreparedModelCallback> & actualCallback)600 hardware::Return<V1_0::ErrorStatus> prepareModel(
601 const V1_0::Model& model,
602 const sp<V1_0::IPreparedModelCallback>& actualCallback) override {
603 return mLatestDriver->prepareModel(model, actualCallback);
604 }
getStatus()605 hardware::Return<V1_0::DeviceStatus> getStatus() override { return mLatestDriver->getStatus(); }
606
607 private:
608 const sp<V1_3::IDevice> mLatestDriver;
609 };
610
611 enum class Dimensioned {
612 NO, // either a scalar, or a tensor of either unspecified rank (usually)
613 // or specified rank but with no specified dimensions (where
614 // specifically stated)
615 RANK_1, // tensor of shape { 0 } -- i.e., rank 1, unspecified dimensions
616 RANK_2, // tensor of shape { 0, 0 } -- i.e., rank 2, unspecified dimensions
617 YES_1, // tensor of shape { 1 }
618 YES_2, // tensor of shape { 2 }
619 YES_4, // tensor of shape { 4 }
620 YES = YES_1
621 };
622
dimensions(Dimensioned dimensioned)623 std::vector<uint32_t> dimensions(Dimensioned dimensioned) {
624 switch (dimensioned) {
625 default:
626 EXPECT_TRUE(false) << "Unknown value";
627 FALLTHROUGH_INTENDED;
628 case Dimensioned::NO:
629 return {};
630 case Dimensioned::RANK_1:
631 return {0};
632 case Dimensioned::RANK_2:
633 return {0, 0};
634 case Dimensioned::YES_1:
635 return {1};
636 case Dimensioned::YES_2:
637 return {2};
638 case Dimensioned::YES_4:
639 return {4};
640 }
641 }
642
643 // "dimensioned" must be a fully specified kind
numberOfElements(Dimensioned dimensioned)644 uint32_t numberOfElements(Dimensioned dimensioned) {
645 auto dims = dimensions(dimensioned);
646 uint32_t result = std::reduce(dims.begin(), dims.end(), 1u, std::multiplies<>());
647 CHECK_GT(result, 0u);
648 return result;
649 }
650
toString(Dimensioned dimensioned)651 std::string toString(Dimensioned dimensioned) {
652 switch (dimensioned) {
653 default:
654 return "<Unknown value>";
655 case Dimensioned::NO:
656 return "NO";
657 case Dimensioned::RANK_1:
658 return "RANK_1";
659 case Dimensioned::RANK_2:
660 return "RANK_2";
661 case Dimensioned::YES_1:
662 return "YES_1";
663 case Dimensioned::YES_2:
664 return "YES_2";
665 case Dimensioned::YES_4:
666 return "YES_4";
667 }
668 }
669
670 // This class adds some simple abstractions and utilities on top of
671 // WrapperModel. For example, it provides methods that work in terms of
672 // operation kind (0..7); and because we care about graph topology rather than
673 // details of operand types and values, it greatly simplifies the process of
674 // creating operands.
675 class PartitioningModel : private WrapperModel {
676 public:
677 using WrapperModel::finish;
678 using WrapperModel::getHandle;
679 using WrapperModel::identifyInputsAndOutputs;
680 using WrapperModel::isValid;
681 using WrapperModel::relaxComputationFloat32toFloat16;
682 using WrapperModel::setOperandValue;
683
684 // Create a tensor operand of the specified type, and return the
685 // corresponding operand index.
addIntOperand(Dimensioned dimensioned=Dimensioned::YES)686 uint32_t addIntOperand(Dimensioned dimensioned = Dimensioned::YES) {
687 return addOperand(WrapperType::TENSOR_INT32, dimensioned);
688 }
addIntScalarOperand(std::optional<int> v=std::nullopt)689 uint32_t addIntScalarOperand(std::optional<int> v = std::nullopt) {
690 uint32_t opnd = addOperand(WrapperType::INT32);
691 if (v.has_value()) {
692 setOperandValue(opnd, &v.value());
693 }
694 return opnd;
695 }
addFloatOperand(Dimensioned dimensioned=Dimensioned::YES)696 uint32_t addFloatOperand(Dimensioned dimensioned = Dimensioned::YES) {
697 return addOperand(WrapperType::TENSOR_FLOAT32, dimensioned);
698 }
addQuantOperand(Dimensioned dimensioned=Dimensioned::YES)699 uint32_t addQuantOperand(Dimensioned dimensioned = Dimensioned::YES) {
700 return addOperand(WrapperType::TENSOR_QUANT8_ASYMM, dimensioned);
701 }
addBooleanOperand(Dimensioned dimensioned=Dimensioned::YES)702 uint32_t addBooleanOperand(Dimensioned dimensioned = Dimensioned::YES) {
703 return addOperand(WrapperType::TENSOR_BOOL8, dimensioned);
704 }
addFloatZeroOperand(Dimensioned dimensioned=Dimensioned::YES)705 uint32_t addFloatZeroOperand(Dimensioned dimensioned = Dimensioned::YES) {
706 uint32_t opnd = addFloatOperand(dimensioned);
707 std::vector<float> values(numberOfElements(dimensioned), 0.0f);
708 uint32_t size = values.size() * sizeof(float);
709 // Make sure the values are immediately copied so that it is safe to free the buffer after
710 // the setOperandValue call
711 CHECK_LE(size, ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES);
712 setOperandValue(opnd, values.data(), size);
713 return opnd;
714 }
715
716 // Create an operand of the specified type, and return the corresponding
717 // operand index.
addOperand(WrapperType wrapperType,Dimensioned dimensioned=Dimensioned::YES)718 uint32_t addOperand(WrapperType wrapperType, Dimensioned dimensioned = Dimensioned::YES) {
719 switch (static_cast<int>(wrapperType)) {
720 case ANEURALNETWORKS_BOOL:
721 case ANEURALNETWORKS_FLOAT16:
722 case ANEURALNETWORKS_FLOAT32:
723 case ANEURALNETWORKS_INT32:
724 case ANEURALNETWORKS_UINT32:
725 case ANEURALNETWORKS_MODEL:
726 case ANEURALNETWORKS_OEM_SCALAR:
727 return addOperand(WrapperOperandType{wrapperType, {}});
728
729 case ANEURALNETWORKS_TENSOR_BOOL8:
730 case ANEURALNETWORKS_TENSOR_FLOAT16:
731 case ANEURALNETWORKS_TENSOR_FLOAT32:
732 case ANEURALNETWORKS_TENSOR_OEM_BYTE:
733 return addOperand(WrapperOperandType{wrapperType, dimensions(dimensioned)});
734
735 case ANEURALNETWORKS_TENSOR_INT32:
736 case ANEURALNETWORKS_TENSOR_QUANT8_ASYMM:
737 case ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED:
738 case ANEURALNETWORKS_TENSOR_QUANT8_SYMM:
739 case ANEURALNETWORKS_TENSOR_QUANT16_ASYMM:
740 case ANEURALNETWORKS_TENSOR_QUANT16_SYMM:
741 return addOperand(WrapperOperandType{wrapperType, dimensions(dimensioned), 1.0f});
742
743 case ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL:
744 return addOperand(WrapperOperandType{wrapperType, dimensions(dimensioned),
745 WrapperSymmPerChannelQuantParams({1.0f}, 0)});
746
747 default:
748 ADD_FAILURE() << "Unexpected type " << static_cast<uint32_t>(wrapperType);
749 return ~uint32_t(0);
750 }
751 }
752
753 // Create an operand of the specified operand type, and return the
754 // corresponding operand index.
addOperand(const WrapperOperandType & wrapperOperandType)755 uint32_t addOperand(const WrapperOperandType& wrapperOperandType) {
756 mWrapperOperandType.push_back(wrapperOperandType);
757 return WrapperModel::addOperand(&wrapperOperandType);
758 }
759
760 // Create an operation with any number of inputs and one output, specifying
761 // the operation type (e.g., ANEURALNETWORKS_ADD), the input operand
762 // indexes, and the output type (e.g., WrapperType::TENSOR_FLOAT32).
763 // Returns the output operand index.
addExplicitOperationXTo1(ANeuralNetworksOperationType operationType,const std::vector<uint32_t> & inputs,WrapperType outputType,Dimensioned dimensionedOutput=Dimensioned::YES)764 uint32_t addExplicitOperationXTo1(ANeuralNetworksOperationType operationType,
765 const std::vector<uint32_t>& inputs, WrapperType outputType,
766 Dimensioned dimensionedOutput = Dimensioned::YES) {
767 uint32_t output = addOperand(outputType, dimensionedOutput);
768 addOperation(operationType, inputs, {output});
769 return output;
770 }
771
772 // Create a V1_0 operation with two inputs and one output, specifying the
773 // operation kind (where 0 is the first V1_0 operation) and the input
774 // operand indexes.
775 // Returns the output operand index.
addOperation2To1V1_0(uint32_t operation,const uint32_t input0,const uint32_t input1,Dimensioned dimensionedOutput=Dimensioned::YES)776 uint32_t addOperation2To1V1_0(uint32_t operation, const uint32_t input0, const uint32_t input1,
777 Dimensioned dimensionedOutput = Dimensioned::YES) {
778 CHECK_LE(operation, kLastEncodingV1_0 - kFirstEncodingV1_0);
779 return addOperation2To1(operation + kFirstEncodingV1_0, input0, input1, dimensionedOutput);
780 }
781
782 // Create a V1_1 operation with two inputs and one output, specifying the
783 // operation kind (where 0 is the first V1_1 operation) and the input
784 // operand indexes.
785 // Returns the output operand index.
addOperation2To1V1_1(uint32_t operation,const uint32_t input0,const uint32_t input1,Dimensioned dimensionedOutput=Dimensioned::YES)786 uint32_t addOperation2To1V1_1(uint32_t operation, const uint32_t input0, const uint32_t input1,
787 Dimensioned dimensionedOutput = Dimensioned::YES) {
788 CHECK_LE(operation, kLastEncodingV1_1 - kFirstEncodingV1_1);
789 return addOperation2To1(operation + kFirstEncodingV1_1, input0, input1, dimensionedOutput);
790 }
791
792 // Create a V1_2 operation with two inputs and one output, specifying the
793 // operation kind (where 0 is the first V1_2 operation) and the input
794 // operand indexes.
795 // Returns the output operand index.
addOperation2To1V1_2(uint32_t operation,const uint32_t input0,const uint32_t input1,Dimensioned dimensionedOutput=Dimensioned::YES)796 uint32_t addOperation2To1V1_2(uint32_t operation, const uint32_t input0, const uint32_t input1,
797 Dimensioned dimensionedOutput = Dimensioned::YES) {
798 CHECK_LE(operation, kLastEncodingV1_2 - kFirstEncodingV1_2);
799 return addOperation2To1(operation + kFirstEncodingV1_2, input0, input1, dimensionedOutput);
800 }
801
802 // Create a V1_3 operation with two inputs and one output, specifying the
803 // operation kind (where 0 is the first V1_3 operation) and the input
804 // operand indexes.
805 // Returns the output operand index.
addOperation1To1V1_3(uint32_t operation,const uint32_t input0,Dimensioned dimensionedOutput=Dimensioned::YES)806 uint32_t addOperation1To1V1_3(uint32_t operation, const uint32_t input0,
807 Dimensioned dimensionedOutput = Dimensioned::YES) {
808 CHECK_LE(operation, kLastEncodingV1_3 - kFirstEncodingV1_3);
809 return addOperation1To1(operation + kFirstEncodingV1_3, input0, dimensionedOutput);
810 }
811
812 // Create an OEM operation with one input and one output,
813 // specifying the input operand index. Returns the output operand
814 // index.
addOperationOEM1To1(const uint32_t input,Dimensioned dimensionedOutput=Dimensioned::YES)815 uint32_t addOperationOEM1To1(const uint32_t input,
816 Dimensioned dimensionedOutput = Dimensioned::YES) {
817 uint32_t output = addOperandOfSameType(input, dimensionedOutput);
818 addOperation(ANEURALNETWORKS_OEM_OPERATION, {input}, {output});
819 return output;
820 }
821
822 // Create an IF operation with the given condition operand and two
823 // referenced models for the true and false cases.
addIfOperation(const uint32_t cond,const PartitioningModel & trueModel,const PartitioningModel & falseModel,const std::vector<uint32_t> & inputs,const std::vector<uint32_t> & outputs)824 void addIfOperation(const uint32_t cond, const PartitioningModel& trueModel,
825 const PartitioningModel& falseModel, const std::vector<uint32_t>& inputs,
826 const std::vector<uint32_t>& outputs) {
827 const uint32_t opndTrue = addRefModelOperand(trueModel);
828 const uint32_t opndFalse = addRefModelOperand(falseModel);
829 std::vector<uint32_t> ifInputs = {cond, opndTrue, opndFalse};
830 ifInputs.insert(ifInputs.end(), inputs.begin(), inputs.end());
831 addOperation(ANEURALNETWORKS_IF, ifInputs, outputs);
832 }
833
834 // Create a WHILE operation with the given condition and body referenced models.
addWhileOperation(const PartitioningModel & condModel,const PartitioningModel & bodyModel,const std::vector<uint32_t> & inputs,const std::vector<uint32_t> & outputs)835 void addWhileOperation(const PartitioningModel& condModel, const PartitioningModel& bodyModel,
836 const std::vector<uint32_t>& inputs,
837 const std::vector<uint32_t>& outputs) {
838 const uint32_t condOperand = addRefModelOperand(condModel);
839 const uint32_t bodyOperand = addRefModelOperand(bodyModel);
840 std::vector<uint32_t> whileInputs = {condOperand, bodyOperand};
841 whileInputs.insert(whileInputs.end(), inputs.begin(), inputs.end());
842 addOperation(ANEURALNETWORKS_WHILE, whileInputs, outputs);
843 }
844
845 // Run the partitioning algorithm to create an ExecutionPlan.
partitionTheWork(const std::vector<std::shared_ptr<Device>> & devices,ExecutePreference preference,ExecutePriority priority,const OptionalTimePoint & deadline,ExecutionPlan * plan)846 int partitionTheWork(const std::vector<std::shared_ptr<Device>>& devices,
847 ExecutePreference preference, ExecutePriority priority,
848 const OptionalTimePoint& deadline, ExecutionPlan* plan) {
849 return reinterpret_cast<ModelBuilder*>(getHandle())
850 ->partitionTheWork(devices, static_cast<uint32_t>(preference),
851 static_cast<int32_t>(priority), deadline, plan);
852 }
853
854 #ifdef VERBOSE
855 // This is a debugging utility function.
dump(const char * name) const856 void dump(const char* name) const {
857 const ModelBuilder* mb = reinterpret_cast<const ModelBuilder*>(getHandle());
858 ::dump(name, mb);
859 }
860 #endif
861
862 private:
863 // Create an operation with two inputs and one output, specifying
864 // the operation kind and the input operand indexes.
865 // Returns the output operand index.
addOperation2To1(uint32_t operation,const uint32_t input0,const uint32_t input1,Dimensioned dimensionedOutput=Dimensioned::YES)866 uint32_t addOperation2To1(uint32_t operation, const uint32_t input0, const uint32_t input1,
867 Dimensioned dimensionedOutput = Dimensioned::YES) {
868 auto it = firstEncodingToOperation.lower_bound(operation);
869 CHECK(it != firstEncodingToOperation.end());
870 ANeuralNetworksOperationType type = it->second.first;
871 if (it->second.second) {
872 int32_t fuseCode = operation - it->first;
873 uint32_t input2 = addIntOperand(fuseCode);
874 uint32_t output = addOperandOfSameType(input0, dimensionedOutput);
875 addOperation(type, {input0, input1, input2}, {output});
876 return output;
877 } else {
878 uint32_t output = addOperandOfSameType(input0, dimensionedOutput);
879 addOperation(type, {input0, input1}, {output});
880 return output;
881 }
882 }
883
884 // Create an operation with one inputs and one output, specifying
885 // the operation kind and the input operand indexes.
886 // Returns the output operand index.
addOperation1To1(uint32_t operation,const uint32_t input0,Dimensioned dimensionedOutput=Dimensioned::YES)887 uint32_t addOperation1To1(uint32_t operation, const uint32_t input0,
888 Dimensioned dimensionedOutput = Dimensioned::YES) {
889 auto it = firstEncodingToOperation.lower_bound(operation);
890 CHECK(it != firstEncodingToOperation.end());
891 ANeuralNetworksOperationType type = it->second.first;
892
893 uint32_t output = addOperandOfSameType(input0, dimensionedOutput);
894 addOperation(type, {input0}, {output});
895 return output;
896 }
897
898 // Create a scalar integer operand of the specified value, and
899 // return the corresponding operand index.
addIntOperand(int32_t value)900 uint32_t addIntOperand(int32_t value) {
901 uint32_t operand = addOperand(WrapperType::INT32);
902 setOperandValue(operand, &value, sizeof(value));
903 return operand;
904 }
905
906 // Create an operand from a model for control flow graphs.
addRefModelOperand(const PartitioningModel & model)907 uint32_t addRefModelOperand(const PartitioningModel& model) {
908 const uint32_t index = addOperand(WrapperType::MODEL);
909 WrapperModel::setOperandValueFromModel(index, &model);
910 return index;
911 }
912
913 // Create an operand of the same type as the specified operand,
914 // and return the operand index of the new operand.
915 //
916 // If a tensor, the new operand will have the same rank as the specified
917 // operand. If dimensioned == Dimensioned::NO, then all dimensions of a new
918 // tensor operand will be unspecified. If dimensioned != Dimensioned::NO,
919 // then all dimensions of a new tensor operand will have the implied value
920 // (e.g., YES_1 means each dimension will have the value "1").
addOperandOfSameType(uint32_t operand,Dimensioned dimensioned=Dimensioned::YES)921 uint32_t addOperandOfSameType(uint32_t operand, Dimensioned dimensioned = Dimensioned::YES) {
922 WrapperOperandType type = mWrapperOperandType.at(operand);
923
924 const auto d = dimensions(dimensioned);
925 EXPECT_TRUE(d.size() <= 1);
926 for (auto& dimension : type.dimensions) {
927 dimension = (dimensioned == Dimensioned::NO ? 0 : d[0]);
928 }
929
930 mWrapperOperandType.push_back(type);
931 return WrapperModel::addOperand(&type);
932 }
933
934 // operand index to operand type
935 std::vector<WrapperOperandType> mWrapperOperandType;
936 };
937
938 // This class adds some utilities on top of WrapperCompilation.
939 class PartitioningCompilation : public WrapperCompilation {
940 public:
PartitioningCompilation(const PartitioningModel * model,const std::vector<std::shared_ptr<Device>> & devices)941 PartitioningCompilation(const PartitioningModel* model,
942 const std::vector<std::shared_ptr<Device>>& devices) {
943 ModelBuilder* m = reinterpret_cast<ModelBuilder*>(model->getHandle());
944 CompilationBuilder* c = nullptr;
945 int result = m->createCompilation(&c, devices);
946 EXPECT_EQ(result, 0);
947 mCompilation = reinterpret_cast<ANeuralNetworksCompilation*>(c);
948 }
949
setPartitioning(uint32_t partitioning)950 Result setPartitioning(uint32_t partitioning) {
951 return static_cast<Result>(builder()->forTest_setPartitioning(partitioning));
952 }
953
954 // Simulate recoverable partitioning failure.
failPartitioning()955 Result failPartitioning() {
956 return static_cast<Result>(
957 builder()->forTest_failPartitioning(static_cast<int>(Result::OP_FAILED)));
958 }
959
960 using WrapperCompilation::finish;
961
getExecutionPlan() const962 const ExecutionPlan& getExecutionPlan() const { return builder()->forTest_getExecutionPlan(); }
963
964 private:
builder()965 CompilationBuilder* builder() { return reinterpret_cast<CompilationBuilder*>(getHandle()); }
966
builder() const967 const CompilationBuilder* builder() const {
968 return reinterpret_cast<const CompilationBuilder*>(getHandle());
969 }
970 };
971
972 #ifdef VERBOSE
973 #define RETURN_TRUE() \
974 { \
975 std::cerr << "returning true from " << __LINE__ << std::endl; \
976 return true; \
977 }
978 #else
979 #define RETURN_TRUE() \
980 { return true; }
981 #endif
982 #ifdef VERBOSE
983 #define RETURN_FALSE(MESSAGE) \
984 { \
985 std::cerr << "returning false from " << __LINE__ MESSAGE << std::endl; \
986 return false; \
987 }
988 #else
989 #define RETURN_FALSE(MESSAGE) \
990 { return false; }
991 #endif
992
993 class PartitioningTest : public ::testing::Test {
994 protected:
995 using DynamicTemporariesType = decltype(ExecutionPlan().forTest_flatGetDynamicTemporaries());
996 using RemapVectorType = ExecutionStep::RemapVectorType;
997 using StepModelOutputSetType = ExecutionStep::StepModelOutputSetType;
998
999 // Used for PartitioningTest::checkExecutionPlanSteps.
1000 static constexpr const char* kIfStep = "IF";
1001 static constexpr const char* kWhileStep = "WHILE";
1002 static constexpr const char* kGotoStep = "GOTO";
1003
SetUp()1004 virtual void SetUp() {}
1005
1006 // From a vector of DeviceSpecification, create a vector of
1007 // Devices.
1008 struct DeviceSpecification {
DeviceSpecification__anon63efd43f0110::PartitioningTest::DeviceSpecification1009 DeviceSpecification(const std::string& name, const V1_3::Capabilities& capabilities,
1010 uint32_t operationMask,
1011 PartitioningDriver::OEM oem = PartitioningDriver::OEMNo)
1012 : mName(name),
1013 mVersionString(kVersionString),
1014 mCapabilities(capabilities),
1015 mOperationMask(operationMask),
1016 mOEM(oem) {}
DeviceSpecification__anon63efd43f0110::PartitioningTest::DeviceSpecification1017 DeviceSpecification(const std::string& name, float perf, uint32_t operationMask,
1018 PartitioningDriver::OEM oem = PartitioningDriver::OEMNo,
1019 HalVersion halVersion = HalVersion::LATEST,
1020 std::set<V1_3::OperationType> operationTypes = {})
1021 : DeviceSpecification(name, perf, perf, operationMask, oem, halVersion,
1022 operationTypes) {}
DeviceSpecification__anon63efd43f0110::PartitioningTest::DeviceSpecification1023 DeviceSpecification(const std::string& name, float perf, float perfRelaxed,
1024 uint32_t operationMask,
1025 PartitioningDriver::OEM oem = PartitioningDriver::OEMNo,
1026 HalVersion halVersion = HalVersion::LATEST,
1027 std::set<V1_3::OperationType> operationTypes = {})
1028 : DeviceSpecification(name, kVersionString, perf, perfRelaxed, operationMask, oem,
1029 halVersion, operationTypes) {}
DeviceSpecification__anon63efd43f0110::PartitioningTest::DeviceSpecification1030 DeviceSpecification(const std::string& name, const std::string& version, float perf,
1031 uint32_t operationMask,
1032 PartitioningDriver::OEM oem = PartitioningDriver::OEMNo,
1033 HalVersion halVersion = HalVersion::LATEST,
1034 std::set<V1_3::OperationType> operationTypes = {})
1035 : DeviceSpecification(name, version, perf, perf, operationMask, oem, halVersion,
1036 operationTypes) {}
DeviceSpecification__anon63efd43f0110::PartitioningTest::DeviceSpecification1037 DeviceSpecification(const std::string& name, const std::string& version, float perf,
1038 float perfRelaxed, uint32_t operationMask,
1039 PartitioningDriver::OEM oem = PartitioningDriver::OEMNo,
1040 HalVersion halVersion = HalVersion::LATEST,
1041 std::set<V1_3::OperationType> operationTypes = {})
1042 : mName(name),
1043 mVersionString(version),
1044 mHalVersion(halVersion),
1045 mOperationMask(operationMask),
1046 mOEM(oem),
1047 mOperationTypes(std::move(operationTypes)) {
1048 V1_0::PerformanceInfo perfInfo = {.execTime = perf, .powerUsage = perf};
1049 V1_0::PerformanceInfo perfRelaxedInfo = {.execTime = perfRelaxed,
1050 .powerUsage = perfRelaxed};
1051 mCapabilities = {
1052 .relaxedFloat32toFloat16PerformanceScalar = perfRelaxedInfo,
1053 .relaxedFloat32toFloat16PerformanceTensor = perfRelaxedInfo,
1054 .operandPerformance =
1055 ::android::nn::nonExtensionOperandPerformance<HalVersion::V1_3>(
1056 perfInfo),
1057 .ifPerformance = perfInfo,
1058 .whilePerformance = perfInfo};
1059 }
DeviceSpecification__anon63efd43f0110::PartitioningTest::DeviceSpecification1060 DeviceSpecification(const std::string& name, float perf, HalVersion halVersion,
1061 uint32_t operationMaskV1_0, uint32_t operationMaskV1_1 = 0,
1062 uint32_t operationMaskV1_2 = 0, uint32_t operationMaskV1_3 = 0)
1063 : DeviceSpecification(
1064 name, perf, perf,
1065 makeOperationMask(halVersion, operationMaskV1_0, operationMaskV1_1,
1066 operationMaskV1_2, operationMaskV1_3)) {
1067 mHalVersion = halVersion;
1068 }
1069
1070 std::string mName;
1071 std::string mVersionString;
1072 V1_3::Capabilities mCapabilities;
1073 HalVersion mHalVersion = HalVersion::LATEST;
1074 uint32_t mOperationMask;
1075 PartitioningDriver::OEM mOEM = PartitioningDriver::OEMNo;
1076 std::set<V1_3::OperationType> mOperationTypes;
1077
1078 static constexpr char kVersionString[] = "JUST_AN_EXAMPLE";
1079
1080 private:
1081 // This function takes three operation masks aligned at the low-order
1082 // bit -- one mask each for V1_0, V1_1, and V1_2 -- and produces a single
1083 // composite operation mask, formed by shifting each of the input
1084 // operation masks appropriately and ORing the results together.
1085 //
1086 // For convenience, any bits of an input mask that are too high order
1087 // for that mask are discarded -- this allows ~0 to be a legal input
1088 // mask.
1089 //
1090 // For the sake of example, assume that each low order mask is 4 bits
1091 // wide, and take some artistic license to write literals in binary.
1092 // Then:
1093 //
1094 // assert(makeOperationMask(HalVersion::V1_2, 0b0110, 0b1001, 0b0101) ==
1095 // 0b 0101 1001 0110);
1096 //
1097 // This is used by a DeviceSpecification constructor to build a mask of
1098 // operations to be supported by the device.
makeOperationMask__anon63efd43f0110::PartitioningTest::DeviceSpecification1099 static uint32_t makeOperationMask(HalVersion halVersion, uint32_t operationMaskV1_0,
1100 uint32_t operationMaskV1_1, uint32_t operationMaskV1_2,
1101 uint32_t operationMaskV1_3) {
1102 if (halVersion < HalVersion::V1_3) {
1103 CHECK(!operationMaskV1_3);
1104 }
1105 if (halVersion < HalVersion::V1_2) {
1106 CHECK(!operationMaskV1_2);
1107 }
1108 if (halVersion < HalVersion::V1_1) {
1109 CHECK(!operationMaskV1_1);
1110 }
1111 auto maskOfWidth = [](uint32_t width) -> uint32_t { return (1U << width) - 1; };
1112 static const uint32_t kOperationMaskV1_0 =
1113 maskOfWidth(kLastEncodingV1_0 - kFirstEncodingV1_0 + 1);
1114 static const uint32_t kOperationMaskV1_1 =
1115 maskOfWidth(kLastEncodingV1_1 - kFirstEncodingV1_1 + 1);
1116 static const uint32_t kOperationMaskV1_2 =
1117 maskOfWidth(kLastEncodingV1_2 - kFirstEncodingV1_2 + 1);
1118 static const uint32_t kOperationMaskV1_3 =
1119 maskOfWidth(kLastEncodingV1_3 - kFirstEncodingV1_3 + 1);
1120 return ((operationMaskV1_0 & kOperationMaskV1_0) << kFirstEncodingV1_0) |
1121 ((operationMaskV1_1 & kOperationMaskV1_1) << kFirstEncodingV1_1) |
1122 ((operationMaskV1_2 & kOperationMaskV1_2) << kFirstEncodingV1_2) |
1123 ((operationMaskV1_3 & kOperationMaskV1_3) << kFirstEncodingV1_3);
1124 }
1125 };
makeDevices(std::vector<DeviceSpecification> specifications)1126 static std::vector<std::shared_ptr<Device>> makeDevices(
1127 std::vector<DeviceSpecification> specifications) {
1128 std::vector<std::shared_ptr<Device>> devices;
1129 for (const auto& specification : specifications) {
1130 SharedDevice device = nullptr;
1131 switch (specification.mHalVersion) {
1132 case HalVersion::V1_3:
1133 device = android::nn::makeSharedDevice(
1134 specification.mName,
1135 new PartitioningDriver(specification.mName.c_str(),
1136 specification.mVersionString.c_str(),
1137 specification.mCapabilities,
1138 specification.mOperationMask, specification.mOEM,
1139 specification.mOperationTypes));
1140 break;
1141 case HalVersion::V1_2:
1142 device = android::nn::makeSharedDevice(
1143 specification.mName,
1144 new PartitioningDriverV1_2(
1145 specification.mName.c_str(),
1146 specification.mVersionString.c_str(),
1147 specification.mCapabilities, specification.mOperationMask,
1148 specification.mOEM, specification.mOperationTypes));
1149 break;
1150 case HalVersion::V1_1:
1151 device = android::nn::makeSharedDevice(
1152 specification.mName,
1153 new PartitioningDriverV1_1(
1154 specification.mName.c_str(),
1155 specification.mVersionString.c_str(),
1156 specification.mCapabilities, specification.mOperationMask,
1157 specification.mOEM, specification.mOperationTypes));
1158 break;
1159 case HalVersion::V1_0:
1160 device = android::nn::makeSharedDevice(
1161 specification.mName,
1162 new PartitioningDriverV1_0(
1163 specification.mName.c_str(),
1164 specification.mVersionString.c_str(),
1165 specification.mCapabilities, specification.mOperationMask,
1166 specification.mOEM, specification.mOperationTypes));
1167 break;
1168 default:
1169 ADD_FAILURE() << "Unexpected";
1170 }
1171 auto driverDevice = DeviceManager::forTest_makeDriverDevice(device);
1172 devices.push_back(std::move(driverDevice));
1173 }
1174 devices.push_back(DeviceManager::getCpuDevice());
1175 return devices;
1176 }
1177
stepsToString(const std::vector<std::string> & steps)1178 static std::string stepsToString(const std::vector<std::string>& steps) {
1179 std::stringstream ss;
1180 ss << "[ ";
1181 for (const auto& step : steps) {
1182 ss << step << " ";
1183 }
1184 ss << "]";
1185 return ss.str();
1186 }
1187
1188 // Checks the type of each logical step in an execution plan.
1189 // Each entry of "expected" is either: kIfStep for IfStep, kWhileStep for WhileStep,
1190 // kGotoStep for GotoStep, or the device name for ExecutionStep.
checkExecutionPlanSteps(const ExecutionPlan & plan,const std::vector<std::string> & expected)1191 void checkExecutionPlanSteps(const ExecutionPlan& plan,
1192 const std::vector<std::string>& expected) {
1193 ASSERT_GT(expected.size(), 0u);
1194
1195 std::vector<std::string> actual;
1196 if (expected.size() == 1) {
1197 ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
1198 actual.emplace_back(plan.forTest_simpleGetDevice()->getName());
1199 } else {
1200 ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::COMPOUND);
1201 const auto& steps = plan.forTest_compoundGetSteps();
1202 for (const auto& step : steps) {
1203 if (step->isIf()) {
1204 actual.emplace_back(kIfStep);
1205 } else if (step->isWhile()) {
1206 actual.emplace_back(kWhileStep);
1207 } else if (step->isGoto()) {
1208 actual.emplace_back(kGotoStep);
1209 } else if (step->isExecution()) {
1210 actual.emplace_back(step->executionStep()->getDevice()->getName());
1211 } else {
1212 ASSERT_FALSE(true) << "Unknown LogicalStep";
1213 }
1214 }
1215 }
1216 ASSERT_TRUE(actual == expected)
1217 << "expected: " << stepsToString(expected) << ", actual: " << stepsToString(actual);
1218 }
1219
1220 /*-- Graph comparision ----------------------------------------------------------------*/
1221
1222 // An operand with certain values for its lifetime does not have a
1223 // defining operation in the graph. For the purposes of the graph
1224 // comparison algorithm, we encode the "defining operation" index of
1225 // such an operand as follows:
1226 // - NO_VALUE kPseudoDefiningOperationNoValue
1227 // - SUBGRAPH_INPUT kPseudoDefiningOperationModelInput0 + (position in list of inputs)
1228 // - CONSTANT_COPY kPseudoDefiningOperationConstantCopy0 + (constant value)
1229 // Note: For the graphs we build in this test, we
1230 // only expect to see 4-byte constants within
1231 // a very restricted range, so we only make
1232 // room for such constants in our encoding
1233 // space.
1234 // We do not expect to see CONSTANT_REFERENCE, and so we do not handle
1235 // it.
1236 //
1237 // The encoding is intended to be relatively human readable; it is not
1238 // designed to represent some optimal balance of ranges for the items
1239 // within its scope (actual operations, inputs, constants).
1240
1241 enum PseudoDefiningOperationEncodings : uint32_t {
1242 kPseudoDefiningOperationModelInput0 = 0x80000000U,
1243 kPseudoDefiningOperationConstantCopy0 = 0x90000000U,
1244 kPseudoDefiningOperationNoValue = 0xeeeeeeeeU,
1245
1246 // lowest value for special encoding
1247 kPseudoDefiningOperationBase = 0x80000000U,
1248
1249 // range of encoded input or constant
1250 kPseudoDefiningOperationRange = 0x10000000U,
1251 };
1252
1253 // Build a map from operand to defining operation.
1254 // TODO: Replace map with vector?
buildDefinitionMap(const ModelBuilder * model,std::map<uint32_t,uint32_t> * defMap)1255 void buildDefinitionMap(const ModelBuilder* model, std::map<uint32_t, uint32_t>* defMap) {
1256 // actual definitions
1257 ASSERT_LT(model->operationCount(), kPseudoDefiningOperationBase);
1258 for (uint32_t i = 0, e = model->operationCount(); i < e; i++) {
1259 const V1_3::Operation& operation = android::nn::convertToV1_3(model->getOperation(i));
1260 for (uint32_t output : operation.outputs) {
1261 (*defMap)[output] = i;
1262 }
1263 }
1264 // inputs
1265 ASSERT_LT(model->inputCount(), kPseudoDefiningOperationRange);
1266 for (uint32_t i = 0, e = model->inputCount(); i < e; i++) {
1267 (*defMap)[model->getInputOperandIndex(i)] = kPseudoDefiningOperationModelInput0 + i;
1268 }
1269 // look for NO_VALUE and CONSTANT_COPY
1270 for (uint32_t i = 0, e = model->operandCount(); i < e; i++) {
1271 const V1_3::Operand& operand = android::nn::convertToV1_3(model->getOperand(i));
1272 switch (operand.lifetime) {
1273 case V1_3::OperandLifeTime::NO_VALUE:
1274 (*defMap)[i] = kPseudoDefiningOperationNoValue;
1275 break;
1276 case V1_3::OperandLifeTime::CONSTANT_COPY: {
1277 ASSERT_EQ(operand.location.length, sizeof(uint32_t));
1278 uint32_t value;
1279 memcpy(&value, model->getPointerToOperandValue(operand.location.offset),
1280 sizeof(uint32_t));
1281 ASSERT_LT(value, kPseudoDefiningOperationNoValue);
1282 (*defMap)[i] = kPseudoDefiningOperationConstantCopy0 + value;
1283 break;
1284 }
1285 case V1_3::OperandLifeTime::TEMPORARY_VARIABLE:
1286 case V1_3::OperandLifeTime::SUBGRAPH_INPUT:
1287 case V1_3::OperandLifeTime::SUBGRAPH_OUTPUT:
1288 // already handled
1289 break;
1290 default:
1291 FAIL();
1292 break;
1293 }
1294 }
1295 // validity check
1296 ASSERT_EQ(model->operandCount(), defMap->size());
1297 }
1298
1299 #ifdef VERBOSE
dump(const char * name,const std::map<uint32_t,uint32_t> * aMap)1300 void dump(const char* name, const std::map<uint32_t, uint32_t>* aMap) {
1301 auto writeNum = [](uint32_t num) {
1302 if (num >= kPseudoDefiningOperationBase) {
1303 std::cout << "0x" << std::hex << num << std::dec;
1304 } else {
1305 std::cout << num;
1306 }
1307 };
1308
1309 std::cout << name << ": { ";
1310 bool gotOne = false;
1311 for (const auto& entry : *aMap) {
1312 if (gotOne) {
1313 std::cout << ", ";
1314 } else {
1315 gotOne = true;
1316 }
1317 std::cout << "(";
1318 writeNum(entry.first);
1319 std::cout << ", ";
1320 writeNum(entry.second);
1321 std::cout << ")";
1322 }
1323 std::cout << " }" << std::endl;
1324 }
1325 #endif
1326
compare(const Operand & operandA,const Operand & operandB)1327 bool compare(const Operand& operandA, const Operand& operandB) {
1328 if (operandA.type != operandB.type || operandA.dimensions != operandB.dimensions ||
1329 operandA.scale != operandB.scale || operandA.zeroPoint != operandB.zeroPoint) {
1330 return false;
1331 }
1332 return true;
1333 }
1334
1335 // Compare two graphs. We ignore operand and operation indexes (i.e.,
1336 // two nodes can be the same even if they are numbered differently)
1337 // but we also ignore semantics (e.g., even if an operation kind is
1338 // such that the operand is commutative, we still pay attention to the
1339 // order of its input operands).
1340 //
1341 // The comparison algorithm works by walking modelA from outputs
1342 // towards inputs, along the edge from each operand to its
1343 // defining operation, and then along the edges to the operation's
1344 // input operands. At each step along the way, we try to match up
1345 // operands and operations from modelA with equivalent operands
1346 // and operations from modelB.
1347 //
1348 // We start by assuming that modelA's outputs and modelB's outputs
1349 // match positionally (e.g., modelA's first output operand is
1350 // equivalent to modelB's first output operand). Once we've
1351 // discovered two equivalent operands (such as those outputs), we
1352 // place them in a work queue. We repeatedly pull operands off
1353 // the queue and compare their defining operations and those
1354 // operations' input operands, to discover more pairs of
1355 // equivalent operands. If we ever find operations that do not
1356 // match (e.g., because operation kind differs), or operands that
1357 // do not match (e.g., because operand type differs); or if we
1358 // ever find a conflict (we've already decided that operand A's
1359 // equivalent operand is B0, but it looks like we need its
1360 // equivalent operand to be B1); then the graphs compare unequal.
1361 // Otherwise, we'll eventually exhaust the work queue, and
1362 // conclude that the graphs compare equal.
1363 //
1364 // As a side effect of the comparison, we produce a map
1365 // *inputsAndOutputsBToA that maps from each of the model input and output
1366 // operand numbers of modelB to the corresponding operand numbers of modelA.
1367 // If the comparison returns false, the contents of the map are undefined.
compare(const ModelBuilder * modelA,const ModelBuilder * modelB,std::map<uint32_t,uint32_t> * inputsAndOutputsBToA)1368 bool compare(const ModelBuilder* modelA, const ModelBuilder* modelB,
1369 std::map<uint32_t, uint32_t>* inputsAndOutputsBToA) {
1370 CHECK(inputsAndOutputsBToA != nullptr);
1371 EXPECT_TRUE(inputsAndOutputsBToA->empty());
1372
1373 #ifdef VERBOSE
1374 ::dump("compare(A)", modelA);
1375 ::dump("compare(B)", modelB);
1376 #endif
1377
1378 if (modelA->operandCount() != modelB->operandCount() ||
1379 modelA->operationCount() != modelB->operationCount() ||
1380 modelA->inputCount() != modelB->inputCount() ||
1381 modelA->outputCount() != modelB->outputCount()) {
1382 RETURN_FALSE();
1383 }
1384
1385 // Maps from operand index to index of defining operation.
1386 std::map<uint32_t, uint32_t> defsA, defsB;
1387 buildDefinitionMap(modelA, &defsA);
1388 buildDefinitionMap(modelB, &defsB);
1389 if (HasFatalFailure()) return false;
1390
1391 // Maps from operand index in modelA to equivalent operand index
1392 // in modelB; and from operation index in modelA to equivalent
1393 // operation index in modelB.
1394 std::map<uint32_t, uint32_t> equivalentOperandsAToB;
1395 std::map<uint32_t, uint32_t> equivalentOperationsAToB;
1396
1397 // Queue of operand indexes from modelA, each of whose defining
1398 // operations are to be checked for equivalence with modelB.
1399 std::queue<uint32_t> workQueueOperandsA;
1400
1401 // Seed operand equivalence map and work queue from model outputs.
1402 for (uint32_t i = 0, e = modelA->outputCount(); i < e; i++) {
1403 uint32_t outputA = modelA->getOutputOperandIndex(i);
1404 uint32_t outputB = modelB->getOutputOperandIndex(i);
1405 if (!compare(modelA->getOperand(outputA), modelB->getOperand(outputB))) {
1406 #ifdef VERBOSE
1407 std::cout << "modelA.output[" << i << "] = operand[" << outputA
1408 << "] = " << toString(modelA->getOperand(outputA)) << std::endl;
1409 std::cout << "modelB.output[" << i << "] = operand[" << outputB
1410 << "] = " << toString(modelB->getOperand(outputB)) << std::endl;
1411 #endif
1412 RETURN_FALSE();
1413 }
1414 equivalentOperandsAToB[outputA] = outputB;
1415 workQueueOperandsA.push(outputA);
1416 }
1417
1418 #ifdef VERBOSE
1419 dump("defsA", &defsA);
1420 dump("defsB", &defsB);
1421 #endif
1422
1423 // Process the queue.
1424 uint32_t pseudoDefinitionCount = 0;
1425 while (!workQueueOperandsA.empty()) {
1426 #ifdef VERBOSE
1427 dump("equivalentOperandsAToB", &equivalentOperandsAToB);
1428 dump("equivalentOperationsAToB", &equivalentOperationsAToB);
1429 #endif
1430 uint32_t operandIndexA = workQueueOperandsA.front();
1431 #ifdef VERBOSE
1432 std::cout << "operandIndexA: " << operandIndexA << std::endl;
1433 #endif
1434 workQueueOperandsA.pop();
1435 uint32_t operandIndexB = equivalentOperandsAToB.at(operandIndexA);
1436
1437 uint32_t operationIndexA = defsA.at(operandIndexA);
1438 uint32_t operationIndexB = defsB.at(operandIndexB);
1439 auto it = equivalentOperationsAToB.find(operationIndexA);
1440 if (it != equivalentOperationsAToB.end()) {
1441 if (it->second != operationIndexB) {
1442 RETURN_FALSE();
1443 }
1444 continue;
1445 }
1446
1447 // We haven't identified an equivalent operation for
1448 // operationIndexA.
1449
1450 if ((operationIndexA >= kPseudoDefiningOperationBase) !=
1451 (operationIndexB >= kPseudoDefiningOperationBase)) {
1452 RETURN_FALSE();
1453 }
1454 // Either both operands have pseudo-definitions, or neither
1455 // does.
1456 if (operationIndexA >= kPseudoDefiningOperationBase) {
1457 // Both operands have pseudo-definitions.
1458 if (operationIndexA != operationIndexB) {
1459 RETURN_FALSE();
1460 }
1461 equivalentOperationsAToB[operationIndexA] = operationIndexB;
1462 ++pseudoDefinitionCount;
1463 continue;
1464 }
1465
1466 // If we get here, neither operation A nor operation B is a
1467 // pseudo-definition.
1468
1469 const Operation& operationA = modelA->getOperation(operationIndexA);
1470 const Operation& operationB = modelB->getOperation(operationIndexB);
1471 if (operationA.type != operationB.type ||
1472 operationA.inputs.size() != operationB.inputs.size() ||
1473 operationA.outputs.size() != operationB.outputs.size()) {
1474 RETURN_FALSE();
1475 }
1476 equivalentOperationsAToB[operationIndexA] = operationIndexB;
1477 for (uint32_t i = 0, e = operationA.inputs.size(); i < e; i++) {
1478 uint32_t inputA = operationA.inputs[i];
1479 uint32_t inputB = operationB.inputs[i];
1480 auto it = equivalentOperandsAToB.find(inputA);
1481 if (it != equivalentOperandsAToB.end()) {
1482 if (it->second != inputB) {
1483 RETURN_FALSE();
1484 }
1485 continue;
1486 }
1487 // We haven't identified an equivalent operand for inputA.
1488 if (!compare(modelA->getOperand(inputA), modelB->getOperand(inputB))) {
1489 #ifdef VERBOSE
1490 std::cout << "modelA.input[" << i << "] = operand[" << inputA
1491 << "] = " << toString(modelA->getOperand(inputA)) << std::endl;
1492 std::cout << "modelB.input[" << i << "] = operand[" << inputB
1493 << "] = " << toString(modelB->getOperand(inputB)) << std::endl;
1494 #endif
1495 RETURN_FALSE();
1496 }
1497 equivalentOperandsAToB[inputA] = inputB;
1498 workQueueOperandsA.push(inputA);
1499 }
1500 }
1501
1502 // Validity check
1503 if (modelA->operandCount() != defsA.size() || modelA->operandCount() != defsB.size() ||
1504 modelA->operandCount() != equivalentOperandsAToB.size() ||
1505 modelA->operationCount() + pseudoDefinitionCount != equivalentOperationsAToB.size()) {
1506 RETURN_FALSE();
1507 }
1508
1509 // Build *inputsAndOutputsBToA
1510 for (uint32_t aInputIndex : modelA->getInputOperandIndexes()) {
1511 (*inputsAndOutputsBToA)[equivalentOperandsAToB.at(aInputIndex)] = aInputIndex;
1512 }
1513 for (uint32_t aOutputIndex : modelA->getOutputOperandIndexes()) {
1514 (*inputsAndOutputsBToA)[equivalentOperandsAToB.at(aOutputIndex)] = aOutputIndex;
1515 }
1516
1517 RETURN_TRUE();
1518 }
1519
1520 /*-------------------------------------------------------------------------------------*/
1521
1522 // As a side effect of the comparison, we produce a map
1523 // *inputsAndOutputsModelToStep that maps from each of the model input and
1524 // output operand numbers of "model" to the corresponding operand numbers of
1525 // the step model from "step". If the comparison returns false, the contents
1526 // of the map are undefined.
compare(const ExecutionStep * step,const PartitioningModel * model,std::shared_ptr<Device> device,std::map<uint32_t,uint32_t> * inputsAndOutputsModelToStep)1527 bool compare(const ExecutionStep* step, const PartitioningModel* model,
1528 std::shared_ptr<Device> device,
1529 std::map<uint32_t, uint32_t>* inputsAndOutputsModelToStep) {
1530 return (step->getDevice() == device) &&
1531 compare(step->getStepModel(),
1532 reinterpret_cast<const ModelBuilder*>(model->getHandle()),
1533 inputsAndOutputsModelToStep);
1534 }
1535
compare(const std::shared_ptr<LogicalStep> logicalStep,const PartitioningModel * model,std::shared_ptr<Device> device,const RemapVectorType & modelInputs,const RemapVectorType & modelOutputs,const RemapVectorType & tempsAsStepModelInputs,const StepModelOutputSetType & tempsAsStepModelOutputs,const RemapVectorType & outputsAsStepModelInputs,const std::set<uint32_t> & modelOutputsThatAreDownstreamInputs)1536 void compare(const std::shared_ptr<LogicalStep> logicalStep, const PartitioningModel* model,
1537 std::shared_ptr<Device> device, const RemapVectorType& modelInputs,
1538 const RemapVectorType& modelOutputs, const RemapVectorType& tempsAsStepModelInputs,
1539 const StepModelOutputSetType& tempsAsStepModelOutputs,
1540 const RemapVectorType& outputsAsStepModelInputs,
1541 const std::set<uint32_t>& modelOutputsThatAreDownstreamInputs) {
1542 ASSERT_TRUE(logicalStep->isExecution());
1543 const ExecutionStep* step = logicalStep->executionStep();
1544 std::map<uint32_t, uint32_t> inputsAndOutputsModelToStep;
1545 ASSERT_NO_FATAL_FAILURE(
1546 ASSERT_TRUE(compare(step, model, device, &inputsAndOutputsModelToStep)));
1547 ASSERT_TRUE(compareRemapVectors(inputsAndOutputsModelToStep, step->getModelInputs(),
1548 modelInputs));
1549 ASSERT_TRUE(compareRemapVectors(inputsAndOutputsModelToStep, step->getModelOutputs(),
1550 modelOutputs));
1551 ASSERT_TRUE(compareRemapVectors(inputsAndOutputsModelToStep,
1552 step->getTempsAsStepModelInputs(), tempsAsStepModelInputs));
1553 ASSERT_TRUE(compareStepModelOutputSets(inputsAndOutputsModelToStep,
1554 step->getTempsAsStepModelOutputs(),
1555 tempsAsStepModelOutputs));
1556 ASSERT_TRUE(compareRemapVectors(inputsAndOutputsModelToStep,
1557 step->getOutputsAsStepModelInputs(),
1558 outputsAsStepModelInputs));
1559 ASSERT_TRUE(modelOutputsThatAreDownstreamInputs ==
1560 step->getModelOutputsThatAreDownstreamInputs());
1561 }
1562
1563 private:
compareRemapVectors(const std::map<uint32_t,uint32_t> & inputsAndOutputsModelToStep,const RemapVectorType & step,RemapVectorType model)1564 static bool compareRemapVectors(const std::map<uint32_t, uint32_t>& inputsAndOutputsModelToStep,
1565 const RemapVectorType& step, RemapVectorType model) {
1566 std::transform(model.begin(), model.end(), model.begin(),
1567 [&inputsAndOutputsModelToStep](const RemapVectorType::value_type& val) {
1568 return std::make_pair(val.first,
1569 inputsAndOutputsModelToStep.at(val.second));
1570 });
1571 return step == model;
1572 }
1573
compareStepModelOutputSets(const std::map<uint32_t,uint32_t> & inputsAndOutputsModelToStep,const StepModelOutputSetType & step,const StepModelOutputSetType & model)1574 static bool compareStepModelOutputSets(
1575 const std::map<uint32_t, uint32_t>& inputsAndOutputsModelToStep,
1576 const StepModelOutputSetType& step, const StepModelOutputSetType& model) {
1577 StepModelOutputSetType modelTransformed;
1578 std::transform(
1579 model.begin(), model.end(), std::inserter(modelTransformed, modelTransformed.end()),
1580 [&inputsAndOutputsModelToStep](const StepModelOutputSetType::value_type& val) {
1581 return std::make_pair(val.first, inputsAndOutputsModelToStep.at(val.second));
1582 });
1583 return step == modelTransformed;
1584 }
1585 };
1586
TEST_F(PartitioningTest,SimpleModel)1587 TEST_F(PartitioningTest, SimpleModel) {
1588 PartitioningModel model;
1589 uint32_t opnd0 = model.addFloatOperand();
1590 uint32_t opnd1 = model.addFloatOperand();
1591 uint32_t opnd2 = model.addOperation2To1V1_0(0, opnd0, opnd1);
1592 uint32_t opnd3 = model.addFloatOperand();
1593 uint32_t opnd4 = model.addOperation2To1V1_0(1, opnd2, opnd3);
1594 model.identifyInputsAndOutputs({opnd0, opnd1, opnd3}, {opnd4});
1595 model.finish();
1596 ASSERT_TRUE(model.isValid());
1597
1598 // Simple partition (two devices are each capable of everything, one is the best).
1599 // No need to compare the original model to the model from the plan -- we
1600 // didn't actually do any partitioning.
1601 const auto devicesA = makeDevices({{"bad", 0.9, ~0U}, {"good", 0.5, ~0U}});
1602 ExecutionPlan planA;
1603 ASSERT_EQ(model.partitionTheWork(devicesA, ExecutePreference::PREFER_LOW_POWER,
1604 ExecutePriority::DEFAULT, {}, &planA),
1605 ANEURALNETWORKS_NO_ERROR);
1606 EXPECT_TRUE(planA.forTest_flatGetDynamicTemporaries().empty());
1607 ASSERT_EQ(planA.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
1608 ASSERT_NE(planA.forTest_simpleGetDevice().get(), nullptr);
1609 ASSERT_EQ(planA.forTest_simpleGetDevice()->getName(), "good");
1610
1611 // Simple partition (two devices are each capable of everything, none better than CPU).
1612 // No need to compare the original model to the model from the plan -- we
1613 // didn't actually do any partitioning.
1614 const auto devicesC = makeDevices({{"bad", 1.1, ~0U}, {"bad2", 1.0, ~0U}});
1615 ExecutionPlan planC;
1616 ASSERT_EQ(model.partitionTheWork(devicesC, ExecutePreference::PREFER_LOW_POWER,
1617 ExecutePriority::DEFAULT, {}, &planC),
1618 ANEURALNETWORKS_NO_ERROR);
1619 EXPECT_TRUE(planC.forTest_flatGetDynamicTemporaries().empty());
1620 ASSERT_EQ(planC.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
1621 ASSERT_EQ(planC.forTest_simpleGetDevice(), DeviceManager::getCpuDevice());
1622
1623 // Compound partition (two devices, each is capable of one of the
1624 // two operations). We could do more extensive checking here --
1625 // for example, verify that each step within the plan has the
1626 // correct (model and step model)x(inputs and outputs).
1627 const auto devicesB = makeDevices({{"0", 0.9, 1 << 0}, {"1", 0.5, 1 << 1}});
1628 ExecutionPlan planB;
1629 ASSERT_EQ(model.partitionTheWork(devicesB, ExecutePreference::PREFER_LOW_POWER,
1630 ExecutePriority::DEFAULT, {}, &planB),
1631 ANEURALNETWORKS_NO_ERROR);
1632 EXPECT_TRUE(planB.forTest_flatGetDynamicTemporaries().empty());
1633 ASSERT_EQ(planB.forTest_getKind(), ExecutionPlan::Kind::COMPOUND);
1634 const auto& stepsB = planB.forTest_compoundGetSteps();
1635 ASSERT_EQ(stepsB.size(), size_t(2));
1636 {
1637 // Build a model to compare against the step model from stepsB[0].
1638 PartitioningModel modelB0;
1639 uint32_t b0Opnd0 = modelB0.addFloatOperand();
1640 uint32_t b0Opnd1 = modelB0.addFloatOperand();
1641 uint32_t b0Opnd2 = modelB0.addOperation2To1V1_0(0, b0Opnd0, b0Opnd1);
1642 modelB0.identifyInputsAndOutputs({b0Opnd0, b0Opnd1}, {b0Opnd2});
1643 modelB0.finish();
1644 ASSERT_TRUE(modelB0.isValid());
1645
1646 ASSERT_NO_FATAL_FAILURE(
1647 compare(stepsB[0], &modelB0, devicesB[0],
1648 RemapVectorType{{opnd0, b0Opnd0}, {opnd1, b0Opnd1}}, // modelInputs
1649 RemapVectorType{}, // modelOutputs
1650 RemapVectorType{}, // tempsAsStepModelInputs
1651 StepModelOutputSetType{{opnd2, b0Opnd2}}, // tempsAsStepModelOutputs
1652 RemapVectorType{}, // outputsAsStepModelInputs
1653 {})); // modelOutputsThatAreDownstreamInputs
1654 }
1655 {
1656 // Build a model to compare against the step model from stepsB[1].
1657 PartitioningModel modelB1;
1658 uint32_t b1Opnd2 = modelB1.addFloatOperand();
1659 uint32_t b1Opnd3 = modelB1.addFloatOperand();
1660 uint32_t b1Opnd4 = modelB1.addOperation2To1V1_0(1, b1Opnd2, b1Opnd3);
1661 // Note: In the partitioning algorithm, step model inputs follow
1662 // model inputs. In the original model "model", opnd2 is not
1663 // an input; so in the step model "modelB1", the corresponding
1664 // input b1Opnd2 is a step model input, and must follow the
1665 // model input b1Opnd3.
1666 modelB1.identifyInputsAndOutputs({b1Opnd3, b1Opnd2}, {b1Opnd4});
1667 modelB1.finish();
1668 ASSERT_TRUE(modelB1.isValid());
1669
1670 ASSERT_NO_FATAL_FAILURE(compare(
1671 stepsB[1], &modelB1, devicesB[1], RemapVectorType{{opnd3, b1Opnd3}}, // modelInputs
1672 RemapVectorType{{opnd4, b1Opnd4}}, // modelOutputs
1673 RemapVectorType{{opnd2, b1Opnd2}}, // tempsAsStepModelInputs
1674 StepModelOutputSetType{}, // tempsAsStepModelOutputs
1675 RemapVectorType{}, // outputsAsStepModelInputs
1676 {})); // modelOutputsThatAreDownstreamInputs
1677 }
1678 }
1679
TEST_F(PartitioningTest,SliceModel)1680 TEST_F(PartitioningTest, SliceModel) {
1681 PartitioningModel model;
1682 uint32_t opnd0 = model.addFloatOperand();
1683 uint32_t opnd1 = model.addFloatOperand();
1684 uint32_t opnd2 = model.addOperation2To1V1_0(0, opnd0, opnd1);
1685 uint32_t opnd3 = model.addOperation2To1V1_0(1, opnd0, opnd1);
1686 uint32_t opnd4 = model.addOperation2To1V1_1(0, opnd0, opnd1);
1687 uint32_t opnd5 = model.addOperation2To1V1_2(0, opnd2, opnd3);
1688 uint32_t opnd6 = model.addOperation1To1V1_3(0, opnd2);
1689 model.identifyInputsAndOutputs({opnd0, opnd1}, {opnd2, opnd4, opnd5, opnd6});
1690 model.finish();
1691 ASSERT_TRUE(model.isValid());
1692
1693 // Simple partition (V1_0, V1_1, V1_2, V1_3 devices are available; V1_3 has best perf).
1694 // No need to compare the original model to the model from the plan -- we
1695 // didn't actually do any partitioning.
1696 const auto devicesA = makeDevices({{"V1_0", 0.8, HalVersion::V1_0, ~0U},
1697 {"V1_1", 0.7, HalVersion::V1_1, ~0U, ~0U},
1698 {"V1_2", 0.6, HalVersion::V1_2, ~0U, ~0U, ~0U},
1699 {"V1_3", 0.5, HalVersion::V1_3, ~0U, ~0U, ~0U, ~0U}});
1700 ExecutionPlan planA;
1701 ASSERT_EQ(model.partitionTheWork(devicesA, ExecutePreference::PREFER_LOW_POWER,
1702 ExecutePriority::DEFAULT, {}, &planA),
1703 ANEURALNETWORKS_NO_ERROR);
1704 EXPECT_TRUE(planA.forTest_flatGetDynamicTemporaries().empty());
1705 ASSERT_EQ(planA.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
1706 ASSERT_NE(planA.forTest_simpleGetDevice().get(), nullptr);
1707 ASSERT_EQ(planA.forTest_simpleGetDevice()->getName(), "V1_3");
1708
1709 // Compound partition (V1_0, V1_1, V1_2 devices are available, in decreasing
1710 // order of performance; model is distributed across all three devices).
1711 const auto devicesB = makeDevices({{"V1_0", 0.6, HalVersion::V1_0, ~0U},
1712 {"V1_1", 0.7, HalVersion::V1_1, ~0U, ~0U},
1713 {"V1_2", 0.8, HalVersion::V1_2, ~0U, ~0U, ~0U},
1714 {"V1_3", 0.9, HalVersion::V1_3, ~0U, ~0U, ~0U, ~0U}});
1715 ExecutionPlan planB;
1716 ASSERT_EQ(model.partitionTheWork(devicesB, ExecutePreference::PREFER_LOW_POWER,
1717 ExecutePriority::DEFAULT, {}, &planB),
1718 ANEURALNETWORKS_NO_ERROR);
1719 EXPECT_TRUE(planB.forTest_flatGetDynamicTemporaries().empty());
1720 ASSERT_EQ(planB.forTest_getKind(), ExecutionPlan::Kind::COMPOUND);
1721 const auto& stepsB = planB.forTest_compoundGetSteps();
1722 ASSERT_EQ(stepsB.size(), size_t(4));
1723 {
1724 // Build a model to compare against the step model from stepsB[0].
1725 PartitioningModel modelB0;
1726 uint32_t b0Opnd0 = modelB0.addFloatOperand();
1727 uint32_t b0Opnd1 = modelB0.addFloatOperand();
1728 uint32_t b0Opnd2 = modelB0.addOperation2To1V1_1(0, b0Opnd0, b0Opnd1);
1729 modelB0.identifyInputsAndOutputs({b0Opnd0, b0Opnd1}, {b0Opnd2});
1730 modelB0.finish();
1731 ASSERT_TRUE(modelB0.isValid());
1732
1733 ASSERT_NO_FATAL_FAILURE(
1734 compare(stepsB[0], &modelB0, devicesB[1],
1735 RemapVectorType{{opnd0, b0Opnd0}, {opnd1, b0Opnd1}}, // modelInputs
1736 RemapVectorType{{opnd4, b0Opnd2}}, // modelOutputs
1737 RemapVectorType{}, // tempsAsStepModelInputs
1738 StepModelOutputSetType{}, // tempsAsStepModelOutputs
1739 RemapVectorType{}, // outputsAsStepModelInputs
1740 {})); // modelOutputsThatAreDownstreamInputs
1741 }
1742 {
1743 // Build a model to compare against the step model from stepsB[1].
1744 PartitioningModel modelB1;
1745 uint32_t b1Opnd0 = modelB1.addFloatOperand();
1746 uint32_t b1Opnd1 = modelB1.addFloatOperand();
1747 uint32_t b1Opnd2 = modelB1.addOperation2To1V1_0(0, b1Opnd0, b1Opnd1);
1748 uint32_t b1Opnd3 = modelB1.addOperation2To1V1_0(1, b1Opnd0, b1Opnd1);
1749 modelB1.identifyInputsAndOutputs({b1Opnd0, b1Opnd1}, {b1Opnd2, b1Opnd3});
1750 modelB1.finish();
1751 ASSERT_TRUE(modelB1.isValid());
1752
1753 // Note that this is also an important test that we can detect
1754 // modelOutputsThatAreDownstreamInputs.
1755 ASSERT_NO_FATAL_FAILURE(
1756 compare(stepsB[1], &modelB1, devicesB[0],
1757 RemapVectorType{{opnd0, b1Opnd0}, {opnd1, b1Opnd1}}, // modelInputs
1758 RemapVectorType{{opnd2, b1Opnd2}}, // modelOutputs
1759 RemapVectorType{}, // tempsAsStepModelInputs
1760 StepModelOutputSetType{{opnd3, b1Opnd3}}, // tempsAsStepModelOutputs
1761 RemapVectorType{}, // outputsAsStepModelInputs
1762 {0u})); // modelOutputsThatAreDownstreamInputs
1763 }
1764 {
1765 // Build a model to compare against the step model from stepsB[2].
1766 PartitioningModel modelB2;
1767 uint32_t b2Opnd0 = modelB2.addFloatOperand();
1768 uint32_t b2Opnd1 = modelB2.addOperation1To1V1_3(0, b2Opnd0);
1769 // Note: In the partitioning algorithm, temps that are
1770 // step model inputs precede model outputs that are step model
1771 // inputs.
1772 modelB2.identifyInputsAndOutputs({b2Opnd0}, {b2Opnd1});
1773 modelB2.finish();
1774 ASSERT_TRUE(modelB2.isValid());
1775
1776 ASSERT_NO_FATAL_FAILURE(
1777 compare(stepsB[2], &modelB2, devicesB[3], RemapVectorType{}, // modelInputs
1778 RemapVectorType{{opnd6, b2Opnd1}}, // modelOutputs
1779 RemapVectorType{}, // tempsAsStepModelInputs
1780 StepModelOutputSetType{}, // tempsAsStepModelOutputs
1781 RemapVectorType{{opnd2, b2Opnd0}}, // outputsAsStepModelInputs
1782 {})); // modelOutputsThatAreDownstreamInputs
1783 }
1784 {
1785 // Build a model to compare against the step model from stepsB[3].
1786 PartitioningModel modelB3;
1787 uint32_t b3Opnd0 = modelB3.addFloatOperand();
1788 uint32_t b3Opnd1 = modelB3.addFloatOperand();
1789 uint32_t b3Opnd2 = modelB3.addOperation2To1V1_2(0, b3Opnd0, b3Opnd1);
1790 // Note: In the partitioning algorithm, temps that are
1791 // step model inputs precede model outputs that are step model
1792 // inputs. In the original model "model", opnd3 is a temp and
1793 // opnd2 is a model output; so in the step model "modelB3", the
1794 // corresponding inputs b3Opnd1 and b3Opnd0 must appear in
1795 // that order.
1796 modelB3.identifyInputsAndOutputs({b3Opnd1, b3Opnd0}, {b3Opnd2});
1797 modelB3.finish();
1798 ASSERT_TRUE(modelB3.isValid());
1799
1800 ASSERT_NO_FATAL_FAILURE(
1801 compare(stepsB[3], &modelB3, devicesB[2], RemapVectorType{}, // modelInputs
1802 RemapVectorType{{opnd5, b3Opnd2}}, // modelOutputs
1803 RemapVectorType{{opnd3, b3Opnd1}}, // tempsAsStepModelInputs
1804 StepModelOutputSetType{}, // tempsAsStepModelOutputs
1805 RemapVectorType{{opnd2, b3Opnd0}}, // outputsAsStepModelInputs
1806 {})); // modelOutputsThatAreDownstreamInputs
1807 }
1808
1809 // TODO: Make sure this still works when we have multiple devices
1810 // of same version available for slicing. An easy (?) choice would
1811 // be to route the two different V1_0 operations to different
1812 // devices.
1813 }
1814
TEST_F(PartitioningTest,SliceModelToEmpty)1815 TEST_F(PartitioningTest, SliceModelToEmpty) {
1816 PartitioningModel model;
1817 uint32_t opnd0 = model.addFloatOperand();
1818 uint32_t opnd1 = model.addOperation1To1V1_3(0, opnd0);
1819 model.identifyInputsAndOutputs({opnd0}, {opnd1});
1820 model.finish();
1821 ASSERT_TRUE(model.isValid());
1822
1823 // Only the V1_3 device can handle any operations in the model.
1824 // No need to compare the original model to the model from the plan -- we
1825 // didn't actually do any partitioning.
1826 const auto devices = makeDevices({{"V1_0", 0.6, HalVersion::V1_0, ~0U},
1827 {"V1_1", 0.7, HalVersion::V1_1, ~0U, ~0U},
1828 {"V1_2", 0.8, HalVersion::V1_2, ~0U, ~0U, ~0U},
1829 {"V1_3", 0.9, HalVersion::V1_3, ~0U, ~0U, ~0U, ~0U}});
1830 ExecutionPlan plan;
1831 ASSERT_EQ(model.partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
1832 ExecutePriority::DEFAULT, {}, &plan),
1833 ANEURALNETWORKS_NO_ERROR);
1834 EXPECT_TRUE(plan.forTest_flatGetDynamicTemporaries().empty());
1835 ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
1836 ASSERT_NE(plan.forTest_simpleGetDevice().get(), nullptr);
1837 ASSERT_EQ(plan.forTest_simpleGetDevice()->getName(), "V1_3");
1838 }
1839
TEST_F(PartitioningTest,Cpu)1840 TEST_F(PartitioningTest, Cpu) {
1841 // Here's a model where some operations execute only on the Cpu.
1842 // To make things interesting, we produce three partitions --
1843 // device, cpu, same-device.
1844
1845 static const uint32_t kCpuOp = 1;
1846 static const uint32_t kDevOp = 2;
1847
1848 const auto devices = makeDevices({{"1", 0.5, 1 << kDevOp}});
1849
1850 PartitioningModel model;
1851
1852 uint32_t opnd0 = model.addFloatOperand();
1853 uint32_t opnd1 = model.addFloatOperand();
1854
1855 uint32_t opnd2 = model.addOperation2To1V1_0(kDevOp, opnd0, opnd1);
1856 uint32_t opnd3 = model.addOperation2To1V1_0(kDevOp, opnd0, opnd2);
1857
1858 uint32_t opnd4 = model.addOperation2To1V1_0(kCpuOp, opnd0, opnd3);
1859 uint32_t opnd5 = model.addOperation2To1V1_0(kCpuOp, opnd2, opnd4);
1860
1861 uint32_t opnd6 = model.addFloatOperand();
1862
1863 uint32_t opnd7 = model.addOperation2To1V1_0(kDevOp, opnd3, opnd5);
1864 uint32_t opnd8 = model.addOperation2To1V1_0(kDevOp, opnd6, opnd7);
1865
1866 model.identifyInputsAndOutputs({opnd0, opnd1, opnd6}, {opnd4, opnd8});
1867 model.finish();
1868 ASSERT_TRUE(model.isValid());
1869
1870 ExecutionPlan plan;
1871 ASSERT_EQ(model.partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
1872 ExecutePriority::DEFAULT, {}, &plan),
1873 ANEURALNETWORKS_NO_ERROR);
1874 EXPECT_TRUE(plan.forTest_flatGetDynamicTemporaries().empty());
1875 ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::COMPOUND);
1876 const auto& steps = plan.forTest_compoundGetSteps();
1877 ASSERT_EQ(steps.size(), size_t(3));
1878 {
1879 const auto& step0 = steps[0];
1880
1881 // Build a model to compare against the step model from steps[0].
1882 PartitioningModel model0;
1883 uint32_t m0Opnd0 = model0.addFloatOperand();
1884 uint32_t m0Opnd1 = model0.addFloatOperand();
1885 uint32_t m0Opnd2 = model0.addOperation2To1V1_0(kDevOp, m0Opnd0, m0Opnd1);
1886 uint32_t m0Opnd3 = model0.addOperation2To1V1_0(kDevOp, m0Opnd0, m0Opnd2);
1887 model0.identifyInputsAndOutputs({m0Opnd0, m0Opnd1}, {m0Opnd2, m0Opnd3});
1888 model0.finish();
1889 ASSERT_TRUE(model0.isValid());
1890
1891 ASSERT_NO_FATAL_FAILURE(
1892 compare(step0, &model0, devices[0],
1893 RemapVectorType{{opnd0, m0Opnd0}, {opnd1, m0Opnd1}}, // modelInputs
1894 RemapVectorType{}, // modelOutputs
1895 RemapVectorType{}, // tempsAsStepModelInputs
1896 StepModelOutputSetType{{opnd2, m0Opnd2},
1897 {opnd3, m0Opnd3}}, // tempsAsStepModelOutputs
1898 RemapVectorType{}, // outputsAsStepModelInputs
1899 {})); // modelOutputsThatAreDownstreamInputs
1900 }
1901 {
1902 const auto& step1 = steps[1];
1903
1904 // Build a model to compare against the step model from steps[1].
1905 PartitioningModel model1;
1906 uint32_t m1Opnd0 = model1.addFloatOperand();
1907 uint32_t m1Opnd3 = model1.addFloatOperand();
1908 uint32_t m1Opnd4 = model1.addOperation2To1V1_0(kCpuOp, m1Opnd0, m1Opnd3);
1909 uint32_t m1Opnd2 = model1.addFloatOperand();
1910 uint32_t m1Opnd5 = model1.addOperation2To1V1_0(kCpuOp, m1Opnd2, m1Opnd4);
1911 model1.identifyInputsAndOutputs({m1Opnd0, m1Opnd3, m1Opnd2}, {m1Opnd4, m1Opnd5});
1912 model1.finish();
1913 ASSERT_TRUE(model1.isValid());
1914
1915 ASSERT_NO_FATAL_FAILURE(compare(
1916 step1, &model1, DeviceManager::getCpuDevice(),
1917 RemapVectorType{{opnd0, m1Opnd0}}, // modelInputs
1918 RemapVectorType{{opnd4, m1Opnd4}}, // modelOutputs
1919 RemapVectorType{{opnd3, m1Opnd3}, {opnd2, m1Opnd2}}, // tempsAsStepModelInputs
1920 StepModelOutputSetType{{opnd5, m1Opnd5}}, // tempsAsStepModelOutputs
1921 RemapVectorType{}, // outputsAsStepModelInputs
1922 {})); // modelOutputsThatAreDownstreamInputs
1923 }
1924 {
1925 const auto& step2 = steps[2];
1926
1927 // Build a model to compare against the step model from steps[2].
1928 PartitioningModel model2;
1929 uint32_t m2Opnd3 = model2.addFloatOperand();
1930 uint32_t m2Opnd5 = model2.addFloatOperand();
1931 uint32_t m2Opnd7 = model2.addOperation2To1V1_0(kDevOp, m2Opnd3, m2Opnd5);
1932 uint32_t m2Opnd6 = model2.addFloatOperand();
1933 uint32_t m2Opnd8 = model2.addOperation2To1V1_0(kDevOp, m2Opnd6, m2Opnd7);
1934 model2.identifyInputsAndOutputs({m2Opnd6, m2Opnd3, m2Opnd5}, {m2Opnd8});
1935 model2.finish();
1936 ASSERT_TRUE(model2.isValid());
1937
1938 ASSERT_NO_FATAL_FAILURE(compare(
1939 step2, &model2, devices[0], RemapVectorType{{opnd6, m2Opnd6}}, // modelInputs
1940 RemapVectorType{{opnd8, m2Opnd8}}, // modelOutputs
1941 RemapVectorType{{opnd3, m2Opnd3}, {opnd5, m2Opnd5}}, // tempsAsStepModelInputs
1942 StepModelOutputSetType{}, // tempsAsStepModelOutputs
1943 RemapVectorType{}, // outputsAsStepModelInputs
1944 {})); // modelOutputsThatAreDownstreamInputs
1945 }
1946 }
1947
TEST_F(PartitioningTest,SetPartitioning)1948 TEST_F(PartitioningTest, SetPartitioning) {
1949 PartitioningModel model;
1950 uint32_t opnd0 = model.addFloatOperand();
1951 uint32_t opnd1 = model.addFloatOperand();
1952 uint32_t opnd2 = model.addOperation2To1V1_0(0, opnd0, opnd1, Dimensioned::NO);
1953 uint32_t opnd3 = model.addFloatOperand();
1954 uint32_t opnd4 = model.addOperation2To1V1_0(1, opnd2, opnd3);
1955 model.identifyInputsAndOutputs({opnd0, opnd1, opnd3}, {opnd4});
1956 model.finish();
1957 ASSERT_TRUE(model.isValid());
1958
1959 // One device that can and should execute operation 0.
1960 const auto devices = makeDevices({{"hw", 0.5, (1 << 0)}});
1961
1962 // Test kPartitioningNo. We should not even attempt partitioning,
1963 // so there should be a SIMPLE plan on CPU.
1964 // No need to compare the original model to the model from the plan -- we
1965 // didn't actually do any partitioning.
1966 PartitioningCompilation cPNo(&model, devices);
1967 ASSERT_EQ(cPNo.setPartitioning(DeviceManager::kPartitioningNo), Result::NO_ERROR);
1968 ASSERT_EQ(cPNo.failPartitioning(), Result::NO_ERROR);
1969 ASSERT_EQ(cPNo.finish(), Result::NO_ERROR);
1970 ASSERT_EQ(cPNo.getExecutionPlan().forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
1971 ASSERT_EQ(cPNo.getExecutionPlan().forTest_simpleGetDevice(), DeviceManager::getCpuDevice());
1972
1973 // Test kPartitioningWithFallback. We should attempt partitioning, simulate
1974 // a recoverable failure, then fallback to CPU with a SIMPLE plan, and
1975 // finally return success. No need to compare the original model to the
1976 // model from the plan -- we didn't actually do any partitioning.
1977 PartitioningCompilation cPWithFallback(&model, devices);
1978 ASSERT_EQ(cPWithFallback.setPartitioning(DeviceManager::kPartitioningWithFallback),
1979 Result::NO_ERROR);
1980 ASSERT_EQ(cPWithFallback.failPartitioning(), Result::NO_ERROR);
1981 ASSERT_EQ(cPWithFallback.finish(), Result::NO_ERROR);
1982 ASSERT_EQ(cPWithFallback.getExecutionPlan().forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
1983 ASSERT_EQ(cPWithFallback.getExecutionPlan().forTest_simpleGetDevice(),
1984 DeviceManager::getCpuDevice());
1985
1986 // Test kPartitioningWithoutFallback. We should attempt partitioning,
1987 // simulate a recoverable failure, and fail.
1988 PartitioningCompilation cPWithoutFallback(&model, devices);
1989 ASSERT_EQ(cPWithoutFallback.setPartitioning(DeviceManager::kPartitioningWithoutFallback),
1990 Result::NO_ERROR);
1991 ASSERT_EQ(cPWithoutFallback.failPartitioning(), Result::NO_ERROR);
1992 ASSERT_EQ(cPWithoutFallback.finish(), Result::OP_FAILED);
1993 ASSERT_EQ(cPWithoutFallback.getExecutionPlan().forTest_getKind(), ExecutionPlan::Kind::ERROR);
1994 }
1995
1996 // Regression test for http://b/69166603:
1997 // "partitioned compilation and execution yields wrong results when model output is step model
1998 // input"
TEST_F(PartitioningTest,ModelOutputAsStepModelInput)1999 TEST_F(PartitioningTest, ModelOutputAsStepModelInput) {
2000 PartitioningModel model;
2001 uint32_t opnd0 = model.addFloatOperand();
2002 uint32_t opnd1 = model.addFloatOperand();
2003 uint32_t opnd2 = model.addOperation2To1V1_0(0, opnd0, opnd1);
2004 uint32_t opnd3 = model.addOperation2To1V1_0(1, opnd2, opnd2);
2005 model.identifyInputsAndOutputs({opnd0, opnd1}, {opnd2, opnd3});
2006 model.finish();
2007 ASSERT_TRUE(model.isValid());
2008
2009 // Compound partition (two devices, each is capable of one of the
2010 // two operations). We could do more extensive checking here --
2011 // for example, verify that each step within the plan has the
2012 // correct (model and step model)x(inputs and outputs).
2013 const auto devices = makeDevices({{"0", 0.5, 1 << 0}, {"1", 0.5, 1 << 1}});
2014 ExecutionPlan plan;
2015 ASSERT_EQ(model.partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
2016 ExecutePriority::DEFAULT, {}, &plan),
2017 ANEURALNETWORKS_NO_ERROR);
2018 EXPECT_TRUE(plan.forTest_flatGetDynamicTemporaries().empty());
2019 ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::COMPOUND);
2020 const auto& steps = plan.forTest_compoundGetSteps();
2021 ASSERT_EQ(steps.size(), size_t(2));
2022 {
2023 // Build a model to compare against the step model from steps[0].
2024 PartitioningModel model0;
2025 uint32_t m0Opnd0 = model0.addFloatOperand();
2026 uint32_t m0Opnd1 = model0.addFloatOperand();
2027 uint32_t m0Opnd2 = model0.addOperation2To1V1_0(0, m0Opnd0, m0Opnd1);
2028 model0.identifyInputsAndOutputs({m0Opnd0, m0Opnd1}, {m0Opnd2});
2029 model0.finish();
2030 ASSERT_TRUE(model0.isValid());
2031 ASSERT_NO_FATAL_FAILURE(
2032 compare(steps[0], &model0, devices[0],
2033 RemapVectorType{{opnd0, m0Opnd0}, {opnd1, m0Opnd1}}, // modelInputs
2034 RemapVectorType{{opnd2, m0Opnd2}}, // modelOutputs
2035 RemapVectorType{}, // tempsAsStepModelInputs
2036 StepModelOutputSetType{}, // tempsAsStepModelOutputs
2037 RemapVectorType{}, // outputsAsStepModelInputs
2038 {0u})); // modelOutputsThatAreDownstreamInputs
2039 }
2040 {
2041 // Build a model to compare against the step model from steps[1].
2042 PartitioningModel model1;
2043 uint32_t m1Opnd2 = model1.addFloatOperand();
2044 uint32_t m1Opnd3 = model1.addOperation2To1V1_0(1, m1Opnd2, m1Opnd2);
2045 model1.identifyInputsAndOutputs({m1Opnd2}, {m1Opnd3});
2046 model1.finish();
2047 ASSERT_TRUE(model1.isValid());
2048
2049 ASSERT_NO_FATAL_FAILURE(
2050 compare(steps[1], &model1, devices[1], RemapVectorType{}, // modelInputs
2051 RemapVectorType{{opnd3, m1Opnd3}}, // modelOutputs
2052 RemapVectorType{}, // tempsAsStepModelInputs
2053 StepModelOutputSetType{}, // tempsAsStepModelOutputs
2054 RemapVectorType{{opnd2, m1Opnd2}}, // outputsAsStepModelInputs
2055 {})); // modelOutputsThatAreDownstreamInputs
2056 }
2057 }
2058
TEST_F(PartitioningTest,OemOperations)2059 TEST_F(PartitioningTest, OemOperations) {
2060 // Trivial model consisting solely of OEM operation.
2061 PartitioningModel model;
2062 uint32_t opndIn = model.addFloatOperand();
2063 uint32_t opndOut = model.addOperationOEM1To1(opndIn);
2064 model.identifyInputsAndOutputs({opndIn}, {opndOut});
2065 model.finish();
2066 ASSERT_TRUE(model.isValid());
2067
2068 // Verify that the best driver than can run an OEM operation is
2069 // used, even if it is not better than the CPU.
2070 // No need to compare the original model to the model from the plan -- we
2071 // didn't actually do any partitioning.
2072 const auto devicesBestOEM = makeDevices({{"badOEM", 1.5, ~0U, PartitioningDriver::OEMYes},
2073 {"noOEM", 0.5, ~0U, PartitioningDriver::OEMNo},
2074 {"goodOEM", 1.2, ~0U, PartitioningDriver::OEMYes}});
2075 PartitioningCompilation compilationBestOEM(&model, devicesBestOEM);
2076 ASSERT_EQ(compilationBestOEM.finish(), Result::NO_ERROR);
2077 const auto& planBestOEM = compilationBestOEM.getExecutionPlan();
2078 ASSERT_EQ(planBestOEM.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
2079 ASSERT_NE(planBestOEM.forTest_simpleGetDevice().get(), nullptr);
2080 ASSERT_EQ(planBestOEM.forTest_simpleGetDevice()->getName(), "goodOEM");
2081
2082 // Verify that we get an error if no driver can run an OEM operation.
2083 const auto devicesNoOEM = makeDevices({{"noOEM", 0.5, ~0U, PartitioningDriver::OEMNo}});
2084 PartitioningCompilation compilationNoOEM(&model, devicesNoOEM);
2085 ASSERT_EQ(compilationNoOEM.finish(), Result::BAD_DATA);
2086
2087 // Verify that we get an error if a driver can SUPPORT but not PREPARE an OEM operation.
2088 const auto devicesIndecisiveOEM =
2089 makeDevices({{"indecisiveOEM", 0.5, ~0U, PartitioningDriver::OEMIndecisive}});
2090 PartitioningCompilation compilationIndecisiveOEM(&model, devicesIndecisiveOEM);
2091 ASSERT_NE(compilationIndecisiveOEM.finish(), Result::NO_ERROR);
2092
2093 // Verify that we get an error if there are no drivers (only CPU fallback).
2094 PartitioningCompilation compilationNoDrivers(&model, makeDevices({}) /* no drivers */);
2095 ASSERT_EQ(compilationNoDrivers.finish(), Result::BAD_DATA);
2096 }
2097
TEST_F(PartitioningTest,RelaxedFP)2098 TEST_F(PartitioningTest, RelaxedFP) {
2099 const auto devices = makeDevices({// Best choice for non-relaxed model.
2100 {"f32", 0.8, 0.9 /* relaxed */, ~0U},
2101 // Best choice for relaxed model.
2102 {"f16", 0.9, 0.8 /* relaxed */, ~0U}});
2103
2104 auto TrivialTest = [&devices](bool doRelax, const char* expectDevice) {
2105 // Trivial model consisting solely of one operation.
2106 SCOPED_TRACE(expectDevice);
2107 PartitioningModel model;
2108 uint32_t opnd0 = model.addFloatOperand();
2109 uint32_t opnd1 = model.addFloatOperand();
2110 uint32_t opnd2 = model.addOperation2To1V1_0(0, opnd0, opnd1);
2111 model.identifyInputsAndOutputs({opnd0, opnd1}, {opnd2});
2112 model.relaxComputationFloat32toFloat16(doRelax);
2113 model.finish();
2114 ASSERT_TRUE(model.isValid());
2115 // Verify that the model will be executed on the appropriate device.
2116 // No need to compare the original model to the model from the plan -- we
2117 // didn't actually do any partitioning.
2118 ExecutionPlan plan;
2119 ASSERT_EQ(model.partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
2120 ExecutePriority::DEFAULT, {}, &plan),
2121 ANEURALNETWORKS_NO_ERROR);
2122 EXPECT_TRUE(plan.forTest_flatGetDynamicTemporaries().empty());
2123 ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
2124 ASSERT_EQ(plan.forTest_simpleGetDevice()->getName(), expectDevice);
2125 };
2126
2127 ASSERT_NO_FATAL_FAILURE(TrivialTest(false, "f32"));
2128 ASSERT_NO_FATAL_FAILURE(TrivialTest(true, "f16"));
2129 }
2130
TEST_F(PartitioningTest,Perf)2131 TEST_F(PartitioningTest, Perf) {
2132 // The various type names used here are confusing.
2133 //
2134 // OperandType (from HAL file), WrapperType (from NeuralNetworksWrapper.h),
2135 // and OperandCode (from NeuralNetworks.h) are different enums representing
2136 // the same type kind -- e.g., OperandType::FLOAT32, WrapperType::FLOAT32,
2137 // ANEURALNETWORKS_FLOAT32. Corresponding enumerators have the same value.
2138 //
2139 // WrapperOperandType is the NeuralNetworksWrapper.h representation of a
2140 // full operand type (WrapperType plus dimensions plus other attributes).
2141
2142 auto TestType = [](V1_3::OperandType operandType) {
2143 if (operandType == V1_3::OperandType::SUBGRAPH) {
2144 // SUBGRAPH capabilities are handled differently.
2145 return;
2146 }
2147 SCOPED_TRACE(toString(operandType));
2148 // Trivial model consisting solely of OEM operation. We
2149 // pick OEM operation because this allows us to use
2150 // inputs and outputs of any number and type.
2151 PartitioningModel model;
2152 uint32_t opndIn = model.addOperand(static_cast<WrapperType>(operandType));
2153 uint32_t opndOut = model.addOperationOEM1To1(opndIn);
2154 model.identifyInputsAndOutputs({opndIn}, {opndOut});
2155 model.finish();
2156 ASSERT_TRUE(model.isValid());
2157
2158 const V1_3::Capabilities baseCapabilities = ::android::nn::makeCapabilities(0.5);
2159
2160 {
2161 // better than base
2162 V1_3::Capabilities goodCapabilities = baseCapabilities;
2163 update(&goodCapabilities, operandType, 0.25);
2164
2165 const auto devices =
2166 makeDevices({{"base", baseCapabilities, ~0U, PartitioningDriver::OEMYes},
2167 {"good", goodCapabilities, ~0U, PartitioningDriver::OEMYes}});
2168
2169 // Verify that model will be executed on "good".
2170 // No need to compare the original model to the model from the plan -- we
2171 // didn't actually do any partitioning.
2172 ExecutionPlan plan;
2173 ASSERT_EQ(model.partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
2174 ExecutePriority::DEFAULT, {}, &plan),
2175 ANEURALNETWORKS_NO_ERROR);
2176 EXPECT_TRUE(plan.forTest_flatGetDynamicTemporaries().empty());
2177 ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
2178 ASSERT_EQ(plan.forTest_simpleGetDevice()->getName(), "good");
2179 }
2180
2181 {
2182 // worse than base
2183 V1_3::Capabilities badCapabilities = baseCapabilities;
2184 update(&badCapabilities, operandType, 0.75);
2185 const auto devices =
2186 makeDevices({{"base", baseCapabilities, ~0U, PartitioningDriver::OEMYes},
2187 {"bad", badCapabilities, ~0U, PartitioningDriver::OEMYes}});
2188
2189 // Verify that model will be executed on "base".
2190 // No need to compare the original model to the model from the plan -- we
2191 // didn't actually do any partitioning.
2192 ExecutionPlan plan;
2193 ASSERT_EQ(model.partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
2194 ExecutePriority::DEFAULT, {}, &plan),
2195 ANEURALNETWORKS_NO_ERROR);
2196 EXPECT_TRUE(plan.forTest_flatGetDynamicTemporaries().empty());
2197 ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
2198 ASSERT_EQ(plan.forTest_simpleGetDevice()->getName(), "base");
2199 }
2200 };
2201
2202 for (uint32_t type = static_cast<uint32_t>(V1_3::OperandTypeRange::FUNDAMENTAL_MIN);
2203 type <= static_cast<uint32_t>(V1_3::OperandTypeRange::FUNDAMENTAL_MAX); ++type) {
2204 TestType(static_cast<V1_3::OperandType>(type));
2205 }
2206 for (uint32_t type = static_cast<uint32_t>(V1_3::OperandTypeRange::OEM_MIN);
2207 type <= static_cast<uint32_t>(V1_3::OperandTypeRange::OEM_MAX); ++type) {
2208 TestType(static_cast<V1_3::OperandType>(type));
2209 }
2210 }
2211
TEST_F(PartitioningTest,ZeroInputStepModel)2212 TEST_F(PartitioningTest, ZeroInputStepModel) {
2213 PartitioningModel model;
2214 const uint32_t opnd0 = model.addFloatZeroOperand();
2215 const uint32_t opnd1 = model.addOperation1To1V1_3(0, opnd0);
2216 const uint32_t opnd2 = model.addFloatOperand();
2217 const uint32_t opnd3 = model.addOperation2To1V1_0(1, opnd1, opnd2);
2218 model.identifyInputsAndOutputs({opnd2}, {opnd3});
2219 ASSERT_EQ(model.finish(), Result::NO_ERROR);
2220
2221 // This will result in 2 partitions: deviceA handles op0, deviceB handles op1.
2222 // The partition for deviceA does not have any model input, and should result in full CPU
2223 // fallback.
2224 const auto devices = makeDevices({{"deviceA", 0.8, ~0U}, {"deviceB", 0.5, 1 << 1}});
2225 PartitioningCompilation compilation(&model, devices);
2226 ASSERT_EQ(compilation.finish(), Result::NO_ERROR);
2227 const auto& cpuDeviceName = DeviceManager::getCpuDevice()->getName();
2228 checkExecutionPlanSteps(compilation.getExecutionPlan(), {cpuDeviceName});
2229 }
2230
TEST_F(PartitioningTest,ZeroOutputStepModel)2231 TEST_F(PartitioningTest, ZeroOutputStepModel) {
2232 PartitioningModel model;
2233 const uint32_t opnd0 = model.addFloatOperand();
2234 const uint32_t opnd1 = model.addOperation1To1V1_3(0, opnd0);
2235 const uint32_t opnd2 = model.addFloatOperand();
2236 model.addOperation2To1V1_0(1, opnd1, opnd2);
2237 model.identifyInputsAndOutputs({opnd0, opnd2}, {opnd1});
2238 ASSERT_EQ(model.finish(), Result::NO_ERROR);
2239
2240 // This will result in 2 partitions: deviceA handles op0, deviceB handles op1.
2241 // The partition for deviceB does not have any model output, and should result in full CPU
2242 // fallback.
2243 const auto devices = makeDevices({{"deviceA", 0.8, ~0U}, {"deviceB", 0.5, 1 << 1}});
2244 PartitioningCompilation compilation(&model, devices);
2245 ASSERT_EQ(compilation.finish(), Result::NO_ERROR);
2246 const auto& cpuDeviceName = DeviceManager::getCpuDevice()->getName();
2247 checkExecutionPlanSteps(compilation.getExecutionPlan(), {cpuDeviceName});
2248 }
2249
2250 // Test dynamic temporaries and related parts of the partitioning implementation.
2251 //
2252 // opnd0 = model input // tensor to pad
2253 // opnd1 = model input // padding
2254 // opnd2 = PAD(opnd1, opnd0) // model output
2255 // opnd3 = PAD(opnd1, opnd0)
2256 // opnd4 = ADD(opnd2, opnd3, FUSED_NONE) // model output
2257 class DynamicTemporariesTest : public PartitioningTest {
2258 protected:
2259 // Call these functions in sequence in order to perform the test.
2260 // Call to declareOutputDimensions() can be omitted (see the default values below).
2261 // Call to declareHalVersions() can be omitted (defaults to HalVersion::LATEST).
2262 void declareOutputDimensions(bool opnd2ModelAndPartitionOutputSpecified,
2263 bool opnd3PartitionOutputSpecified,
2264 bool opnd4ModelOutputSpecified);
2265 void declareHalVersions(HalVersion padDeviceVersion, HalVersion addDeviceVersion);
2266 void makeModelAndValidate();
2267 void compileModelAndComparePlan(bool noFallback = true);
2268 void executeCompilationAndCompareOutput(bool opnd2ModelOutputBigEnough,
2269 bool opnd4ModelOutputBigEnough);
2270
2271 // set by declareOutputDimensions()
2272 bool mOpnd2ModelAndPartitionOutputSpecified = false;
2273 bool mOpnd3PartitionOutputSpecified = false;
2274 bool mOpnd4ModelOutputSpecified = false;
2275
2276 // set by declareHalVersions()
2277 HalVersion mPadDeviceVersion = HalVersion::LATEST;
2278 HalVersion mAddDeviceVersion = HalVersion::LATEST;
2279 HalVersion mMinDeviceVersion = HalVersion::LATEST; // minimum of the other two device versions
2280
2281 // created by makeModelAndValidate()
2282 std::optional<PartitioningModel> mModel;
2283 std::vector<uint32_t> mOpnds;
2284
2285 // created by compileModelAndComparePlan();
2286 std::optional<PartitioningCompilation> mCompilation;
2287
supportsOutputOfUnknownRank(HalVersion version)2288 static bool supportsOutputOfUnknownRank(HalVersion version) {
2289 return version >= HalVersion::V1_2;
2290 }
2291
dimensionedOutput(HalVersion version,bool specified)2292 static Dimensioned dimensionedOutput(HalVersion version, bool specified) {
2293 return specified ? Dimensioned::YES_4
2294 : supportsOutputOfUnknownRank(version) ? Dimensioned::NO
2295 : Dimensioned::RANK_1;
2296 }
2297 };
2298
declareOutputDimensions(bool opnd2ModelAndPartitionOutputSpecified,bool opnd3PartitionOutputSpecified,bool opnd4ModelOutputSpecified)2299 void DynamicTemporariesTest::declareOutputDimensions(bool opnd2ModelAndPartitionOutputSpecified,
2300 bool opnd3PartitionOutputSpecified,
2301 bool opnd4ModelOutputSpecified) {
2302 ASSERT_FALSE(mModel.has_value());
2303 mOpnd2ModelAndPartitionOutputSpecified = opnd2ModelAndPartitionOutputSpecified;
2304 mOpnd3PartitionOutputSpecified = opnd3PartitionOutputSpecified;
2305 mOpnd4ModelOutputSpecified = opnd4ModelOutputSpecified;
2306 }
2307
declareHalVersions(HalVersion padDeviceVersion,HalVersion addDeviceVersion)2308 void DynamicTemporariesTest::declareHalVersions(HalVersion padDeviceVersion,
2309 HalVersion addDeviceVersion) {
2310 ASSERT_FALSE(mModel.has_value());
2311 mPadDeviceVersion = padDeviceVersion;
2312 mAddDeviceVersion = addDeviceVersion;
2313 mMinDeviceVersion = min(padDeviceVersion, addDeviceVersion);
2314 }
2315
makeModelAndValidate()2316 void DynamicTemporariesTest::makeModelAndValidate() {
2317 ASSERT_FALSE(mModel.has_value());
2318 mModel = PartitioningModel();
2319
2320 uint32_t opndActivation = mModel->addIntScalarOperand(ANEURALNETWORKS_FUSED_NONE);
2321
2322 uint32_t opnd0 = mModel->addFloatOperand(Dimensioned::YES_2); // tensor to pad
2323 uint32_t opnd1 = mModel->addIntOperand(Dimensioned::RANK_2); // paddings
2324 uint32_t opnd2 = mModel->addExplicitOperationXTo1(
2325 ANEURALNETWORKS_PAD, {opnd0, opnd1}, WrapperType::TENSOR_FLOAT32,
2326 dimensionedOutput(mMinDeviceVersion, mOpnd2ModelAndPartitionOutputSpecified));
2327 uint32_t opnd3 = mModel->addExplicitOperationXTo1(
2328 ANEURALNETWORKS_PAD, {opnd0, opnd1}, WrapperType::TENSOR_FLOAT32,
2329 dimensionedOutput(mMinDeviceVersion, mOpnd3PartitionOutputSpecified));
2330 uint32_t opnd4 = mModel->addExplicitOperationXTo1(
2331 ANEURALNETWORKS_ADD, {opnd2, opnd3, opndActivation}, WrapperType::TENSOR_FLOAT32,
2332 dimensionedOutput(mMinDeviceVersion, mOpnd4ModelOutputSpecified));
2333 mModel->identifyInputsAndOutputs({opnd0, opnd1}, {opnd2, opnd4});
2334 mModel->finish();
2335 ASSERT_TRUE(mModel->isValid());
2336
2337 mOpnds = {opnd0, opnd1, opnd2, opnd3, opnd4};
2338 }
2339
compileModelAndComparePlan(bool noFallback)2340 void DynamicTemporariesTest::compileModelAndComparePlan(bool noFallback) {
2341 ASSERT_TRUE(mModel.has_value());
2342 ASSERT_TRUE(!mCompilation.has_value());
2343
2344 auto devices = makeDevices({{"pad",
2345 0.9,
2346 0U,
2347 PartitioningDriver::OEMNo,
2348 mPadDeviceVersion,
2349 {V1_3::OperationType::PAD}},
2350 {"add",
2351 0.9,
2352 0U,
2353 PartitioningDriver::OEMNo,
2354 mAddDeviceVersion,
2355 {V1_3::OperationType::ADD}}});
2356
2357 mCompilation = PartitioningCompilation(&mModel.value(), devices);
2358 ASSERT_EQ(mCompilation->setPartitioning(DeviceManager::kPartitioningWithoutFallback),
2359 Result::NO_ERROR);
2360 if (noFallback) {
2361 ASSERT_EQ(mCompilation->finish(), Result::NO_ERROR);
2362 const ExecutionPlan& planA = mCompilation->getExecutionPlan();
2363 EXPECT_TRUE(planA.forTest_flatGetDynamicTemporaries() ==
2364 (mOpnd3PartitionOutputSpecified ? DynamicTemporariesType{}
2365 : DynamicTemporariesType{mOpnds[3]}));
2366 ASSERT_EQ(planA.forTest_getKind(), ExecutionPlan::Kind::COMPOUND);
2367 const auto& stepsA = planA.forTest_compoundGetSteps();
2368 ASSERT_EQ(stepsA.size(), size_t(2));
2369 {
2370 // Build a model to compare against the step model from stepsA[0].
2371 PartitioningModel modelA0;
2372 uint32_t a0Opnd0 = modelA0.addFloatOperand(Dimensioned::YES_2);
2373 uint32_t a0Opnd1 = modelA0.addIntOperand(Dimensioned::RANK_2);
2374 uint32_t a0Opnd2 = modelA0.addExplicitOperationXTo1(
2375 ANEURALNETWORKS_PAD, {a0Opnd0, a0Opnd1}, WrapperType::TENSOR_FLOAT32,
2376 dimensionedOutput(mMinDeviceVersion, mOpnd3PartitionOutputSpecified));
2377 uint32_t a0Opnd3 = modelA0.addExplicitOperationXTo1(
2378 ANEURALNETWORKS_PAD, {a0Opnd0, a0Opnd1}, WrapperType::TENSOR_FLOAT32,
2379 dimensionedOutput(mMinDeviceVersion, mOpnd2ModelAndPartitionOutputSpecified));
2380 modelA0.identifyInputsAndOutputs({a0Opnd0, a0Opnd1}, {a0Opnd3, a0Opnd2});
2381 modelA0.finish();
2382 ASSERT_TRUE(modelA0.isValid());
2383
2384 ASSERT_NO_FATAL_FAILURE(compare(
2385 stepsA[0], &modelA0, devices[0],
2386 RemapVectorType{{mOpnds[0], a0Opnd0}, {mOpnds[1], a0Opnd1}}, // modelInputs
2387 RemapVectorType{{mOpnds[2], a0Opnd3}}, // modelOutputs
2388 RemapVectorType{}, // tempsAsStepModelInputs
2389 StepModelOutputSetType{{mOpnds[3], a0Opnd2}}, // tempsAsStepModelOutputs
2390 RemapVectorType{}, // outputsAsStepModelInputs
2391 {0u})); // modelOutputsThatAreDownstreamInputs
2392 }
2393 {
2394 // Build a model to compare against the step model from stepsA[1].
2395 PartitioningModel modelA1;
2396 uint32_t a1Opnd2 = modelA1.addFloatOperand(
2397 dimensionedOutput(mMinDeviceVersion, mOpnd2ModelAndPartitionOutputSpecified));
2398 uint32_t a1Opnd3 = modelA1.addFloatOperand(
2399 dimensionedOutput(mMinDeviceVersion, mOpnd3PartitionOutputSpecified));
2400 uint32_t a1Opnd4 = modelA1.addOperation2To1V1_0(
2401 0, a1Opnd2, a1Opnd3,
2402 dimensionedOutput(mMinDeviceVersion, mOpnd4ModelOutputSpecified));
2403 modelA1.identifyInputsAndOutputs({a1Opnd3, a1Opnd2}, {a1Opnd4});
2404 modelA1.finish();
2405 ASSERT_TRUE(modelA1.isValid());
2406
2407 ASSERT_NO_FATAL_FAILURE(
2408 compare(stepsA[1], &modelA1, devices[1], RemapVectorType{}, // modelInputs
2409 RemapVectorType{{mOpnds[4], a1Opnd4}}, // modelOutputs
2410 RemapVectorType{{mOpnds[3], a1Opnd3}}, // tempsAsStepModelInputs
2411 StepModelOutputSetType{}, // tempsAsStepModelOutputs
2412 RemapVectorType{{mOpnds[2], a1Opnd2}}, // outputsAsStepModelInputs
2413 {})); // modelOutputsThatAreDownstreamInputs
2414 }
2415 } else {
2416 ASSERT_EQ(mCompilation->finish(), Result::OP_FAILED);
2417 // Try again, expecting fallback.
2418 mCompilation = PartitioningCompilation(&mModel.value(), devices);
2419 ASSERT_EQ(mCompilation->setPartitioning(DeviceManager::kPartitioningWithFallback),
2420 Result::NO_ERROR);
2421 ASSERT_EQ(mCompilation->finish(), Result::NO_ERROR);
2422 ASSERT_EQ(mCompilation->getExecutionPlan().forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
2423 ASSERT_EQ(mCompilation->getExecutionPlan().forTest_simpleGetDevice(),
2424 DeviceManager::getCpuDevice());
2425 }
2426 }
2427
executeCompilationAndCompareOutput(bool opnd2ModelOutputBigEnough,bool opnd4ModelOutputBigEnough)2428 void DynamicTemporariesTest::executeCompilationAndCompareOutput(bool opnd2ModelOutputBigEnough,
2429 bool opnd4ModelOutputBigEnough) {
2430 ASSERT_TRUE(opnd2ModelOutputBigEnough || !mOpnd2ModelAndPartitionOutputSpecified);
2431 ASSERT_TRUE(opnd4ModelOutputBigEnough || !mOpnd4ModelOutputSpecified);
2432
2433 ASSERT_TRUE(mCompilation.has_value());
2434 WrapperExecution e(&mCompilation.value());
2435
2436 WrapperOperandType padTensorValueType(WrapperType::TENSOR_FLOAT32, {2});
2437 const float padTensorValue[] = {3.0f, 5.0f};
2438 e.setInput(0, &padTensorValue, &padTensorValueType.operandType);
2439
2440 WrapperOperandType paddingsType(WrapperType::TENSOR_INT32, {1, 2});
2441 const int paddings[1][2] = {{1, 1}};
2442 e.setInput(1, &paddings, &paddingsType.operandType);
2443
2444 auto setOutput = [&e](uint32_t index, float* buffer, bool bigEnough, bool specified,
2445 HalVersion version) {
2446 const uint32_t elts = bigEnough ? 4 : 3;
2447 std::fill(buffer, buffer + elts, -1.0f);
2448 using DimsType = std::vector<uint32_t>;
2449 WrapperOperandType outputType(
2450 WrapperType::TENSOR_FLOAT32,
2451 specified ? DimsType{elts}
2452 : supportsOutputOfUnknownRank(version) ? DimsType{} : DimsType{0});
2453 e.setOutput(index, buffer, elts * sizeof(float), &outputType.operandType);
2454 };
2455 float opnd2ModelOutput[4], opnd4ModelOutput[4];
2456 setOutput(0, opnd2ModelOutput, opnd2ModelOutputBigEnough,
2457 mOpnd2ModelAndPartitionOutputSpecified, mPadDeviceVersion);
2458 setOutput(1, opnd4ModelOutput, opnd4ModelOutputBigEnough, mOpnd4ModelOutputSpecified,
2459 mAddDeviceVersion);
2460
2461 const Result expectResult = opnd2ModelOutputBigEnough && opnd4ModelOutputBigEnough
2462 ? Result::NO_ERROR
2463 : Result::OUTPUT_INSUFFICIENT_SIZE;
2464 ASSERT_EQ(e.compute(), expectResult);
2465 if (expectResult == Result::NO_ERROR) {
2466 float expected[4] = {0.0f, padTensorValue[0], padTensorValue[1], 0.0f};
2467 ASSERT_TRUE(std::equal(std::begin(opnd2ModelOutput), std::end(opnd2ModelOutput),
2468 std::begin(expected)));
2469 for (auto& elt : expected) {
2470 elt *= 2;
2471 }
2472 ASSERT_TRUE(std::equal(std::begin(opnd4ModelOutput), std::end(opnd4ModelOutput),
2473 std::begin(expected)));
2474 }
2475 }
2476
TEST_F(DynamicTemporariesTest,ModelOutputsSufficientSize)2477 TEST_F(DynamicTemporariesTest, ModelOutputsSufficientSize) {
2478 // The purpose of this test is to confirm that the partitioner and the
2479 // runtime can handle a model output of unspecified dimensions but
2480 // sufficient size that is written by one partition and read by another.
2481
2482 ASSERT_NO_FATAL_FAILURE(declareOutputDimensions(/*opnd2ModelAndPartitionOutputSpecified=*/false,
2483 /*opnd3PartitionOutputSpecified=*/true,
2484 /*opnd4ModelOutputSpecified=*/false));
2485 ASSERT_NO_FATAL_FAILURE(makeModelAndValidate());
2486 ASSERT_NO_FATAL_FAILURE(compileModelAndComparePlan());
2487 ASSERT_NO_FATAL_FAILURE(executeCompilationAndCompareOutput(true, true));
2488 }
2489
2490 // TODO(b/174851714): Fix the partitioner and re-enable this test.
TEST_F(DynamicTemporariesTest,DISABLED_ModelOutputsSufficientSize_V1_1)2491 TEST_F(DynamicTemporariesTest, DISABLED_ModelOutputsSufficientSize_V1_1) {
2492 // The purpose of this test is to confirm that the partitioner and the
2493 // runtime can handle a model output of unspecified dimensions but
2494 // sufficient size that is written by one partition and read by another.
2495 // Regression test for http://b/174851714.
2496
2497 ASSERT_NO_FATAL_FAILURE(declareOutputDimensions(/*opnd2ModelAndPartitionOutputSpecified=*/false,
2498 /*opnd3PartitionOutputSpecified=*/true,
2499 /*opnd4ModelOutputSpecified=*/false));
2500 ASSERT_NO_FATAL_FAILURE(declareHalVersions(/*padDeviceVersion=*/HalVersion::V1_1,
2501 /*addDeviceVersion=*/HalVersion::V1_1));
2502 ASSERT_NO_FATAL_FAILURE(makeModelAndValidate());
2503 ASSERT_NO_FATAL_FAILURE(compileModelAndComparePlan());
2504 ASSERT_NO_FATAL_FAILURE(executeCompilationAndCompareOutput(true, true));
2505 }
2506
TEST_F(DynamicTemporariesTest,DynamicTemporariesUnspecifiedOutputs)2507 TEST_F(DynamicTemporariesTest, DynamicTemporariesUnspecifiedOutputs) {
2508 // The purpose of this test is to confirm that the partitioner can produce
2509 // dynamic temporaries and that the runtime can handle them properly. Note
2510 // that all model outputs are of unspecified dimensions but sufficient size.
2511
2512 ASSERT_NO_FATAL_FAILURE(makeModelAndValidate());
2513 ASSERT_NO_FATAL_FAILURE(compileModelAndComparePlan());
2514 ASSERT_NO_FATAL_FAILURE(executeCompilationAndCompareOutput(true, true));
2515 }
2516
TEST_F(DynamicTemporariesTest,DynamicTemporariesSpecifiedOutputs)2517 TEST_F(DynamicTemporariesTest, DynamicTemporariesSpecifiedOutputs) {
2518 // The purpose of this test is to confirm that the partitioner can produce
2519 // dynamic temporaries and that the runtime can handle them properly. Note
2520 // that all model outputs are of specified dimensions.
2521
2522 ASSERT_NO_FATAL_FAILURE(declareOutputDimensions(/*opnd2ModelAndPartitionOutputSpecified=*/true,
2523 /*opnd3PartitionOutputSpecified=*/false,
2524 /*opnd4ModelOutputSpecified=*/true));
2525 ASSERT_NO_FATAL_FAILURE(makeModelAndValidate());
2526 ASSERT_NO_FATAL_FAILURE(compileModelAndComparePlan());
2527 ASSERT_NO_FATAL_FAILURE(executeCompilationAndCompareOutput(true, true));
2528 }
2529
TEST_F(DynamicTemporariesTest,DynamicTemporariesSpecifiedOutputs_V1_2)2530 TEST_F(DynamicTemporariesTest, DynamicTemporariesSpecifiedOutputs_V1_2) {
2531 // The purpose of this test is to confirm that the partitioner can produce
2532 // dynamic temporaries and that the runtime can handle them properly. Note
2533 // that all model outputs are of specified dimensions.
2534 // Regression test for http://b/174851714.
2535
2536 ASSERT_NO_FATAL_FAILURE(declareOutputDimensions(/*opnd2ModelAndPartitionOutputSpecified=*/true,
2537 /*opnd3PartitionOutputSpecified=*/false,
2538 /*opnd4ModelOutputSpecified=*/true));
2539 ASSERT_NO_FATAL_FAILURE(declareHalVersions(/*padDeviceVersion=*/HalVersion::V1_2,
2540 /*addDeviceVersion=*/HalVersion::V1_2));
2541 ASSERT_NO_FATAL_FAILURE(makeModelAndValidate());
2542 ASSERT_NO_FATAL_FAILURE(compileModelAndComparePlan());
2543 ASSERT_NO_FATAL_FAILURE(executeCompilationAndCompareOutput(true, true));
2544 }
2545
TEST_F(DynamicTemporariesTest,DynamicTemporariesSpecifiedOutputs_V1_1)2546 TEST_F(DynamicTemporariesTest, DynamicTemporariesSpecifiedOutputs_V1_1) {
2547 // The purpose of this test is to confirm that the partitioner cannot produce
2548 // dynamic temporaries for V1_1 but instead does whole-model CPU fallback. Note
2549 // that all model outputs are of specified dimensions.
2550 // Regression test for http://b/174851714.
2551
2552 ASSERT_NO_FATAL_FAILURE(declareOutputDimensions(/*opnd2ModelAndPartitionOutputSpecified=*/true,
2553 /*opnd3PartitionOutputSpecified=*/false,
2554 /*opnd4ModelOutputSpecified=*/true));
2555 ASSERT_NO_FATAL_FAILURE(declareHalVersions(/*padDeviceVersion=*/HalVersion::V1_1,
2556 /*addDeviceVersion=*/HalVersion::V1_1));
2557 ASSERT_NO_FATAL_FAILURE(makeModelAndValidate());
2558 ASSERT_NO_FATAL_FAILURE(compileModelAndComparePlan(false));
2559 ASSERT_NO_FATAL_FAILURE(executeCompilationAndCompareOutput(true, true));
2560 }
2561
TEST_F(DynamicTemporariesTest,ModelOutputsInsufficientSizeWithDynamicTemporary)2562 TEST_F(DynamicTemporariesTest, ModelOutputsInsufficientSizeWithDynamicTemporary) {
2563 // The purpose of this test is to confirm that the runtime can detect a
2564 // model output of insufficient size in the presence of a dynamic temporary.
2565
2566 ASSERT_NO_FATAL_FAILURE(makeModelAndValidate());
2567 ASSERT_NO_FATAL_FAILURE(compileModelAndComparePlan());
2568 ASSERT_NO_FATAL_FAILURE(executeCompilationAndCompareOutput(false, false));
2569 }
2570
TEST_F(DynamicTemporariesTest,ModelOutputsInsufficientSizeWithoutDynamicTemporary)2571 TEST_F(DynamicTemporariesTest, ModelOutputsInsufficientSizeWithoutDynamicTemporary) {
2572 // The purpose of this test is to confirm that the runtime can detect a
2573 // model output of insufficient size in the absence of a dynamic temporary.
2574
2575 ASSERT_NO_FATAL_FAILURE(declareOutputDimensions(/*opnd2ModelAndPartitionOutputSpecified=*/false,
2576 /*opnd3PartitionOutputSpecified=*/true,
2577 /*opnd4ModelOutputSpecified=*/false));
2578 ASSERT_NO_FATAL_FAILURE(makeModelAndValidate());
2579 ASSERT_NO_FATAL_FAILURE(compileModelAndComparePlan());
2580 ASSERT_NO_FATAL_FAILURE(executeCompilationAndCompareOutput(false, false));
2581 }
2582
TEST_F(DynamicTemporariesTest,ModelOutput2InsufficientSizeWithoutDynamicTemporary)2583 TEST_F(DynamicTemporariesTest, ModelOutput2InsufficientSizeWithoutDynamicTemporary) {
2584 // The purpose of this test is to confirm that the runtime can detect a
2585 // model output of insufficient size in the absence of a dynamic temporary.
2586
2587 ASSERT_NO_FATAL_FAILURE(declareOutputDimensions(/*opnd2ModelAndPartitionOutputSpecified=*/false,
2588 /*opnd3PartitionOutputSpecified=*/true,
2589 /*opnd4ModelOutputSpecified=*/false));
2590 ASSERT_NO_FATAL_FAILURE(makeModelAndValidate());
2591 ASSERT_NO_FATAL_FAILURE(compileModelAndComparePlan());
2592 ASSERT_NO_FATAL_FAILURE(executeCompilationAndCompareOutput(false, true));
2593 }
2594
TEST_F(DynamicTemporariesTest,ModelOutput4InsufficientSizeWithoutDynamicTemporary)2595 TEST_F(DynamicTemporariesTest, ModelOutput4InsufficientSizeWithoutDynamicTemporary) {
2596 // The purpose of this test is to confirm that the runtime can detect a
2597 // model output of insufficient size in the absence of a dynamic temporary.
2598
2599 ASSERT_NO_FATAL_FAILURE(declareOutputDimensions(/*opnd2ModelAndPartitionOutputSpecified=*/false,
2600 /*opnd3PartitionOutputSpecified=*/true,
2601 /*opnd4ModelOutputSpecified=*/false));
2602 ASSERT_NO_FATAL_FAILURE(makeModelAndValidate());
2603 ASSERT_NO_FATAL_FAILURE(compileModelAndComparePlan());
2604 ASSERT_NO_FATAL_FAILURE(executeCompilationAndCompareOutput(true, false));
2605 }
2606
2607 // Test token rehashing during the compilation step.
2608 class CacheTest : public PartitioningTest {
2609 protected:
SetUp()2610 virtual void SetUp() override {
2611 PartitioningTest::SetUp();
2612 char cacheDirTemp[] = "/data/local/tmp/TestCompilationCachingXXXXXX";
2613 char* cacheDir = mkdtemp(cacheDirTemp);
2614 ASSERT_NE(cacheDir, nullptr);
2615 mCacheDir = cacheDir;
2616 }
2617
TearDown()2618 virtual void TearDown() override {
2619 if (!::testing::Test::HasFailure()) {
2620 std::filesystem::remove_all(mCacheDir);
2621 }
2622 PartitioningTest::TearDown();
2623 }
2624
expectUniqueTokens(const std::vector<std::vector<uint8_t>> & tokens)2625 void expectUniqueTokens(const std::vector<std::vector<uint8_t>>& tokens) {
2626 for (uint32_t i = 0; i < tokens.size(); i++) {
2627 SCOPED_TRACE(i);
2628 for (uint32_t j = i + 1; j < tokens.size(); j++) {
2629 SCOPED_TRACE(j);
2630 EXPECT_NE(tokens[i], tokens[j]);
2631 }
2632 }
2633 }
2634
2635 // Launch a single run of the partitioner against the provided model and device list with
2636 // cache token privided as tokenIn. Find the partition for the device with deviceName.
2637 // Record the transformed token into tokenOut. Two or more partitions may be on the same device.
2638 // "devicePartitionIndex" specifies the index of the ExecutionStep corresponding to the
2639 // partition of interest, within the sequence of ExecutionSteps on the target device.
2640 // If tokenIn is empty, no caching information will be provided to the partitioner.
getTransformedCacheTokenSingle(const PartitioningModel & model,const std::vector<std::shared_ptr<Device>> & devices,const char * deviceName,const std::vector<uint8_t> & tokenIn,ExecutePreference preference,ExecutePriority priority,uint32_t devicePartitionIndex,std::vector<uint8_t> * tokenOut)2641 void getTransformedCacheTokenSingle(const PartitioningModel& model,
2642 const std::vector<std::shared_ptr<Device>>& devices,
2643 const char* deviceName, const std::vector<uint8_t>& tokenIn,
2644 ExecutePreference preference, ExecutePriority priority,
2645 uint32_t devicePartitionIndex,
2646 std::vector<uint8_t>* tokenOut) {
2647 // Compile the model and get the execution plan.
2648 PartitioningCompilation compilation(&model, devices);
2649 if (!tokenIn.empty()) {
2650 compilation.setCaching(mCacheDir.c_str(), tokenIn);
2651 }
2652 compilation.setPreference(preference);
2653 compilation.setPriority(priority);
2654 ASSERT_EQ(compilation.finish(), Result::NO_ERROR);
2655 const ExecutionPlan& plan = compilation.getExecutionPlan();
2656
2657 // Find the cache info for the device.
2658 const uint8_t* token = nullptr;
2659 if (plan.forTest_getKind() == ExecutionPlan::Kind::SIMPLE) {
2660 ASSERT_EQ(devicePartitionIndex, 0u);
2661 ASSERT_EQ(plan.forTest_simpleGetDevice()->getName(), deviceName);
2662 token = plan.forTest_simpleGetCacheToken();
2663 } else if (plan.forTest_getKind() == ExecutionPlan::Kind::COMPOUND) {
2664 const auto& steps = plan.forTest_compoundGetSteps();
2665 uint32_t executionStepCount = 0;
2666 for (const auto& step : steps) {
2667 if (step->isExecution() &&
2668 step->executionStep()->getDevice()->getName() == deviceName) {
2669 if (devicePartitionIndex == executionStepCount) {
2670 token = step->executionStep()->forTest_getCacheToken();
2671 break;
2672 }
2673 executionStepCount++;
2674 }
2675 }
2676 } else {
2677 FAIL();
2678 }
2679
2680 // Retrieve the transformed token from the cache info.
2681 if (token == nullptr) {
2682 tokenOut->clear();
2683 } else {
2684 tokenOut->resize(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN);
2685 std::copy(token, token + ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, tokenOut->begin());
2686 }
2687 }
2688
2689 // A wrapper of getTransformedCacheTokenSingle, which runs getTransformedCacheTokenSingle
2690 // multiple times and checks if the transformation provides consistent result.
2691 // Two or more partitions may be on the same device. "devicePartitionIndex" specifies the index
2692 // of the ExecutionStep corresponding to the partition of interest, within the sequence of
2693 // ExecutionSteps on the target device.
getTransformedCacheToken(const PartitioningModel & model,const std::vector<std::shared_ptr<Device>> & devices,const char * deviceName,const std::vector<uint8_t> & tokenIn,ExecutePreference preference,ExecutePriority priority,std::vector<uint8_t> * tokenOut,uint32_t devicePartitionIndex=0)2694 void getTransformedCacheToken(const PartitioningModel& model,
2695 const std::vector<std::shared_ptr<Device>>& devices,
2696 const char* deviceName, const std::vector<uint8_t>& tokenIn,
2697 ExecutePreference preference, ExecutePriority priority,
2698 std::vector<uint8_t>* tokenOut,
2699 uint32_t devicePartitionIndex = 0) {
2700 getTransformedCacheTokenSingle(model, devices, deviceName, tokenIn, preference, priority,
2701 devicePartitionIndex, tokenOut);
2702
2703 // Test if the runtime maps to the same cache token every time for the same compilation
2704 // setup.
2705 for (uint32_t i = 0; i < 10; i++) {
2706 std::vector<uint8_t> token;
2707 SCOPED_TRACE(i);
2708 getTransformedCacheTokenSingle(model, devices, deviceName, tokenIn, preference,
2709 priority, devicePartitionIndex, &token);
2710 EXPECT_EQ(*tokenOut, token);
2711 }
2712 }
2713
createModelForCachingTests(PartitioningModel * model)2714 void createModelForCachingTests(PartitioningModel* model) {
2715 uint32_t opnd0 = model->addFloatOperand();
2716 uint32_t opnd1 = model->addFloatOperand();
2717 uint32_t opnd2 = model->addOperation2To1V1_0(0, opnd0, opnd1);
2718 uint32_t opnd3 = model->addFloatOperand();
2719 uint32_t opnd4 = model->addOperation2To1V1_0(1, opnd2, opnd3);
2720 model->identifyInputsAndOutputs({opnd0, opnd1, opnd3}, {opnd4});
2721 model->finish();
2722 ASSERT_TRUE(model->isValid());
2723 }
2724
2725 // The first model returned in "models" is the main model.
createControlFlowModelForCachingTests(std::vector<std::unique_ptr<PartitioningModel>> * models)2726 void createControlFlowModelForCachingTests(
2727 std::vector<std::unique_ptr<PartitioningModel>>* models) {
2728 CHECK(models != nullptr);
2729
2730 auto trueModel = std::make_unique<PartitioningModel>();
2731 {
2732 const uint32_t opnd0 = trueModel->addFloatOperand();
2733 const uint32_t opnd1 = trueModel->addFloatOperand();
2734 const uint32_t opnd2 = trueModel->addOperation2To1V1_0(0, opnd0, opnd1);
2735 trueModel->identifyInputsAndOutputs({opnd0, opnd1}, {opnd2});
2736 trueModel->finish();
2737 ASSERT_TRUE(trueModel->isValid());
2738 }
2739
2740 auto falseModel = std::make_unique<PartitioningModel>();
2741 {
2742 const uint32_t opnd0 = falseModel->addFloatOperand();
2743 const uint32_t opnd1 = falseModel->addFloatOperand();
2744 const uint32_t opnd2 = falseModel->addOperation2To1V1_0(0, opnd0, opnd1);
2745 falseModel->identifyInputsAndOutputs({opnd0, opnd1}, {opnd2});
2746 falseModel->finish();
2747 ASSERT_TRUE(falseModel->isValid());
2748 }
2749
2750 auto mainModel = std::make_unique<PartitioningModel>();
2751 {
2752 const uint32_t opnd0 = mainModel->addBooleanOperand();
2753 const uint32_t opnd1 = mainModel->addFloatOperand();
2754 const uint32_t opnd2 = mainModel->addFloatOperand();
2755 const uint32_t opnd3 = mainModel->addFloatOperand();
2756 mainModel->addIfOperation(opnd0, *trueModel, *falseModel, {opnd1, opnd2}, {opnd3});
2757 mainModel->identifyInputsAndOutputs({opnd0, opnd1, opnd2}, {opnd3});
2758 mainModel->finish();
2759 ASSERT_TRUE(mainModel->isValid());
2760 }
2761
2762 models->clear();
2763 models->push_back(std::move(mainModel));
2764 models->push_back(std::move(trueModel));
2765 models->push_back(std::move(falseModel));
2766 }
2767
2768 std::string mCacheDir;
2769 };
2770
2771 // Test the case when no token is provided by the application and the execution plan has a
2772 // simple body.
TEST_F(CacheTest,CacheTokenNoneSimpleBody)2773 TEST_F(CacheTest, CacheTokenNoneSimpleBody) {
2774 PartitioningModel model;
2775 createModelForCachingTests(&model);
2776
2777 // deviceA can execute the whole model.
2778 const auto deviceA = makeDevices({
2779 {"deviceA", 0.5, ~0U},
2780 });
2781
2782 std::vector<uint8_t> tokenIn, tokenOut;
2783 getTransformedCacheToken(model, deviceA, "deviceA", tokenIn,
2784 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2785 &tokenOut);
2786 EXPECT_TRUE(tokenOut.empty());
2787 }
2788
2789 // Test if the runtime maps to different cache tokens for devices with different names in
2790 // execution plan with a simple body.
TEST_F(CacheTest,CacheTokenDifferentDeviceNamesSimpleBody)2791 TEST_F(CacheTest, CacheTokenDifferentDeviceNamesSimpleBody) {
2792 PartitioningModel model;
2793 createModelForCachingTests(&model);
2794
2795 // Two devices that can both execute the whole model.
2796 const auto deviceA = makeDevices({{"deviceA", 0.5, ~0U}});
2797 const auto deviceB = makeDevices({{"deviceB", 0.5, ~0U}});
2798
2799 std::vector<uint8_t> tokenIn(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0);
2800 std::vector<uint8_t> deviceAToken, deviceBToken;
2801 getTransformedCacheToken(model, deviceA, "deviceA", tokenIn,
2802 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2803 &deviceAToken);
2804 getTransformedCacheToken(model, deviceB, "deviceB", tokenIn,
2805 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2806 &deviceBToken);
2807 expectUniqueTokens({deviceAToken, deviceBToken});
2808 }
2809
2810 // Test if the runtime maps to different cache tokens for devices with different version strings in
2811 // execution plan with a simple body.
TEST_F(CacheTest,CacheTokenDifferentDeviceVersionStringsSimpleBody)2812 TEST_F(CacheTest, CacheTokenDifferentDeviceVersionStringsSimpleBody) {
2813 PartitioningModel model;
2814 createModelForCachingTests(&model);
2815
2816 // Two devices that can both execute the whole model.
2817 const auto deviceA_1_0 = makeDevices({{"deviceA", "1.0", 0.5, ~0U}});
2818 const auto deviceA_1_1 = makeDevices({{"deviceA", "1.1", 0.5, ~0U}});
2819
2820 std::vector<uint8_t> tokenIn(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0);
2821 std::vector<uint8_t> deviceA_1_0_Token, deviceA_1_1_Token;
2822 getTransformedCacheToken(model, deviceA_1_0, "deviceA", tokenIn,
2823 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2824 &deviceA_1_0_Token);
2825 getTransformedCacheToken(model, deviceA_1_1, "deviceA", tokenIn,
2826 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2827 &deviceA_1_1_Token);
2828 expectUniqueTokens({deviceA_1_0_Token, deviceA_1_1_Token});
2829 }
2830
2831 // Test if the runtime maps to different cache tokens for compilations with different preferences
2832 // in execution plan with a simple body.
TEST_F(CacheTest,CacheTokenDifferentPreferencesSimpleBody)2833 TEST_F(CacheTest, CacheTokenDifferentPreferencesSimpleBody) {
2834 PartitioningModel model;
2835 createModelForCachingTests(&model);
2836
2837 // One device that can execute the whole model.
2838 const auto deviceA = makeDevices({{"deviceA", 0.5, ~0U}});
2839
2840 std::vector<uint8_t> fastToken, powerToken, sustainedToken;
2841 std::vector<uint8_t> tokenIn(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0);
2842 getTransformedCacheToken(model, deviceA, "deviceA", tokenIn,
2843 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2844 &fastToken);
2845 getTransformedCacheToken(model, deviceA, "deviceA", tokenIn,
2846 ExecutePreference::PREFER_LOW_POWER, ExecutePriority::DEFAULT,
2847 &powerToken);
2848 getTransformedCacheToken(model, deviceA, "deviceA", tokenIn,
2849 ExecutePreference::PREFER_SUSTAINED_SPEED, ExecutePriority::DEFAULT,
2850 &sustainedToken);
2851 expectUniqueTokens({fastToken, powerToken, sustainedToken});
2852 }
2853
2854 // Test if the runtime maps to different cache tokens for compilations with different priorities
2855 // in execution plan with a simple body.
TEST_F(CacheTest,CacheTokenDifferentPrioritiesSimpleBody)2856 TEST_F(CacheTest, CacheTokenDifferentPrioritiesSimpleBody) {
2857 PartitioningModel model;
2858 createModelForCachingTests(&model);
2859
2860 // One device that can execute the whole model.
2861 const auto deviceA = makeDevices({{"deviceA", 0.5, ~0U}});
2862
2863 std::vector<uint8_t> lowToken, mediumToken, highToken;
2864 std::vector<uint8_t> tokenIn(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0);
2865 getTransformedCacheToken(model, deviceA, "deviceA", tokenIn,
2866 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::LOW,
2867 &lowToken);
2868 getTransformedCacheToken(model, deviceA, "deviceA", tokenIn,
2869 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::MEDIUM,
2870 &mediumToken);
2871 getTransformedCacheToken(model, deviceA, "deviceA", tokenIn,
2872 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::HIGH,
2873 &highToken);
2874 expectUniqueTokens({lowToken, mediumToken, highToken});
2875 }
2876
2877 // Test if the runtime maps to different cache tokens for compilations with different tokens
2878 // provided by application in execution plan with a simple body.
TEST_F(CacheTest,CacheTokenDifferentTokensSimpleBody)2879 TEST_F(CacheTest, CacheTokenDifferentTokensSimpleBody) {
2880 PartitioningModel model;
2881 createModelForCachingTests(&model);
2882
2883 // One device that can execute the whole model.
2884 const auto deviceA = makeDevices({{"deviceA", 0.5, ~0U}});
2885
2886 std::vector<uint8_t> tokenOut1, tokenOut2;
2887 std::vector<uint8_t> tokenIn1(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0);
2888 std::vector<uint8_t> tokenIn2(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 1);
2889 getTransformedCacheToken(model, deviceA, "deviceA", tokenIn1,
2890 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2891 &tokenOut1);
2892 getTransformedCacheToken(model, deviceA, "deviceA", tokenIn2,
2893 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2894 &tokenOut2);
2895 expectUniqueTokens({tokenOut1, tokenOut2});
2896 }
2897
2898 // Test the case when no token is provided by the application and the execution plan has a
2899 // compound body.
TEST_F(CacheTest,CacheTokenNoneCompoundBody)2900 TEST_F(CacheTest, CacheTokenNoneCompoundBody) {
2901 PartitioningModel model;
2902 createModelForCachingTests(&model);
2903
2904 // DeviceA executes the first operation only.
2905 const auto devices = makeDevices({{"deviceA", 0.8, ~0U}, {"deviceB", 0.5, 1 << 1}});
2906
2907 std::vector<uint8_t> tokenIn, tokenOut;
2908 getTransformedCacheToken(model, devices, "deviceA", tokenIn,
2909 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2910 &tokenOut);
2911 EXPECT_TRUE(tokenOut.empty());
2912 getTransformedCacheToken(model, devices, "deviceB", tokenIn,
2913 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2914 &tokenOut);
2915 EXPECT_TRUE(tokenOut.empty());
2916 }
2917
2918 // Test if the runtime maps to different cache tokens for devices with different names in
2919 // execution plan with a compound body.
TEST_F(CacheTest,CacheTokenDifferentDeviceNamesCompoundBody)2920 TEST_F(CacheTest, CacheTokenDifferentDeviceNamesCompoundBody) {
2921 PartitioningModel model;
2922 createModelForCachingTests(&model);
2923
2924 // DeviceA executes the first operation only.
2925 const auto devices1 = makeDevices({{"deviceA", 0.8, ~0U}, {"deviceC", 0.5, 1 << 1}});
2926 // DeviceB executes the first operation only.
2927 const auto devices2 = makeDevices({{"deviceB", 0.8, ~0U}, {"deviceC", 0.5, 1 << 1}});
2928
2929 std::vector<uint8_t> tokenIn(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0);
2930 std::vector<uint8_t> deviceAToken, deviceBToken;
2931 getTransformedCacheToken(model, devices1, "deviceA", tokenIn,
2932 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2933 &deviceAToken);
2934 getTransformedCacheToken(model, devices2, "deviceB", tokenIn,
2935 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2936 &deviceBToken);
2937 expectUniqueTokens({deviceAToken, deviceBToken});
2938 }
2939
2940 // Test if the runtime maps to different cache tokens for devices with different names in
2941 // execution plan with a compound body.
TEST_F(CacheTest,CacheTokenDifferentDeviceVersionStringsCompoundBody)2942 TEST_F(CacheTest, CacheTokenDifferentDeviceVersionStringsCompoundBody) {
2943 PartitioningModel model;
2944 createModelForCachingTests(&model);
2945
2946 // DeviceA executes the first operation only.
2947 const auto devices1 = makeDevices({{"deviceA", "1.0", 0.8, ~0U}, {"deviceB", 0.5, 1 << 1}});
2948 // DeviceB executes the first operation only.
2949 const auto devices2 = makeDevices({{"deviceA", "1.1", 0.8, ~0U}, {"deviceB", 0.5, 1 << 1}});
2950
2951 std::vector<uint8_t> tokenIn(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0);
2952 std::vector<uint8_t> deviceA_1_0_Token, deviceA_1_1_Token;
2953 getTransformedCacheToken(model, devices1, "deviceA", tokenIn,
2954 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2955 &deviceA_1_0_Token);
2956 getTransformedCacheToken(model, devices2, "deviceA", tokenIn,
2957 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2958 &deviceA_1_1_Token);
2959 expectUniqueTokens({deviceA_1_0_Token, deviceA_1_1_Token});
2960 }
2961
2962 // Test if the runtime maps to different cache tokens for compilations with different preferences
2963 // in execution plan with a compound body.
TEST_F(CacheTest,CacheTokenDifferentPreferencesCompoundBody)2964 TEST_F(CacheTest, CacheTokenDifferentPreferencesCompoundBody) {
2965 PartitioningModel model;
2966 createModelForCachingTests(&model);
2967
2968 // DeviceA executes the first operation only.
2969 const auto devices = makeDevices({{"deviceA", 0.8, ~0U}, {"deviceB", 0.5, 1 << 1}});
2970
2971 std::vector<uint8_t> fastToken, powerToken, sustainedToken;
2972 std::vector<uint8_t> tokenIn(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0);
2973 getTransformedCacheToken(model, devices, "deviceA", tokenIn,
2974 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2975 &fastToken);
2976 getTransformedCacheToken(model, devices, "deviceA", tokenIn,
2977 ExecutePreference::PREFER_LOW_POWER, ExecutePriority::DEFAULT,
2978 &powerToken);
2979 getTransformedCacheToken(model, devices, "deviceA", tokenIn,
2980 ExecutePreference::PREFER_SUSTAINED_SPEED, ExecutePriority::DEFAULT,
2981 &sustainedToken);
2982 expectUniqueTokens({fastToken, powerToken, sustainedToken});
2983 }
2984
2985 // Test if the runtime maps to different cache tokens for compilations with different priorities
2986 // in execution plan with a compound body.
TEST_F(CacheTest,CacheTokenDifferentPrioritiesCompoundBody)2987 TEST_F(CacheTest, CacheTokenDifferentPrioritiesCompoundBody) {
2988 PartitioningModel model;
2989 createModelForCachingTests(&model);
2990
2991 // DeviceA executes the first operation only.
2992 const auto devices = makeDevices({{"deviceA", 0.8, ~0U}, {"deviceB", 0.5, 1 << 1}});
2993
2994 std::vector<uint8_t> lowToken, mediumToken, highToken;
2995 std::vector<uint8_t> tokenIn(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0);
2996 getTransformedCacheToken(model, devices, "deviceA", tokenIn,
2997 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::LOW,
2998 &lowToken);
2999 getTransformedCacheToken(model, devices, "deviceA", tokenIn,
3000 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::MEDIUM,
3001 &mediumToken);
3002 getTransformedCacheToken(model, devices, "deviceA", tokenIn,
3003 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::HIGH,
3004 &highToken);
3005 expectUniqueTokens({lowToken, mediumToken, highToken});
3006 }
3007
3008 // Test if the runtime maps to different cache tokens for compilations with different tokens
3009 // provided by application in execution plan with a compound body.
TEST_F(CacheTest,CacheTokenDifferentTokensCompoundBody)3010 TEST_F(CacheTest, CacheTokenDifferentTokensCompoundBody) {
3011 PartitioningModel model;
3012 createModelForCachingTests(&model);
3013
3014 // DeviceA executes the first operation only.
3015 const auto devices = makeDevices({{"deviceA", 0.8, ~0U}, {"deviceB", 0.5, 1 << 1}});
3016
3017 std::vector<uint8_t> tokenOut1, tokenOut2;
3018 std::vector<uint8_t> tokenIn1(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0);
3019 std::vector<uint8_t> tokenIn2(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 1);
3020 getTransformedCacheToken(model, devices, "deviceA", tokenIn1,
3021 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
3022 &tokenOut1);
3023 getTransformedCacheToken(model, devices, "deviceA", tokenIn2,
3024 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
3025 &tokenOut2);
3026 expectUniqueTokens({tokenOut1, tokenOut2});
3027 }
3028
3029 // Test if the runtime maps to different cache tokens for compilations with different partitioning
3030 // outcome in execution plan with a compound body.
TEST_F(CacheTest,CacheTokenDifferentPartitionsCompoundBody)3031 TEST_F(CacheTest, CacheTokenDifferentPartitionsCompoundBody) {
3032 PartitioningModel model;
3033 createModelForCachingTests(&model);
3034
3035 // DeviceA executes the whole model.
3036 const auto devices1 = makeDevices({{"deviceA", 0.8, ~0U}, {"deviceB", 0.5, 0U}});
3037 // DeviceA executes the first operation only.
3038 const auto devices2 = makeDevices({{"deviceA", 0.8, ~0U}, {"deviceB", 0.5, 1 << 1}});
3039 // DeviceA executes the second operation only.
3040 const auto devices3 = makeDevices({{"deviceA", 0.8, ~0U}, {"deviceB", 0.5, 1 << 0}});
3041
3042 std::vector<uint8_t> tokenIn(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0);
3043 std::vector<uint8_t> tokenOut1, tokenOut2, tokenOut3;
3044 getTransformedCacheToken(model, devices1, "deviceA", tokenIn,
3045 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
3046 &tokenOut1);
3047 getTransformedCacheToken(model, devices2, "deviceA", tokenIn,
3048 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
3049 &tokenOut2);
3050 getTransformedCacheToken(model, devices3, "deviceA", tokenIn,
3051 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
3052 &tokenOut3);
3053 expectUniqueTokens({tokenOut1, tokenOut2, tokenOut3});
3054 }
3055
3056 // Test if the runtime maps different referenced models to different cache tokens.
TEST_F(CacheTest,CacheTokenDifferentReferenceModelPartitions)3057 TEST_F(CacheTest, CacheTokenDifferentReferenceModelPartitions) {
3058 std::vector<std::unique_ptr<PartitioningModel>> models;
3059 createControlFlowModelForCachingTests(&models);
3060 const auto& main = *models[0];
3061
3062 // DeviceA executes the two referenced models but does not support IF.
3063 // There will be two partitions on deviceA.
3064 const auto devices = makeDevices({{"deviceA", 0.8, ~0U}});
3065
3066 std::vector<uint8_t> tokenIn(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0);
3067 std::vector<uint8_t> tokenOut1, tokenOut2;
3068 getTransformedCacheToken(main, devices, "deviceA", tokenIn,
3069 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
3070 &tokenOut1, /*devicePartitionIndex=*/0);
3071 getTransformedCacheToken(main, devices, "deviceA", tokenIn,
3072 ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
3073 &tokenOut2, /*devicePartitionIndex=*/1);
3074 expectUniqueTokens({tokenOut1, tokenOut2});
3075 }
3076
3077 // Very basic tests of some of the PerformanceInfo functionality.
3078 // Placed in this file because partitioning is the consumer of this functionality.
3079 class PerfTest : public ::testing::Test {};
3080
TEST_F(PerfTest,Lookup)3081 TEST_F(PerfTest, Lookup) {
3082 // Derive an arbitrary (but reproducible) performance value from an OperandType.
3083 // We'll use this to ensure that we can save and then recover a type's performance.
3084 auto typePerf = [](V1_3::OperandType type) { return float(static_cast<uint32_t>(type)); };
3085
3086 V1_3::Capabilities capabilities = ::android::nn::makeCapabilities(-1.0f);
3087
3088 for (uint32_t type = static_cast<uint32_t>(V1_3::OperandTypeRange::FUNDAMENTAL_MIN);
3089 type <= static_cast<uint32_t>(V1_3::OperandTypeRange::FUNDAMENTAL_MAX); ++type) {
3090 V1_3::OperandType operandType = static_cast<V1_3::OperandType>(type);
3091 update(&capabilities, operandType, typePerf(operandType));
3092 }
3093 for (uint32_t type = static_cast<uint32_t>(V1_3::OperandTypeRange::OEM_MIN);
3094 type <= static_cast<uint32_t>(V1_3::OperandTypeRange::OEM_MAX); ++type) {
3095 V1_3::OperandType operandType = static_cast<V1_3::OperandType>(type);
3096 update(&capabilities, operandType, typePerf(operandType));
3097 }
3098
3099 // Make sure lookup retrieves the values stored by update
3100
3101 for (uint32_t type = static_cast<uint32_t>(V1_3::OperandTypeRange::FUNDAMENTAL_MIN);
3102 type <= static_cast<uint32_t>(V1_3::OperandTypeRange::FUNDAMENTAL_MAX); ++type) {
3103 V1_3::OperandType operandType = static_cast<V1_3::OperandType>(type);
3104 if (operandType == V1_3::OperandType::SUBGRAPH) {
3105 // SUBGRAPH capabilities are handled differently.
3106 continue;
3107 }
3108 SCOPED_TRACE(toString(operandType));
3109 EXPECT_EQ(lookupExecTime(capabilities, operandType), typePerf(operandType));
3110 }
3111 for (uint32_t type = static_cast<uint32_t>(V1_3::OperandTypeRange::OEM_MIN);
3112 type <= static_cast<uint32_t>(V1_3::OperandTypeRange::OEM_MAX); ++type) {
3113 V1_3::OperandType operandType = static_cast<V1_3::OperandType>(type);
3114 SCOPED_TRACE(toString(operandType));
3115 EXPECT_EQ(lookupExecTime(capabilities, operandType), typePerf(operandType));
3116 }
3117
3118 // Check the behavior of a missing type
3119
3120 V1_3::OperandType operandType = static_cast<V1_3::OperandType>(
3121 static_cast<uint32_t>(V1_3::OperandTypeRange::BASE_MAX) + 1);
3122 EXPECT_EQ(lookupExecTime(capabilities, operandType), FLT_MAX);
3123 }
3124
3125 class ControlFlowPartitioningTest : public PartitioningTest {
3126 protected:
3127 // opnd0 --> +-----+
3128 // | op0 | --> opnd2
3129 // opnd1 --> +-----+
createBranchOrBodyModel(Dimensioned dimensioned)3130 std::unique_ptr<PartitioningModel> createBranchOrBodyModel(Dimensioned dimensioned) {
3131 auto model = std::make_unique<PartitioningModel>();
3132 const uint32_t opnd0 = model->addFloatOperand(dimensioned);
3133 const uint32_t opnd1 = model->addFloatOperand(dimensioned);
3134 const uint32_t opnd2 = model->addOperation2To1V1_0(0, opnd0, opnd1, dimensioned);
3135 model->identifyInputsAndOutputs({opnd0, opnd1}, {opnd2});
3136 model->finish();
3137 EXPECT_TRUE(model->isValid());
3138 return model;
3139 }
3140
3141 // opnd0 --> +-------+
3142 // | EQUAL | --> opnd2
3143 // opnd1 --> +-------+
createCondModel(Dimensioned dimensioned)3144 std::unique_ptr<PartitioningModel> createCondModel(Dimensioned dimensioned) {
3145 auto model = std::make_unique<PartitioningModel>();
3146 const uint32_t opnd0 = model->addFloatOperand(dimensioned);
3147 const uint32_t opnd1 = model->addFloatOperand(dimensioned);
3148 const uint32_t opnd2 = model->addExplicitOperationXTo1(
3149 ANEURALNETWORKS_EQUAL, {opnd0, opnd1}, WrapperType::TENSOR_BOOL8);
3150 model->identifyInputsAndOutputs({opnd0, opnd1}, {opnd2});
3151 model->finish();
3152 EXPECT_TRUE(model->isValid());
3153 return model;
3154 }
3155
3156 // opnd0 --> +----+
3157 // opnd1 --> | IF | --> opnd3
3158 // opnd2 --> +----+
createIfModel(Dimensioned dimensionedMain=Dimensioned::YES,Dimensioned dimensionedThen=Dimensioned::YES,Dimensioned dimensionedElse=Dimensioned::YES)3159 std::vector<std::unique_ptr<PartitioningModel>> createIfModel(
3160 Dimensioned dimensionedMain = Dimensioned::YES,
3161 Dimensioned dimensionedThen = Dimensioned::YES,
3162 Dimensioned dimensionedElse = Dimensioned::YES) {
3163 auto thenModel = createBranchOrBodyModel(dimensionedThen);
3164 auto elseModel = createBranchOrBodyModel(dimensionedElse);
3165
3166 auto mainModel = std::make_unique<PartitioningModel>();
3167 const uint32_t opnd0 = mainModel->addBooleanOperand();
3168 const uint32_t opnd1 = mainModel->addFloatOperand(dimensionedMain);
3169 const uint32_t opnd2 = mainModel->addFloatOperand(dimensionedMain);
3170 const uint32_t opnd3 = mainModel->addFloatOperand(dimensionedMain);
3171 mainModel->addIfOperation(opnd0, *thenModel, *elseModel, {opnd1, opnd2}, {opnd3});
3172 mainModel->identifyInputsAndOutputs({opnd0, opnd1, opnd2}, {opnd3});
3173 mainModel->finish();
3174 EXPECT_TRUE(mainModel->isValid());
3175
3176 std::vector<std::unique_ptr<PartitioningModel>> models;
3177 models.push_back(std::move(mainModel));
3178 models.push_back(std::move(thenModel));
3179 models.push_back(std::move(elseModel));
3180 return std::move(models);
3181 }
3182
3183 // opnd0 --> +-------+
3184 // | WHILE | --> opnd2
3185 // opnd1 --> +-------+
createWhileModel(Dimensioned dimensionedMain=Dimensioned::YES,Dimensioned dimensionedCond=Dimensioned::YES,Dimensioned dimensionedBody=Dimensioned::YES)3186 std::vector<std::unique_ptr<PartitioningModel>> createWhileModel(
3187 Dimensioned dimensionedMain = Dimensioned::YES,
3188 Dimensioned dimensionedCond = Dimensioned::YES,
3189 Dimensioned dimensionedBody = Dimensioned::YES) {
3190 auto condModel = createCondModel(dimensionedCond);
3191 auto bodyModel = createBranchOrBodyModel(dimensionedBody);
3192
3193 auto mainModel = std::make_unique<PartitioningModel>();
3194 const uint32_t opnd0 = mainModel->addFloatOperand(dimensionedMain);
3195 const uint32_t opnd1 = mainModel->addFloatOperand(dimensionedMain);
3196 const uint32_t opnd2 = mainModel->addFloatOperand(dimensionedMain);
3197 mainModel->addWhileOperation(*condModel, *bodyModel, {opnd0, opnd1}, {opnd2});
3198 mainModel->identifyInputsAndOutputs({opnd0, opnd1}, {opnd2});
3199 mainModel->finish();
3200 EXPECT_TRUE(mainModel->isValid());
3201
3202 std::vector<std::unique_ptr<PartitioningModel>> models;
3203 models.push_back(std::move(mainModel));
3204 models.push_back(std::move(condModel));
3205 models.push_back(std::move(bodyModel));
3206 return std::move(models);
3207 }
3208
3209 void testIfUnknownSize(Dimensioned dimensionedMain, Dimensioned dimensionedThen,
3210 Dimensioned dimensionedElse);
3211 void testWhileUnknownSize(Dimensioned dimensionedMain, Dimensioned dimensionedThen,
3212 Dimensioned dimensionedElse);
3213 };
3214
TEST_F(ControlFlowPartitioningTest,IF_Interpreted)3215 TEST_F(ControlFlowPartitioningTest, IF_Interpreted) {
3216 const auto models = createIfModel();
3217
3218 // The device supports the referenced models but does not support IF.
3219 const auto devices = makeDevices({{"V1_0", 0.9, HalVersion::V1_0, ~0U}});
3220
3221 ExecutionPlan plan;
3222 ASSERT_EQ(models[0]->partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
3223 ExecutePriority::DEFAULT, {}, &plan),
3224 ANEURALNETWORKS_NO_ERROR);
3225 checkExecutionPlanSteps(plan, {kIfStep, "V1_0", kGotoStep, "V1_0"});
3226 }
3227
TEST_F(ControlFlowPartitioningTest,WHILE_Interpreted)3228 TEST_F(ControlFlowPartitioningTest, WHILE_Interpreted) {
3229 const auto models = createWhileModel();
3230
3231 // The device supports the body model but does not support WHILE or the
3232 // condition model (because of EQUAL).
3233 const auto devices = makeDevices({{"V1_0", 0.9, HalVersion::V1_0, ~0U}});
3234
3235 ExecutionPlan plan;
3236 ASSERT_EQ(models[0]->partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
3237 ExecutePriority::DEFAULT, {}, &plan),
3238 ANEURALNETWORKS_NO_ERROR);
3239 const auto& cpuDeviceName = DeviceManager::getCpuDevice()->getName();
3240 checkExecutionPlanSteps(plan, {kWhileStep, cpuDeviceName, kGotoStep, "V1_0", kGotoStep});
3241 }
3242
TEST_F(ControlFlowPartitioningTest,IF_SimplePlan)3243 TEST_F(ControlFlowPartitioningTest, IF_SimplePlan) {
3244 const auto models = createIfModel();
3245
3246 // The device supports all operations.
3247 const auto devices = makeDevices({{"ALL",
3248 0.9,
3249 ~0U,
3250 PartitioningDriver::OEMNo,
3251 HalVersion::LATEST,
3252 {V1_3::OperationType::IF}}});
3253
3254 ExecutionPlan plan;
3255 ASSERT_EQ(models[0]->partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
3256 ExecutePriority::DEFAULT, {}, &plan),
3257 ANEURALNETWORKS_NO_ERROR);
3258 checkExecutionPlanSteps(plan, {"ALL"});
3259 }
3260
TEST_F(ControlFlowPartitioningTest,WHILE_SimplePlan)3261 TEST_F(ControlFlowPartitioningTest, WHILE_SimplePlan) {
3262 const auto models = createWhileModel();
3263
3264 // The device supports all operations.
3265 const auto devices = makeDevices({{"ALL",
3266 0.9,
3267 ~0U,
3268 PartitioningDriver::OEMNo,
3269 HalVersion::LATEST,
3270 {V1_3::OperationType::WHILE, V1_3::OperationType::EQUAL}}});
3271
3272 ExecutionPlan plan;
3273 ASSERT_EQ(models[0]->partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
3274 ExecutePriority::DEFAULT, {}, &plan),
3275 ANEURALNETWORKS_NO_ERROR);
3276 checkExecutionPlanSteps(plan, {"ALL"});
3277 }
3278
testIfUnknownSize(Dimensioned dimensionedMain,Dimensioned dimensionedThen,Dimensioned dimensionedElse)3279 void ControlFlowPartitioningTest::testIfUnknownSize(Dimensioned dimensionedMain,
3280 Dimensioned dimensionedThen,
3281 Dimensioned dimensionedElse) {
3282 if (dimensionedMain != Dimensioned::NO && dimensionedThen != Dimensioned::NO &&
3283 dimensionedElse != Dimensioned::NO) {
3284 // No unknown size.
3285 return;
3286 }
3287
3288 const auto models = createIfModel(dimensionedMain, dimensionedThen, dimensionedElse);
3289
3290 // The device supports all operations but the partitioner ignores its IF
3291 // support due to http://b/159076604#comment5.
3292 const auto devices = makeDevices({{"ALL",
3293 0.9,
3294 ~0U,
3295 PartitioningDriver::OEMNo,
3296 HalVersion::LATEST,
3297 {V1_3::OperationType::IF}}});
3298
3299 ExecutionPlan plan;
3300 ASSERT_EQ(models[0]->partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
3301 ExecutePriority::DEFAULT, {}, &plan),
3302 ANEURALNETWORKS_NO_ERROR);
3303 // The control flow interpreter does not support unknown size (b/132458982).
3304 checkExecutionPlanSteps(plan, {DeviceManager::getCpuDevice()->getName()});
3305 }
3306
TEST_F(ControlFlowPartitioningTest,IF_UnknownSize)3307 TEST_F(ControlFlowPartitioningTest, IF_UnknownSize) {
3308 const std::vector<Dimensioned> configurations = {Dimensioned::NO, Dimensioned::YES};
3309 for (Dimensioned dimensionedMain : configurations) {
3310 SCOPED_TRACE(testing::Message() << "dimensionedMain: " << toString(dimensionedMain));
3311 for (Dimensioned dimensionedThen : configurations) {
3312 SCOPED_TRACE(testing::Message() << "dimensionedThen: " << toString(dimensionedThen));
3313 for (Dimensioned dimensionedElse : configurations) {
3314 SCOPED_TRACE(testing::Message()
3315 << "dimensionedElse: " << toString(dimensionedElse));
3316 testIfUnknownSize(dimensionedMain, dimensionedThen, dimensionedElse);
3317 }
3318 }
3319 }
3320 }
3321
testWhileUnknownSize(Dimensioned dimensionedMain,Dimensioned dimensionedCond,Dimensioned dimensionedBody)3322 void ControlFlowPartitioningTest::testWhileUnknownSize(Dimensioned dimensionedMain,
3323 Dimensioned dimensionedCond,
3324 Dimensioned dimensionedBody) {
3325 if (dimensionedMain != Dimensioned::NO && dimensionedCond != Dimensioned::NO &&
3326 dimensionedBody != Dimensioned::NO) {
3327 // No unknown size.
3328 return;
3329 }
3330
3331 const auto models = createWhileModel(dimensionedMain, dimensionedCond, dimensionedBody);
3332
3333 // The device supports all operations but the partitioner ignores its WHILE
3334 // support due to http://b/159076604#comment5.
3335 const auto devices = makeDevices({{"ALL",
3336 0.9,
3337 ~0U,
3338 PartitioningDriver::OEMNo,
3339 HalVersion::LATEST,
3340 {V1_3::OperationType::WHILE, V1_3::OperationType::EQUAL}}});
3341
3342 ExecutionPlan plan;
3343 ASSERT_EQ(models[0]->partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
3344 ExecutePriority::DEFAULT, {}, &plan),
3345 ANEURALNETWORKS_NO_ERROR);
3346 // The control flow interpreter does not support unknown size (b/132458982).
3347 checkExecutionPlanSteps(plan, {DeviceManager::getCpuDevice()->getName()});
3348 }
3349
TEST_F(ControlFlowPartitioningTest,WHILE_UnknownSize)3350 TEST_F(ControlFlowPartitioningTest, WHILE_UnknownSize) {
3351 const std::vector<Dimensioned> configurations = {Dimensioned::NO, Dimensioned::YES};
3352 for (Dimensioned dimensionedMain : configurations) {
3353 SCOPED_TRACE(testing::Message() << "dimensionedMain: " << toString(dimensionedMain));
3354 for (Dimensioned dimensionedCond : configurations) {
3355 SCOPED_TRACE(testing::Message() << "dimensionedCond: " << toString(dimensionedCond));
3356 for (Dimensioned dimensionedBody : configurations) {
3357 SCOPED_TRACE(testing::Message()
3358 << "dimensionedBody: " << toString(dimensionedBody));
3359 testWhileUnknownSize(dimensionedMain, dimensionedCond, dimensionedBody);
3360 }
3361 }
3362 }
3363 }
3364
3365 // Test the memory step role analysis of the partitioning implementation.
3366 class MemoryStepRoleTest : public PartitioningTest {
3367 protected:
3368 // A tuple of {device_name, input/output}
3369 using TestStepRole = std::tuple<std::string, IOType>;
3370
SetUp()3371 void SetUp() override {
3372 PartitioningTest::SetUp();
3373 mModel = std::make_unique<PartitioningModel>();
3374 }
3375
toString(SourceOperandIndex index)3376 static std::string toString(SourceOperandIndex index) {
3377 return "{" + std::to_string(index.first) + ", " + std::to_string(index.second) + "}";
3378 }
3379
toString(const std::set<TestStepRole> & roles)3380 static std::string toString(const std::set<TestStepRole>& roles) {
3381 std::stringstream ss;
3382 ss << "[ ";
3383 for (const auto& [deviceName, type] : roles) {
3384 ss << "{" << deviceName << ", " << (type == IOType::INPUT ? "INPUT" : "OUTPUT") << "} ";
3385 }
3386 ss << "]";
3387 return ss.str();
3388 }
3389
finishAndPartitionModelForDevices(const std::vector<std::shared_ptr<Device>> & devices)3390 void finishAndPartitionModelForDevices(const std::vector<std::shared_ptr<Device>>& devices) {
3391 mModel->finish();
3392 ASSERT_TRUE(mModel->isValid());
3393 ASSERT_EQ(mModel->partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
3394 ExecutePriority::DEFAULT, {}, &mPlan),
3395 ANEURALNETWORKS_NO_ERROR);
3396 }
3397
checkStepRolesOfInput(uint32_t index,const std::set<TestStepRole> & expected) const3398 void checkStepRolesOfInput(uint32_t index, const std::set<TestStepRole>& expected) const {
3399 SCOPED_TRACE("Input: " + std::to_string(index));
3400 std::set<TestStepRole> actual;
3401 mPlan.forEachStepRoleOfInput(
3402 index, [&actual](const auto* preparedModel, IOType type, uint32_t) {
3403 actual.emplace(preparedModel->getDevice()->getName(), type);
3404 });
3405 EXPECT_TRUE(expected == actual)
3406 << "expected: " << toString(expected) << ", actual: " << toString(actual);
3407 }
3408
checkStepRolesOfOutput(uint32_t index,const std::set<TestStepRole> & expected) const3409 void checkStepRolesOfOutput(uint32_t index, const std::set<TestStepRole>& expected) const {
3410 SCOPED_TRACE("Output: " + std::to_string(index));
3411 std::set<TestStepRole> actual;
3412 mPlan.forEachStepRoleOfOutput(
3413 index, [&actual](const auto* preparedModel, IOType type, uint32_t) {
3414 actual.emplace(preparedModel->getDevice()->getName(), type);
3415 });
3416 EXPECT_TRUE(expected == actual)
3417 << "expected: " << toString(expected) << ", actual: " << toString(actual);
3418 }
3419
checkStepRolesOfSourceOperand(SourceOperandIndex index,const std::set<TestStepRole> & expected) const3420 void checkStepRolesOfSourceOperand(SourceOperandIndex index,
3421 const std::set<TestStepRole>& expected) const {
3422 SCOPED_TRACE("SourceOperandIndex: " + toString(index));
3423 std::set<TestStepRole> actual;
3424 mPlan.forTest_compoundForEachStepRoleOfSourceOperand(
3425 index, [&actual](const auto* preparedModel, IOType type, uint32_t) {
3426 actual.emplace(preparedModel->getDevice()->getName(), type);
3427 });
3428 EXPECT_TRUE(expected == actual)
3429 << "expected: " << toString(expected) << ", actual: " << toString(actual);
3430 }
3431
3432 std::unique_ptr<PartitioningModel> mModel;
3433 ExecutionPlan mPlan;
3434 };
3435
3436 // Test a graph with 3 operations, each operation in a separate partition:
3437 // opnd2 = OP0(opnd0, opnd1)
3438 // opnd4 = OP1(opnd1, opnd3)
3439 // opnd5 = OP2(opnd2, opnd4)
TEST_F(MemoryStepRoleTest,NoControlFlow)3440 TEST_F(MemoryStepRoleTest, NoControlFlow) {
3441 const uint32_t opnd0 = mModel->addFloatOperand();
3442 const uint32_t opnd1 = mModel->addFloatOperand();
3443 const uint32_t opnd2 = mModel->addOperation2To1V1_0(0, opnd0, opnd1);
3444 const uint32_t opnd3 = mModel->addFloatOperand();
3445 const uint32_t opnd4 = mModel->addOperation2To1V1_0(1, opnd1, opnd3);
3446 const uint32_t opnd5 = mModel->addOperation2To1V1_0(2, opnd2, opnd4);
3447 mModel->identifyInputsAndOutputs({opnd0, opnd1, opnd3}, {opnd2, opnd5});
3448
3449 // This will result in 3 partitions:
3450 // deviceA handles op0, deviceB handles op1, deviceC handles op2.
3451 const auto devices = makeDevices(
3452 {{"deviceA", 0.8, ~0U}, {"deviceB", 0.5, 1 << 1}, {"deviceC", 0.5, 1 << 2}});
3453 finishAndPartitionModelForDevices(devices);
3454 checkExecutionPlanSteps(mPlan, {"deviceB", "deviceA", "deviceC"});
3455
3456 // Check the step roles of the main model inputs and outputs:
3457 //
3458 // input0 and input2 are each exclusive for a single partition.
3459 checkStepRolesOfInput(0, {{"deviceA", IOType::INPUT}});
3460 checkStepRolesOfInput(2, {{"deviceB", IOType::INPUT}});
3461 // input1 is shared by two operations in different partitions.
3462 checkStepRolesOfInput(1, {{"deviceA", IOType::INPUT}, {"deviceB", IOType::INPUT}});
3463 // output0 is a model output that is a downstream input.
3464 checkStepRolesOfOutput(0, {{"deviceA", IOType::OUTPUT}, {"deviceC", IOType::INPUT}});
3465 // output1 is only used in a single partition.
3466 checkStepRolesOfOutput(1, {{"deviceC", IOType::OUTPUT}});
3467
3468 // Check the step roles of the partition boundary temporaries that we will allocate memory on
3469 // behalf of (see ExecutionPlan::makeController for the allocation logic):
3470 //
3471 // opnd4 is a partition boundary temporary.
3472 checkStepRolesOfSourceOperand({0, opnd4},
3473 {{"deviceB", IOType::OUTPUT}, {"deviceC", IOType::INPUT}});
3474 }
3475
3476 // Test a graph with an interpreted IF operation.
TEST_F(MemoryStepRoleTest,InterpretedIf)3477 TEST_F(MemoryStepRoleTest, InterpretedIf) {
3478 auto thenModel = std::make_unique<PartitioningModel>();
3479 const uint32_t thenOpnd0 = thenModel->addFloatOperand();
3480 const uint32_t thenOpnd1 = thenModel->addFloatOperand();
3481 const uint32_t thenOpnd2 = thenModel->addOperation2To1V1_0(0, thenOpnd0, thenOpnd1);
3482 thenModel->identifyInputsAndOutputs({thenOpnd0, thenOpnd1}, {thenOpnd2});
3483 thenModel->finish();
3484 EXPECT_TRUE(thenModel->isValid());
3485
3486 auto elseModel = std::make_unique<PartitioningModel>();
3487 const uint32_t elseOpnd0 = elseModel->addFloatOperand();
3488 const uint32_t elseOpnd1 = elseModel->addFloatOperand();
3489 const uint32_t elseOpnd2 = elseModel->addOperation2To1V1_0(1, elseOpnd0, elseOpnd1);
3490 elseModel->identifyInputsAndOutputs({elseOpnd0, elseOpnd1}, {elseOpnd2});
3491 elseModel->finish();
3492 EXPECT_TRUE(elseModel->isValid());
3493
3494 const uint32_t mainOpnd0 = mModel->addBooleanOperand();
3495 const uint32_t mainOpnd1 = mModel->addFloatOperand();
3496 const uint32_t mainOpnd2 = mModel->addFloatOperand();
3497 const uint32_t mainOpnd3 = mModel->addFloatOperand();
3498 mModel->addIfOperation(mainOpnd0, *thenModel, *elseModel, {mainOpnd1, mainOpnd2}, {mainOpnd3});
3499 mModel->identifyInputsAndOutputs({mainOpnd0, mainOpnd1, mainOpnd2}, {mainOpnd3});
3500
3501 // deviceA handles op0, deviceB handles op1.
3502 const auto devices = makeDevices({{"deviceA", 0.8, ~0U}, {"deviceB", 0.5, 1 << 1}});
3503 finishAndPartitionModelForDevices(devices);
3504 checkExecutionPlanSteps(mPlan, {kIfStep, "deviceA", kGotoStep, "deviceB"});
3505
3506 // Check the step roles of the main model inputs and outputs:
3507 //
3508 // input0 is a condition operand of the interpreted IF that will only be read by the runtime.
3509 checkStepRolesOfInput(0, {});
3510 // input1 and input2 are outer inputs of the interpreted IF. The memories may be directly used
3511 // by the input operands of the then and else model.
3512 checkStepRolesOfInput(1, {{"deviceA", IOType::INPUT}, {"deviceB", IOType::INPUT}});
3513 checkStepRolesOfInput(2, {{"deviceA", IOType::INPUT}, {"deviceB", IOType::INPUT}});
3514 // output0 is the outer output of the interpreted IF. The memory may be directly
3515 // used by the output operands of the then and else model.
3516 checkStepRolesOfOutput(0, {{"deviceA", IOType::OUTPUT}, {"deviceB", IOType::OUTPUT}});
3517
3518 // There is no partition boundary temporary in this model that we will allocate memory on
3519 // behalf of (see ExecutionPlan::makeController for the allocation logic).
3520 }
3521
3522 // Test a graph with an interpreted WHILE operation.
TEST_F(MemoryStepRoleTest,InterpretedWhile)3523 TEST_F(MemoryStepRoleTest, InterpretedWhile) {
3524 // Condition model:
3525 // condOpnd3 = OP0(condOpnd0, condOpnd1)
3526 // condOpnd4 = EQUAL(condOpnd2, condOpnd3)
3527 auto condModel = std::make_unique<PartitioningModel>();
3528 const uint32_t condOpnd0 = condModel->addFloatOperand();
3529 const uint32_t condOpnd1 = condModel->addFloatOperand();
3530 const uint32_t condOpnd2 = condModel->addFloatOperand();
3531 const uint32_t condOpnd3 = condModel->addOperation2To1V1_0(0, condOpnd0, condOpnd1);
3532 const uint32_t condOpnd4 = condModel->addExplicitOperationXTo1(
3533 ANEURALNETWORKS_EQUAL, {condOpnd2, condOpnd3}, WrapperType::TENSOR_BOOL8);
3534 condModel->identifyInputsAndOutputs({condOpnd0, condOpnd1, condOpnd2}, {condOpnd4});
3535 condModel->finish();
3536 EXPECT_TRUE(condModel->isValid());
3537
3538 // Body model:
3539 // bodyOpnd3 = OP1(bodyOpnd0, bodyOpnd1)
3540 // bodyOpnd4 = OP1(bodyOpnd0, bodyOpnd2)
3541 auto bodyModel = std::make_unique<PartitioningModel>();
3542 const uint32_t bodyOpnd0 = bodyModel->addFloatOperand();
3543 const uint32_t bodyOpnd1 = bodyModel->addFloatOperand();
3544 const uint32_t bodyOpnd2 = bodyModel->addFloatOperand();
3545 const uint32_t bodyOpnd3 = bodyModel->addOperation2To1V1_0(1, bodyOpnd0, bodyOpnd1);
3546 const uint32_t bodyOpnd4 = bodyModel->addOperation2To1V1_0(1, bodyOpnd0, bodyOpnd2);
3547 bodyModel->identifyInputsAndOutputs({bodyOpnd0, bodyOpnd1, bodyOpnd2}, {bodyOpnd3, bodyOpnd4});
3548 bodyModel->finish();
3549 EXPECT_TRUE(bodyModel->isValid());
3550
3551 const uint32_t mainOpnd0 = mModel->addFloatOperand();
3552 const uint32_t mainOpnd1 = mModel->addFloatOperand();
3553 const uint32_t mainOpnd2 = mModel->addFloatOperand();
3554 const uint32_t mainOpnd3 = mModel->addFloatOperand();
3555 mModel->addWhileOperation(*condModel, *bodyModel, {mainOpnd0, mainOpnd1, mainOpnd2},
3556 {mainOpnd3});
3557 mModel->identifyInputsAndOutputs({mainOpnd0, mainOpnd1, mainOpnd2}, {mainOpnd3});
3558
3559 // deviceA handles the cond model, deviceB handles the body model.
3560 const auto devices = makeDevices({{"deviceA",
3561 0.8,
3562 ~0U,
3563 PartitioningDriver::OEMNo,
3564 HalVersion::LATEST,
3565 {V1_3::OperationType::EQUAL}},
3566 {"deviceB", 0.5, 1 << 1}});
3567 finishAndPartitionModelForDevices(devices);
3568 checkExecutionPlanSteps(mPlan, {kWhileStep, "deviceA", kGotoStep, "deviceB", kGotoStep});
3569
3570 // The subgraph indexes of the condition and body models of the WHILE operation.
3571 const uint32_t condModelIndex = 1;
3572 const uint32_t bodyModelIndex = 2;
3573
3574 // Check the step roles of the main model inputs and outputs:
3575 //
3576 // input0 (input-output), input1 (state-only), and input2 (input-only) are outer inputs of the
3577 // interpreted WHILE. The memories may be directly used by the input operands of the condition
3578 // and body models.
3579 checkStepRolesOfInput(0, {{"deviceA", IOType::INPUT}, {"deviceB", IOType::INPUT}});
3580 checkStepRolesOfInput(1, {{"deviceA", IOType::INPUT}, {"deviceB", IOType::INPUT}});
3581 checkStepRolesOfInput(2, {{"deviceA", IOType::INPUT}, {"deviceB", IOType::INPUT}});
3582 // output0 is an outer output of the interpreted WHILE that will only be written by the runtime.
3583 checkStepRolesOfOutput(0, {});
3584
3585 // Check the step roles of the partition boundary temporaries that we will allocate memory on
3586 // behalf of (see ExecutionPlan::makeController for the allocation logic):
3587 //
3588 // condOpnd4 is output of the interpreted WHILE condition model.
3589 checkStepRolesOfSourceOperand({condModelIndex, condOpnd4}, {{"deviceA", IOType::OUTPUT}});
3590 // bodyOpnd3 (input-output) and bodyOpnd4 (state-only) are outputs of the interpreted WHILE body
3591 // model. The memories may be directly used by the input operands of the condition and body
3592 // models.
3593 checkStepRolesOfSourceOperand(
3594 {bodyModelIndex, bodyOpnd3},
3595 {{"deviceA", IOType::INPUT}, {"deviceB", IOType::INPUT}, {"deviceB", IOType::OUTPUT}});
3596 checkStepRolesOfSourceOperand(
3597 {bodyModelIndex, bodyOpnd4},
3598 {{"deviceA", IOType::INPUT}, {"deviceB", IOType::INPUT}, {"deviceB", IOType::OUTPUT}});
3599 }
3600
3601 // Test a graph with nested interpreted control flow operations: a WHILE operation with IF operation
3602 // in the body model.
TEST_F(MemoryStepRoleTest,NestedInterpretedControlFlow)3603 TEST_F(MemoryStepRoleTest, NestedInterpretedControlFlow) {
3604 auto condModel = std::make_unique<PartitioningModel>();
3605 const uint32_t condOpnd0 = condModel->addFloatOperand();
3606 const uint32_t condOpnd1 = condModel->addFloatOperand();
3607 const uint32_t condOpnd2 = condModel->addBooleanOperand();
3608 const uint32_t condOpnd3 = condModel->addExplicitOperationXTo1(
3609 ANEURALNETWORKS_EQUAL, {condOpnd0, condOpnd1}, WrapperType::TENSOR_BOOL8);
3610 condModel->identifyInputsAndOutputs({condOpnd0, condOpnd1, condOpnd2}, {condOpnd3});
3611 condModel->finish();
3612 EXPECT_TRUE(condModel->isValid());
3613
3614 auto thenModel = std::make_unique<PartitioningModel>();
3615 const uint32_t thenOpnd0 = thenModel->addFloatOperand();
3616 const uint32_t thenOpnd1 = thenModel->addFloatOperand();
3617 const uint32_t thenOpnd2 = thenModel->addOperation2To1V1_0(0, thenOpnd0, thenOpnd1);
3618 thenModel->identifyInputsAndOutputs({thenOpnd0, thenOpnd1}, {thenOpnd2});
3619 thenModel->finish();
3620 EXPECT_TRUE(thenModel->isValid());
3621
3622 auto elseModel = std::make_unique<PartitioningModel>();
3623 const uint32_t elseOpnd0 = elseModel->addFloatOperand();
3624 const uint32_t elseOpnd1 = elseModel->addFloatOperand();
3625 const uint32_t elseOpnd2 = elseModel->addOperation2To1V1_0(1, elseOpnd0, elseOpnd1);
3626 elseModel->identifyInputsAndOutputs({elseOpnd0, elseOpnd1}, {elseOpnd2});
3627 elseModel->finish();
3628 EXPECT_TRUE(elseModel->isValid());
3629
3630 auto bodyModel = std::make_unique<PartitioningModel>();
3631 const uint32_t bodyOpnd0 = bodyModel->addFloatOperand();
3632 const uint32_t bodyOpnd1 = bodyModel->addFloatOperand();
3633 const uint32_t bodyOpnd2 = bodyModel->addBooleanOperand();
3634 const uint32_t bodyOpnd3 = bodyModel->addFloatOperand();
3635 bodyModel->addIfOperation(bodyOpnd2, *thenModel, *elseModel, {bodyOpnd0, bodyOpnd1},
3636 {bodyOpnd3});
3637 bodyModel->identifyInputsAndOutputs({bodyOpnd0, bodyOpnd1, bodyOpnd2}, {bodyOpnd3});
3638 bodyModel->finish();
3639 EXPECT_TRUE(bodyModel->isValid());
3640
3641 const uint32_t mainOpnd0 = mModel->addFloatOperand();
3642 const uint32_t mainOpnd1 = mModel->addFloatOperand();
3643 const uint32_t mainOpnd2 = mModel->addBooleanOperand();
3644 const uint32_t mainOpnd3 = mModel->addFloatOperand();
3645 mModel->addWhileOperation(*condModel, *bodyModel, {mainOpnd0, mainOpnd1, mainOpnd2},
3646 {mainOpnd3});
3647 mModel->identifyInputsAndOutputs({mainOpnd0, mainOpnd1, mainOpnd2}, {mainOpnd3});
3648
3649 // deviceA handles the cond model, deviceB handles the then model,
3650 // deviceC handles the else model.
3651 const auto devices = makeDevices({{"deviceA",
3652 0.8,
3653 ~0U,
3654 PartitioningDriver::OEMNo,
3655 HalVersion::LATEST,
3656 {V1_3::OperationType::EQUAL}},
3657 {"deviceB", 0.5, 1 << 0},
3658 {"deviceC", 0.5, 1 << 1}});
3659 finishAndPartitionModelForDevices(devices);
3660 checkExecutionPlanSteps(mPlan, {kWhileStep, "deviceA", kGotoStep, kIfStep, "deviceB", kGotoStep,
3661 "deviceC", kGotoStep});
3662
3663 // The subgraph indexes of the condition and body models of the WHILE operation.
3664 const uint32_t condModelIndex = 1;
3665 const uint32_t bodyModelIndex = 2;
3666
3667 // Check the step roles of the main model inputs and outputs:
3668 //
3669 // input0 and input1 are outer inputs of the interpreted WHILE. The memories may be directly
3670 // used by the input operands of the condition and body models, and then be directly used by the
3671 // input operands of the then and else model of the interpreted IF in the body model.
3672 checkStepRolesOfInput(
3673 0,
3674 {{"deviceA", IOType::INPUT}, {"deviceB", IOType::INPUT}, {"deviceC", IOType::INPUT}});
3675 checkStepRolesOfInput(
3676 1,
3677 {{"deviceA", IOType::INPUT}, {"deviceB", IOType::INPUT}, {"deviceC", IOType::INPUT}});
3678 // input2 is also an outer input of the interpreted WHILE. The memory has no step role in the
3679 // condition model. In the body model, the memory will be used by the condition operand of the
3680 // interpreted IF that will only be read by the runtime.
3681 checkStepRolesOfInput(2, {});
3682 // output0 is an outer output of the interpreted WHILE that will only be written by the runtime.
3683 checkStepRolesOfOutput(0, {});
3684
3685 // Check the step roles of the partition boundary temporaries that we will allocate memory on
3686 // behalf of (see ExecutionPlan::makeController for the allocation logic):
3687 //
3688 // condOpnd2 is output of the interpreted WHILE condition model.
3689 checkStepRolesOfSourceOperand({condModelIndex, condOpnd3}, {{"deviceA", IOType::OUTPUT}});
3690 // bodyOpnd3 is output of the interpreted WHILE body model. The memories may be directly used by
3691 // the input operands of the condition and body models, and then be directly used by the
3692 // input operands of the then and else model of the interpreted IF in the body model.
3693 checkStepRolesOfSourceOperand({bodyModelIndex, bodyOpnd3}, {{"deviceA", IOType::INPUT},
3694 {"deviceB", IOType::INPUT},
3695 {"deviceB", IOType::OUTPUT},
3696 {"deviceC", IOType::INPUT},
3697 {"deviceC", IOType::OUTPUT}});
3698 }
3699
3700 } // namespace
3701