1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <ControlFlow.h>
18 #include <HalInterfaces.h>
19 #include <SampleDriver.h>
20 #include <Utils.h>
21 #include <ValidateHal.h>
22 #include <gtest/gtest.h>
23 
24 #include <algorithm>
25 #include <filesystem>
26 #include <functional>
27 #include <iostream>
28 #include <map>
29 #include <memory>
30 #include <numeric>
31 #include <queue>
32 #include <set>
33 #include <string>
34 #include <tuple>
35 #include <type_traits>
36 #include <utility>
37 #include <vector>
38 
39 #include "CompilationBuilder.h"
40 #include "ExecutionPlan.h"
41 #include "HalUtils.h"
42 #include "Manager.h"
43 #include "ModelBuilder.h"
44 #include "NeuralNetworks.h"
45 #include "NeuralNetworksOEM.h"
46 #include "TestNeuralNetworksWrapper.h"
47 
48 // Uncomment the following line to generate some debugging output that
49 // may be useful when analyzing failures:
50 //
51 // #define VERBOSE VERBOSE
52 
53 // These tests do whitebox testing of the graph partitioning
54 // algorithm.  It is "whitebox" in the sense that we're not evaluating
55 // whether a particular partitioning is legal, or "good enough"
56 // according to some metric, but whether it exactly matches the
57 // expected behavior of the current partitioning algorithm.
58 //
59 // A key part of the current partitioning algorithm is to determine
60 // which device among the available devices should be the one to
61 // execute a particular operation from the graph.  This determination
62 // is made "locally" -- i.e., it does not depend on the graph
63 // topology, only on the properties of the operation in question.
64 // IDevice::getSupportedOperations() indicates which operations in a
65 // graph can be executed on a device, and IDevice::getCapabilities()
66 // indicates how "good" that device is for executing particular kinds
67 // of operations.  For each operation, the partitioning algorithm
68 // picks the "best" device that is capable of executing that
69 // operation; if no device can do so, then the algorithm picks the
70 // cpu.
71 //
72 // As part of this testing approach, we want to make it easy to
73 // specify which operations in a test graph can be executed on which
74 // devices.  We accomplish this in the following way:
75 // - A unary OEM operation is available.
76 // - There is a collection of operations (each of which has two inputs
77 //   and one output):
78 //   - Eight kinds of operations available at driver version V1_0 or
79 //     later.  They are represented in the graph as ADD or MUL with a
80 //     particular activation function -- two opcodes times four
81 //     activation functions means eight available operation kinds.
82 //     This is a low-level representation detail -- when we specify the
83 //     behavior of the device or build a graph, we do so in terms of
84 //     operation encodings 0..7.
85 //   - Eight kinds of operations available at driver version V1_1 or
86 //     later.  They are represented in the graph as DIV or SUB with
87 //     a particular activation function, exactly analogous to ADD
88 //     and MUL above.  We use operation encodings 8..15 for them.
89 //   - Four kinds of operations available at driver version V1_2 or
90 //     later.  They are represented in the graph as MAXIMUM,
91 //     MINIMUM, POW, or PRELU.  These operations take no activation
92 //     function, so we only get 4 operation kinds, for which we
93 //     use operation encodings 16..19.
94 // - There is another collection of operations (each of which has one input
95 //   and one output):
96 //   - Single operation available at driver version V1_3 or
97 //     later.  It is represented in the graph as HARD_SWISH.
98 //     These operations take no activation function, for which we
99 //     use operation encodings 20..20.
100 
101 // When we instantiate a device for testing purposes, we specify what subset of
102 // those operations the device is able to execute.
103 //
104 // In order to determine whether or not a partitioning matches the
105 // expected partitioning, we check the number of partitions, check
106 // which device each partition targets, and compare each partition's
107 // subgraph, model inputs, model outputs, step model inputs, and
108 // step model outputs against what is expected.  In order to perform
109 // that comparison, we build a model to compare against a partition's
110 // step model and run a graph comparison algorithm on it.  The graph
111 // comparison and the inputs and outputs comparisons are syntactic
112 // rather than semantic comparisons -- they don't allow for
113 // reorderings of inputs and outputs.  Because of this, we need to
114 // know exactly how the partitioning algorithm orders inputs and
115 // outputs in order to construct the models and operand lists to
116 // compare against.  Here are some relevant behaviors of the
117 // partitioning algorithm:
118 //
119 // - It builds a subgraph by walking operations in forward topological
120 //   order, and adding each operation's input operands and output
121 //   operands in index order (input followed by output) when that
122 //   operation is added.  (It does not add an input that has already
123 //   been added.)
124 // - It finds model inputs, model outputs, and step model inputs in
125 //   the order the corresponding operands were added to the subgraph
126 //   (see ExecutionStep methods getModelInputs(), getModelOutputs(),
127 //   getTempsAsStepModelInputs(), getOutputsAsStepModelInputs()).
128 // - It finds temps as step model outputs in numerical order of corresponding
129 //   operand number in the original model (see ExecutionStep method
130 //   getTempsAsStepModelOutputs()).
131 // - When it calls identifyInputsAndOutputs() on the step model, it
132 //   passes inputs from getModelInputs() in order, followed by temps as
133 //   step model inputs from getTempsAsStepModelInputs() in order,
134 //   followed by outputs as step model inputs from
135 //   getOutputsAsStepModelInputs() in order; and it passes outputs from
136 //   getModelOutputs() in order followed by step model outputs from
137 //   getTempsAsStepModelOutputs() in order.
138 //
139 // TODO: Maybe the logic for comparing a partition to an expected
140 //       model should be changed to tolerate reorderings of inputs and
141 //       outputs, so that when we build models and lists to compare
142 //       against, we don't need to worry about input and output
143 //       orderings.  But is there a way to do this that still lets us
144 //       verify that we have the correct relationships between
145 //       an (original) model's inputs and outputs and each step model's
146 //       inputs and outputs, as well as the correct relationship
147 //       between step model inputs and outputs across partitions?
148 
149 namespace {
150 
151 namespace hardware = android::hardware;
152 namespace V1_0 = ::android::hardware::neuralnetworks::V1_0;
153 namespace V1_1 = ::android::hardware::neuralnetworks::V1_1;
154 namespace V1_2 = ::android::hardware::neuralnetworks::V1_2;
155 namespace V1_3 = ::android::hardware::neuralnetworks::V1_3;
156 using CompilationBuilder = ::android::nn::CompilationBuilder;
157 using Device = ::android::nn::Device;
158 using DeviceManager = ::android::nn::DeviceManager;
159 using ExecutePreference = ::android::nn::test_wrapper::ExecutePreference;
160 using ExecutePriority = ::android::nn::test_wrapper::ExecutePriority;
161 using ExecutionPlan = ::android::nn::ExecutionPlan;
162 using ExecutionStep = ::android::nn::ExecutionStep;
163 using HalCacheToken = ::android::nn::HalCacheToken;
164 using HalVersion = ::android::nn::HalVersion;
165 using HidlModel = V1_3::Model;
166 using IOType = ::android::nn::IOType;
167 using LogicalStep = ::android::nn::LogicalStep;
168 using ModelBuilder = ::android::nn::ModelBuilder;
169 using Operand = ::android::nn::Operand;
170 using Operation = ::android::nn::Operation;
171 using OptionalTimePoint = ::android::nn::OptionalTimePoint;
172 using Result = ::android::nn::test_wrapper::Result;
173 using SampleDriver = ::android::nn::sample_driver::SampleDriver;
174 using SharedDevice = ::android::nn::SharedDevice;
175 using SourceOperandIndex = ::android::nn::SourceOperandIndex;
176 using StepRole = ::android::nn::StepRole;
177 using WrapperCompilation = ::android::nn::test_wrapper::Compilation;
178 using WrapperExecution = ::android::nn::test_wrapper::Execution;
179 using WrapperModel = ::android::nn::test_wrapper::Model;
180 using WrapperOperandType = ::android::nn::test_wrapper::OperandType;
181 using WrapperSymmPerChannelQuantParams = ::android::nn::test_wrapper::SymmPerChannelQuantParams;
182 using WrapperType = ::android::nn::test_wrapper::Type;
183 using android::sp;
184 
update(V1_3::Capabilities * capabilities,V1_3::OperandType type,float perf)185 void update(V1_3::Capabilities* capabilities, V1_3::OperandType type, float perf) {
186     V1_0::PerformanceInfo perfInfo = {.execTime = perf, .powerUsage = perf};
187     ::android::nn::update(&capabilities->operandPerformance, type, perfInfo);
188 }
189 
lookupExecTime(const V1_3::Capabilities & capabilities,V1_3::OperandType type)190 float lookupExecTime(const V1_3::Capabilities& capabilities, V1_3::OperandType type) {
191     return ::android::nn::lookup(capabilities.operandPerformance, type).execTime;
192 }
193 
min(HalVersion a,HalVersion b)194 HalVersion min(HalVersion a, HalVersion b) {
195     return int32_t(a) < int32_t(b) ? a : b;
196 }
197 
198 const uint32_t kNumFuseCodes = 4;
199 const uint32_t kBadOperation = ~0;
200 
201 // V1_0 operations
202 const uint32_t kFirstEncodingADD = 0;
203 const uint32_t kFirstEncodingMUL = kFirstEncodingADD + kNumFuseCodes;
204 const uint32_t kFirstEncodingV1_0 = kFirstEncodingADD;
205 const uint32_t kLastEncodingV1_0 = kFirstEncodingMUL + kNumFuseCodes - 1;
206 
207 // V1_1 operations
208 const uint32_t kFirstEncodingDIV = kLastEncodingV1_0 + 1;
209 const uint32_t kFirstEncodingSUB = kFirstEncodingDIV + kNumFuseCodes;
210 const uint32_t kFirstEncodingV1_1 = kFirstEncodingDIV;
211 const uint32_t kLastEncodingV1_1 = kFirstEncodingSUB + kNumFuseCodes - 1;
212 
213 // V1_2 operations
214 const uint32_t kFirstEncodingMAXIMUM = kLastEncodingV1_1 + 1;
215 const uint32_t kFirstEncodingMINIMUM = kFirstEncodingMAXIMUM + 1;
216 const uint32_t kFirstEncodingPOW = kFirstEncodingMINIMUM + 1;
217 const uint32_t kFirstEncodingPRELU = kFirstEncodingPOW + 1;
218 const uint32_t kFirstEncodingV1_2 = kFirstEncodingMAXIMUM;
219 const uint32_t kLastEncodingV1_2 = kFirstEncodingPRELU;
220 
221 // V1_3 operations
222 const uint32_t kFirstEncodingHARD_SWISH = kLastEncodingV1_2 + 1;
223 const uint32_t kFirstEncodingV1_3 = kFirstEncodingHARD_SWISH;
224 const uint32_t kLastEncodingV1_3 = kFirstEncodingHARD_SWISH;
225 
226 const std::map<V1_3::OperationType, uint32_t> operationToFirstEncoding = {
227         {V1_3::OperationType::ADD, kFirstEncodingADD},
228         {V1_3::OperationType::MUL, kFirstEncodingMUL},
229         {V1_3::OperationType::DIV, kFirstEncodingDIV},
230         {V1_3::OperationType::SUB, kFirstEncodingSUB},
231         {V1_3::OperationType::MAXIMUM, kFirstEncodingMAXIMUM},
232         {V1_3::OperationType::MINIMUM, kFirstEncodingMINIMUM},
233         {V1_3::OperationType::POW, kFirstEncodingPOW},
234         {V1_3::OperationType::PRELU, kFirstEncodingPRELU},
235         {V1_3::OperationType::HARD_SWISH, kFirstEncodingHARD_SWISH},
236 };
237 
238 // Sorted in reverse order (std::greater) so that we can use map::lower_bound to
239 // find an entry whose key is numerically less than or equal to a search value.
240 // mapped_type is (OperandCode, hasFuseCode).
241 const std::map<uint32_t, std::pair<uint32_t, bool>, std::greater<>> firstEncodingToOperation = {
242         {kFirstEncodingADD, {ANEURALNETWORKS_ADD, true}},
243         {kFirstEncodingMUL, {ANEURALNETWORKS_MUL, true}},
244         {kFirstEncodingDIV, {ANEURALNETWORKS_DIV, true}},
245         {kFirstEncodingSUB, {ANEURALNETWORKS_SUB, true}},
246         {kFirstEncodingMAXIMUM, {ANEURALNETWORKS_MAXIMUM, false}},
247         {kFirstEncodingMINIMUM, {ANEURALNETWORKS_MINIMUM, false}},
248         {kFirstEncodingPOW, {ANEURALNETWORKS_POW, false}},
249         {kFirstEncodingPRELU, {ANEURALNETWORKS_PRELU, false}},
250         {kFirstEncodingHARD_SWISH, {ANEURALNETWORKS_HARD_SWISH, false}},
251 };
252 
253 // Look up the operation with the specified index in a graph, and return the
254 // operation encoding; or, if for some reason this is not one of the encoded
255 // operations, then return kBadOperation.
lookupOperation(std::function<const V1_3::Operation & (uint32_t)> getOperation,std::function<const V1_3::Operand & (uint32_t)> getOperand,std::function<const uint8_t * (uint32_t)> getValue,uint32_t operationIndex)256 uint32_t lookupOperation(std::function<const V1_3::Operation&(uint32_t)> getOperation,
257                          std::function<const V1_3::Operand&(uint32_t)> getOperand,
258                          std::function<const uint8_t*(uint32_t)> getValue,
259                          uint32_t operationIndex) {
260     const V1_3::Operation& operation = getOperation(operationIndex);
261     switch (operation.type) {
262         case V1_3::OperationType::ADD:
263         case V1_3::OperationType::MUL:
264         case V1_3::OperationType::DIV:
265         case V1_3::OperationType::SUB: {
266             // input2 is the fused activation function
267             const V1_3::Operand& input2 = getOperand(operation.inputs[2]);
268             if ((input2.type == V1_3::OperandType::INT32) &&
269                 (input2.lifetime == V1_3::OperandLifeTime::CONSTANT_COPY)) {
270                 int32_t value;
271                 CHECK_EQ(sizeof(value), input2.location.length);
272                 memcpy(&value, getValue(input2.location.offset), input2.location.length);
273                 return value + operationToFirstEncoding.at(operation.type);
274             }
275             break;
276         }
277         default: {
278             auto it = operationToFirstEncoding.find(operation.type);
279             if (it != operationToFirstEncoding.end()) {
280                 return it->second;
281             }
282             break;
283         }
284     }
285     return kBadOperation;
286 }
287 
lookupOperation(const HidlModel & model,const V1_3::Subgraph & subgraph,uint32_t operationIndex)288 uint32_t lookupOperation(const HidlModel& model, const V1_3::Subgraph& subgraph,
289                          uint32_t operationIndex) {
290     return lookupOperation(
291             [&subgraph](uint32_t index) -> const V1_3::Operation& {
292                 return subgraph.operations[index];
293             },
294             [&subgraph](uint32_t index) -> const V1_3::Operand& {
295                 return subgraph.operands[index];
296             },
297             [&model](uint32_t offset) { return &model.operandValues[offset]; }, operationIndex);
298 }
299 
300 #ifdef VERBOSE
301 // This is a debugging utility function
dump(const char * name,const ModelBuilder * model)302 void dump(const char* name, const ModelBuilder* model) {
303     const HidlModel hidlModel = model->makeHidlModel();
304     std::cout << name << ": " << hidlModel << std::endl;
305     std::cout << "inputs: " << hidlModel.main.inputIndexes << std::endl;
306     std::cout << "outputs: " << hidlModel.main.outputIndexes << std::endl;
307     for (size_t i = 0, e = hidlModel.main.operations.size(); i < e; i++) {
308         std::cout << "operation[" << i << "]: " << hidlModel.main.operations[i] << std::endl;
309     }
310 }
311 #endif
312 
313 // This is an IDevice for testing purposes.  It only has a few interesting
314 // properties, all of which are specified as constructor arguments: device
315 // capabilities; which subset of operation kinds (0..19) does the device
316 // support; does the device support the OEM operation; does the device support
317 // other operations.  The subset is represented with a bitmask, in which
318 // operation kind K corresponds to the bit (1 << K).  The other operations are
319 // represented by a set of OperationType.
320 class PartitioningDriver : public SampleDriver {
321    public:
322     enum OEM {
323         OEMNo,          // rejected by getSupportedOperations and prepareModel
324         OEMIndecisive,  // accepted by getSupportedOperations but not prepareModel
325         OEMYes,         // accepted by getSupportedOperations and prepareModel
326     };
327 
PartitioningDriver(const char * name,const char * version,V1_3::Capabilities capabilities,uint32_t operationMask,OEM oem=OEMNo,std::set<V1_3::OperationType> operationTypes={})328     PartitioningDriver(const char* name, const char* version, V1_3::Capabilities capabilities,
329                        uint32_t operationMask, OEM oem = OEMNo,
330                        std::set<V1_3::OperationType> operationTypes = {})
331         : SampleDriver(name),
332           mVersionString(version),
333           mCapabilities(capabilities),
334           mOperationMask(operationMask),
335           mOEM(oem),
336           mOperationTypes(std::move(operationTypes)) {
337         CHECK_EQ(mOperationTypes.count(V1_3::OperationType::OEM_OPERATION), size_t(0));
338         if (operationMask) {
339             std::for_each(mOperationTypes.begin(), mOperationTypes.end(),
__anon63efd43f0502(V1_3::OperationType type) 340                           [](V1_3::OperationType type) {
341                               CHECK_EQ(operationToFirstEncoding.count(type), size_t(0));
342                           });
343         }
344     }
~PartitioningDriver()345     ~PartitioningDriver() override {}
346 
getVersionString(getVersionString_cb cb)347     hardware::Return<void> getVersionString(getVersionString_cb cb) override {
348         cb(V1_0::ErrorStatus::NONE, mVersionString);
349         return hardware::Void();
350     }
351 
prepareModel_1_3(const V1_3::Model & model,V1_1::ExecutionPreference preference,V1_3::Priority priority,const V1_3::OptionalTimePoint & deadline,const hardware::hidl_vec<hardware::hidl_handle> & modelCache,const hardware::hidl_vec<hardware::hidl_handle> & dataCache,const HalCacheToken & token,const sp<V1_3::IPreparedModelCallback> & callback)352     hardware::Return<V1_3::ErrorStatus> prepareModel_1_3(
353             const V1_3::Model& model, V1_1::ExecutionPreference preference, V1_3::Priority priority,
354             const V1_3::OptionalTimePoint& deadline,
355             const hardware::hidl_vec<hardware::hidl_handle>& modelCache,
356             const hardware::hidl_vec<hardware::hidl_handle>& dataCache, const HalCacheToken& token,
357             const sp<V1_3::IPreparedModelCallback>& callback) override {
358         if (mOEM == OEMIndecisive) {
359             for (const auto& operation : model.main.operations) {
360                 if (operation.type == V1_3::OperationType::OEM_OPERATION) {
361                     callback->notify_1_3(V1_3::ErrorStatus::INVALID_ARGUMENT, nullptr);
362                     return V1_3::ErrorStatus::INVALID_ARGUMENT;
363                 }
364             }
365         }
366 
367         // NOTE: We verify that all operations in the model are supported.
368         V1_3::ErrorStatus outStatus = V1_3::ErrorStatus::INVALID_ARGUMENT;
369         auto ret = getSupportedOperations_1_3(
370                 model, [&outStatus](V1_3::ErrorStatus inStatus,
371                                     const hardware::hidl_vec<bool>& supportedOperations) {
372                     if (inStatus == V1_3::ErrorStatus::NONE) {
373                         if (std::all_of(supportedOperations.begin(), supportedOperations.end(),
374                                         [](bool v) { return v; })) {
375                             outStatus = V1_3::ErrorStatus::NONE;
376                         }
377                     }
378                 });
379         if (ret.isOk() && (outStatus == V1_3::ErrorStatus::NONE)) {
380             return SampleDriver::prepareModel_1_3(model, preference, priority, deadline, modelCache,
381                                                   dataCache, token, callback);
382         } else {
383             callback->notify_1_3(V1_3::ErrorStatus::INVALID_ARGUMENT, nullptr);
384             return V1_3::ErrorStatus::INVALID_ARGUMENT;
385         }
386     }
387 
getStatus()388     hardware::Return<V1_0::DeviceStatus> getStatus() override {
389         return V1_0::DeviceStatus::AVAILABLE;
390     }
391 
getCapabilities_1_3(getCapabilities_1_3_cb cb)392     hardware::Return<void> getCapabilities_1_3(getCapabilities_1_3_cb cb) override {
393         cb(V1_3::ErrorStatus::NONE, mCapabilities);
394         return hardware::Void();
395     }
396 
getSupportedOperations_1_3(const V1_3::Model & model,getSupportedOperations_1_3_cb cb)397     hardware::Return<void> getSupportedOperations_1_3(const V1_3::Model& model,
398                                                       getSupportedOperations_1_3_cb cb) override {
399         if (!android::nn::validateModel(model)) {
400             cb(V1_3::ErrorStatus::INVALID_ARGUMENT, std::vector<bool>());
401             return hardware::Void();
402         }
403         cb(V1_3::ErrorStatus::NONE, getSupportedOperationsForSubgraph(model, model.main));
404         return hardware::Void();
405     }
406 
getNumberOfCacheFilesNeeded(getNumberOfCacheFilesNeeded_cb cb)407     hardware::Return<void> getNumberOfCacheFilesNeeded(getNumberOfCacheFilesNeeded_cb cb) override {
408         cb(V1_0::ErrorStatus::NONE, /*numModelCache=*/1, /*numDataCache=*/1);
409         return hardware::Void();
410     }
411 
412    private:
getSupportedOperationsForSubgraph(const V1_3::Model & model,const V1_3::Subgraph & subgraph)413     std::vector<bool> getSupportedOperationsForSubgraph(const V1_3::Model& model,
414                                                         const V1_3::Subgraph& subgraph) {
415         CHECK(&subgraph == &model.main ||
416               std::find_if(model.referenced.begin(), model.referenced.end(),
417                            [&subgraph](const V1_3::Subgraph& refSubgraph) {
418                                return &subgraph == &refSubgraph;
419                            }) != model.referenced.end());
420         auto supportsEntireSubgraph = [this, &model, &subgraph](uint32_t refSubgraphOperandIndex) {
421             CHECK_LT(refSubgraphOperandIndex, subgraph.operands.size());
422             const V1_3::Operand& refSubgraphOperand = subgraph.operands[refSubgraphOperandIndex];
423             CHECK(refSubgraphOperand.lifetime == V1_3::OperandLifeTime::SUBGRAPH);
424             CHECK_LT(refSubgraphOperand.location.offset, model.referenced.size());
425             const V1_3::Subgraph& refSubgraph =
426                     model.referenced[refSubgraphOperand.location.offset];
427             std::vector<bool> supported = getSupportedOperationsForSubgraph(model, refSubgraph);
428             return std::all_of(supported.begin(), supported.end(), [](bool x) { return x; });
429         };
430         const size_t count = subgraph.operations.size();
431         std::vector<bool> supported(count);
432         for (size_t i = 0; i < count; i++) {
433             const V1_3::Operation& operation = subgraph.operations[i];
434             if (mOperationTypes.count(operation.type)) {
435                 if (operation.type == V1_3::OperationType::IF) {
436                     namespace op = android::nn::operation_if;
437                     CHECK_GE(operation.inputs.size(), op::kFirstInput);
438                     supported[i] =
439                             supportsEntireSubgraph(operation.inputs[op::kThenModelOperand]) &&
440                             supportsEntireSubgraph(operation.inputs[op::kElseModelOperand]);
441                 } else if (operation.type == V1_3::OperationType::WHILE) {
442                     namespace op = android::nn::operation_while;
443                     CHECK_GE(operation.inputs.size(), op::kFirstInput);
444                     supported[i] =
445                             supportsEntireSubgraph(operation.inputs[op::kCondModelOperand]) &&
446                             supportsEntireSubgraph(operation.inputs[op::kBodyModelOperand]);
447                 } else {
448                     supported[i] = true;
449                 }
450                 continue;
451             }
452             if (operation.type == V1_3::OperationType::OEM_OPERATION) {
453                 supported[i] = (mOEM != OEMNo);
454                 continue;
455             }
456             supported[i] = false;
457             uint32_t operationEncoding = lookupOperation(model, subgraph, i);
458             if ((operationEncoding != kBadOperation) &&
459                 (mOperationMask & (1 << operationEncoding))) {
460                 supported[i] = true;
461             }
462         }
463         return supported;
464     }
465 
466     std::string mVersionString;
467     V1_3::Capabilities mCapabilities;
468     uint32_t mOperationMask;
469     OEM mOEM;
470     std::set<V1_3::OperationType> mOperationTypes;
471 };
472 
473 // Like PartitioningDriver, but implementing 1.2
474 class PartitioningDriverV1_2 : public V1_2::IDevice {
475    public:
PartitioningDriverV1_2(const char * name,const char * version,V1_3::Capabilities capabilities,uint32_t operationMask,PartitioningDriver::OEM oem=PartitioningDriver::OEMNo,std::set<V1_3::OperationType> operationTypes={})476     PartitioningDriverV1_2(const char* name, const char* version, V1_3::Capabilities capabilities,
477                            uint32_t operationMask,
478                            PartitioningDriver::OEM oem = PartitioningDriver::OEMNo,
479                            std::set<V1_3::OperationType> operationTypes = {})
480         : mLatestDriver(new PartitioningDriver(name, version, capabilities, operationMask, oem,
481                                                operationTypes)) {}
getCapabilities_1_2(getCapabilities_1_2_cb _hidl_cb)482     hardware::Return<void> getCapabilities_1_2(getCapabilities_1_2_cb _hidl_cb) override {
483         return mLatestDriver->getCapabilities_1_2(_hidl_cb);
484     }
getSupportedOperations_1_2(const V1_2::Model & model,getSupportedOperations_1_2_cb _hidl_cb)485     hardware::Return<void> getSupportedOperations_1_2(
486             const V1_2::Model& model, getSupportedOperations_1_2_cb _hidl_cb) override {
487         return mLatestDriver->getSupportedOperations_1_2(model, _hidl_cb);
488     }
prepareModel_1_2(const V1_2::Model & model,V1_1::ExecutionPreference preference,const hardware::hidl_vec<hardware::hidl_handle> & modelCache,const hardware::hidl_vec<hardware::hidl_handle> & dataCache,const HalCacheToken & token,const sp<V1_2::IPreparedModelCallback> & actualCallback)489     hardware::Return<V1_0::ErrorStatus> prepareModel_1_2(
490             const V1_2::Model& model, V1_1::ExecutionPreference preference,
491             const hardware::hidl_vec<hardware::hidl_handle>& modelCache,
492             const hardware::hidl_vec<hardware::hidl_handle>& dataCache, const HalCacheToken& token,
493             const sp<V1_2::IPreparedModelCallback>& actualCallback) override {
494         return mLatestDriver->prepareModel_1_2(model, preference, modelCache, dataCache, token,
495                                                actualCallback);
496     }
getVersionString(getVersionString_cb _hidl_cb)497     hardware::Return<void> getVersionString(getVersionString_cb _hidl_cb) override {
498         return mLatestDriver->getVersionString(_hidl_cb);
499     }
getType(getType_cb _hidl_cb)500     hardware::Return<void> getType(getType_cb _hidl_cb) override {
501         return mLatestDriver->getType(_hidl_cb);
502     }
getSupportedExtensions(getSupportedExtensions_cb _hidl_cb)503     hardware::Return<void> getSupportedExtensions(getSupportedExtensions_cb _hidl_cb) {
504         return mLatestDriver->getSupportedExtensions(_hidl_cb);
505     }
getNumberOfCacheFilesNeeded(getNumberOfCacheFilesNeeded_cb _hidl_cb)506     hardware::Return<void> getNumberOfCacheFilesNeeded(getNumberOfCacheFilesNeeded_cb _hidl_cb) {
507         return mLatestDriver->getNumberOfCacheFilesNeeded(_hidl_cb);
508     }
prepareModelFromCache(const hardware::hidl_vec<hardware::hidl_handle> & modelCache,const hardware::hidl_vec<hardware::hidl_handle> & dataCache,const HalCacheToken & token,const sp<V1_2::IPreparedModelCallback> & callback)509     hardware::Return<V1_0::ErrorStatus> prepareModelFromCache(
510             const hardware::hidl_vec<hardware::hidl_handle>& modelCache,
511             const hardware::hidl_vec<hardware::hidl_handle>& dataCache, const HalCacheToken& token,
512             const sp<V1_2::IPreparedModelCallback>& callback) {
513         return mLatestDriver->prepareModelFromCache(modelCache, dataCache, token, callback);
514     }
getCapabilities_1_1(getCapabilities_1_1_cb _hidl_cb)515     hardware::Return<void> getCapabilities_1_1(getCapabilities_1_1_cb _hidl_cb) override {
516         return mLatestDriver->getCapabilities_1_1(_hidl_cb);
517     }
getSupportedOperations_1_1(const V1_1::Model & model,getSupportedOperations_1_1_cb _hidl_cb)518     hardware::Return<void> getSupportedOperations_1_1(
519             const V1_1::Model& model, getSupportedOperations_1_1_cb _hidl_cb) override {
520         return mLatestDriver->getSupportedOperations_1_1(model, _hidl_cb);
521     }
prepareModel_1_1(const V1_1::Model & model,V1_1::ExecutionPreference preference,const sp<V1_0::IPreparedModelCallback> & actualCallback)522     hardware::Return<V1_0::ErrorStatus> prepareModel_1_1(
523             const V1_1::Model& model, V1_1::ExecutionPreference preference,
524             const sp<V1_0::IPreparedModelCallback>& actualCallback) override {
525         return mLatestDriver->prepareModel_1_1(model, preference, actualCallback);
526     }
getStatus()527     hardware::Return<V1_0::DeviceStatus> getStatus() override { return mLatestDriver->getStatus(); }
getCapabilities(getCapabilities_cb _hidl_cb)528     hardware::Return<void> getCapabilities(getCapabilities_cb _hidl_cb) override {
529         return mLatestDriver->getCapabilities(_hidl_cb);
530     }
getSupportedOperations(const V1_0::Model & model,getSupportedOperations_cb _hidl_cb)531     hardware::Return<void> getSupportedOperations(const V1_0::Model& model,
532                                                   getSupportedOperations_cb _hidl_cb) override {
533         return mLatestDriver->getSupportedOperations(model, _hidl_cb);
534     }
prepareModel(const V1_0::Model & model,const sp<V1_0::IPreparedModelCallback> & actualCallback)535     hardware::Return<V1_0::ErrorStatus> prepareModel(
536             const V1_0::Model& model,
537             const sp<V1_0::IPreparedModelCallback>& actualCallback) override {
538         return mLatestDriver->prepareModel(model, actualCallback);
539     }
540 
541    private:
542     const sp<V1_3::IDevice> mLatestDriver;
543 };
544 
545 // Like PartitioningDriver, but implementing 1.1
546 class PartitioningDriverV1_1 : public V1_1::IDevice {
547    public:
PartitioningDriverV1_1(const char * name,const char * version,V1_3::Capabilities capabilities,uint32_t operationMask,PartitioningDriver::OEM oem=PartitioningDriver::OEMNo,std::set<V1_3::OperationType> operationTypes={})548     PartitioningDriverV1_1(const char* name, const char* version, V1_3::Capabilities capabilities,
549                            uint32_t operationMask,
550                            PartitioningDriver::OEM oem = PartitioningDriver::OEMNo,
551                            std::set<V1_3::OperationType> operationTypes = {})
552         : mLatestDriver(new PartitioningDriver(name, version, capabilities, operationMask, oem,
553                                                operationTypes)) {}
getCapabilities_1_1(getCapabilities_1_1_cb _hidl_cb)554     hardware::Return<void> getCapabilities_1_1(getCapabilities_1_1_cb _hidl_cb) override {
555         return mLatestDriver->getCapabilities_1_1(_hidl_cb);
556     }
getSupportedOperations_1_1(const V1_1::Model & model,getSupportedOperations_1_1_cb _hidl_cb)557     hardware::Return<void> getSupportedOperations_1_1(
558             const V1_1::Model& model, getSupportedOperations_1_1_cb _hidl_cb) override {
559         return mLatestDriver->getSupportedOperations_1_1(model, _hidl_cb);
560     }
prepareModel_1_1(const V1_1::Model & model,V1_1::ExecutionPreference preference,const sp<V1_0::IPreparedModelCallback> & actualCallback)561     hardware::Return<V1_0::ErrorStatus> prepareModel_1_1(
562             const V1_1::Model& model, V1_1::ExecutionPreference preference,
563             const sp<V1_0::IPreparedModelCallback>& actualCallback) override {
564         return mLatestDriver->prepareModel_1_1(model, preference, actualCallback);
565     }
getStatus()566     hardware::Return<V1_0::DeviceStatus> getStatus() override { return mLatestDriver->getStatus(); }
getCapabilities(getCapabilities_cb _hidl_cb)567     hardware::Return<void> getCapabilities(getCapabilities_cb _hidl_cb) override {
568         return mLatestDriver->getCapabilities(_hidl_cb);
569     }
getSupportedOperations(const V1_0::Model & model,getSupportedOperations_cb _hidl_cb)570     hardware::Return<void> getSupportedOperations(const V1_0::Model& model,
571                                                   getSupportedOperations_cb _hidl_cb) override {
572         return mLatestDriver->getSupportedOperations(model, _hidl_cb);
573     }
prepareModel(const V1_0::Model & model,const sp<V1_0::IPreparedModelCallback> & actualCallback)574     hardware::Return<V1_0::ErrorStatus> prepareModel(
575             const V1_0::Model& model,
576             const sp<V1_0::IPreparedModelCallback>& actualCallback) override {
577         return mLatestDriver->prepareModel(model, actualCallback);
578     }
579 
580    private:
581     const sp<V1_3::IDevice> mLatestDriver;
582 };
583 
584 // Like PartitioningDriver, but implementing 1.0
585 class PartitioningDriverV1_0 : public V1_0::IDevice {
586    public:
PartitioningDriverV1_0(const char * name,const char * version,V1_3::Capabilities capabilities,uint32_t operationMask,PartitioningDriver::OEM oem=PartitioningDriver::OEMNo,std::set<V1_3::OperationType> operationTypes={})587     PartitioningDriverV1_0(const char* name, const char* version, V1_3::Capabilities capabilities,
588                            uint32_t operationMask,
589                            PartitioningDriver::OEM oem = PartitioningDriver::OEMNo,
590                            std::set<V1_3::OperationType> operationTypes = {})
591         : mLatestDriver(new PartitioningDriver(name, version, capabilities, operationMask, oem,
592                                                operationTypes)) {}
getCapabilities(getCapabilities_cb _hidl_cb)593     hardware::Return<void> getCapabilities(getCapabilities_cb _hidl_cb) override {
594         return mLatestDriver->getCapabilities(_hidl_cb);
595     }
getSupportedOperations(const V1_0::Model & model,getSupportedOperations_cb _hidl_cb)596     hardware::Return<void> getSupportedOperations(const V1_0::Model& model,
597                                                   getSupportedOperations_cb _hidl_cb) override {
598         return mLatestDriver->getSupportedOperations(model, _hidl_cb);
599     }
prepareModel(const V1_0::Model & model,const sp<V1_0::IPreparedModelCallback> & actualCallback)600     hardware::Return<V1_0::ErrorStatus> prepareModel(
601             const V1_0::Model& model,
602             const sp<V1_0::IPreparedModelCallback>& actualCallback) override {
603         return mLatestDriver->prepareModel(model, actualCallback);
604     }
getStatus()605     hardware::Return<V1_0::DeviceStatus> getStatus() override { return mLatestDriver->getStatus(); }
606 
607    private:
608     const sp<V1_3::IDevice> mLatestDriver;
609 };
610 
611 enum class Dimensioned {
612     NO,      // either a scalar, or a tensor of either unspecified rank (usually)
613              // or specified rank but with no specified dimensions (where
614              // specifically stated)
615     RANK_1,  // tensor of shape { 0 }    -- i.e., rank 1, unspecified dimensions
616     RANK_2,  // tensor of shape { 0, 0 } -- i.e., rank 2, unspecified dimensions
617     YES_1,   // tensor of shape { 1 }
618     YES_2,   // tensor of shape { 2 }
619     YES_4,   // tensor of shape { 4 }
620     YES = YES_1
621 };
622 
dimensions(Dimensioned dimensioned)623 std::vector<uint32_t> dimensions(Dimensioned dimensioned) {
624     switch (dimensioned) {
625         default:
626             EXPECT_TRUE(false) << "Unknown value";
627             FALLTHROUGH_INTENDED;
628         case Dimensioned::NO:
629             return {};
630         case Dimensioned::RANK_1:
631             return {0};
632         case Dimensioned::RANK_2:
633             return {0, 0};
634         case Dimensioned::YES_1:
635             return {1};
636         case Dimensioned::YES_2:
637             return {2};
638         case Dimensioned::YES_4:
639             return {4};
640     }
641 }
642 
643 // "dimensioned" must be a fully specified kind
numberOfElements(Dimensioned dimensioned)644 uint32_t numberOfElements(Dimensioned dimensioned) {
645     auto dims = dimensions(dimensioned);
646     uint32_t result = std::reduce(dims.begin(), dims.end(), 1u, std::multiplies<>());
647     CHECK_GT(result, 0u);
648     return result;
649 }
650 
toString(Dimensioned dimensioned)651 std::string toString(Dimensioned dimensioned) {
652     switch (dimensioned) {
653         default:
654             return "<Unknown value>";
655         case Dimensioned::NO:
656             return "NO";
657         case Dimensioned::RANK_1:
658             return "RANK_1";
659         case Dimensioned::RANK_2:
660             return "RANK_2";
661         case Dimensioned::YES_1:
662             return "YES_1";
663         case Dimensioned::YES_2:
664             return "YES_2";
665         case Dimensioned::YES_4:
666             return "YES_4";
667     }
668 }
669 
670 // This class adds some simple abstractions and utilities on top of
671 // WrapperModel.  For example, it provides methods that work in terms of
672 // operation kind (0..7); and because we care about graph topology rather than
673 // details of operand types and values, it greatly simplifies the process of
674 // creating operands.
675 class PartitioningModel : private WrapperModel {
676    public:
677     using WrapperModel::finish;
678     using WrapperModel::getHandle;
679     using WrapperModel::identifyInputsAndOutputs;
680     using WrapperModel::isValid;
681     using WrapperModel::relaxComputationFloat32toFloat16;
682     using WrapperModel::setOperandValue;
683 
684     // Create a tensor operand of the specified type, and return the
685     // corresponding operand index.
addIntOperand(Dimensioned dimensioned=Dimensioned::YES)686     uint32_t addIntOperand(Dimensioned dimensioned = Dimensioned::YES) {
687         return addOperand(WrapperType::TENSOR_INT32, dimensioned);
688     }
addIntScalarOperand(std::optional<int> v=std::nullopt)689     uint32_t addIntScalarOperand(std::optional<int> v = std::nullopt) {
690         uint32_t opnd = addOperand(WrapperType::INT32);
691         if (v.has_value()) {
692             setOperandValue(opnd, &v.value());
693         }
694         return opnd;
695     }
addFloatOperand(Dimensioned dimensioned=Dimensioned::YES)696     uint32_t addFloatOperand(Dimensioned dimensioned = Dimensioned::YES) {
697         return addOperand(WrapperType::TENSOR_FLOAT32, dimensioned);
698     }
addQuantOperand(Dimensioned dimensioned=Dimensioned::YES)699     uint32_t addQuantOperand(Dimensioned dimensioned = Dimensioned::YES) {
700         return addOperand(WrapperType::TENSOR_QUANT8_ASYMM, dimensioned);
701     }
addBooleanOperand(Dimensioned dimensioned=Dimensioned::YES)702     uint32_t addBooleanOperand(Dimensioned dimensioned = Dimensioned::YES) {
703         return addOperand(WrapperType::TENSOR_BOOL8, dimensioned);
704     }
addFloatZeroOperand(Dimensioned dimensioned=Dimensioned::YES)705     uint32_t addFloatZeroOperand(Dimensioned dimensioned = Dimensioned::YES) {
706         uint32_t opnd = addFloatOperand(dimensioned);
707         std::vector<float> values(numberOfElements(dimensioned), 0.0f);
708         uint32_t size = values.size() * sizeof(float);
709         // Make sure the values are immediately copied so that it is safe to free the buffer after
710         // the setOperandValue call
711         CHECK_LE(size, ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES);
712         setOperandValue(opnd, values.data(), size);
713         return opnd;
714     }
715 
716     // Create an operand of the specified type, and return the corresponding
717     // operand index.
addOperand(WrapperType wrapperType,Dimensioned dimensioned=Dimensioned::YES)718     uint32_t addOperand(WrapperType wrapperType, Dimensioned dimensioned = Dimensioned::YES) {
719         switch (static_cast<int>(wrapperType)) {
720             case ANEURALNETWORKS_BOOL:
721             case ANEURALNETWORKS_FLOAT16:
722             case ANEURALNETWORKS_FLOAT32:
723             case ANEURALNETWORKS_INT32:
724             case ANEURALNETWORKS_UINT32:
725             case ANEURALNETWORKS_MODEL:
726             case ANEURALNETWORKS_OEM_SCALAR:
727                 return addOperand(WrapperOperandType{wrapperType, {}});
728 
729             case ANEURALNETWORKS_TENSOR_BOOL8:
730             case ANEURALNETWORKS_TENSOR_FLOAT16:
731             case ANEURALNETWORKS_TENSOR_FLOAT32:
732             case ANEURALNETWORKS_TENSOR_OEM_BYTE:
733                 return addOperand(WrapperOperandType{wrapperType, dimensions(dimensioned)});
734 
735             case ANEURALNETWORKS_TENSOR_INT32:
736             case ANEURALNETWORKS_TENSOR_QUANT8_ASYMM:
737             case ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED:
738             case ANEURALNETWORKS_TENSOR_QUANT8_SYMM:
739             case ANEURALNETWORKS_TENSOR_QUANT16_ASYMM:
740             case ANEURALNETWORKS_TENSOR_QUANT16_SYMM:
741                 return addOperand(WrapperOperandType{wrapperType, dimensions(dimensioned), 1.0f});
742 
743             case ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL:
744                 return addOperand(WrapperOperandType{wrapperType, dimensions(dimensioned),
745                                                      WrapperSymmPerChannelQuantParams({1.0f}, 0)});
746 
747             default:
748                 ADD_FAILURE() << "Unexpected type " << static_cast<uint32_t>(wrapperType);
749                 return ~uint32_t(0);
750         }
751     }
752 
753     // Create an operand of the specified operand type, and return the
754     // corresponding operand index.
addOperand(const WrapperOperandType & wrapperOperandType)755     uint32_t addOperand(const WrapperOperandType& wrapperOperandType) {
756         mWrapperOperandType.push_back(wrapperOperandType);
757         return WrapperModel::addOperand(&wrapperOperandType);
758     }
759 
760     // Create an operation with any number of inputs and one output, specifying
761     // the operation type (e.g., ANEURALNETWORKS_ADD), the input operand
762     // indexes, and the output type (e.g., WrapperType::TENSOR_FLOAT32).
763     // Returns the output operand index.
addExplicitOperationXTo1(ANeuralNetworksOperationType operationType,const std::vector<uint32_t> & inputs,WrapperType outputType,Dimensioned dimensionedOutput=Dimensioned::YES)764     uint32_t addExplicitOperationXTo1(ANeuralNetworksOperationType operationType,
765                                       const std::vector<uint32_t>& inputs, WrapperType outputType,
766                                       Dimensioned dimensionedOutput = Dimensioned::YES) {
767         uint32_t output = addOperand(outputType, dimensionedOutput);
768         addOperation(operationType, inputs, {output});
769         return output;
770     }
771 
772     // Create a V1_0 operation with two inputs and one output, specifying the
773     // operation kind (where 0 is the first V1_0 operation) and the input
774     // operand indexes.
775     // Returns the output operand index.
addOperation2To1V1_0(uint32_t operation,const uint32_t input0,const uint32_t input1,Dimensioned dimensionedOutput=Dimensioned::YES)776     uint32_t addOperation2To1V1_0(uint32_t operation, const uint32_t input0, const uint32_t input1,
777                                   Dimensioned dimensionedOutput = Dimensioned::YES) {
778         CHECK_LE(operation, kLastEncodingV1_0 - kFirstEncodingV1_0);
779         return addOperation2To1(operation + kFirstEncodingV1_0, input0, input1, dimensionedOutput);
780     }
781 
782     // Create a V1_1 operation with two inputs and one output, specifying the
783     // operation kind (where 0 is the first V1_1 operation) and the input
784     // operand indexes.
785     // Returns the output operand index.
addOperation2To1V1_1(uint32_t operation,const uint32_t input0,const uint32_t input1,Dimensioned dimensionedOutput=Dimensioned::YES)786     uint32_t addOperation2To1V1_1(uint32_t operation, const uint32_t input0, const uint32_t input1,
787                                   Dimensioned dimensionedOutput = Dimensioned::YES) {
788         CHECK_LE(operation, kLastEncodingV1_1 - kFirstEncodingV1_1);
789         return addOperation2To1(operation + kFirstEncodingV1_1, input0, input1, dimensionedOutput);
790     }
791 
792     // Create a V1_2 operation with two inputs and one output, specifying the
793     // operation kind (where 0 is the first V1_2 operation) and the input
794     // operand indexes.
795     // Returns the output operand index.
addOperation2To1V1_2(uint32_t operation,const uint32_t input0,const uint32_t input1,Dimensioned dimensionedOutput=Dimensioned::YES)796     uint32_t addOperation2To1V1_2(uint32_t operation, const uint32_t input0, const uint32_t input1,
797                                   Dimensioned dimensionedOutput = Dimensioned::YES) {
798         CHECK_LE(operation, kLastEncodingV1_2 - kFirstEncodingV1_2);
799         return addOperation2To1(operation + kFirstEncodingV1_2, input0, input1, dimensionedOutput);
800     }
801 
802     // Create a V1_3 operation with two inputs and one output, specifying the
803     // operation kind (where 0 is the first V1_3 operation) and the input
804     // operand indexes.
805     // Returns the output operand index.
addOperation1To1V1_3(uint32_t operation,const uint32_t input0,Dimensioned dimensionedOutput=Dimensioned::YES)806     uint32_t addOperation1To1V1_3(uint32_t operation, const uint32_t input0,
807                                   Dimensioned dimensionedOutput = Dimensioned::YES) {
808         CHECK_LE(operation, kLastEncodingV1_3 - kFirstEncodingV1_3);
809         return addOperation1To1(operation + kFirstEncodingV1_3, input0, dimensionedOutput);
810     }
811 
812     // Create an OEM operation with one input and one output,
813     // specifying the input operand index.  Returns the output operand
814     // index.
addOperationOEM1To1(const uint32_t input,Dimensioned dimensionedOutput=Dimensioned::YES)815     uint32_t addOperationOEM1To1(const uint32_t input,
816                                  Dimensioned dimensionedOutput = Dimensioned::YES) {
817         uint32_t output = addOperandOfSameType(input, dimensionedOutput);
818         addOperation(ANEURALNETWORKS_OEM_OPERATION, {input}, {output});
819         return output;
820     }
821 
822     // Create an IF operation with the given condition operand and two
823     // referenced models for the true and false cases.
addIfOperation(const uint32_t cond,const PartitioningModel & trueModel,const PartitioningModel & falseModel,const std::vector<uint32_t> & inputs,const std::vector<uint32_t> & outputs)824     void addIfOperation(const uint32_t cond, const PartitioningModel& trueModel,
825                         const PartitioningModel& falseModel, const std::vector<uint32_t>& inputs,
826                         const std::vector<uint32_t>& outputs) {
827         const uint32_t opndTrue = addRefModelOperand(trueModel);
828         const uint32_t opndFalse = addRefModelOperand(falseModel);
829         std::vector<uint32_t> ifInputs = {cond, opndTrue, opndFalse};
830         ifInputs.insert(ifInputs.end(), inputs.begin(), inputs.end());
831         addOperation(ANEURALNETWORKS_IF, ifInputs, outputs);
832     }
833 
834     // Create a WHILE operation with the given condition and body referenced models.
addWhileOperation(const PartitioningModel & condModel,const PartitioningModel & bodyModel,const std::vector<uint32_t> & inputs,const std::vector<uint32_t> & outputs)835     void addWhileOperation(const PartitioningModel& condModel, const PartitioningModel& bodyModel,
836                            const std::vector<uint32_t>& inputs,
837                            const std::vector<uint32_t>& outputs) {
838         const uint32_t condOperand = addRefModelOperand(condModel);
839         const uint32_t bodyOperand = addRefModelOperand(bodyModel);
840         std::vector<uint32_t> whileInputs = {condOperand, bodyOperand};
841         whileInputs.insert(whileInputs.end(), inputs.begin(), inputs.end());
842         addOperation(ANEURALNETWORKS_WHILE, whileInputs, outputs);
843     }
844 
845     // Run the partitioning algorithm to create an ExecutionPlan.
partitionTheWork(const std::vector<std::shared_ptr<Device>> & devices,ExecutePreference preference,ExecutePriority priority,const OptionalTimePoint & deadline,ExecutionPlan * plan)846     int partitionTheWork(const std::vector<std::shared_ptr<Device>>& devices,
847                          ExecutePreference preference, ExecutePriority priority,
848                          const OptionalTimePoint& deadline, ExecutionPlan* plan) {
849         return reinterpret_cast<ModelBuilder*>(getHandle())
850                 ->partitionTheWork(devices, static_cast<uint32_t>(preference),
851                                    static_cast<int32_t>(priority), deadline, plan);
852     }
853 
854 #ifdef VERBOSE
855     // This is a debugging utility function.
dump(const char * name) const856     void dump(const char* name) const {
857         const ModelBuilder* mb = reinterpret_cast<const ModelBuilder*>(getHandle());
858         ::dump(name, mb);
859     }
860 #endif
861 
862    private:
863     // Create an operation with two inputs and one output, specifying
864     // the operation kind and the input operand indexes.
865     // Returns the output operand index.
addOperation2To1(uint32_t operation,const uint32_t input0,const uint32_t input1,Dimensioned dimensionedOutput=Dimensioned::YES)866     uint32_t addOperation2To1(uint32_t operation, const uint32_t input0, const uint32_t input1,
867                               Dimensioned dimensionedOutput = Dimensioned::YES) {
868         auto it = firstEncodingToOperation.lower_bound(operation);
869         CHECK(it != firstEncodingToOperation.end());
870         ANeuralNetworksOperationType type = it->second.first;
871         if (it->second.second) {
872             int32_t fuseCode = operation - it->first;
873             uint32_t input2 = addIntOperand(fuseCode);
874             uint32_t output = addOperandOfSameType(input0, dimensionedOutput);
875             addOperation(type, {input0, input1, input2}, {output});
876             return output;
877         } else {
878             uint32_t output = addOperandOfSameType(input0, dimensionedOutput);
879             addOperation(type, {input0, input1}, {output});
880             return output;
881         }
882     }
883 
884     // Create an operation with one inputs and one output, specifying
885     // the operation kind and the input operand indexes.
886     // Returns the output operand index.
addOperation1To1(uint32_t operation,const uint32_t input0,Dimensioned dimensionedOutput=Dimensioned::YES)887     uint32_t addOperation1To1(uint32_t operation, const uint32_t input0,
888                               Dimensioned dimensionedOutput = Dimensioned::YES) {
889         auto it = firstEncodingToOperation.lower_bound(operation);
890         CHECK(it != firstEncodingToOperation.end());
891         ANeuralNetworksOperationType type = it->second.first;
892 
893         uint32_t output = addOperandOfSameType(input0, dimensionedOutput);
894         addOperation(type, {input0}, {output});
895         return output;
896     }
897 
898     // Create a scalar integer operand of the specified value, and
899     // return the corresponding operand index.
addIntOperand(int32_t value)900     uint32_t addIntOperand(int32_t value) {
901         uint32_t operand = addOperand(WrapperType::INT32);
902         setOperandValue(operand, &value, sizeof(value));
903         return operand;
904     }
905 
906     // Create an operand from a model for control flow graphs.
addRefModelOperand(const PartitioningModel & model)907     uint32_t addRefModelOperand(const PartitioningModel& model) {
908         const uint32_t index = addOperand(WrapperType::MODEL);
909         WrapperModel::setOperandValueFromModel(index, &model);
910         return index;
911     }
912 
913     // Create an operand of the same type as the specified operand,
914     // and return the operand index of the new operand.
915     //
916     // If a tensor, the new operand will have the same rank as the specified
917     // operand.  If dimensioned == Dimensioned::NO, then all dimensions of a new
918     // tensor operand will be unspecified.  If dimensioned != Dimensioned::NO,
919     // then all dimensions of a new tensor operand will have the implied value
920     // (e.g., YES_1 means each dimension will have the value "1").
addOperandOfSameType(uint32_t operand,Dimensioned dimensioned=Dimensioned::YES)921     uint32_t addOperandOfSameType(uint32_t operand, Dimensioned dimensioned = Dimensioned::YES) {
922         WrapperOperandType type = mWrapperOperandType.at(operand);
923 
924         const auto d = dimensions(dimensioned);
925         EXPECT_TRUE(d.size() <= 1);
926         for (auto& dimension : type.dimensions) {
927             dimension = (dimensioned == Dimensioned::NO ? 0 : d[0]);
928         }
929 
930         mWrapperOperandType.push_back(type);
931         return WrapperModel::addOperand(&type);
932     }
933 
934     // operand index to operand type
935     std::vector<WrapperOperandType> mWrapperOperandType;
936 };
937 
938 // This class adds some utilities on top of WrapperCompilation.
939 class PartitioningCompilation : public WrapperCompilation {
940    public:
PartitioningCompilation(const PartitioningModel * model,const std::vector<std::shared_ptr<Device>> & devices)941     PartitioningCompilation(const PartitioningModel* model,
942                             const std::vector<std::shared_ptr<Device>>& devices) {
943         ModelBuilder* m = reinterpret_cast<ModelBuilder*>(model->getHandle());
944         CompilationBuilder* c = nullptr;
945         int result = m->createCompilation(&c, devices);
946         EXPECT_EQ(result, 0);
947         mCompilation = reinterpret_cast<ANeuralNetworksCompilation*>(c);
948     }
949 
setPartitioning(uint32_t partitioning)950     Result setPartitioning(uint32_t partitioning) {
951         return static_cast<Result>(builder()->forTest_setPartitioning(partitioning));
952     }
953 
954     // Simulate recoverable partitioning failure.
failPartitioning()955     Result failPartitioning() {
956         return static_cast<Result>(
957                 builder()->forTest_failPartitioning(static_cast<int>(Result::OP_FAILED)));
958     }
959 
960     using WrapperCompilation::finish;
961 
getExecutionPlan() const962     const ExecutionPlan& getExecutionPlan() const { return builder()->forTest_getExecutionPlan(); }
963 
964    private:
builder()965     CompilationBuilder* builder() { return reinterpret_cast<CompilationBuilder*>(getHandle()); }
966 
builder() const967     const CompilationBuilder* builder() const {
968         return reinterpret_cast<const CompilationBuilder*>(getHandle());
969     }
970 };
971 
972 #ifdef VERBOSE
973 #define RETURN_TRUE()                                                 \
974     {                                                                 \
975         std::cerr << "returning true from " << __LINE__ << std::endl; \
976         return true;                                                  \
977     }
978 #else
979 #define RETURN_TRUE() \
980     { return true; }
981 #endif
982 #ifdef VERBOSE
983 #define RETURN_FALSE(MESSAGE)                                                  \
984     {                                                                          \
985         std::cerr << "returning false from " << __LINE__ MESSAGE << std::endl; \
986         return false;                                                          \
987     }
988 #else
989 #define RETURN_FALSE(MESSAGE) \
990     { return false; }
991 #endif
992 
993 class PartitioningTest : public ::testing::Test {
994    protected:
995     using DynamicTemporariesType = decltype(ExecutionPlan().forTest_flatGetDynamicTemporaries());
996     using RemapVectorType = ExecutionStep::RemapVectorType;
997     using StepModelOutputSetType = ExecutionStep::StepModelOutputSetType;
998 
999     // Used for PartitioningTest::checkExecutionPlanSteps.
1000     static constexpr const char* kIfStep = "IF";
1001     static constexpr const char* kWhileStep = "WHILE";
1002     static constexpr const char* kGotoStep = "GOTO";
1003 
SetUp()1004     virtual void SetUp() {}
1005 
1006     // From a vector of DeviceSpecification, create a vector of
1007     // Devices.
1008     struct DeviceSpecification {
DeviceSpecification__anon63efd43f0110::PartitioningTest::DeviceSpecification1009         DeviceSpecification(const std::string& name, const V1_3::Capabilities& capabilities,
1010                             uint32_t operationMask,
1011                             PartitioningDriver::OEM oem = PartitioningDriver::OEMNo)
1012             : mName(name),
1013               mVersionString(kVersionString),
1014               mCapabilities(capabilities),
1015               mOperationMask(operationMask),
1016               mOEM(oem) {}
DeviceSpecification__anon63efd43f0110::PartitioningTest::DeviceSpecification1017         DeviceSpecification(const std::string& name, float perf, uint32_t operationMask,
1018                             PartitioningDriver::OEM oem = PartitioningDriver::OEMNo,
1019                             HalVersion halVersion = HalVersion::LATEST,
1020                             std::set<V1_3::OperationType> operationTypes = {})
1021             : DeviceSpecification(name, perf, perf, operationMask, oem, halVersion,
1022                                   operationTypes) {}
DeviceSpecification__anon63efd43f0110::PartitioningTest::DeviceSpecification1023         DeviceSpecification(const std::string& name, float perf, float perfRelaxed,
1024                             uint32_t operationMask,
1025                             PartitioningDriver::OEM oem = PartitioningDriver::OEMNo,
1026                             HalVersion halVersion = HalVersion::LATEST,
1027                             std::set<V1_3::OperationType> operationTypes = {})
1028             : DeviceSpecification(name, kVersionString, perf, perfRelaxed, operationMask, oem,
1029                                   halVersion, operationTypes) {}
DeviceSpecification__anon63efd43f0110::PartitioningTest::DeviceSpecification1030         DeviceSpecification(const std::string& name, const std::string& version, float perf,
1031                             uint32_t operationMask,
1032                             PartitioningDriver::OEM oem = PartitioningDriver::OEMNo,
1033                             HalVersion halVersion = HalVersion::LATEST,
1034                             std::set<V1_3::OperationType> operationTypes = {})
1035             : DeviceSpecification(name, version, perf, perf, operationMask, oem, halVersion,
1036                                   operationTypes) {}
DeviceSpecification__anon63efd43f0110::PartitioningTest::DeviceSpecification1037         DeviceSpecification(const std::string& name, const std::string& version, float perf,
1038                             float perfRelaxed, uint32_t operationMask,
1039                             PartitioningDriver::OEM oem = PartitioningDriver::OEMNo,
1040                             HalVersion halVersion = HalVersion::LATEST,
1041                             std::set<V1_3::OperationType> operationTypes = {})
1042             : mName(name),
1043               mVersionString(version),
1044               mHalVersion(halVersion),
1045               mOperationMask(operationMask),
1046               mOEM(oem),
1047               mOperationTypes(std::move(operationTypes)) {
1048             V1_0::PerformanceInfo perfInfo = {.execTime = perf, .powerUsage = perf};
1049             V1_0::PerformanceInfo perfRelaxedInfo = {.execTime = perfRelaxed,
1050                                                      .powerUsage = perfRelaxed};
1051             mCapabilities = {
1052                     .relaxedFloat32toFloat16PerformanceScalar = perfRelaxedInfo,
1053                     .relaxedFloat32toFloat16PerformanceTensor = perfRelaxedInfo,
1054                     .operandPerformance =
1055                             ::android::nn::nonExtensionOperandPerformance<HalVersion::V1_3>(
1056                                     perfInfo),
1057                     .ifPerformance = perfInfo,
1058                     .whilePerformance = perfInfo};
1059         }
DeviceSpecification__anon63efd43f0110::PartitioningTest::DeviceSpecification1060         DeviceSpecification(const std::string& name, float perf, HalVersion halVersion,
1061                             uint32_t operationMaskV1_0, uint32_t operationMaskV1_1 = 0,
1062                             uint32_t operationMaskV1_2 = 0, uint32_t operationMaskV1_3 = 0)
1063             : DeviceSpecification(
1064                       name, perf, perf,
1065                       makeOperationMask(halVersion, operationMaskV1_0, operationMaskV1_1,
1066                                         operationMaskV1_2, operationMaskV1_3)) {
1067             mHalVersion = halVersion;
1068         }
1069 
1070         std::string mName;
1071         std::string mVersionString;
1072         V1_3::Capabilities mCapabilities;
1073         HalVersion mHalVersion = HalVersion::LATEST;
1074         uint32_t mOperationMask;
1075         PartitioningDriver::OEM mOEM = PartitioningDriver::OEMNo;
1076         std::set<V1_3::OperationType> mOperationTypes;
1077 
1078         static constexpr char kVersionString[] = "JUST_AN_EXAMPLE";
1079 
1080        private:
1081         // This function takes three operation masks aligned at the low-order
1082         // bit -- one mask each for V1_0, V1_1, and V1_2 -- and produces a single
1083         // composite operation mask, formed by shifting each of the input
1084         // operation masks appropriately and ORing the results together.
1085         //
1086         // For convenience, any bits of an input mask that are too high order
1087         // for that mask are discarded -- this allows ~0 to be a legal input
1088         // mask.
1089         //
1090         // For the sake of example, assume that each low order mask is 4 bits
1091         // wide, and take some artistic license to write literals in binary.
1092         // Then:
1093         //
1094         //     assert(makeOperationMask(HalVersion::V1_2, 0b0110, 0b1001, 0b0101) ==
1095         //            0b 0101 1001 0110);
1096         //
1097         // This is used by a DeviceSpecification constructor to build a mask of
1098         // operations to be supported by the device.
makeOperationMask__anon63efd43f0110::PartitioningTest::DeviceSpecification1099         static uint32_t makeOperationMask(HalVersion halVersion, uint32_t operationMaskV1_0,
1100                                           uint32_t operationMaskV1_1, uint32_t operationMaskV1_2,
1101                                           uint32_t operationMaskV1_3) {
1102             if (halVersion < HalVersion::V1_3) {
1103                 CHECK(!operationMaskV1_3);
1104             }
1105             if (halVersion < HalVersion::V1_2) {
1106                 CHECK(!operationMaskV1_2);
1107             }
1108             if (halVersion < HalVersion::V1_1) {
1109                 CHECK(!operationMaskV1_1);
1110             }
1111             auto maskOfWidth = [](uint32_t width) -> uint32_t { return (1U << width) - 1; };
1112             static const uint32_t kOperationMaskV1_0 =
1113                     maskOfWidth(kLastEncodingV1_0 - kFirstEncodingV1_0 + 1);
1114             static const uint32_t kOperationMaskV1_1 =
1115                     maskOfWidth(kLastEncodingV1_1 - kFirstEncodingV1_1 + 1);
1116             static const uint32_t kOperationMaskV1_2 =
1117                     maskOfWidth(kLastEncodingV1_2 - kFirstEncodingV1_2 + 1);
1118             static const uint32_t kOperationMaskV1_3 =
1119                     maskOfWidth(kLastEncodingV1_3 - kFirstEncodingV1_3 + 1);
1120             return ((operationMaskV1_0 & kOperationMaskV1_0) << kFirstEncodingV1_0) |
1121                    ((operationMaskV1_1 & kOperationMaskV1_1) << kFirstEncodingV1_1) |
1122                    ((operationMaskV1_2 & kOperationMaskV1_2) << kFirstEncodingV1_2) |
1123                    ((operationMaskV1_3 & kOperationMaskV1_3) << kFirstEncodingV1_3);
1124         }
1125     };
makeDevices(std::vector<DeviceSpecification> specifications)1126     static std::vector<std::shared_ptr<Device>> makeDevices(
1127             std::vector<DeviceSpecification> specifications) {
1128         std::vector<std::shared_ptr<Device>> devices;
1129         for (const auto& specification : specifications) {
1130             SharedDevice device = nullptr;
1131             switch (specification.mHalVersion) {
1132                 case HalVersion::V1_3:
1133                     device = android::nn::makeSharedDevice(
1134                             specification.mName,
1135                             new PartitioningDriver(specification.mName.c_str(),
1136                                                    specification.mVersionString.c_str(),
1137                                                    specification.mCapabilities,
1138                                                    specification.mOperationMask, specification.mOEM,
1139                                                    specification.mOperationTypes));
1140                     break;
1141                 case HalVersion::V1_2:
1142                     device = android::nn::makeSharedDevice(
1143                             specification.mName,
1144                             new PartitioningDriverV1_2(
1145                                     specification.mName.c_str(),
1146                                     specification.mVersionString.c_str(),
1147                                     specification.mCapabilities, specification.mOperationMask,
1148                                     specification.mOEM, specification.mOperationTypes));
1149                     break;
1150                 case HalVersion::V1_1:
1151                     device = android::nn::makeSharedDevice(
1152                             specification.mName,
1153                             new PartitioningDriverV1_1(
1154                                     specification.mName.c_str(),
1155                                     specification.mVersionString.c_str(),
1156                                     specification.mCapabilities, specification.mOperationMask,
1157                                     specification.mOEM, specification.mOperationTypes));
1158                     break;
1159                 case HalVersion::V1_0:
1160                     device = android::nn::makeSharedDevice(
1161                             specification.mName,
1162                             new PartitioningDriverV1_0(
1163                                     specification.mName.c_str(),
1164                                     specification.mVersionString.c_str(),
1165                                     specification.mCapabilities, specification.mOperationMask,
1166                                     specification.mOEM, specification.mOperationTypes));
1167                     break;
1168                 default:
1169                     ADD_FAILURE() << "Unexpected";
1170             }
1171             auto driverDevice = DeviceManager::forTest_makeDriverDevice(device);
1172             devices.push_back(std::move(driverDevice));
1173         }
1174         devices.push_back(DeviceManager::getCpuDevice());
1175         return devices;
1176     }
1177 
stepsToString(const std::vector<std::string> & steps)1178     static std::string stepsToString(const std::vector<std::string>& steps) {
1179         std::stringstream ss;
1180         ss << "[ ";
1181         for (const auto& step : steps) {
1182             ss << step << " ";
1183         }
1184         ss << "]";
1185         return ss.str();
1186     }
1187 
1188     // Checks the type of each logical step in an execution plan.
1189     // Each entry of "expected" is either: kIfStep for IfStep, kWhileStep for WhileStep,
1190     // kGotoStep for GotoStep, or the device name for ExecutionStep.
checkExecutionPlanSteps(const ExecutionPlan & plan,const std::vector<std::string> & expected)1191     void checkExecutionPlanSteps(const ExecutionPlan& plan,
1192                                  const std::vector<std::string>& expected) {
1193         ASSERT_GT(expected.size(), 0u);
1194 
1195         std::vector<std::string> actual;
1196         if (expected.size() == 1) {
1197             ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
1198             actual.emplace_back(plan.forTest_simpleGetDevice()->getName());
1199         } else {
1200             ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::COMPOUND);
1201             const auto& steps = plan.forTest_compoundGetSteps();
1202             for (const auto& step : steps) {
1203                 if (step->isIf()) {
1204                     actual.emplace_back(kIfStep);
1205                 } else if (step->isWhile()) {
1206                     actual.emplace_back(kWhileStep);
1207                 } else if (step->isGoto()) {
1208                     actual.emplace_back(kGotoStep);
1209                 } else if (step->isExecution()) {
1210                     actual.emplace_back(step->executionStep()->getDevice()->getName());
1211                 } else {
1212                     ASSERT_FALSE(true) << "Unknown LogicalStep";
1213                 }
1214             }
1215         }
1216         ASSERT_TRUE(actual == expected)
1217                 << "expected: " << stepsToString(expected) << ", actual: " << stepsToString(actual);
1218     }
1219 
1220     /*-- Graph comparision ----------------------------------------------------------------*/
1221 
1222     // An operand with certain values for its lifetime does not have a
1223     // defining operation in the graph.  For the purposes of the graph
1224     // comparison algorithm, we encode the "defining operation" index of
1225     // such an operand as follows:
1226     // - NO_VALUE       kPseudoDefiningOperationNoValue
1227     // - SUBGRAPH_INPUT kPseudoDefiningOperationModelInput0 + (position in list of inputs)
1228     // - CONSTANT_COPY  kPseudoDefiningOperationConstantCopy0 + (constant value)
1229     //                    Note: For the graphs we build in this test, we
1230     //                          only expect to see 4-byte constants within
1231     //                          a very restricted range, so we only make
1232     //                          room for such constants in our encoding
1233     //                          space.
1234     // We do not expect to see CONSTANT_REFERENCE, and so we do not handle
1235     // it.
1236     //
1237     // The encoding is intended to be relatively human readable; it is not
1238     // designed to represent some optimal balance of ranges for the items
1239     // within its scope (actual operations, inputs, constants).
1240 
1241     enum PseudoDefiningOperationEncodings : uint32_t {
1242         kPseudoDefiningOperationModelInput0 = 0x80000000U,
1243         kPseudoDefiningOperationConstantCopy0 = 0x90000000U,
1244         kPseudoDefiningOperationNoValue = 0xeeeeeeeeU,
1245 
1246         // lowest value for special encoding
1247         kPseudoDefiningOperationBase = 0x80000000U,
1248 
1249         // range of encoded input or constant
1250         kPseudoDefiningOperationRange = 0x10000000U,
1251     };
1252 
1253     // Build a map from operand to defining operation.
1254     // TODO: Replace map with vector?
buildDefinitionMap(const ModelBuilder * model,std::map<uint32_t,uint32_t> * defMap)1255     void buildDefinitionMap(const ModelBuilder* model, std::map<uint32_t, uint32_t>* defMap) {
1256         // actual definitions
1257         ASSERT_LT(model->operationCount(), kPseudoDefiningOperationBase);
1258         for (uint32_t i = 0, e = model->operationCount(); i < e; i++) {
1259             const V1_3::Operation& operation = android::nn::convertToV1_3(model->getOperation(i));
1260             for (uint32_t output : operation.outputs) {
1261                 (*defMap)[output] = i;
1262             }
1263         }
1264         // inputs
1265         ASSERT_LT(model->inputCount(), kPseudoDefiningOperationRange);
1266         for (uint32_t i = 0, e = model->inputCount(); i < e; i++) {
1267             (*defMap)[model->getInputOperandIndex(i)] = kPseudoDefiningOperationModelInput0 + i;
1268         }
1269         // look for NO_VALUE and CONSTANT_COPY
1270         for (uint32_t i = 0, e = model->operandCount(); i < e; i++) {
1271             const V1_3::Operand& operand = android::nn::convertToV1_3(model->getOperand(i));
1272             switch (operand.lifetime) {
1273                 case V1_3::OperandLifeTime::NO_VALUE:
1274                     (*defMap)[i] = kPseudoDefiningOperationNoValue;
1275                     break;
1276                 case V1_3::OperandLifeTime::CONSTANT_COPY: {
1277                     ASSERT_EQ(operand.location.length, sizeof(uint32_t));
1278                     uint32_t value;
1279                     memcpy(&value, model->getPointerToOperandValue(operand.location.offset),
1280                            sizeof(uint32_t));
1281                     ASSERT_LT(value, kPseudoDefiningOperationNoValue);
1282                     (*defMap)[i] = kPseudoDefiningOperationConstantCopy0 + value;
1283                     break;
1284                 }
1285                 case V1_3::OperandLifeTime::TEMPORARY_VARIABLE:
1286                 case V1_3::OperandLifeTime::SUBGRAPH_INPUT:
1287                 case V1_3::OperandLifeTime::SUBGRAPH_OUTPUT:
1288                     // already handled
1289                     break;
1290                 default:
1291                     FAIL();
1292                     break;
1293             }
1294         }
1295         // validity check
1296         ASSERT_EQ(model->operandCount(), defMap->size());
1297     }
1298 
1299 #ifdef VERBOSE
dump(const char * name,const std::map<uint32_t,uint32_t> * aMap)1300     void dump(const char* name, const std::map<uint32_t, uint32_t>* aMap) {
1301         auto writeNum = [](uint32_t num) {
1302             if (num >= kPseudoDefiningOperationBase) {
1303                 std::cout << "0x" << std::hex << num << std::dec;
1304             } else {
1305                 std::cout << num;
1306             }
1307         };
1308 
1309         std::cout << name << ": { ";
1310         bool gotOne = false;
1311         for (const auto& entry : *aMap) {
1312             if (gotOne) {
1313                 std::cout << ", ";
1314             } else {
1315                 gotOne = true;
1316             }
1317             std::cout << "(";
1318             writeNum(entry.first);
1319             std::cout << ", ";
1320             writeNum(entry.second);
1321             std::cout << ")";
1322         }
1323         std::cout << " }" << std::endl;
1324     }
1325 #endif
1326 
compare(const Operand & operandA,const Operand & operandB)1327     bool compare(const Operand& operandA, const Operand& operandB) {
1328         if (operandA.type != operandB.type || operandA.dimensions != operandB.dimensions ||
1329             operandA.scale != operandB.scale || operandA.zeroPoint != operandB.zeroPoint) {
1330             return false;
1331         }
1332         return true;
1333     }
1334 
1335     // Compare two graphs.  We ignore operand and operation indexes (i.e.,
1336     // two nodes can be the same even if they are numbered differently)
1337     // but we also ignore semantics (e.g., even if an operation kind is
1338     // such that the operand is commutative, we still pay attention to the
1339     // order of its input operands).
1340     //
1341     // The comparison algorithm works by walking modelA from outputs
1342     // towards inputs, along the edge from each operand to its
1343     // defining operation, and then along the edges to the operation's
1344     // input operands.  At each step along the way, we try to match up
1345     // operands and operations from modelA with equivalent operands
1346     // and operations from modelB.
1347     //
1348     // We start by assuming that modelA's outputs and modelB's outputs
1349     // match positionally (e.g., modelA's first output operand is
1350     // equivalent to modelB's first output operand).  Once we've
1351     // discovered two equivalent operands (such as those outputs), we
1352     // place them in a work queue.  We repeatedly pull operands off
1353     // the queue and compare their defining operations and those
1354     // operations' input operands, to discover more pairs of
1355     // equivalent operands.  If we ever find operations that do not
1356     // match (e.g., because operation kind differs), or operands that
1357     // do not match (e.g., because operand type differs); or if we
1358     // ever find a conflict (we've already decided that operand A's
1359     // equivalent operand is B0, but it looks like we need its
1360     // equivalent operand to be B1); then the graphs compare unequal.
1361     // Otherwise, we'll eventually exhaust the work queue, and
1362     // conclude that the graphs compare equal.
1363     //
1364     // As a side effect of the comparison, we produce a map
1365     // *inputsAndOutputsBToA that maps from each of the model input and output
1366     // operand numbers of modelB to the corresponding operand numbers of modelA.
1367     // If the comparison returns false, the contents of the map are undefined.
compare(const ModelBuilder * modelA,const ModelBuilder * modelB,std::map<uint32_t,uint32_t> * inputsAndOutputsBToA)1368     bool compare(const ModelBuilder* modelA, const ModelBuilder* modelB,
1369                  std::map<uint32_t, uint32_t>* inputsAndOutputsBToA) {
1370         CHECK(inputsAndOutputsBToA != nullptr);
1371         EXPECT_TRUE(inputsAndOutputsBToA->empty());
1372 
1373 #ifdef VERBOSE
1374         ::dump("compare(A)", modelA);
1375         ::dump("compare(B)", modelB);
1376 #endif
1377 
1378         if (modelA->operandCount() != modelB->operandCount() ||
1379             modelA->operationCount() != modelB->operationCount() ||
1380             modelA->inputCount() != modelB->inputCount() ||
1381             modelA->outputCount() != modelB->outputCount()) {
1382             RETURN_FALSE();
1383         }
1384 
1385         // Maps from operand index to index of defining operation.
1386         std::map<uint32_t, uint32_t> defsA, defsB;
1387         buildDefinitionMap(modelA, &defsA);
1388         buildDefinitionMap(modelB, &defsB);
1389         if (HasFatalFailure()) return false;
1390 
1391         // Maps from operand index in modelA to equivalent operand index
1392         // in modelB; and from operation index in modelA to equivalent
1393         // operation index in modelB.
1394         std::map<uint32_t, uint32_t> equivalentOperandsAToB;
1395         std::map<uint32_t, uint32_t> equivalentOperationsAToB;
1396 
1397         // Queue of operand indexes from modelA, each of whose defining
1398         // operations are to be checked for equivalence with modelB.
1399         std::queue<uint32_t> workQueueOperandsA;
1400 
1401         // Seed operand equivalence map and work queue from model outputs.
1402         for (uint32_t i = 0, e = modelA->outputCount(); i < e; i++) {
1403             uint32_t outputA = modelA->getOutputOperandIndex(i);
1404             uint32_t outputB = modelB->getOutputOperandIndex(i);
1405             if (!compare(modelA->getOperand(outputA), modelB->getOperand(outputB))) {
1406 #ifdef VERBOSE
1407                 std::cout << "modelA.output[" << i << "] = operand[" << outputA
1408                           << "] = " << toString(modelA->getOperand(outputA)) << std::endl;
1409                 std::cout << "modelB.output[" << i << "] = operand[" << outputB
1410                           << "] = " << toString(modelB->getOperand(outputB)) << std::endl;
1411 #endif
1412                 RETURN_FALSE();
1413             }
1414             equivalentOperandsAToB[outputA] = outputB;
1415             workQueueOperandsA.push(outputA);
1416         }
1417 
1418 #ifdef VERBOSE
1419         dump("defsA", &defsA);
1420         dump("defsB", &defsB);
1421 #endif
1422 
1423         // Process the queue.
1424         uint32_t pseudoDefinitionCount = 0;
1425         while (!workQueueOperandsA.empty()) {
1426 #ifdef VERBOSE
1427             dump("equivalentOperandsAToB", &equivalentOperandsAToB);
1428             dump("equivalentOperationsAToB", &equivalentOperationsAToB);
1429 #endif
1430             uint32_t operandIndexA = workQueueOperandsA.front();
1431 #ifdef VERBOSE
1432             std::cout << "operandIndexA: " << operandIndexA << std::endl;
1433 #endif
1434             workQueueOperandsA.pop();
1435             uint32_t operandIndexB = equivalentOperandsAToB.at(operandIndexA);
1436 
1437             uint32_t operationIndexA = defsA.at(operandIndexA);
1438             uint32_t operationIndexB = defsB.at(operandIndexB);
1439             auto it = equivalentOperationsAToB.find(operationIndexA);
1440             if (it != equivalentOperationsAToB.end()) {
1441                 if (it->second != operationIndexB) {
1442                     RETURN_FALSE();
1443                 }
1444                 continue;
1445             }
1446 
1447             // We haven't identified an equivalent operation for
1448             // operationIndexA.
1449 
1450             if ((operationIndexA >= kPseudoDefiningOperationBase) !=
1451                 (operationIndexB >= kPseudoDefiningOperationBase)) {
1452                 RETURN_FALSE();
1453             }
1454             // Either both operands have pseudo-definitions, or neither
1455             // does.
1456             if (operationIndexA >= kPseudoDefiningOperationBase) {
1457                 // Both operands have pseudo-definitions.
1458                 if (operationIndexA != operationIndexB) {
1459                     RETURN_FALSE();
1460                 }
1461                 equivalentOperationsAToB[operationIndexA] = operationIndexB;
1462                 ++pseudoDefinitionCount;
1463                 continue;
1464             }
1465 
1466             // If we get here, neither operation A nor operation B is a
1467             // pseudo-definition.
1468 
1469             const Operation& operationA = modelA->getOperation(operationIndexA);
1470             const Operation& operationB = modelB->getOperation(operationIndexB);
1471             if (operationA.type != operationB.type ||
1472                 operationA.inputs.size() != operationB.inputs.size() ||
1473                 operationA.outputs.size() != operationB.outputs.size()) {
1474                 RETURN_FALSE();
1475             }
1476             equivalentOperationsAToB[operationIndexA] = operationIndexB;
1477             for (uint32_t i = 0, e = operationA.inputs.size(); i < e; i++) {
1478                 uint32_t inputA = operationA.inputs[i];
1479                 uint32_t inputB = operationB.inputs[i];
1480                 auto it = equivalentOperandsAToB.find(inputA);
1481                 if (it != equivalentOperandsAToB.end()) {
1482                     if (it->second != inputB) {
1483                         RETURN_FALSE();
1484                     }
1485                     continue;
1486                 }
1487                 // We haven't identified an equivalent operand for inputA.
1488                 if (!compare(modelA->getOperand(inputA), modelB->getOperand(inputB))) {
1489 #ifdef VERBOSE
1490                     std::cout << "modelA.input[" << i << "] = operand[" << inputA
1491                               << "] = " << toString(modelA->getOperand(inputA)) << std::endl;
1492                     std::cout << "modelB.input[" << i << "] = operand[" << inputB
1493                               << "] = " << toString(modelB->getOperand(inputB)) << std::endl;
1494 #endif
1495                     RETURN_FALSE();
1496                 }
1497                 equivalentOperandsAToB[inputA] = inputB;
1498                 workQueueOperandsA.push(inputA);
1499             }
1500         }
1501 
1502         // Validity check
1503         if (modelA->operandCount() != defsA.size() || modelA->operandCount() != defsB.size() ||
1504             modelA->operandCount() != equivalentOperandsAToB.size() ||
1505             modelA->operationCount() + pseudoDefinitionCount != equivalentOperationsAToB.size()) {
1506             RETURN_FALSE();
1507         }
1508 
1509         // Build *inputsAndOutputsBToA
1510         for (uint32_t aInputIndex : modelA->getInputOperandIndexes()) {
1511             (*inputsAndOutputsBToA)[equivalentOperandsAToB.at(aInputIndex)] = aInputIndex;
1512         }
1513         for (uint32_t aOutputIndex : modelA->getOutputOperandIndexes()) {
1514             (*inputsAndOutputsBToA)[equivalentOperandsAToB.at(aOutputIndex)] = aOutputIndex;
1515         }
1516 
1517         RETURN_TRUE();
1518     }
1519 
1520     /*-------------------------------------------------------------------------------------*/
1521 
1522     // As a side effect of the comparison, we produce a map
1523     // *inputsAndOutputsModelToStep that maps from each of the model input and
1524     // output operand numbers of "model" to the corresponding operand numbers of
1525     // the step model from "step".  If the comparison returns false, the contents
1526     // of the map are undefined.
compare(const ExecutionStep * step,const PartitioningModel * model,std::shared_ptr<Device> device,std::map<uint32_t,uint32_t> * inputsAndOutputsModelToStep)1527     bool compare(const ExecutionStep* step, const PartitioningModel* model,
1528                  std::shared_ptr<Device> device,
1529                  std::map<uint32_t, uint32_t>* inputsAndOutputsModelToStep) {
1530         return (step->getDevice() == device) &&
1531                compare(step->getStepModel(),
1532                        reinterpret_cast<const ModelBuilder*>(model->getHandle()),
1533                        inputsAndOutputsModelToStep);
1534     }
1535 
compare(const std::shared_ptr<LogicalStep> logicalStep,const PartitioningModel * model,std::shared_ptr<Device> device,const RemapVectorType & modelInputs,const RemapVectorType & modelOutputs,const RemapVectorType & tempsAsStepModelInputs,const StepModelOutputSetType & tempsAsStepModelOutputs,const RemapVectorType & outputsAsStepModelInputs,const std::set<uint32_t> & modelOutputsThatAreDownstreamInputs)1536     void compare(const std::shared_ptr<LogicalStep> logicalStep, const PartitioningModel* model,
1537                  std::shared_ptr<Device> device, const RemapVectorType& modelInputs,
1538                  const RemapVectorType& modelOutputs, const RemapVectorType& tempsAsStepModelInputs,
1539                  const StepModelOutputSetType& tempsAsStepModelOutputs,
1540                  const RemapVectorType& outputsAsStepModelInputs,
1541                  const std::set<uint32_t>& modelOutputsThatAreDownstreamInputs) {
1542         ASSERT_TRUE(logicalStep->isExecution());
1543         const ExecutionStep* step = logicalStep->executionStep();
1544         std::map<uint32_t, uint32_t> inputsAndOutputsModelToStep;
1545         ASSERT_NO_FATAL_FAILURE(
1546                 ASSERT_TRUE(compare(step, model, device, &inputsAndOutputsModelToStep)));
1547         ASSERT_TRUE(compareRemapVectors(inputsAndOutputsModelToStep, step->getModelInputs(),
1548                                         modelInputs));
1549         ASSERT_TRUE(compareRemapVectors(inputsAndOutputsModelToStep, step->getModelOutputs(),
1550                                         modelOutputs));
1551         ASSERT_TRUE(compareRemapVectors(inputsAndOutputsModelToStep,
1552                                         step->getTempsAsStepModelInputs(), tempsAsStepModelInputs));
1553         ASSERT_TRUE(compareStepModelOutputSets(inputsAndOutputsModelToStep,
1554                                                step->getTempsAsStepModelOutputs(),
1555                                                tempsAsStepModelOutputs));
1556         ASSERT_TRUE(compareRemapVectors(inputsAndOutputsModelToStep,
1557                                         step->getOutputsAsStepModelInputs(),
1558                                         outputsAsStepModelInputs));
1559         ASSERT_TRUE(modelOutputsThatAreDownstreamInputs ==
1560                     step->getModelOutputsThatAreDownstreamInputs());
1561     }
1562 
1563    private:
compareRemapVectors(const std::map<uint32_t,uint32_t> & inputsAndOutputsModelToStep,const RemapVectorType & step,RemapVectorType model)1564     static bool compareRemapVectors(const std::map<uint32_t, uint32_t>& inputsAndOutputsModelToStep,
1565                                     const RemapVectorType& step, RemapVectorType model) {
1566         std::transform(model.begin(), model.end(), model.begin(),
1567                        [&inputsAndOutputsModelToStep](const RemapVectorType::value_type& val) {
1568                            return std::make_pair(val.first,
1569                                                  inputsAndOutputsModelToStep.at(val.second));
1570                        });
1571         return step == model;
1572     }
1573 
compareStepModelOutputSets(const std::map<uint32_t,uint32_t> & inputsAndOutputsModelToStep,const StepModelOutputSetType & step,const StepModelOutputSetType & model)1574     static bool compareStepModelOutputSets(
1575             const std::map<uint32_t, uint32_t>& inputsAndOutputsModelToStep,
1576             const StepModelOutputSetType& step, const StepModelOutputSetType& model) {
1577         StepModelOutputSetType modelTransformed;
1578         std::transform(
1579                 model.begin(), model.end(), std::inserter(modelTransformed, modelTransformed.end()),
1580                 [&inputsAndOutputsModelToStep](const StepModelOutputSetType::value_type& val) {
1581                     return std::make_pair(val.first, inputsAndOutputsModelToStep.at(val.second));
1582                 });
1583         return step == modelTransformed;
1584     }
1585 };
1586 
TEST_F(PartitioningTest,SimpleModel)1587 TEST_F(PartitioningTest, SimpleModel) {
1588     PartitioningModel model;
1589     uint32_t opnd0 = model.addFloatOperand();
1590     uint32_t opnd1 = model.addFloatOperand();
1591     uint32_t opnd2 = model.addOperation2To1V1_0(0, opnd0, opnd1);
1592     uint32_t opnd3 = model.addFloatOperand();
1593     uint32_t opnd4 = model.addOperation2To1V1_0(1, opnd2, opnd3);
1594     model.identifyInputsAndOutputs({opnd0, opnd1, opnd3}, {opnd4});
1595     model.finish();
1596     ASSERT_TRUE(model.isValid());
1597 
1598     // Simple partition (two devices are each capable of everything, one is the best).
1599     // No need to compare the original model to the model from the plan -- we
1600     // didn't actually do any partitioning.
1601     const auto devicesA = makeDevices({{"bad", 0.9, ~0U}, {"good", 0.5, ~0U}});
1602     ExecutionPlan planA;
1603     ASSERT_EQ(model.partitionTheWork(devicesA, ExecutePreference::PREFER_LOW_POWER,
1604                                      ExecutePriority::DEFAULT, {}, &planA),
1605               ANEURALNETWORKS_NO_ERROR);
1606     EXPECT_TRUE(planA.forTest_flatGetDynamicTemporaries().empty());
1607     ASSERT_EQ(planA.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
1608     ASSERT_NE(planA.forTest_simpleGetDevice().get(), nullptr);
1609     ASSERT_EQ(planA.forTest_simpleGetDevice()->getName(), "good");
1610 
1611     // Simple partition (two devices are each capable of everything, none better than CPU).
1612     // No need to compare the original model to the model from the plan -- we
1613     // didn't actually do any partitioning.
1614     const auto devicesC = makeDevices({{"bad", 1.1, ~0U}, {"bad2", 1.0, ~0U}});
1615     ExecutionPlan planC;
1616     ASSERT_EQ(model.partitionTheWork(devicesC, ExecutePreference::PREFER_LOW_POWER,
1617                                      ExecutePriority::DEFAULT, {}, &planC),
1618               ANEURALNETWORKS_NO_ERROR);
1619     EXPECT_TRUE(planC.forTest_flatGetDynamicTemporaries().empty());
1620     ASSERT_EQ(planC.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
1621     ASSERT_EQ(planC.forTest_simpleGetDevice(), DeviceManager::getCpuDevice());
1622 
1623     // Compound partition (two devices, each is capable of one of the
1624     // two operations).  We could do more extensive checking here --
1625     // for example, verify that each step within the plan has the
1626     // correct (model and step model)x(inputs and outputs).
1627     const auto devicesB = makeDevices({{"0", 0.9, 1 << 0}, {"1", 0.5, 1 << 1}});
1628     ExecutionPlan planB;
1629     ASSERT_EQ(model.partitionTheWork(devicesB, ExecutePreference::PREFER_LOW_POWER,
1630                                      ExecutePriority::DEFAULT, {}, &planB),
1631               ANEURALNETWORKS_NO_ERROR);
1632     EXPECT_TRUE(planB.forTest_flatGetDynamicTemporaries().empty());
1633     ASSERT_EQ(planB.forTest_getKind(), ExecutionPlan::Kind::COMPOUND);
1634     const auto& stepsB = planB.forTest_compoundGetSteps();
1635     ASSERT_EQ(stepsB.size(), size_t(2));
1636     {
1637         // Build a model to compare against the step model from stepsB[0].
1638         PartitioningModel modelB0;
1639         uint32_t b0Opnd0 = modelB0.addFloatOperand();
1640         uint32_t b0Opnd1 = modelB0.addFloatOperand();
1641         uint32_t b0Opnd2 = modelB0.addOperation2To1V1_0(0, b0Opnd0, b0Opnd1);
1642         modelB0.identifyInputsAndOutputs({b0Opnd0, b0Opnd1}, {b0Opnd2});
1643         modelB0.finish();
1644         ASSERT_TRUE(modelB0.isValid());
1645 
1646         ASSERT_NO_FATAL_FAILURE(
1647                 compare(stepsB[0], &modelB0, devicesB[0],
1648                         RemapVectorType{{opnd0, b0Opnd0}, {opnd1, b0Opnd1}},  // modelInputs
1649                         RemapVectorType{},                                    // modelOutputs
1650                         RemapVectorType{},                         // tempsAsStepModelInputs
1651                         StepModelOutputSetType{{opnd2, b0Opnd2}},  // tempsAsStepModelOutputs
1652                         RemapVectorType{},                         // outputsAsStepModelInputs
1653                         {}));  // modelOutputsThatAreDownstreamInputs
1654     }
1655     {
1656         // Build a model to compare against the step model from stepsB[1].
1657         PartitioningModel modelB1;
1658         uint32_t b1Opnd2 = modelB1.addFloatOperand();
1659         uint32_t b1Opnd3 = modelB1.addFloatOperand();
1660         uint32_t b1Opnd4 = modelB1.addOperation2To1V1_0(1, b1Opnd2, b1Opnd3);
1661         // Note: In the partitioning algorithm, step model inputs follow
1662         // model inputs.  In the original model "model", opnd2 is not
1663         // an input; so in the step model "modelB1", the corresponding
1664         // input b1Opnd2 is a step model input, and must follow the
1665         // model input b1Opnd3.
1666         modelB1.identifyInputsAndOutputs({b1Opnd3, b1Opnd2}, {b1Opnd4});
1667         modelB1.finish();
1668         ASSERT_TRUE(modelB1.isValid());
1669 
1670         ASSERT_NO_FATAL_FAILURE(compare(
1671                 stepsB[1], &modelB1, devicesB[1], RemapVectorType{{opnd3, b1Opnd3}},  // modelInputs
1672                 RemapVectorType{{opnd4, b1Opnd4}},  // modelOutputs
1673                 RemapVectorType{{opnd2, b1Opnd2}},  // tempsAsStepModelInputs
1674                 StepModelOutputSetType{},           // tempsAsStepModelOutputs
1675                 RemapVectorType{},                  // outputsAsStepModelInputs
1676                 {}));                               // modelOutputsThatAreDownstreamInputs
1677     }
1678 }
1679 
TEST_F(PartitioningTest,SliceModel)1680 TEST_F(PartitioningTest, SliceModel) {
1681     PartitioningModel model;
1682     uint32_t opnd0 = model.addFloatOperand();
1683     uint32_t opnd1 = model.addFloatOperand();
1684     uint32_t opnd2 = model.addOperation2To1V1_0(0, opnd0, opnd1);
1685     uint32_t opnd3 = model.addOperation2To1V1_0(1, opnd0, opnd1);
1686     uint32_t opnd4 = model.addOperation2To1V1_1(0, opnd0, opnd1);
1687     uint32_t opnd5 = model.addOperation2To1V1_2(0, opnd2, opnd3);
1688     uint32_t opnd6 = model.addOperation1To1V1_3(0, opnd2);
1689     model.identifyInputsAndOutputs({opnd0, opnd1}, {opnd2, opnd4, opnd5, opnd6});
1690     model.finish();
1691     ASSERT_TRUE(model.isValid());
1692 
1693     // Simple partition (V1_0, V1_1, V1_2, V1_3 devices are available; V1_3 has best perf).
1694     // No need to compare the original model to the model from the plan -- we
1695     // didn't actually do any partitioning.
1696     const auto devicesA = makeDevices({{"V1_0", 0.8, HalVersion::V1_0, ~0U},
1697                                        {"V1_1", 0.7, HalVersion::V1_1, ~0U, ~0U},
1698                                        {"V1_2", 0.6, HalVersion::V1_2, ~0U, ~0U, ~0U},
1699                                        {"V1_3", 0.5, HalVersion::V1_3, ~0U, ~0U, ~0U, ~0U}});
1700     ExecutionPlan planA;
1701     ASSERT_EQ(model.partitionTheWork(devicesA, ExecutePreference::PREFER_LOW_POWER,
1702                                      ExecutePriority::DEFAULT, {}, &planA),
1703               ANEURALNETWORKS_NO_ERROR);
1704     EXPECT_TRUE(planA.forTest_flatGetDynamicTemporaries().empty());
1705     ASSERT_EQ(planA.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
1706     ASSERT_NE(planA.forTest_simpleGetDevice().get(), nullptr);
1707     ASSERT_EQ(planA.forTest_simpleGetDevice()->getName(), "V1_3");
1708 
1709     // Compound partition (V1_0, V1_1, V1_2 devices are available, in decreasing
1710     // order of performance; model is distributed across all three devices).
1711     const auto devicesB = makeDevices({{"V1_0", 0.6, HalVersion::V1_0, ~0U},
1712                                        {"V1_1", 0.7, HalVersion::V1_1, ~0U, ~0U},
1713                                        {"V1_2", 0.8, HalVersion::V1_2, ~0U, ~0U, ~0U},
1714                                        {"V1_3", 0.9, HalVersion::V1_3, ~0U, ~0U, ~0U, ~0U}});
1715     ExecutionPlan planB;
1716     ASSERT_EQ(model.partitionTheWork(devicesB, ExecutePreference::PREFER_LOW_POWER,
1717                                      ExecutePriority::DEFAULT, {}, &planB),
1718               ANEURALNETWORKS_NO_ERROR);
1719     EXPECT_TRUE(planB.forTest_flatGetDynamicTemporaries().empty());
1720     ASSERT_EQ(planB.forTest_getKind(), ExecutionPlan::Kind::COMPOUND);
1721     const auto& stepsB = planB.forTest_compoundGetSteps();
1722     ASSERT_EQ(stepsB.size(), size_t(4));
1723     {
1724         // Build a model to compare against the step model from stepsB[0].
1725         PartitioningModel modelB0;
1726         uint32_t b0Opnd0 = modelB0.addFloatOperand();
1727         uint32_t b0Opnd1 = modelB0.addFloatOperand();
1728         uint32_t b0Opnd2 = modelB0.addOperation2To1V1_1(0, b0Opnd0, b0Opnd1);
1729         modelB0.identifyInputsAndOutputs({b0Opnd0, b0Opnd1}, {b0Opnd2});
1730         modelB0.finish();
1731         ASSERT_TRUE(modelB0.isValid());
1732 
1733         ASSERT_NO_FATAL_FAILURE(
1734                 compare(stepsB[0], &modelB0, devicesB[1],
1735                         RemapVectorType{{opnd0, b0Opnd0}, {opnd1, b0Opnd1}},  // modelInputs
1736                         RemapVectorType{{opnd4, b0Opnd2}},                    // modelOutputs
1737                         RemapVectorType{},         // tempsAsStepModelInputs
1738                         StepModelOutputSetType{},  // tempsAsStepModelOutputs
1739                         RemapVectorType{},         // outputsAsStepModelInputs
1740                         {}));                      // modelOutputsThatAreDownstreamInputs
1741     }
1742     {
1743         // Build a model to compare against the step model from stepsB[1].
1744         PartitioningModel modelB1;
1745         uint32_t b1Opnd0 = modelB1.addFloatOperand();
1746         uint32_t b1Opnd1 = modelB1.addFloatOperand();
1747         uint32_t b1Opnd2 = modelB1.addOperation2To1V1_0(0, b1Opnd0, b1Opnd1);
1748         uint32_t b1Opnd3 = modelB1.addOperation2To1V1_0(1, b1Opnd0, b1Opnd1);
1749         modelB1.identifyInputsAndOutputs({b1Opnd0, b1Opnd1}, {b1Opnd2, b1Opnd3});
1750         modelB1.finish();
1751         ASSERT_TRUE(modelB1.isValid());
1752 
1753         // Note that this is also an important test that we can detect
1754         // modelOutputsThatAreDownstreamInputs.
1755         ASSERT_NO_FATAL_FAILURE(
1756                 compare(stepsB[1], &modelB1, devicesB[0],
1757                         RemapVectorType{{opnd0, b1Opnd0}, {opnd1, b1Opnd1}},  // modelInputs
1758                         RemapVectorType{{opnd2, b1Opnd2}},                    // modelOutputs
1759                         RemapVectorType{},                         // tempsAsStepModelInputs
1760                         StepModelOutputSetType{{opnd3, b1Opnd3}},  // tempsAsStepModelOutputs
1761                         RemapVectorType{},                         // outputsAsStepModelInputs
1762                         {0u}));  // modelOutputsThatAreDownstreamInputs
1763     }
1764     {
1765         // Build a model to compare against the step model from stepsB[2].
1766         PartitioningModel modelB2;
1767         uint32_t b2Opnd0 = modelB2.addFloatOperand();
1768         uint32_t b2Opnd1 = modelB2.addOperation1To1V1_3(0, b2Opnd0);
1769         // Note: In the partitioning algorithm, temps that are
1770         // step model inputs precede model outputs that are step model
1771         // inputs.
1772         modelB2.identifyInputsAndOutputs({b2Opnd0}, {b2Opnd1});
1773         modelB2.finish();
1774         ASSERT_TRUE(modelB2.isValid());
1775 
1776         ASSERT_NO_FATAL_FAILURE(
1777                 compare(stepsB[2], &modelB2, devicesB[3], RemapVectorType{},  // modelInputs
1778                         RemapVectorType{{opnd6, b2Opnd1}},                    // modelOutputs
1779                         RemapVectorType{},                  // tempsAsStepModelInputs
1780                         StepModelOutputSetType{},           // tempsAsStepModelOutputs
1781                         RemapVectorType{{opnd2, b2Opnd0}},  // outputsAsStepModelInputs
1782                         {}));                               // modelOutputsThatAreDownstreamInputs
1783     }
1784     {
1785         // Build a model to compare against the step model from stepsB[3].
1786         PartitioningModel modelB3;
1787         uint32_t b3Opnd0 = modelB3.addFloatOperand();
1788         uint32_t b3Opnd1 = modelB3.addFloatOperand();
1789         uint32_t b3Opnd2 = modelB3.addOperation2To1V1_2(0, b3Opnd0, b3Opnd1);
1790         // Note: In the partitioning algorithm, temps that are
1791         // step model inputs precede model outputs that are step model
1792         // inputs.  In the original model "model", opnd3 is a temp and
1793         // opnd2 is a model output; so in the step model "modelB3", the
1794         // corresponding inputs b3Opnd1 and b3Opnd0 must appear in
1795         // that order.
1796         modelB3.identifyInputsAndOutputs({b3Opnd1, b3Opnd0}, {b3Opnd2});
1797         modelB3.finish();
1798         ASSERT_TRUE(modelB3.isValid());
1799 
1800         ASSERT_NO_FATAL_FAILURE(
1801                 compare(stepsB[3], &modelB3, devicesB[2], RemapVectorType{},  // modelInputs
1802                         RemapVectorType{{opnd5, b3Opnd2}},                    // modelOutputs
1803                         RemapVectorType{{opnd3, b3Opnd1}},  // tempsAsStepModelInputs
1804                         StepModelOutputSetType{},           // tempsAsStepModelOutputs
1805                         RemapVectorType{{opnd2, b3Opnd0}},  // outputsAsStepModelInputs
1806                         {}));                               // modelOutputsThatAreDownstreamInputs
1807     }
1808 
1809     // TODO: Make sure this still works when we have multiple devices
1810     // of same version available for slicing. An easy (?) choice would
1811     // be to route the two different V1_0 operations to different
1812     // devices.
1813 }
1814 
TEST_F(PartitioningTest,SliceModelToEmpty)1815 TEST_F(PartitioningTest, SliceModelToEmpty) {
1816     PartitioningModel model;
1817     uint32_t opnd0 = model.addFloatOperand();
1818     uint32_t opnd1 = model.addOperation1To1V1_3(0, opnd0);
1819     model.identifyInputsAndOutputs({opnd0}, {opnd1});
1820     model.finish();
1821     ASSERT_TRUE(model.isValid());
1822 
1823     // Only the V1_3 device can handle any operations in the model.
1824     // No need to compare the original model to the model from the plan -- we
1825     // didn't actually do any partitioning.
1826     const auto devices = makeDevices({{"V1_0", 0.6, HalVersion::V1_0, ~0U},
1827                                       {"V1_1", 0.7, HalVersion::V1_1, ~0U, ~0U},
1828                                       {"V1_2", 0.8, HalVersion::V1_2, ~0U, ~0U, ~0U},
1829                                       {"V1_3", 0.9, HalVersion::V1_3, ~0U, ~0U, ~0U, ~0U}});
1830     ExecutionPlan plan;
1831     ASSERT_EQ(model.partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
1832                                      ExecutePriority::DEFAULT, {}, &plan),
1833               ANEURALNETWORKS_NO_ERROR);
1834     EXPECT_TRUE(plan.forTest_flatGetDynamicTemporaries().empty());
1835     ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
1836     ASSERT_NE(plan.forTest_simpleGetDevice().get(), nullptr);
1837     ASSERT_EQ(plan.forTest_simpleGetDevice()->getName(), "V1_3");
1838 }
1839 
TEST_F(PartitioningTest,Cpu)1840 TEST_F(PartitioningTest, Cpu) {
1841     // Here's a model where some operations execute only on the Cpu.
1842     // To make things interesting, we produce three partitions --
1843     // device, cpu, same-device.
1844 
1845     static const uint32_t kCpuOp = 1;
1846     static const uint32_t kDevOp = 2;
1847 
1848     const auto devices = makeDevices({{"1", 0.5, 1 << kDevOp}});
1849 
1850     PartitioningModel model;
1851 
1852     uint32_t opnd0 = model.addFloatOperand();
1853     uint32_t opnd1 = model.addFloatOperand();
1854 
1855     uint32_t opnd2 = model.addOperation2To1V1_0(kDevOp, opnd0, opnd1);
1856     uint32_t opnd3 = model.addOperation2To1V1_0(kDevOp, opnd0, opnd2);
1857 
1858     uint32_t opnd4 = model.addOperation2To1V1_0(kCpuOp, opnd0, opnd3);
1859     uint32_t opnd5 = model.addOperation2To1V1_0(kCpuOp, opnd2, opnd4);
1860 
1861     uint32_t opnd6 = model.addFloatOperand();
1862 
1863     uint32_t opnd7 = model.addOperation2To1V1_0(kDevOp, opnd3, opnd5);
1864     uint32_t opnd8 = model.addOperation2To1V1_0(kDevOp, opnd6, opnd7);
1865 
1866     model.identifyInputsAndOutputs({opnd0, opnd1, opnd6}, {opnd4, opnd8});
1867     model.finish();
1868     ASSERT_TRUE(model.isValid());
1869 
1870     ExecutionPlan plan;
1871     ASSERT_EQ(model.partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
1872                                      ExecutePriority::DEFAULT, {}, &plan),
1873               ANEURALNETWORKS_NO_ERROR);
1874     EXPECT_TRUE(plan.forTest_flatGetDynamicTemporaries().empty());
1875     ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::COMPOUND);
1876     const auto& steps = plan.forTest_compoundGetSteps();
1877     ASSERT_EQ(steps.size(), size_t(3));
1878     {
1879         const auto& step0 = steps[0];
1880 
1881         // Build a model to compare against the step model from steps[0].
1882         PartitioningModel model0;
1883         uint32_t m0Opnd0 = model0.addFloatOperand();
1884         uint32_t m0Opnd1 = model0.addFloatOperand();
1885         uint32_t m0Opnd2 = model0.addOperation2To1V1_0(kDevOp, m0Opnd0, m0Opnd1);
1886         uint32_t m0Opnd3 = model0.addOperation2To1V1_0(kDevOp, m0Opnd0, m0Opnd2);
1887         model0.identifyInputsAndOutputs({m0Opnd0, m0Opnd1}, {m0Opnd2, m0Opnd3});
1888         model0.finish();
1889         ASSERT_TRUE(model0.isValid());
1890 
1891         ASSERT_NO_FATAL_FAILURE(
1892                 compare(step0, &model0, devices[0],
1893                         RemapVectorType{{opnd0, m0Opnd0}, {opnd1, m0Opnd1}},  // modelInputs
1894                         RemapVectorType{},                                    // modelOutputs
1895                         RemapVectorType{},  // tempsAsStepModelInputs
1896                         StepModelOutputSetType{{opnd2, m0Opnd2},
1897                                                {opnd3, m0Opnd3}},  // tempsAsStepModelOutputs
1898                         RemapVectorType{},                         // outputsAsStepModelInputs
1899                         {}));  // modelOutputsThatAreDownstreamInputs
1900     }
1901     {
1902         const auto& step1 = steps[1];
1903 
1904         // Build a model to compare against the step model from steps[1].
1905         PartitioningModel model1;
1906         uint32_t m1Opnd0 = model1.addFloatOperand();
1907         uint32_t m1Opnd3 = model1.addFloatOperand();
1908         uint32_t m1Opnd4 = model1.addOperation2To1V1_0(kCpuOp, m1Opnd0, m1Opnd3);
1909         uint32_t m1Opnd2 = model1.addFloatOperand();
1910         uint32_t m1Opnd5 = model1.addOperation2To1V1_0(kCpuOp, m1Opnd2, m1Opnd4);
1911         model1.identifyInputsAndOutputs({m1Opnd0, m1Opnd3, m1Opnd2}, {m1Opnd4, m1Opnd5});
1912         model1.finish();
1913         ASSERT_TRUE(model1.isValid());
1914 
1915         ASSERT_NO_FATAL_FAILURE(compare(
1916                 step1, &model1, DeviceManager::getCpuDevice(),
1917                 RemapVectorType{{opnd0, m1Opnd0}},                    // modelInputs
1918                 RemapVectorType{{opnd4, m1Opnd4}},                    // modelOutputs
1919                 RemapVectorType{{opnd3, m1Opnd3}, {opnd2, m1Opnd2}},  // tempsAsStepModelInputs
1920                 StepModelOutputSetType{{opnd5, m1Opnd5}},             // tempsAsStepModelOutputs
1921                 RemapVectorType{},                                    // outputsAsStepModelInputs
1922                 {}));  // modelOutputsThatAreDownstreamInputs
1923     }
1924     {
1925         const auto& step2 = steps[2];
1926 
1927         // Build a model to compare against the step model from steps[2].
1928         PartitioningModel model2;
1929         uint32_t m2Opnd3 = model2.addFloatOperand();
1930         uint32_t m2Opnd5 = model2.addFloatOperand();
1931         uint32_t m2Opnd7 = model2.addOperation2To1V1_0(kDevOp, m2Opnd3, m2Opnd5);
1932         uint32_t m2Opnd6 = model2.addFloatOperand();
1933         uint32_t m2Opnd8 = model2.addOperation2To1V1_0(kDevOp, m2Opnd6, m2Opnd7);
1934         model2.identifyInputsAndOutputs({m2Opnd6, m2Opnd3, m2Opnd5}, {m2Opnd8});
1935         model2.finish();
1936         ASSERT_TRUE(model2.isValid());
1937 
1938         ASSERT_NO_FATAL_FAILURE(compare(
1939                 step2, &model2, devices[0], RemapVectorType{{opnd6, m2Opnd6}},  // modelInputs
1940                 RemapVectorType{{opnd8, m2Opnd8}},                              // modelOutputs
1941                 RemapVectorType{{opnd3, m2Opnd3}, {opnd5, m2Opnd5}},  // tempsAsStepModelInputs
1942                 StepModelOutputSetType{},                             // tempsAsStepModelOutputs
1943                 RemapVectorType{},                                    // outputsAsStepModelInputs
1944                 {}));  // modelOutputsThatAreDownstreamInputs
1945     }
1946 }
1947 
TEST_F(PartitioningTest,SetPartitioning)1948 TEST_F(PartitioningTest, SetPartitioning) {
1949     PartitioningModel model;
1950     uint32_t opnd0 = model.addFloatOperand();
1951     uint32_t opnd1 = model.addFloatOperand();
1952     uint32_t opnd2 = model.addOperation2To1V1_0(0, opnd0, opnd1, Dimensioned::NO);
1953     uint32_t opnd3 = model.addFloatOperand();
1954     uint32_t opnd4 = model.addOperation2To1V1_0(1, opnd2, opnd3);
1955     model.identifyInputsAndOutputs({opnd0, opnd1, opnd3}, {opnd4});
1956     model.finish();
1957     ASSERT_TRUE(model.isValid());
1958 
1959     // One device that can and should execute operation 0.
1960     const auto devices = makeDevices({{"hw", 0.5, (1 << 0)}});
1961 
1962     // Test kPartitioningNo.  We should not even attempt partitioning,
1963     // so there should be a SIMPLE plan on CPU.
1964     // No need to compare the original model to the model from the plan -- we
1965     // didn't actually do any partitioning.
1966     PartitioningCompilation cPNo(&model, devices);
1967     ASSERT_EQ(cPNo.setPartitioning(DeviceManager::kPartitioningNo), Result::NO_ERROR);
1968     ASSERT_EQ(cPNo.failPartitioning(), Result::NO_ERROR);
1969     ASSERT_EQ(cPNo.finish(), Result::NO_ERROR);
1970     ASSERT_EQ(cPNo.getExecutionPlan().forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
1971     ASSERT_EQ(cPNo.getExecutionPlan().forTest_simpleGetDevice(), DeviceManager::getCpuDevice());
1972 
1973     // Test kPartitioningWithFallback.  We should attempt partitioning, simulate
1974     // a recoverable failure, then fallback to CPU with a SIMPLE plan, and
1975     // finally return success.  No need to compare the original model to the
1976     // model from the plan -- we didn't actually do any partitioning.
1977     PartitioningCompilation cPWithFallback(&model, devices);
1978     ASSERT_EQ(cPWithFallback.setPartitioning(DeviceManager::kPartitioningWithFallback),
1979               Result::NO_ERROR);
1980     ASSERT_EQ(cPWithFallback.failPartitioning(), Result::NO_ERROR);
1981     ASSERT_EQ(cPWithFallback.finish(), Result::NO_ERROR);
1982     ASSERT_EQ(cPWithFallback.getExecutionPlan().forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
1983     ASSERT_EQ(cPWithFallback.getExecutionPlan().forTest_simpleGetDevice(),
1984               DeviceManager::getCpuDevice());
1985 
1986     // Test kPartitioningWithoutFallback.  We should attempt partitioning,
1987     // simulate a recoverable failure, and fail.
1988     PartitioningCompilation cPWithoutFallback(&model, devices);
1989     ASSERT_EQ(cPWithoutFallback.setPartitioning(DeviceManager::kPartitioningWithoutFallback),
1990               Result::NO_ERROR);
1991     ASSERT_EQ(cPWithoutFallback.failPartitioning(), Result::NO_ERROR);
1992     ASSERT_EQ(cPWithoutFallback.finish(), Result::OP_FAILED);
1993     ASSERT_EQ(cPWithoutFallback.getExecutionPlan().forTest_getKind(), ExecutionPlan::Kind::ERROR);
1994 }
1995 
1996 // Regression test for http://b/69166603:
1997 //     "partitioned compilation and execution yields wrong results when model output is step model
1998 //     input"
TEST_F(PartitioningTest,ModelOutputAsStepModelInput)1999 TEST_F(PartitioningTest, ModelOutputAsStepModelInput) {
2000     PartitioningModel model;
2001     uint32_t opnd0 = model.addFloatOperand();
2002     uint32_t opnd1 = model.addFloatOperand();
2003     uint32_t opnd2 = model.addOperation2To1V1_0(0, opnd0, opnd1);
2004     uint32_t opnd3 = model.addOperation2To1V1_0(1, opnd2, opnd2);
2005     model.identifyInputsAndOutputs({opnd0, opnd1}, {opnd2, opnd3});
2006     model.finish();
2007     ASSERT_TRUE(model.isValid());
2008 
2009     // Compound partition (two devices, each is capable of one of the
2010     // two operations).  We could do more extensive checking here --
2011     // for example, verify that each step within the plan has the
2012     // correct (model and step model)x(inputs and outputs).
2013     const auto devices = makeDevices({{"0", 0.5, 1 << 0}, {"1", 0.5, 1 << 1}});
2014     ExecutionPlan plan;
2015     ASSERT_EQ(model.partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
2016                                      ExecutePriority::DEFAULT, {}, &plan),
2017               ANEURALNETWORKS_NO_ERROR);
2018     EXPECT_TRUE(plan.forTest_flatGetDynamicTemporaries().empty());
2019     ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::COMPOUND);
2020     const auto& steps = plan.forTest_compoundGetSteps();
2021     ASSERT_EQ(steps.size(), size_t(2));
2022     {
2023         // Build a model to compare against the step model from steps[0].
2024         PartitioningModel model0;
2025         uint32_t m0Opnd0 = model0.addFloatOperand();
2026         uint32_t m0Opnd1 = model0.addFloatOperand();
2027         uint32_t m0Opnd2 = model0.addOperation2To1V1_0(0, m0Opnd0, m0Opnd1);
2028         model0.identifyInputsAndOutputs({m0Opnd0, m0Opnd1}, {m0Opnd2});
2029         model0.finish();
2030         ASSERT_TRUE(model0.isValid());
2031         ASSERT_NO_FATAL_FAILURE(
2032                 compare(steps[0], &model0, devices[0],
2033                         RemapVectorType{{opnd0, m0Opnd0}, {opnd1, m0Opnd1}},  // modelInputs
2034                         RemapVectorType{{opnd2, m0Opnd2}},                    // modelOutputs
2035                         RemapVectorType{},         // tempsAsStepModelInputs
2036                         StepModelOutputSetType{},  // tempsAsStepModelOutputs
2037                         RemapVectorType{},         // outputsAsStepModelInputs
2038                         {0u}));                    // modelOutputsThatAreDownstreamInputs
2039     }
2040     {
2041         // Build a model to compare against the step model from steps[1].
2042         PartitioningModel model1;
2043         uint32_t m1Opnd2 = model1.addFloatOperand();
2044         uint32_t m1Opnd3 = model1.addOperation2To1V1_0(1, m1Opnd2, m1Opnd2);
2045         model1.identifyInputsAndOutputs({m1Opnd2}, {m1Opnd3});
2046         model1.finish();
2047         ASSERT_TRUE(model1.isValid());
2048 
2049         ASSERT_NO_FATAL_FAILURE(
2050                 compare(steps[1], &model1, devices[1], RemapVectorType{},  // modelInputs
2051                         RemapVectorType{{opnd3, m1Opnd3}},                 // modelOutputs
2052                         RemapVectorType{},                                 // tempsAsStepModelInputs
2053                         StepModelOutputSetType{},           // tempsAsStepModelOutputs
2054                         RemapVectorType{{opnd2, m1Opnd2}},  // outputsAsStepModelInputs
2055                         {}));                               // modelOutputsThatAreDownstreamInputs
2056     }
2057 }
2058 
TEST_F(PartitioningTest,OemOperations)2059 TEST_F(PartitioningTest, OemOperations) {
2060     // Trivial model consisting solely of OEM operation.
2061     PartitioningModel model;
2062     uint32_t opndIn = model.addFloatOperand();
2063     uint32_t opndOut = model.addOperationOEM1To1(opndIn);
2064     model.identifyInputsAndOutputs({opndIn}, {opndOut});
2065     model.finish();
2066     ASSERT_TRUE(model.isValid());
2067 
2068     // Verify that the best driver than can run an OEM operation is
2069     // used, even if it is not better than the CPU.
2070     // No need to compare the original model to the model from the plan -- we
2071     // didn't actually do any partitioning.
2072     const auto devicesBestOEM = makeDevices({{"badOEM", 1.5, ~0U, PartitioningDriver::OEMYes},
2073                                              {"noOEM", 0.5, ~0U, PartitioningDriver::OEMNo},
2074                                              {"goodOEM", 1.2, ~0U, PartitioningDriver::OEMYes}});
2075     PartitioningCompilation compilationBestOEM(&model, devicesBestOEM);
2076     ASSERT_EQ(compilationBestOEM.finish(), Result::NO_ERROR);
2077     const auto& planBestOEM = compilationBestOEM.getExecutionPlan();
2078     ASSERT_EQ(planBestOEM.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
2079     ASSERT_NE(planBestOEM.forTest_simpleGetDevice().get(), nullptr);
2080     ASSERT_EQ(planBestOEM.forTest_simpleGetDevice()->getName(), "goodOEM");
2081 
2082     // Verify that we get an error if no driver can run an OEM operation.
2083     const auto devicesNoOEM = makeDevices({{"noOEM", 0.5, ~0U, PartitioningDriver::OEMNo}});
2084     PartitioningCompilation compilationNoOEM(&model, devicesNoOEM);
2085     ASSERT_EQ(compilationNoOEM.finish(), Result::BAD_DATA);
2086 
2087     // Verify that we get an error if a driver can SUPPORT but not PREPARE an OEM operation.
2088     const auto devicesIndecisiveOEM =
2089             makeDevices({{"indecisiveOEM", 0.5, ~0U, PartitioningDriver::OEMIndecisive}});
2090     PartitioningCompilation compilationIndecisiveOEM(&model, devicesIndecisiveOEM);
2091     ASSERT_NE(compilationIndecisiveOEM.finish(), Result::NO_ERROR);
2092 
2093     // Verify that we get an error if there are no drivers (only CPU fallback).
2094     PartitioningCompilation compilationNoDrivers(&model, makeDevices({}) /* no drivers */);
2095     ASSERT_EQ(compilationNoDrivers.finish(), Result::BAD_DATA);
2096 }
2097 
TEST_F(PartitioningTest,RelaxedFP)2098 TEST_F(PartitioningTest, RelaxedFP) {
2099     const auto devices = makeDevices({// Best choice for non-relaxed model.
2100                                       {"f32", 0.8, 0.9 /* relaxed */, ~0U},
2101                                       // Best choice for relaxed model.
2102                                       {"f16", 0.9, 0.8 /* relaxed */, ~0U}});
2103 
2104     auto TrivialTest = [&devices](bool doRelax, const char* expectDevice) {
2105         // Trivial model consisting solely of one operation.
2106         SCOPED_TRACE(expectDevice);
2107         PartitioningModel model;
2108         uint32_t opnd0 = model.addFloatOperand();
2109         uint32_t opnd1 = model.addFloatOperand();
2110         uint32_t opnd2 = model.addOperation2To1V1_0(0, opnd0, opnd1);
2111         model.identifyInputsAndOutputs({opnd0, opnd1}, {opnd2});
2112         model.relaxComputationFloat32toFloat16(doRelax);
2113         model.finish();
2114         ASSERT_TRUE(model.isValid());
2115         // Verify that the model will be executed on the appropriate device.
2116         // No need to compare the original model to the model from the plan -- we
2117         // didn't actually do any partitioning.
2118         ExecutionPlan plan;
2119         ASSERT_EQ(model.partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
2120                                          ExecutePriority::DEFAULT, {}, &plan),
2121                   ANEURALNETWORKS_NO_ERROR);
2122         EXPECT_TRUE(plan.forTest_flatGetDynamicTemporaries().empty());
2123         ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
2124         ASSERT_EQ(plan.forTest_simpleGetDevice()->getName(), expectDevice);
2125     };
2126 
2127     ASSERT_NO_FATAL_FAILURE(TrivialTest(false, "f32"));
2128     ASSERT_NO_FATAL_FAILURE(TrivialTest(true, "f16"));
2129 }
2130 
TEST_F(PartitioningTest,Perf)2131 TEST_F(PartitioningTest, Perf) {
2132     // The various type names used here are confusing.
2133     //
2134     // OperandType (from HAL file), WrapperType (from NeuralNetworksWrapper.h),
2135     // and OperandCode (from NeuralNetworks.h) are different enums representing
2136     // the same type kind -- e.g., OperandType::FLOAT32, WrapperType::FLOAT32,
2137     // ANEURALNETWORKS_FLOAT32.  Corresponding enumerators have the same value.
2138     //
2139     // WrapperOperandType is the NeuralNetworksWrapper.h representation of a
2140     // full operand type (WrapperType plus dimensions plus other attributes).
2141 
2142     auto TestType = [](V1_3::OperandType operandType) {
2143         if (operandType == V1_3::OperandType::SUBGRAPH) {
2144             // SUBGRAPH capabilities are handled differently.
2145             return;
2146         }
2147         SCOPED_TRACE(toString(operandType));
2148         // Trivial model consisting solely of OEM operation.  We
2149         // pick OEM operation because this allows us to use
2150         // inputs and outputs of any number and type.
2151         PartitioningModel model;
2152         uint32_t opndIn = model.addOperand(static_cast<WrapperType>(operandType));
2153         uint32_t opndOut = model.addOperationOEM1To1(opndIn);
2154         model.identifyInputsAndOutputs({opndIn}, {opndOut});
2155         model.finish();
2156         ASSERT_TRUE(model.isValid());
2157 
2158         const V1_3::Capabilities baseCapabilities = ::android::nn::makeCapabilities(0.5);
2159 
2160         {
2161             // better than base
2162             V1_3::Capabilities goodCapabilities = baseCapabilities;
2163             update(&goodCapabilities, operandType, 0.25);
2164 
2165             const auto devices =
2166                     makeDevices({{"base", baseCapabilities, ~0U, PartitioningDriver::OEMYes},
2167                                  {"good", goodCapabilities, ~0U, PartitioningDriver::OEMYes}});
2168 
2169             // Verify that model will be executed on "good".
2170             // No need to compare the original model to the model from the plan -- we
2171             // didn't actually do any partitioning.
2172             ExecutionPlan plan;
2173             ASSERT_EQ(model.partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
2174                                              ExecutePriority::DEFAULT, {}, &plan),
2175                       ANEURALNETWORKS_NO_ERROR);
2176             EXPECT_TRUE(plan.forTest_flatGetDynamicTemporaries().empty());
2177             ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
2178             ASSERT_EQ(plan.forTest_simpleGetDevice()->getName(), "good");
2179         }
2180 
2181         {
2182             // worse than base
2183             V1_3::Capabilities badCapabilities = baseCapabilities;
2184             update(&badCapabilities, operandType, 0.75);
2185             const auto devices =
2186                     makeDevices({{"base", baseCapabilities, ~0U, PartitioningDriver::OEMYes},
2187                                  {"bad", badCapabilities, ~0U, PartitioningDriver::OEMYes}});
2188 
2189             // Verify that model will be executed on "base".
2190             // No need to compare the original model to the model from the plan -- we
2191             // didn't actually do any partitioning.
2192             ExecutionPlan plan;
2193             ASSERT_EQ(model.partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
2194                                              ExecutePriority::DEFAULT, {}, &plan),
2195                       ANEURALNETWORKS_NO_ERROR);
2196             EXPECT_TRUE(plan.forTest_flatGetDynamicTemporaries().empty());
2197             ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
2198             ASSERT_EQ(plan.forTest_simpleGetDevice()->getName(), "base");
2199         }
2200     };
2201 
2202     for (uint32_t type = static_cast<uint32_t>(V1_3::OperandTypeRange::FUNDAMENTAL_MIN);
2203          type <= static_cast<uint32_t>(V1_3::OperandTypeRange::FUNDAMENTAL_MAX); ++type) {
2204         TestType(static_cast<V1_3::OperandType>(type));
2205     }
2206     for (uint32_t type = static_cast<uint32_t>(V1_3::OperandTypeRange::OEM_MIN);
2207          type <= static_cast<uint32_t>(V1_3::OperandTypeRange::OEM_MAX); ++type) {
2208         TestType(static_cast<V1_3::OperandType>(type));
2209     }
2210 }
2211 
TEST_F(PartitioningTest,ZeroInputStepModel)2212 TEST_F(PartitioningTest, ZeroInputStepModel) {
2213     PartitioningModel model;
2214     const uint32_t opnd0 = model.addFloatZeroOperand();
2215     const uint32_t opnd1 = model.addOperation1To1V1_3(0, opnd0);
2216     const uint32_t opnd2 = model.addFloatOperand();
2217     const uint32_t opnd3 = model.addOperation2To1V1_0(1, opnd1, opnd2);
2218     model.identifyInputsAndOutputs({opnd2}, {opnd3});
2219     ASSERT_EQ(model.finish(), Result::NO_ERROR);
2220 
2221     // This will result in 2 partitions: deviceA handles op0, deviceB handles op1.
2222     // The partition for deviceA does not have any model input, and should result in full CPU
2223     // fallback.
2224     const auto devices = makeDevices({{"deviceA", 0.8, ~0U}, {"deviceB", 0.5, 1 << 1}});
2225     PartitioningCompilation compilation(&model, devices);
2226     ASSERT_EQ(compilation.finish(), Result::NO_ERROR);
2227     const auto& cpuDeviceName = DeviceManager::getCpuDevice()->getName();
2228     checkExecutionPlanSteps(compilation.getExecutionPlan(), {cpuDeviceName});
2229 }
2230 
TEST_F(PartitioningTest,ZeroOutputStepModel)2231 TEST_F(PartitioningTest, ZeroOutputStepModel) {
2232     PartitioningModel model;
2233     const uint32_t opnd0 = model.addFloatOperand();
2234     const uint32_t opnd1 = model.addOperation1To1V1_3(0, opnd0);
2235     const uint32_t opnd2 = model.addFloatOperand();
2236     model.addOperation2To1V1_0(1, opnd1, opnd2);
2237     model.identifyInputsAndOutputs({opnd0, opnd2}, {opnd1});
2238     ASSERT_EQ(model.finish(), Result::NO_ERROR);
2239 
2240     // This will result in 2 partitions: deviceA handles op0, deviceB handles op1.
2241     // The partition for deviceB does not have any model output, and should result in full CPU
2242     // fallback.
2243     const auto devices = makeDevices({{"deviceA", 0.8, ~0U}, {"deviceB", 0.5, 1 << 1}});
2244     PartitioningCompilation compilation(&model, devices);
2245     ASSERT_EQ(compilation.finish(), Result::NO_ERROR);
2246     const auto& cpuDeviceName = DeviceManager::getCpuDevice()->getName();
2247     checkExecutionPlanSteps(compilation.getExecutionPlan(), {cpuDeviceName});
2248 }
2249 
2250 // Test dynamic temporaries and related parts of the partitioning implementation.
2251 //
2252 // opnd0 = model input                   // tensor to pad
2253 // opnd1 = model input                   // padding
2254 // opnd2 = PAD(opnd1, opnd0)             // model output
2255 // opnd3 = PAD(opnd1, opnd0)
2256 // opnd4 = ADD(opnd2, opnd3, FUSED_NONE) // model output
2257 class DynamicTemporariesTest : public PartitioningTest {
2258    protected:
2259     // Call these functions in sequence in order to perform the test.
2260     // Call to declareOutputDimensions() can be omitted (see the default values below).
2261     // Call to declareHalVersions() can be omitted (defaults to HalVersion::LATEST).
2262     void declareOutputDimensions(bool opnd2ModelAndPartitionOutputSpecified,
2263                                  bool opnd3PartitionOutputSpecified,
2264                                  bool opnd4ModelOutputSpecified);
2265     void declareHalVersions(HalVersion padDeviceVersion, HalVersion addDeviceVersion);
2266     void makeModelAndValidate();
2267     void compileModelAndComparePlan(bool noFallback = true);
2268     void executeCompilationAndCompareOutput(bool opnd2ModelOutputBigEnough,
2269                                             bool opnd4ModelOutputBigEnough);
2270 
2271     // set by declareOutputDimensions()
2272     bool mOpnd2ModelAndPartitionOutputSpecified = false;
2273     bool mOpnd3PartitionOutputSpecified = false;
2274     bool mOpnd4ModelOutputSpecified = false;
2275 
2276     // set by declareHalVersions()
2277     HalVersion mPadDeviceVersion = HalVersion::LATEST;
2278     HalVersion mAddDeviceVersion = HalVersion::LATEST;
2279     HalVersion mMinDeviceVersion = HalVersion::LATEST;  // minimum of the other two device versions
2280 
2281     // created by makeModelAndValidate()
2282     std::optional<PartitioningModel> mModel;
2283     std::vector<uint32_t> mOpnds;
2284 
2285     // created by compileModelAndComparePlan();
2286     std::optional<PartitioningCompilation> mCompilation;
2287 
supportsOutputOfUnknownRank(HalVersion version)2288     static bool supportsOutputOfUnknownRank(HalVersion version) {
2289         return version >= HalVersion::V1_2;
2290     }
2291 
dimensionedOutput(HalVersion version,bool specified)2292     static Dimensioned dimensionedOutput(HalVersion version, bool specified) {
2293         return specified ? Dimensioned::YES_4
2294                          : supportsOutputOfUnknownRank(version) ? Dimensioned::NO
2295                                                                 : Dimensioned::RANK_1;
2296     }
2297 };
2298 
declareOutputDimensions(bool opnd2ModelAndPartitionOutputSpecified,bool opnd3PartitionOutputSpecified,bool opnd4ModelOutputSpecified)2299 void DynamicTemporariesTest::declareOutputDimensions(bool opnd2ModelAndPartitionOutputSpecified,
2300                                                      bool opnd3PartitionOutputSpecified,
2301                                                      bool opnd4ModelOutputSpecified) {
2302     ASSERT_FALSE(mModel.has_value());
2303     mOpnd2ModelAndPartitionOutputSpecified = opnd2ModelAndPartitionOutputSpecified;
2304     mOpnd3PartitionOutputSpecified = opnd3PartitionOutputSpecified;
2305     mOpnd4ModelOutputSpecified = opnd4ModelOutputSpecified;
2306 }
2307 
declareHalVersions(HalVersion padDeviceVersion,HalVersion addDeviceVersion)2308 void DynamicTemporariesTest::declareHalVersions(HalVersion padDeviceVersion,
2309                                                 HalVersion addDeviceVersion) {
2310     ASSERT_FALSE(mModel.has_value());
2311     mPadDeviceVersion = padDeviceVersion;
2312     mAddDeviceVersion = addDeviceVersion;
2313     mMinDeviceVersion = min(padDeviceVersion, addDeviceVersion);
2314 }
2315 
makeModelAndValidate()2316 void DynamicTemporariesTest::makeModelAndValidate() {
2317     ASSERT_FALSE(mModel.has_value());
2318     mModel = PartitioningModel();
2319 
2320     uint32_t opndActivation = mModel->addIntScalarOperand(ANEURALNETWORKS_FUSED_NONE);
2321 
2322     uint32_t opnd0 = mModel->addFloatOperand(Dimensioned::YES_2);  // tensor to pad
2323     uint32_t opnd1 = mModel->addIntOperand(Dimensioned::RANK_2);   // paddings
2324     uint32_t opnd2 = mModel->addExplicitOperationXTo1(
2325             ANEURALNETWORKS_PAD, {opnd0, opnd1}, WrapperType::TENSOR_FLOAT32,
2326             dimensionedOutput(mMinDeviceVersion, mOpnd2ModelAndPartitionOutputSpecified));
2327     uint32_t opnd3 = mModel->addExplicitOperationXTo1(
2328             ANEURALNETWORKS_PAD, {opnd0, opnd1}, WrapperType::TENSOR_FLOAT32,
2329             dimensionedOutput(mMinDeviceVersion, mOpnd3PartitionOutputSpecified));
2330     uint32_t opnd4 = mModel->addExplicitOperationXTo1(
2331             ANEURALNETWORKS_ADD, {opnd2, opnd3, opndActivation}, WrapperType::TENSOR_FLOAT32,
2332             dimensionedOutput(mMinDeviceVersion, mOpnd4ModelOutputSpecified));
2333     mModel->identifyInputsAndOutputs({opnd0, opnd1}, {opnd2, opnd4});
2334     mModel->finish();
2335     ASSERT_TRUE(mModel->isValid());
2336 
2337     mOpnds = {opnd0, opnd1, opnd2, opnd3, opnd4};
2338 }
2339 
compileModelAndComparePlan(bool noFallback)2340 void DynamicTemporariesTest::compileModelAndComparePlan(bool noFallback) {
2341     ASSERT_TRUE(mModel.has_value());
2342     ASSERT_TRUE(!mCompilation.has_value());
2343 
2344     auto devices = makeDevices({{"pad",
2345                                  0.9,
2346                                  0U,
2347                                  PartitioningDriver::OEMNo,
2348                                  mPadDeviceVersion,
2349                                  {V1_3::OperationType::PAD}},
2350                                 {"add",
2351                                  0.9,
2352                                  0U,
2353                                  PartitioningDriver::OEMNo,
2354                                  mAddDeviceVersion,
2355                                  {V1_3::OperationType::ADD}}});
2356 
2357     mCompilation = PartitioningCompilation(&mModel.value(), devices);
2358     ASSERT_EQ(mCompilation->setPartitioning(DeviceManager::kPartitioningWithoutFallback),
2359               Result::NO_ERROR);
2360     if (noFallback) {
2361         ASSERT_EQ(mCompilation->finish(), Result::NO_ERROR);
2362         const ExecutionPlan& planA = mCompilation->getExecutionPlan();
2363         EXPECT_TRUE(planA.forTest_flatGetDynamicTemporaries() ==
2364                     (mOpnd3PartitionOutputSpecified ? DynamicTemporariesType{}
2365                                                     : DynamicTemporariesType{mOpnds[3]}));
2366         ASSERT_EQ(planA.forTest_getKind(), ExecutionPlan::Kind::COMPOUND);
2367         const auto& stepsA = planA.forTest_compoundGetSteps();
2368         ASSERT_EQ(stepsA.size(), size_t(2));
2369         {
2370             // Build a model to compare against the step model from stepsA[0].
2371             PartitioningModel modelA0;
2372             uint32_t a0Opnd0 = modelA0.addFloatOperand(Dimensioned::YES_2);
2373             uint32_t a0Opnd1 = modelA0.addIntOperand(Dimensioned::RANK_2);
2374             uint32_t a0Opnd2 = modelA0.addExplicitOperationXTo1(
2375                     ANEURALNETWORKS_PAD, {a0Opnd0, a0Opnd1}, WrapperType::TENSOR_FLOAT32,
2376                     dimensionedOutput(mMinDeviceVersion, mOpnd3PartitionOutputSpecified));
2377             uint32_t a0Opnd3 = modelA0.addExplicitOperationXTo1(
2378                     ANEURALNETWORKS_PAD, {a0Opnd0, a0Opnd1}, WrapperType::TENSOR_FLOAT32,
2379                     dimensionedOutput(mMinDeviceVersion, mOpnd2ModelAndPartitionOutputSpecified));
2380             modelA0.identifyInputsAndOutputs({a0Opnd0, a0Opnd1}, {a0Opnd3, a0Opnd2});
2381             modelA0.finish();
2382             ASSERT_TRUE(modelA0.isValid());
2383 
2384             ASSERT_NO_FATAL_FAILURE(compare(
2385                     stepsA[0], &modelA0, devices[0],
2386                     RemapVectorType{{mOpnds[0], a0Opnd0}, {mOpnds[1], a0Opnd1}},  // modelInputs
2387                     RemapVectorType{{mOpnds[2], a0Opnd3}},                        // modelOutputs
2388                     RemapVectorType{},                             // tempsAsStepModelInputs
2389                     StepModelOutputSetType{{mOpnds[3], a0Opnd2}},  // tempsAsStepModelOutputs
2390                     RemapVectorType{},                             // outputsAsStepModelInputs
2391                     {0u}));  // modelOutputsThatAreDownstreamInputs
2392         }
2393         {
2394             // Build a model to compare against the step model from stepsA[1].
2395             PartitioningModel modelA1;
2396             uint32_t a1Opnd2 = modelA1.addFloatOperand(
2397                     dimensionedOutput(mMinDeviceVersion, mOpnd2ModelAndPartitionOutputSpecified));
2398             uint32_t a1Opnd3 = modelA1.addFloatOperand(
2399                     dimensionedOutput(mMinDeviceVersion, mOpnd3PartitionOutputSpecified));
2400             uint32_t a1Opnd4 = modelA1.addOperation2To1V1_0(
2401                     0, a1Opnd2, a1Opnd3,
2402                     dimensionedOutput(mMinDeviceVersion, mOpnd4ModelOutputSpecified));
2403             modelA1.identifyInputsAndOutputs({a1Opnd3, a1Opnd2}, {a1Opnd4});
2404             modelA1.finish();
2405             ASSERT_TRUE(modelA1.isValid());
2406 
2407             ASSERT_NO_FATAL_FAILURE(
2408                     compare(stepsA[1], &modelA1, devices[1], RemapVectorType{},  // modelInputs
2409                             RemapVectorType{{mOpnds[4], a1Opnd4}},               // modelOutputs
2410                             RemapVectorType{{mOpnds[3], a1Opnd3}},  // tempsAsStepModelInputs
2411                             StepModelOutputSetType{},               // tempsAsStepModelOutputs
2412                             RemapVectorType{{mOpnds[2], a1Opnd2}},  // outputsAsStepModelInputs
2413                             {}));  // modelOutputsThatAreDownstreamInputs
2414         }
2415     } else {
2416         ASSERT_EQ(mCompilation->finish(), Result::OP_FAILED);
2417         // Try again, expecting fallback.
2418         mCompilation = PartitioningCompilation(&mModel.value(), devices);
2419         ASSERT_EQ(mCompilation->setPartitioning(DeviceManager::kPartitioningWithFallback),
2420                   Result::NO_ERROR);
2421         ASSERT_EQ(mCompilation->finish(), Result::NO_ERROR);
2422         ASSERT_EQ(mCompilation->getExecutionPlan().forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
2423         ASSERT_EQ(mCompilation->getExecutionPlan().forTest_simpleGetDevice(),
2424                   DeviceManager::getCpuDevice());
2425     }
2426 }
2427 
executeCompilationAndCompareOutput(bool opnd2ModelOutputBigEnough,bool opnd4ModelOutputBigEnough)2428 void DynamicTemporariesTest::executeCompilationAndCompareOutput(bool opnd2ModelOutputBigEnough,
2429                                                                 bool opnd4ModelOutputBigEnough) {
2430     ASSERT_TRUE(opnd2ModelOutputBigEnough || !mOpnd2ModelAndPartitionOutputSpecified);
2431     ASSERT_TRUE(opnd4ModelOutputBigEnough || !mOpnd4ModelOutputSpecified);
2432 
2433     ASSERT_TRUE(mCompilation.has_value());
2434     WrapperExecution e(&mCompilation.value());
2435 
2436     WrapperOperandType padTensorValueType(WrapperType::TENSOR_FLOAT32, {2});
2437     const float padTensorValue[] = {3.0f, 5.0f};
2438     e.setInput(0, &padTensorValue, &padTensorValueType.operandType);
2439 
2440     WrapperOperandType paddingsType(WrapperType::TENSOR_INT32, {1, 2});
2441     const int paddings[1][2] = {{1, 1}};
2442     e.setInput(1, &paddings, &paddingsType.operandType);
2443 
2444     auto setOutput = [&e](uint32_t index, float* buffer, bool bigEnough, bool specified,
2445                           HalVersion version) {
2446         const uint32_t elts = bigEnough ? 4 : 3;
2447         std::fill(buffer, buffer + elts, -1.0f);
2448         using DimsType = std::vector<uint32_t>;
2449         WrapperOperandType outputType(
2450                 WrapperType::TENSOR_FLOAT32,
2451                 specified ? DimsType{elts}
2452                           : supportsOutputOfUnknownRank(version) ? DimsType{} : DimsType{0});
2453         e.setOutput(index, buffer, elts * sizeof(float), &outputType.operandType);
2454     };
2455     float opnd2ModelOutput[4], opnd4ModelOutput[4];
2456     setOutput(0, opnd2ModelOutput, opnd2ModelOutputBigEnough,
2457               mOpnd2ModelAndPartitionOutputSpecified, mPadDeviceVersion);
2458     setOutput(1, opnd4ModelOutput, opnd4ModelOutputBigEnough, mOpnd4ModelOutputSpecified,
2459               mAddDeviceVersion);
2460 
2461     const Result expectResult = opnd2ModelOutputBigEnough && opnd4ModelOutputBigEnough
2462                                         ? Result::NO_ERROR
2463                                         : Result::OUTPUT_INSUFFICIENT_SIZE;
2464     ASSERT_EQ(e.compute(), expectResult);
2465     if (expectResult == Result::NO_ERROR) {
2466         float expected[4] = {0.0f, padTensorValue[0], padTensorValue[1], 0.0f};
2467         ASSERT_TRUE(std::equal(std::begin(opnd2ModelOutput), std::end(opnd2ModelOutput),
2468                                std::begin(expected)));
2469         for (auto& elt : expected) {
2470             elt *= 2;
2471         }
2472         ASSERT_TRUE(std::equal(std::begin(opnd4ModelOutput), std::end(opnd4ModelOutput),
2473                                std::begin(expected)));
2474     }
2475 }
2476 
TEST_F(DynamicTemporariesTest,ModelOutputsSufficientSize)2477 TEST_F(DynamicTemporariesTest, ModelOutputsSufficientSize) {
2478     // The purpose of this test is to confirm that the partitioner and the
2479     // runtime can handle a model output of unspecified dimensions but
2480     // sufficient size that is written by one partition and read by another.
2481 
2482     ASSERT_NO_FATAL_FAILURE(declareOutputDimensions(/*opnd2ModelAndPartitionOutputSpecified=*/false,
2483                                                     /*opnd3PartitionOutputSpecified=*/true,
2484                                                     /*opnd4ModelOutputSpecified=*/false));
2485     ASSERT_NO_FATAL_FAILURE(makeModelAndValidate());
2486     ASSERT_NO_FATAL_FAILURE(compileModelAndComparePlan());
2487     ASSERT_NO_FATAL_FAILURE(executeCompilationAndCompareOutput(true, true));
2488 }
2489 
2490 // TODO(b/174851714): Fix the partitioner and re-enable this test.
TEST_F(DynamicTemporariesTest,DISABLED_ModelOutputsSufficientSize_V1_1)2491 TEST_F(DynamicTemporariesTest, DISABLED_ModelOutputsSufficientSize_V1_1) {
2492     // The purpose of this test is to confirm that the partitioner and the
2493     // runtime can handle a model output of unspecified dimensions but
2494     // sufficient size that is written by one partition and read by another.
2495     // Regression test for http://b/174851714.
2496 
2497     ASSERT_NO_FATAL_FAILURE(declareOutputDimensions(/*opnd2ModelAndPartitionOutputSpecified=*/false,
2498                                                     /*opnd3PartitionOutputSpecified=*/true,
2499                                                     /*opnd4ModelOutputSpecified=*/false));
2500     ASSERT_NO_FATAL_FAILURE(declareHalVersions(/*padDeviceVersion=*/HalVersion::V1_1,
2501                                                /*addDeviceVersion=*/HalVersion::V1_1));
2502     ASSERT_NO_FATAL_FAILURE(makeModelAndValidate());
2503     ASSERT_NO_FATAL_FAILURE(compileModelAndComparePlan());
2504     ASSERT_NO_FATAL_FAILURE(executeCompilationAndCompareOutput(true, true));
2505 }
2506 
TEST_F(DynamicTemporariesTest,DynamicTemporariesUnspecifiedOutputs)2507 TEST_F(DynamicTemporariesTest, DynamicTemporariesUnspecifiedOutputs) {
2508     // The purpose of this test is to confirm that the partitioner can produce
2509     // dynamic temporaries and that the runtime can handle them properly.  Note
2510     // that all model outputs are of unspecified dimensions but sufficient size.
2511 
2512     ASSERT_NO_FATAL_FAILURE(makeModelAndValidate());
2513     ASSERT_NO_FATAL_FAILURE(compileModelAndComparePlan());
2514     ASSERT_NO_FATAL_FAILURE(executeCompilationAndCompareOutput(true, true));
2515 }
2516 
TEST_F(DynamicTemporariesTest,DynamicTemporariesSpecifiedOutputs)2517 TEST_F(DynamicTemporariesTest, DynamicTemporariesSpecifiedOutputs) {
2518     // The purpose of this test is to confirm that the partitioner can produce
2519     // dynamic temporaries and that the runtime can handle them properly.  Note
2520     // that all model outputs are of specified dimensions.
2521 
2522     ASSERT_NO_FATAL_FAILURE(declareOutputDimensions(/*opnd2ModelAndPartitionOutputSpecified=*/true,
2523                                                     /*opnd3PartitionOutputSpecified=*/false,
2524                                                     /*opnd4ModelOutputSpecified=*/true));
2525     ASSERT_NO_FATAL_FAILURE(makeModelAndValidate());
2526     ASSERT_NO_FATAL_FAILURE(compileModelAndComparePlan());
2527     ASSERT_NO_FATAL_FAILURE(executeCompilationAndCompareOutput(true, true));
2528 }
2529 
TEST_F(DynamicTemporariesTest,DynamicTemporariesSpecifiedOutputs_V1_2)2530 TEST_F(DynamicTemporariesTest, DynamicTemporariesSpecifiedOutputs_V1_2) {
2531     // The purpose of this test is to confirm that the partitioner can produce
2532     // dynamic temporaries and that the runtime can handle them properly.  Note
2533     // that all model outputs are of specified dimensions.
2534     // Regression test for http://b/174851714.
2535 
2536     ASSERT_NO_FATAL_FAILURE(declareOutputDimensions(/*opnd2ModelAndPartitionOutputSpecified=*/true,
2537                                                     /*opnd3PartitionOutputSpecified=*/false,
2538                                                     /*opnd4ModelOutputSpecified=*/true));
2539     ASSERT_NO_FATAL_FAILURE(declareHalVersions(/*padDeviceVersion=*/HalVersion::V1_2,
2540                                                /*addDeviceVersion=*/HalVersion::V1_2));
2541     ASSERT_NO_FATAL_FAILURE(makeModelAndValidate());
2542     ASSERT_NO_FATAL_FAILURE(compileModelAndComparePlan());
2543     ASSERT_NO_FATAL_FAILURE(executeCompilationAndCompareOutput(true, true));
2544 }
2545 
TEST_F(DynamicTemporariesTest,DynamicTemporariesSpecifiedOutputs_V1_1)2546 TEST_F(DynamicTemporariesTest, DynamicTemporariesSpecifiedOutputs_V1_1) {
2547     // The purpose of this test is to confirm that the partitioner cannot produce
2548     // dynamic temporaries for V1_1 but instead does whole-model CPU fallback.  Note
2549     // that all model outputs are of specified dimensions.
2550     // Regression test for http://b/174851714.
2551 
2552     ASSERT_NO_FATAL_FAILURE(declareOutputDimensions(/*opnd2ModelAndPartitionOutputSpecified=*/true,
2553                                                     /*opnd3PartitionOutputSpecified=*/false,
2554                                                     /*opnd4ModelOutputSpecified=*/true));
2555     ASSERT_NO_FATAL_FAILURE(declareHalVersions(/*padDeviceVersion=*/HalVersion::V1_1,
2556                                                /*addDeviceVersion=*/HalVersion::V1_1));
2557     ASSERT_NO_FATAL_FAILURE(makeModelAndValidate());
2558     ASSERT_NO_FATAL_FAILURE(compileModelAndComparePlan(false));
2559     ASSERT_NO_FATAL_FAILURE(executeCompilationAndCompareOutput(true, true));
2560 }
2561 
TEST_F(DynamicTemporariesTest,ModelOutputsInsufficientSizeWithDynamicTemporary)2562 TEST_F(DynamicTemporariesTest, ModelOutputsInsufficientSizeWithDynamicTemporary) {
2563     // The purpose of this test is to confirm that the runtime can detect a
2564     // model output of insufficient size in the presence of a dynamic temporary.
2565 
2566     ASSERT_NO_FATAL_FAILURE(makeModelAndValidate());
2567     ASSERT_NO_FATAL_FAILURE(compileModelAndComparePlan());
2568     ASSERT_NO_FATAL_FAILURE(executeCompilationAndCompareOutput(false, false));
2569 }
2570 
TEST_F(DynamicTemporariesTest,ModelOutputsInsufficientSizeWithoutDynamicTemporary)2571 TEST_F(DynamicTemporariesTest, ModelOutputsInsufficientSizeWithoutDynamicTemporary) {
2572     // The purpose of this test is to confirm that the runtime can detect a
2573     // model output of insufficient size in the absence of a dynamic temporary.
2574 
2575     ASSERT_NO_FATAL_FAILURE(declareOutputDimensions(/*opnd2ModelAndPartitionOutputSpecified=*/false,
2576                                                     /*opnd3PartitionOutputSpecified=*/true,
2577                                                     /*opnd4ModelOutputSpecified=*/false));
2578     ASSERT_NO_FATAL_FAILURE(makeModelAndValidate());
2579     ASSERT_NO_FATAL_FAILURE(compileModelAndComparePlan());
2580     ASSERT_NO_FATAL_FAILURE(executeCompilationAndCompareOutput(false, false));
2581 }
2582 
TEST_F(DynamicTemporariesTest,ModelOutput2InsufficientSizeWithoutDynamicTemporary)2583 TEST_F(DynamicTemporariesTest, ModelOutput2InsufficientSizeWithoutDynamicTemporary) {
2584     // The purpose of this test is to confirm that the runtime can detect a
2585     // model output of insufficient size in the absence of a dynamic temporary.
2586 
2587     ASSERT_NO_FATAL_FAILURE(declareOutputDimensions(/*opnd2ModelAndPartitionOutputSpecified=*/false,
2588                                                     /*opnd3PartitionOutputSpecified=*/true,
2589                                                     /*opnd4ModelOutputSpecified=*/false));
2590     ASSERT_NO_FATAL_FAILURE(makeModelAndValidate());
2591     ASSERT_NO_FATAL_FAILURE(compileModelAndComparePlan());
2592     ASSERT_NO_FATAL_FAILURE(executeCompilationAndCompareOutput(false, true));
2593 }
2594 
TEST_F(DynamicTemporariesTest,ModelOutput4InsufficientSizeWithoutDynamicTemporary)2595 TEST_F(DynamicTemporariesTest, ModelOutput4InsufficientSizeWithoutDynamicTemporary) {
2596     // The purpose of this test is to confirm that the runtime can detect a
2597     // model output of insufficient size in the absence of a dynamic temporary.
2598 
2599     ASSERT_NO_FATAL_FAILURE(declareOutputDimensions(/*opnd2ModelAndPartitionOutputSpecified=*/false,
2600                                                     /*opnd3PartitionOutputSpecified=*/true,
2601                                                     /*opnd4ModelOutputSpecified=*/false));
2602     ASSERT_NO_FATAL_FAILURE(makeModelAndValidate());
2603     ASSERT_NO_FATAL_FAILURE(compileModelAndComparePlan());
2604     ASSERT_NO_FATAL_FAILURE(executeCompilationAndCompareOutput(true, false));
2605 }
2606 
2607 // Test token rehashing during the compilation step.
2608 class CacheTest : public PartitioningTest {
2609    protected:
SetUp()2610     virtual void SetUp() override {
2611         PartitioningTest::SetUp();
2612         char cacheDirTemp[] = "/data/local/tmp/TestCompilationCachingXXXXXX";
2613         char* cacheDir = mkdtemp(cacheDirTemp);
2614         ASSERT_NE(cacheDir, nullptr);
2615         mCacheDir = cacheDir;
2616     }
2617 
TearDown()2618     virtual void TearDown() override {
2619         if (!::testing::Test::HasFailure()) {
2620             std::filesystem::remove_all(mCacheDir);
2621         }
2622         PartitioningTest::TearDown();
2623     }
2624 
expectUniqueTokens(const std::vector<std::vector<uint8_t>> & tokens)2625     void expectUniqueTokens(const std::vector<std::vector<uint8_t>>& tokens) {
2626         for (uint32_t i = 0; i < tokens.size(); i++) {
2627             SCOPED_TRACE(i);
2628             for (uint32_t j = i + 1; j < tokens.size(); j++) {
2629                 SCOPED_TRACE(j);
2630                 EXPECT_NE(tokens[i], tokens[j]);
2631             }
2632         }
2633     }
2634 
2635     // Launch a single run of the partitioner against the provided model and device list with
2636     // cache token privided as tokenIn. Find the partition for the device with deviceName.
2637     // Record the transformed token into tokenOut. Two or more partitions may be on the same device.
2638     // "devicePartitionIndex" specifies the index of the ExecutionStep corresponding to the
2639     // partition of interest, within the sequence of ExecutionSteps on the target device.
2640     // If tokenIn is empty, no caching information will be provided to the partitioner.
getTransformedCacheTokenSingle(const PartitioningModel & model,const std::vector<std::shared_ptr<Device>> & devices,const char * deviceName,const std::vector<uint8_t> & tokenIn,ExecutePreference preference,ExecutePriority priority,uint32_t devicePartitionIndex,std::vector<uint8_t> * tokenOut)2641     void getTransformedCacheTokenSingle(const PartitioningModel& model,
2642                                         const std::vector<std::shared_ptr<Device>>& devices,
2643                                         const char* deviceName, const std::vector<uint8_t>& tokenIn,
2644                                         ExecutePreference preference, ExecutePriority priority,
2645                                         uint32_t devicePartitionIndex,
2646                                         std::vector<uint8_t>* tokenOut) {
2647         // Compile the model and get the execution plan.
2648         PartitioningCompilation compilation(&model, devices);
2649         if (!tokenIn.empty()) {
2650             compilation.setCaching(mCacheDir.c_str(), tokenIn);
2651         }
2652         compilation.setPreference(preference);
2653         compilation.setPriority(priority);
2654         ASSERT_EQ(compilation.finish(), Result::NO_ERROR);
2655         const ExecutionPlan& plan = compilation.getExecutionPlan();
2656 
2657         // Find the cache info for the device.
2658         const uint8_t* token = nullptr;
2659         if (plan.forTest_getKind() == ExecutionPlan::Kind::SIMPLE) {
2660             ASSERT_EQ(devicePartitionIndex, 0u);
2661             ASSERT_EQ(plan.forTest_simpleGetDevice()->getName(), deviceName);
2662             token = plan.forTest_simpleGetCacheToken();
2663         } else if (plan.forTest_getKind() == ExecutionPlan::Kind::COMPOUND) {
2664             const auto& steps = plan.forTest_compoundGetSteps();
2665             uint32_t executionStepCount = 0;
2666             for (const auto& step : steps) {
2667                 if (step->isExecution() &&
2668                     step->executionStep()->getDevice()->getName() == deviceName) {
2669                     if (devicePartitionIndex == executionStepCount) {
2670                         token = step->executionStep()->forTest_getCacheToken();
2671                         break;
2672                     }
2673                     executionStepCount++;
2674                 }
2675             }
2676         } else {
2677             FAIL();
2678         }
2679 
2680         // Retrieve the transformed token from the cache info.
2681         if (token == nullptr) {
2682             tokenOut->clear();
2683         } else {
2684             tokenOut->resize(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN);
2685             std::copy(token, token + ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, tokenOut->begin());
2686         }
2687     }
2688 
2689     // A wrapper of getTransformedCacheTokenSingle, which runs getTransformedCacheTokenSingle
2690     // multiple times and checks if the transformation provides consistent result.
2691     // Two or more partitions may be on the same device. "devicePartitionIndex" specifies the index
2692     // of the ExecutionStep corresponding to the partition of interest, within the sequence of
2693     // ExecutionSteps on the target device.
getTransformedCacheToken(const PartitioningModel & model,const std::vector<std::shared_ptr<Device>> & devices,const char * deviceName,const std::vector<uint8_t> & tokenIn,ExecutePreference preference,ExecutePriority priority,std::vector<uint8_t> * tokenOut,uint32_t devicePartitionIndex=0)2694     void getTransformedCacheToken(const PartitioningModel& model,
2695                                   const std::vector<std::shared_ptr<Device>>& devices,
2696                                   const char* deviceName, const std::vector<uint8_t>& tokenIn,
2697                                   ExecutePreference preference, ExecutePriority priority,
2698                                   std::vector<uint8_t>* tokenOut,
2699                                   uint32_t devicePartitionIndex = 0) {
2700         getTransformedCacheTokenSingle(model, devices, deviceName, tokenIn, preference, priority,
2701                                        devicePartitionIndex, tokenOut);
2702 
2703         // Test if the runtime maps to the same cache token every time for the same compilation
2704         // setup.
2705         for (uint32_t i = 0; i < 10; i++) {
2706             std::vector<uint8_t> token;
2707             SCOPED_TRACE(i);
2708             getTransformedCacheTokenSingle(model, devices, deviceName, tokenIn, preference,
2709                                            priority, devicePartitionIndex, &token);
2710             EXPECT_EQ(*tokenOut, token);
2711         }
2712     }
2713 
createModelForCachingTests(PartitioningModel * model)2714     void createModelForCachingTests(PartitioningModel* model) {
2715         uint32_t opnd0 = model->addFloatOperand();
2716         uint32_t opnd1 = model->addFloatOperand();
2717         uint32_t opnd2 = model->addOperation2To1V1_0(0, opnd0, opnd1);
2718         uint32_t opnd3 = model->addFloatOperand();
2719         uint32_t opnd4 = model->addOperation2To1V1_0(1, opnd2, opnd3);
2720         model->identifyInputsAndOutputs({opnd0, opnd1, opnd3}, {opnd4});
2721         model->finish();
2722         ASSERT_TRUE(model->isValid());
2723     }
2724 
2725     // The first model returned in "models" is the main model.
createControlFlowModelForCachingTests(std::vector<std::unique_ptr<PartitioningModel>> * models)2726     void createControlFlowModelForCachingTests(
2727             std::vector<std::unique_ptr<PartitioningModel>>* models) {
2728         CHECK(models != nullptr);
2729 
2730         auto trueModel = std::make_unique<PartitioningModel>();
2731         {
2732             const uint32_t opnd0 = trueModel->addFloatOperand();
2733             const uint32_t opnd1 = trueModel->addFloatOperand();
2734             const uint32_t opnd2 = trueModel->addOperation2To1V1_0(0, opnd0, opnd1);
2735             trueModel->identifyInputsAndOutputs({opnd0, opnd1}, {opnd2});
2736             trueModel->finish();
2737             ASSERT_TRUE(trueModel->isValid());
2738         }
2739 
2740         auto falseModel = std::make_unique<PartitioningModel>();
2741         {
2742             const uint32_t opnd0 = falseModel->addFloatOperand();
2743             const uint32_t opnd1 = falseModel->addFloatOperand();
2744             const uint32_t opnd2 = falseModel->addOperation2To1V1_0(0, opnd0, opnd1);
2745             falseModel->identifyInputsAndOutputs({opnd0, opnd1}, {opnd2});
2746             falseModel->finish();
2747             ASSERT_TRUE(falseModel->isValid());
2748         }
2749 
2750         auto mainModel = std::make_unique<PartitioningModel>();
2751         {
2752             const uint32_t opnd0 = mainModel->addBooleanOperand();
2753             const uint32_t opnd1 = mainModel->addFloatOperand();
2754             const uint32_t opnd2 = mainModel->addFloatOperand();
2755             const uint32_t opnd3 = mainModel->addFloatOperand();
2756             mainModel->addIfOperation(opnd0, *trueModel, *falseModel, {opnd1, opnd2}, {opnd3});
2757             mainModel->identifyInputsAndOutputs({opnd0, opnd1, opnd2}, {opnd3});
2758             mainModel->finish();
2759             ASSERT_TRUE(mainModel->isValid());
2760         }
2761 
2762         models->clear();
2763         models->push_back(std::move(mainModel));
2764         models->push_back(std::move(trueModel));
2765         models->push_back(std::move(falseModel));
2766     }
2767 
2768     std::string mCacheDir;
2769 };
2770 
2771 // Test the case when no token is provided by the application and the execution plan has a
2772 // simple body.
TEST_F(CacheTest,CacheTokenNoneSimpleBody)2773 TEST_F(CacheTest, CacheTokenNoneSimpleBody) {
2774     PartitioningModel model;
2775     createModelForCachingTests(&model);
2776 
2777     // deviceA can execute the whole model.
2778     const auto deviceA = makeDevices({
2779             {"deviceA", 0.5, ~0U},
2780     });
2781 
2782     std::vector<uint8_t> tokenIn, tokenOut;
2783     getTransformedCacheToken(model, deviceA, "deviceA", tokenIn,
2784                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2785                              &tokenOut);
2786     EXPECT_TRUE(tokenOut.empty());
2787 }
2788 
2789 // Test if the runtime maps to different cache tokens for devices with different names in
2790 // execution plan with a simple body.
TEST_F(CacheTest,CacheTokenDifferentDeviceNamesSimpleBody)2791 TEST_F(CacheTest, CacheTokenDifferentDeviceNamesSimpleBody) {
2792     PartitioningModel model;
2793     createModelForCachingTests(&model);
2794 
2795     // Two devices that can both execute the whole model.
2796     const auto deviceA = makeDevices({{"deviceA", 0.5, ~0U}});
2797     const auto deviceB = makeDevices({{"deviceB", 0.5, ~0U}});
2798 
2799     std::vector<uint8_t> tokenIn(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0);
2800     std::vector<uint8_t> deviceAToken, deviceBToken;
2801     getTransformedCacheToken(model, deviceA, "deviceA", tokenIn,
2802                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2803                              &deviceAToken);
2804     getTransformedCacheToken(model, deviceB, "deviceB", tokenIn,
2805                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2806                              &deviceBToken);
2807     expectUniqueTokens({deviceAToken, deviceBToken});
2808 }
2809 
2810 // Test if the runtime maps to different cache tokens for devices with different version strings in
2811 // execution plan with a simple body.
TEST_F(CacheTest,CacheTokenDifferentDeviceVersionStringsSimpleBody)2812 TEST_F(CacheTest, CacheTokenDifferentDeviceVersionStringsSimpleBody) {
2813     PartitioningModel model;
2814     createModelForCachingTests(&model);
2815 
2816     // Two devices that can both execute the whole model.
2817     const auto deviceA_1_0 = makeDevices({{"deviceA", "1.0", 0.5, ~0U}});
2818     const auto deviceA_1_1 = makeDevices({{"deviceA", "1.1", 0.5, ~0U}});
2819 
2820     std::vector<uint8_t> tokenIn(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0);
2821     std::vector<uint8_t> deviceA_1_0_Token, deviceA_1_1_Token;
2822     getTransformedCacheToken(model, deviceA_1_0, "deviceA", tokenIn,
2823                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2824                              &deviceA_1_0_Token);
2825     getTransformedCacheToken(model, deviceA_1_1, "deviceA", tokenIn,
2826                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2827                              &deviceA_1_1_Token);
2828     expectUniqueTokens({deviceA_1_0_Token, deviceA_1_1_Token});
2829 }
2830 
2831 // Test if the runtime maps to different cache tokens for compilations with different preferences
2832 // in execution plan with a simple body.
TEST_F(CacheTest,CacheTokenDifferentPreferencesSimpleBody)2833 TEST_F(CacheTest, CacheTokenDifferentPreferencesSimpleBody) {
2834     PartitioningModel model;
2835     createModelForCachingTests(&model);
2836 
2837     // One device that can execute the whole model.
2838     const auto deviceA = makeDevices({{"deviceA", 0.5, ~0U}});
2839 
2840     std::vector<uint8_t> fastToken, powerToken, sustainedToken;
2841     std::vector<uint8_t> tokenIn(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0);
2842     getTransformedCacheToken(model, deviceA, "deviceA", tokenIn,
2843                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2844                              &fastToken);
2845     getTransformedCacheToken(model, deviceA, "deviceA", tokenIn,
2846                              ExecutePreference::PREFER_LOW_POWER, ExecutePriority::DEFAULT,
2847                              &powerToken);
2848     getTransformedCacheToken(model, deviceA, "deviceA", tokenIn,
2849                              ExecutePreference::PREFER_SUSTAINED_SPEED, ExecutePriority::DEFAULT,
2850                              &sustainedToken);
2851     expectUniqueTokens({fastToken, powerToken, sustainedToken});
2852 }
2853 
2854 // Test if the runtime maps to different cache tokens for compilations with different priorities
2855 // in execution plan with a simple body.
TEST_F(CacheTest,CacheTokenDifferentPrioritiesSimpleBody)2856 TEST_F(CacheTest, CacheTokenDifferentPrioritiesSimpleBody) {
2857     PartitioningModel model;
2858     createModelForCachingTests(&model);
2859 
2860     // One device that can execute the whole model.
2861     const auto deviceA = makeDevices({{"deviceA", 0.5, ~0U}});
2862 
2863     std::vector<uint8_t> lowToken, mediumToken, highToken;
2864     std::vector<uint8_t> tokenIn(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0);
2865     getTransformedCacheToken(model, deviceA, "deviceA", tokenIn,
2866                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::LOW,
2867                              &lowToken);
2868     getTransformedCacheToken(model, deviceA, "deviceA", tokenIn,
2869                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::MEDIUM,
2870                              &mediumToken);
2871     getTransformedCacheToken(model, deviceA, "deviceA", tokenIn,
2872                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::HIGH,
2873                              &highToken);
2874     expectUniqueTokens({lowToken, mediumToken, highToken});
2875 }
2876 
2877 // Test if the runtime maps to different cache tokens for compilations with different tokens
2878 // provided by application in execution plan with a simple body.
TEST_F(CacheTest,CacheTokenDifferentTokensSimpleBody)2879 TEST_F(CacheTest, CacheTokenDifferentTokensSimpleBody) {
2880     PartitioningModel model;
2881     createModelForCachingTests(&model);
2882 
2883     // One device that can execute the whole model.
2884     const auto deviceA = makeDevices({{"deviceA", 0.5, ~0U}});
2885 
2886     std::vector<uint8_t> tokenOut1, tokenOut2;
2887     std::vector<uint8_t> tokenIn1(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0);
2888     std::vector<uint8_t> tokenIn2(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 1);
2889     getTransformedCacheToken(model, deviceA, "deviceA", tokenIn1,
2890                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2891                              &tokenOut1);
2892     getTransformedCacheToken(model, deviceA, "deviceA", tokenIn2,
2893                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2894                              &tokenOut2);
2895     expectUniqueTokens({tokenOut1, tokenOut2});
2896 }
2897 
2898 // Test the case when no token is provided by the application and the execution plan has a
2899 // compound body.
TEST_F(CacheTest,CacheTokenNoneCompoundBody)2900 TEST_F(CacheTest, CacheTokenNoneCompoundBody) {
2901     PartitioningModel model;
2902     createModelForCachingTests(&model);
2903 
2904     // DeviceA executes the first operation only.
2905     const auto devices = makeDevices({{"deviceA", 0.8, ~0U}, {"deviceB", 0.5, 1 << 1}});
2906 
2907     std::vector<uint8_t> tokenIn, tokenOut;
2908     getTransformedCacheToken(model, devices, "deviceA", tokenIn,
2909                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2910                              &tokenOut);
2911     EXPECT_TRUE(tokenOut.empty());
2912     getTransformedCacheToken(model, devices, "deviceB", tokenIn,
2913                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2914                              &tokenOut);
2915     EXPECT_TRUE(tokenOut.empty());
2916 }
2917 
2918 // Test if the runtime maps to different cache tokens for devices with different names in
2919 // execution plan with a compound body.
TEST_F(CacheTest,CacheTokenDifferentDeviceNamesCompoundBody)2920 TEST_F(CacheTest, CacheTokenDifferentDeviceNamesCompoundBody) {
2921     PartitioningModel model;
2922     createModelForCachingTests(&model);
2923 
2924     // DeviceA executes the first operation only.
2925     const auto devices1 = makeDevices({{"deviceA", 0.8, ~0U}, {"deviceC", 0.5, 1 << 1}});
2926     // DeviceB executes the first operation only.
2927     const auto devices2 = makeDevices({{"deviceB", 0.8, ~0U}, {"deviceC", 0.5, 1 << 1}});
2928 
2929     std::vector<uint8_t> tokenIn(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0);
2930     std::vector<uint8_t> deviceAToken, deviceBToken;
2931     getTransformedCacheToken(model, devices1, "deviceA", tokenIn,
2932                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2933                              &deviceAToken);
2934     getTransformedCacheToken(model, devices2, "deviceB", tokenIn,
2935                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2936                              &deviceBToken);
2937     expectUniqueTokens({deviceAToken, deviceBToken});
2938 }
2939 
2940 // Test if the runtime maps to different cache tokens for devices with different names in
2941 // execution plan with a compound body.
TEST_F(CacheTest,CacheTokenDifferentDeviceVersionStringsCompoundBody)2942 TEST_F(CacheTest, CacheTokenDifferentDeviceVersionStringsCompoundBody) {
2943     PartitioningModel model;
2944     createModelForCachingTests(&model);
2945 
2946     // DeviceA executes the first operation only.
2947     const auto devices1 = makeDevices({{"deviceA", "1.0", 0.8, ~0U}, {"deviceB", 0.5, 1 << 1}});
2948     // DeviceB executes the first operation only.
2949     const auto devices2 = makeDevices({{"deviceA", "1.1", 0.8, ~0U}, {"deviceB", 0.5, 1 << 1}});
2950 
2951     std::vector<uint8_t> tokenIn(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0);
2952     std::vector<uint8_t> deviceA_1_0_Token, deviceA_1_1_Token;
2953     getTransformedCacheToken(model, devices1, "deviceA", tokenIn,
2954                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2955                              &deviceA_1_0_Token);
2956     getTransformedCacheToken(model, devices2, "deviceA", tokenIn,
2957                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2958                              &deviceA_1_1_Token);
2959     expectUniqueTokens({deviceA_1_0_Token, deviceA_1_1_Token});
2960 }
2961 
2962 // Test if the runtime maps to different cache tokens for compilations with different preferences
2963 // in execution plan with a compound body.
TEST_F(CacheTest,CacheTokenDifferentPreferencesCompoundBody)2964 TEST_F(CacheTest, CacheTokenDifferentPreferencesCompoundBody) {
2965     PartitioningModel model;
2966     createModelForCachingTests(&model);
2967 
2968     // DeviceA executes the first operation only.
2969     const auto devices = makeDevices({{"deviceA", 0.8, ~0U}, {"deviceB", 0.5, 1 << 1}});
2970 
2971     std::vector<uint8_t> fastToken, powerToken, sustainedToken;
2972     std::vector<uint8_t> tokenIn(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0);
2973     getTransformedCacheToken(model, devices, "deviceA", tokenIn,
2974                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
2975                              &fastToken);
2976     getTransformedCacheToken(model, devices, "deviceA", tokenIn,
2977                              ExecutePreference::PREFER_LOW_POWER, ExecutePriority::DEFAULT,
2978                              &powerToken);
2979     getTransformedCacheToken(model, devices, "deviceA", tokenIn,
2980                              ExecutePreference::PREFER_SUSTAINED_SPEED, ExecutePriority::DEFAULT,
2981                              &sustainedToken);
2982     expectUniqueTokens({fastToken, powerToken, sustainedToken});
2983 }
2984 
2985 // Test if the runtime maps to different cache tokens for compilations with different priorities
2986 // in execution plan with a compound body.
TEST_F(CacheTest,CacheTokenDifferentPrioritiesCompoundBody)2987 TEST_F(CacheTest, CacheTokenDifferentPrioritiesCompoundBody) {
2988     PartitioningModel model;
2989     createModelForCachingTests(&model);
2990 
2991     // DeviceA executes the first operation only.
2992     const auto devices = makeDevices({{"deviceA", 0.8, ~0U}, {"deviceB", 0.5, 1 << 1}});
2993 
2994     std::vector<uint8_t> lowToken, mediumToken, highToken;
2995     std::vector<uint8_t> tokenIn(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0);
2996     getTransformedCacheToken(model, devices, "deviceA", tokenIn,
2997                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::LOW,
2998                              &lowToken);
2999     getTransformedCacheToken(model, devices, "deviceA", tokenIn,
3000                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::MEDIUM,
3001                              &mediumToken);
3002     getTransformedCacheToken(model, devices, "deviceA", tokenIn,
3003                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::HIGH,
3004                              &highToken);
3005     expectUniqueTokens({lowToken, mediumToken, highToken});
3006 }
3007 
3008 // Test if the runtime maps to different cache tokens for compilations with different tokens
3009 // provided by application in execution plan with a compound body.
TEST_F(CacheTest,CacheTokenDifferentTokensCompoundBody)3010 TEST_F(CacheTest, CacheTokenDifferentTokensCompoundBody) {
3011     PartitioningModel model;
3012     createModelForCachingTests(&model);
3013 
3014     // DeviceA executes the first operation only.
3015     const auto devices = makeDevices({{"deviceA", 0.8, ~0U}, {"deviceB", 0.5, 1 << 1}});
3016 
3017     std::vector<uint8_t> tokenOut1, tokenOut2;
3018     std::vector<uint8_t> tokenIn1(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0);
3019     std::vector<uint8_t> tokenIn2(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 1);
3020     getTransformedCacheToken(model, devices, "deviceA", tokenIn1,
3021                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
3022                              &tokenOut1);
3023     getTransformedCacheToken(model, devices, "deviceA", tokenIn2,
3024                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
3025                              &tokenOut2);
3026     expectUniqueTokens({tokenOut1, tokenOut2});
3027 }
3028 
3029 // Test if the runtime maps to different cache tokens for compilations with different partitioning
3030 // outcome in execution plan with a compound body.
TEST_F(CacheTest,CacheTokenDifferentPartitionsCompoundBody)3031 TEST_F(CacheTest, CacheTokenDifferentPartitionsCompoundBody) {
3032     PartitioningModel model;
3033     createModelForCachingTests(&model);
3034 
3035     // DeviceA executes the whole model.
3036     const auto devices1 = makeDevices({{"deviceA", 0.8, ~0U}, {"deviceB", 0.5, 0U}});
3037     // DeviceA executes the first operation only.
3038     const auto devices2 = makeDevices({{"deviceA", 0.8, ~0U}, {"deviceB", 0.5, 1 << 1}});
3039     // DeviceA executes the second operation only.
3040     const auto devices3 = makeDevices({{"deviceA", 0.8, ~0U}, {"deviceB", 0.5, 1 << 0}});
3041 
3042     std::vector<uint8_t> tokenIn(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0);
3043     std::vector<uint8_t> tokenOut1, tokenOut2, tokenOut3;
3044     getTransformedCacheToken(model, devices1, "deviceA", tokenIn,
3045                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
3046                              &tokenOut1);
3047     getTransformedCacheToken(model, devices2, "deviceA", tokenIn,
3048                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
3049                              &tokenOut2);
3050     getTransformedCacheToken(model, devices3, "deviceA", tokenIn,
3051                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
3052                              &tokenOut3);
3053     expectUniqueTokens({tokenOut1, tokenOut2, tokenOut3});
3054 }
3055 
3056 // Test if the runtime maps different referenced models to different cache tokens.
TEST_F(CacheTest,CacheTokenDifferentReferenceModelPartitions)3057 TEST_F(CacheTest, CacheTokenDifferentReferenceModelPartitions) {
3058     std::vector<std::unique_ptr<PartitioningModel>> models;
3059     createControlFlowModelForCachingTests(&models);
3060     const auto& main = *models[0];
3061 
3062     // DeviceA executes the two referenced models but does not support IF.
3063     // There will be two partitions on deviceA.
3064     const auto devices = makeDevices({{"deviceA", 0.8, ~0U}});
3065 
3066     std::vector<uint8_t> tokenIn(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0);
3067     std::vector<uint8_t> tokenOut1, tokenOut2;
3068     getTransformedCacheToken(main, devices, "deviceA", tokenIn,
3069                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
3070                              &tokenOut1, /*devicePartitionIndex=*/0);
3071     getTransformedCacheToken(main, devices, "deviceA", tokenIn,
3072                              ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT,
3073                              &tokenOut2, /*devicePartitionIndex=*/1);
3074     expectUniqueTokens({tokenOut1, tokenOut2});
3075 }
3076 
3077 // Very basic tests of some of the PerformanceInfo functionality.
3078 // Placed in this file because partitioning is the consumer of this functionality.
3079 class PerfTest : public ::testing::Test {};
3080 
TEST_F(PerfTest,Lookup)3081 TEST_F(PerfTest, Lookup) {
3082     // Derive an arbitrary (but reproducible) performance value from an OperandType.
3083     // We'll use this to ensure that we can save and then recover a type's performance.
3084     auto typePerf = [](V1_3::OperandType type) { return float(static_cast<uint32_t>(type)); };
3085 
3086     V1_3::Capabilities capabilities = ::android::nn::makeCapabilities(-1.0f);
3087 
3088     for (uint32_t type = static_cast<uint32_t>(V1_3::OperandTypeRange::FUNDAMENTAL_MIN);
3089          type <= static_cast<uint32_t>(V1_3::OperandTypeRange::FUNDAMENTAL_MAX); ++type) {
3090         V1_3::OperandType operandType = static_cast<V1_3::OperandType>(type);
3091         update(&capabilities, operandType, typePerf(operandType));
3092     }
3093     for (uint32_t type = static_cast<uint32_t>(V1_3::OperandTypeRange::OEM_MIN);
3094          type <= static_cast<uint32_t>(V1_3::OperandTypeRange::OEM_MAX); ++type) {
3095         V1_3::OperandType operandType = static_cast<V1_3::OperandType>(type);
3096         update(&capabilities, operandType, typePerf(operandType));
3097     }
3098 
3099     // Make sure lookup retrieves the values stored by update
3100 
3101     for (uint32_t type = static_cast<uint32_t>(V1_3::OperandTypeRange::FUNDAMENTAL_MIN);
3102          type <= static_cast<uint32_t>(V1_3::OperandTypeRange::FUNDAMENTAL_MAX); ++type) {
3103         V1_3::OperandType operandType = static_cast<V1_3::OperandType>(type);
3104         if (operandType == V1_3::OperandType::SUBGRAPH) {
3105             // SUBGRAPH capabilities are handled differently.
3106             continue;
3107         }
3108         SCOPED_TRACE(toString(operandType));
3109         EXPECT_EQ(lookupExecTime(capabilities, operandType), typePerf(operandType));
3110     }
3111     for (uint32_t type = static_cast<uint32_t>(V1_3::OperandTypeRange::OEM_MIN);
3112          type <= static_cast<uint32_t>(V1_3::OperandTypeRange::OEM_MAX); ++type) {
3113         V1_3::OperandType operandType = static_cast<V1_3::OperandType>(type);
3114         SCOPED_TRACE(toString(operandType));
3115         EXPECT_EQ(lookupExecTime(capabilities, operandType), typePerf(operandType));
3116     }
3117 
3118     // Check the behavior of a missing type
3119 
3120     V1_3::OperandType operandType = static_cast<V1_3::OperandType>(
3121             static_cast<uint32_t>(V1_3::OperandTypeRange::BASE_MAX) + 1);
3122     EXPECT_EQ(lookupExecTime(capabilities, operandType), FLT_MAX);
3123 }
3124 
3125 class ControlFlowPartitioningTest : public PartitioningTest {
3126    protected:
3127     // opnd0 --> +-----+
3128     //           | op0 | --> opnd2
3129     // opnd1 --> +-----+
createBranchOrBodyModel(Dimensioned dimensioned)3130     std::unique_ptr<PartitioningModel> createBranchOrBodyModel(Dimensioned dimensioned) {
3131         auto model = std::make_unique<PartitioningModel>();
3132         const uint32_t opnd0 = model->addFloatOperand(dimensioned);
3133         const uint32_t opnd1 = model->addFloatOperand(dimensioned);
3134         const uint32_t opnd2 = model->addOperation2To1V1_0(0, opnd0, opnd1, dimensioned);
3135         model->identifyInputsAndOutputs({opnd0, opnd1}, {opnd2});
3136         model->finish();
3137         EXPECT_TRUE(model->isValid());
3138         return model;
3139     }
3140 
3141     // opnd0 --> +-------+
3142     //           | EQUAL | --> opnd2
3143     // opnd1 --> +-------+
createCondModel(Dimensioned dimensioned)3144     std::unique_ptr<PartitioningModel> createCondModel(Dimensioned dimensioned) {
3145         auto model = std::make_unique<PartitioningModel>();
3146         const uint32_t opnd0 = model->addFloatOperand(dimensioned);
3147         const uint32_t opnd1 = model->addFloatOperand(dimensioned);
3148         const uint32_t opnd2 = model->addExplicitOperationXTo1(
3149                 ANEURALNETWORKS_EQUAL, {opnd0, opnd1}, WrapperType::TENSOR_BOOL8);
3150         model->identifyInputsAndOutputs({opnd0, opnd1}, {opnd2});
3151         model->finish();
3152         EXPECT_TRUE(model->isValid());
3153         return model;
3154     }
3155 
3156     // opnd0 --> +----+
3157     // opnd1 --> | IF | --> opnd3
3158     // opnd2 --> +----+
createIfModel(Dimensioned dimensionedMain=Dimensioned::YES,Dimensioned dimensionedThen=Dimensioned::YES,Dimensioned dimensionedElse=Dimensioned::YES)3159     std::vector<std::unique_ptr<PartitioningModel>> createIfModel(
3160             Dimensioned dimensionedMain = Dimensioned::YES,
3161             Dimensioned dimensionedThen = Dimensioned::YES,
3162             Dimensioned dimensionedElse = Dimensioned::YES) {
3163         auto thenModel = createBranchOrBodyModel(dimensionedThen);
3164         auto elseModel = createBranchOrBodyModel(dimensionedElse);
3165 
3166         auto mainModel = std::make_unique<PartitioningModel>();
3167         const uint32_t opnd0 = mainModel->addBooleanOperand();
3168         const uint32_t opnd1 = mainModel->addFloatOperand(dimensionedMain);
3169         const uint32_t opnd2 = mainModel->addFloatOperand(dimensionedMain);
3170         const uint32_t opnd3 = mainModel->addFloatOperand(dimensionedMain);
3171         mainModel->addIfOperation(opnd0, *thenModel, *elseModel, {opnd1, opnd2}, {opnd3});
3172         mainModel->identifyInputsAndOutputs({opnd0, opnd1, opnd2}, {opnd3});
3173         mainModel->finish();
3174         EXPECT_TRUE(mainModel->isValid());
3175 
3176         std::vector<std::unique_ptr<PartitioningModel>> models;
3177         models.push_back(std::move(mainModel));
3178         models.push_back(std::move(thenModel));
3179         models.push_back(std::move(elseModel));
3180         return std::move(models);
3181     }
3182 
3183     // opnd0 --> +-------+
3184     //           | WHILE | --> opnd2
3185     // opnd1 --> +-------+
createWhileModel(Dimensioned dimensionedMain=Dimensioned::YES,Dimensioned dimensionedCond=Dimensioned::YES,Dimensioned dimensionedBody=Dimensioned::YES)3186     std::vector<std::unique_ptr<PartitioningModel>> createWhileModel(
3187             Dimensioned dimensionedMain = Dimensioned::YES,
3188             Dimensioned dimensionedCond = Dimensioned::YES,
3189             Dimensioned dimensionedBody = Dimensioned::YES) {
3190         auto condModel = createCondModel(dimensionedCond);
3191         auto bodyModel = createBranchOrBodyModel(dimensionedBody);
3192 
3193         auto mainModel = std::make_unique<PartitioningModel>();
3194         const uint32_t opnd0 = mainModel->addFloatOperand(dimensionedMain);
3195         const uint32_t opnd1 = mainModel->addFloatOperand(dimensionedMain);
3196         const uint32_t opnd2 = mainModel->addFloatOperand(dimensionedMain);
3197         mainModel->addWhileOperation(*condModel, *bodyModel, {opnd0, opnd1}, {opnd2});
3198         mainModel->identifyInputsAndOutputs({opnd0, opnd1}, {opnd2});
3199         mainModel->finish();
3200         EXPECT_TRUE(mainModel->isValid());
3201 
3202         std::vector<std::unique_ptr<PartitioningModel>> models;
3203         models.push_back(std::move(mainModel));
3204         models.push_back(std::move(condModel));
3205         models.push_back(std::move(bodyModel));
3206         return std::move(models);
3207     }
3208 
3209     void testIfUnknownSize(Dimensioned dimensionedMain, Dimensioned dimensionedThen,
3210                            Dimensioned dimensionedElse);
3211     void testWhileUnknownSize(Dimensioned dimensionedMain, Dimensioned dimensionedThen,
3212                               Dimensioned dimensionedElse);
3213 };
3214 
TEST_F(ControlFlowPartitioningTest,IF_Interpreted)3215 TEST_F(ControlFlowPartitioningTest, IF_Interpreted) {
3216     const auto models = createIfModel();
3217 
3218     // The device supports the referenced models but does not support IF.
3219     const auto devices = makeDevices({{"V1_0", 0.9, HalVersion::V1_0, ~0U}});
3220 
3221     ExecutionPlan plan;
3222     ASSERT_EQ(models[0]->partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
3223                                           ExecutePriority::DEFAULT, {}, &plan),
3224               ANEURALNETWORKS_NO_ERROR);
3225     checkExecutionPlanSteps(plan, {kIfStep, "V1_0", kGotoStep, "V1_0"});
3226 }
3227 
TEST_F(ControlFlowPartitioningTest,WHILE_Interpreted)3228 TEST_F(ControlFlowPartitioningTest, WHILE_Interpreted) {
3229     const auto models = createWhileModel();
3230 
3231     // The device supports the body model but does not support WHILE or the
3232     // condition model (because of EQUAL).
3233     const auto devices = makeDevices({{"V1_0", 0.9, HalVersion::V1_0, ~0U}});
3234 
3235     ExecutionPlan plan;
3236     ASSERT_EQ(models[0]->partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
3237                                           ExecutePriority::DEFAULT, {}, &plan),
3238               ANEURALNETWORKS_NO_ERROR);
3239     const auto& cpuDeviceName = DeviceManager::getCpuDevice()->getName();
3240     checkExecutionPlanSteps(plan, {kWhileStep, cpuDeviceName, kGotoStep, "V1_0", kGotoStep});
3241 }
3242 
TEST_F(ControlFlowPartitioningTest,IF_SimplePlan)3243 TEST_F(ControlFlowPartitioningTest, IF_SimplePlan) {
3244     const auto models = createIfModel();
3245 
3246     // The device supports all operations.
3247     const auto devices = makeDevices({{"ALL",
3248                                        0.9,
3249                                        ~0U,
3250                                        PartitioningDriver::OEMNo,
3251                                        HalVersion::LATEST,
3252                                        {V1_3::OperationType::IF}}});
3253 
3254     ExecutionPlan plan;
3255     ASSERT_EQ(models[0]->partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
3256                                           ExecutePriority::DEFAULT, {}, &plan),
3257               ANEURALNETWORKS_NO_ERROR);
3258     checkExecutionPlanSteps(plan, {"ALL"});
3259 }
3260 
TEST_F(ControlFlowPartitioningTest,WHILE_SimplePlan)3261 TEST_F(ControlFlowPartitioningTest, WHILE_SimplePlan) {
3262     const auto models = createWhileModel();
3263 
3264     // The device supports all operations.
3265     const auto devices = makeDevices({{"ALL",
3266                                        0.9,
3267                                        ~0U,
3268                                        PartitioningDriver::OEMNo,
3269                                        HalVersion::LATEST,
3270                                        {V1_3::OperationType::WHILE, V1_3::OperationType::EQUAL}}});
3271 
3272     ExecutionPlan plan;
3273     ASSERT_EQ(models[0]->partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
3274                                           ExecutePriority::DEFAULT, {}, &plan),
3275               ANEURALNETWORKS_NO_ERROR);
3276     checkExecutionPlanSteps(plan, {"ALL"});
3277 }
3278 
testIfUnknownSize(Dimensioned dimensionedMain,Dimensioned dimensionedThen,Dimensioned dimensionedElse)3279 void ControlFlowPartitioningTest::testIfUnknownSize(Dimensioned dimensionedMain,
3280                                                     Dimensioned dimensionedThen,
3281                                                     Dimensioned dimensionedElse) {
3282     if (dimensionedMain != Dimensioned::NO && dimensionedThen != Dimensioned::NO &&
3283         dimensionedElse != Dimensioned::NO) {
3284         // No unknown size.
3285         return;
3286     }
3287 
3288     const auto models = createIfModel(dimensionedMain, dimensionedThen, dimensionedElse);
3289 
3290     // The device supports all operations but the partitioner ignores its IF
3291     // support due to http://b/159076604#comment5.
3292     const auto devices = makeDevices({{"ALL",
3293                                        0.9,
3294                                        ~0U,
3295                                        PartitioningDriver::OEMNo,
3296                                        HalVersion::LATEST,
3297                                        {V1_3::OperationType::IF}}});
3298 
3299     ExecutionPlan plan;
3300     ASSERT_EQ(models[0]->partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
3301                                           ExecutePriority::DEFAULT, {}, &plan),
3302               ANEURALNETWORKS_NO_ERROR);
3303     // The control flow interpreter does not support unknown size (b/132458982).
3304     checkExecutionPlanSteps(plan, {DeviceManager::getCpuDevice()->getName()});
3305 }
3306 
TEST_F(ControlFlowPartitioningTest,IF_UnknownSize)3307 TEST_F(ControlFlowPartitioningTest, IF_UnknownSize) {
3308     const std::vector<Dimensioned> configurations = {Dimensioned::NO, Dimensioned::YES};
3309     for (Dimensioned dimensionedMain : configurations) {
3310         SCOPED_TRACE(testing::Message() << "dimensionedMain: " << toString(dimensionedMain));
3311         for (Dimensioned dimensionedThen : configurations) {
3312             SCOPED_TRACE(testing::Message() << "dimensionedThen: " << toString(dimensionedThen));
3313             for (Dimensioned dimensionedElse : configurations) {
3314                 SCOPED_TRACE(testing::Message()
3315                              << "dimensionedElse: " << toString(dimensionedElse));
3316                 testIfUnknownSize(dimensionedMain, dimensionedThen, dimensionedElse);
3317             }
3318         }
3319     }
3320 }
3321 
testWhileUnknownSize(Dimensioned dimensionedMain,Dimensioned dimensionedCond,Dimensioned dimensionedBody)3322 void ControlFlowPartitioningTest::testWhileUnknownSize(Dimensioned dimensionedMain,
3323                                                        Dimensioned dimensionedCond,
3324                                                        Dimensioned dimensionedBody) {
3325     if (dimensionedMain != Dimensioned::NO && dimensionedCond != Dimensioned::NO &&
3326         dimensionedBody != Dimensioned::NO) {
3327         // No unknown size.
3328         return;
3329     }
3330 
3331     const auto models = createWhileModel(dimensionedMain, dimensionedCond, dimensionedBody);
3332 
3333     // The device supports all operations but the partitioner ignores its WHILE
3334     // support due to http://b/159076604#comment5.
3335     const auto devices = makeDevices({{"ALL",
3336                                        0.9,
3337                                        ~0U,
3338                                        PartitioningDriver::OEMNo,
3339                                        HalVersion::LATEST,
3340                                        {V1_3::OperationType::WHILE, V1_3::OperationType::EQUAL}}});
3341 
3342     ExecutionPlan plan;
3343     ASSERT_EQ(models[0]->partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
3344                                           ExecutePriority::DEFAULT, {}, &plan),
3345               ANEURALNETWORKS_NO_ERROR);
3346     // The control flow interpreter does not support unknown size (b/132458982).
3347     checkExecutionPlanSteps(plan, {DeviceManager::getCpuDevice()->getName()});
3348 }
3349 
TEST_F(ControlFlowPartitioningTest,WHILE_UnknownSize)3350 TEST_F(ControlFlowPartitioningTest, WHILE_UnknownSize) {
3351     const std::vector<Dimensioned> configurations = {Dimensioned::NO, Dimensioned::YES};
3352     for (Dimensioned dimensionedMain : configurations) {
3353         SCOPED_TRACE(testing::Message() << "dimensionedMain: " << toString(dimensionedMain));
3354         for (Dimensioned dimensionedCond : configurations) {
3355             SCOPED_TRACE(testing::Message() << "dimensionedCond: " << toString(dimensionedCond));
3356             for (Dimensioned dimensionedBody : configurations) {
3357                 SCOPED_TRACE(testing::Message()
3358                              << "dimensionedBody: " << toString(dimensionedBody));
3359                 testWhileUnknownSize(dimensionedMain, dimensionedCond, dimensionedBody);
3360             }
3361         }
3362     }
3363 }
3364 
3365 // Test the memory step role analysis of the partitioning implementation.
3366 class MemoryStepRoleTest : public PartitioningTest {
3367    protected:
3368     // A tuple of {device_name, input/output}
3369     using TestStepRole = std::tuple<std::string, IOType>;
3370 
SetUp()3371     void SetUp() override {
3372         PartitioningTest::SetUp();
3373         mModel = std::make_unique<PartitioningModel>();
3374     }
3375 
toString(SourceOperandIndex index)3376     static std::string toString(SourceOperandIndex index) {
3377         return "{" + std::to_string(index.first) + ", " + std::to_string(index.second) + "}";
3378     }
3379 
toString(const std::set<TestStepRole> & roles)3380     static std::string toString(const std::set<TestStepRole>& roles) {
3381         std::stringstream ss;
3382         ss << "[ ";
3383         for (const auto& [deviceName, type] : roles) {
3384             ss << "{" << deviceName << ", " << (type == IOType::INPUT ? "INPUT" : "OUTPUT") << "} ";
3385         }
3386         ss << "]";
3387         return ss.str();
3388     }
3389 
finishAndPartitionModelForDevices(const std::vector<std::shared_ptr<Device>> & devices)3390     void finishAndPartitionModelForDevices(const std::vector<std::shared_ptr<Device>>& devices) {
3391         mModel->finish();
3392         ASSERT_TRUE(mModel->isValid());
3393         ASSERT_EQ(mModel->partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
3394                                            ExecutePriority::DEFAULT, {}, &mPlan),
3395                   ANEURALNETWORKS_NO_ERROR);
3396     }
3397 
checkStepRolesOfInput(uint32_t index,const std::set<TestStepRole> & expected) const3398     void checkStepRolesOfInput(uint32_t index, const std::set<TestStepRole>& expected) const {
3399         SCOPED_TRACE("Input: " + std::to_string(index));
3400         std::set<TestStepRole> actual;
3401         mPlan.forEachStepRoleOfInput(
3402                 index, [&actual](const auto* preparedModel, IOType type, uint32_t) {
3403                     actual.emplace(preparedModel->getDevice()->getName(), type);
3404                 });
3405         EXPECT_TRUE(expected == actual)
3406                 << "expected: " << toString(expected) << ", actual: " << toString(actual);
3407     }
3408 
checkStepRolesOfOutput(uint32_t index,const std::set<TestStepRole> & expected) const3409     void checkStepRolesOfOutput(uint32_t index, const std::set<TestStepRole>& expected) const {
3410         SCOPED_TRACE("Output: " + std::to_string(index));
3411         std::set<TestStepRole> actual;
3412         mPlan.forEachStepRoleOfOutput(
3413                 index, [&actual](const auto* preparedModel, IOType type, uint32_t) {
3414                     actual.emplace(preparedModel->getDevice()->getName(), type);
3415                 });
3416         EXPECT_TRUE(expected == actual)
3417                 << "expected: " << toString(expected) << ", actual: " << toString(actual);
3418     }
3419 
checkStepRolesOfSourceOperand(SourceOperandIndex index,const std::set<TestStepRole> & expected) const3420     void checkStepRolesOfSourceOperand(SourceOperandIndex index,
3421                                        const std::set<TestStepRole>& expected) const {
3422         SCOPED_TRACE("SourceOperandIndex: " + toString(index));
3423         std::set<TestStepRole> actual;
3424         mPlan.forTest_compoundForEachStepRoleOfSourceOperand(
3425                 index, [&actual](const auto* preparedModel, IOType type, uint32_t) {
3426                     actual.emplace(preparedModel->getDevice()->getName(), type);
3427                 });
3428         EXPECT_TRUE(expected == actual)
3429                 << "expected: " << toString(expected) << ", actual: " << toString(actual);
3430     }
3431 
3432     std::unique_ptr<PartitioningModel> mModel;
3433     ExecutionPlan mPlan;
3434 };
3435 
3436 // Test a graph with 3 operations, each operation in a separate partition:
3437 //     opnd2 = OP0(opnd0, opnd1)
3438 //     opnd4 = OP1(opnd1, opnd3)
3439 //     opnd5 = OP2(opnd2, opnd4)
TEST_F(MemoryStepRoleTest,NoControlFlow)3440 TEST_F(MemoryStepRoleTest, NoControlFlow) {
3441     const uint32_t opnd0 = mModel->addFloatOperand();
3442     const uint32_t opnd1 = mModel->addFloatOperand();
3443     const uint32_t opnd2 = mModel->addOperation2To1V1_0(0, opnd0, opnd1);
3444     const uint32_t opnd3 = mModel->addFloatOperand();
3445     const uint32_t opnd4 = mModel->addOperation2To1V1_0(1, opnd1, opnd3);
3446     const uint32_t opnd5 = mModel->addOperation2To1V1_0(2, opnd2, opnd4);
3447     mModel->identifyInputsAndOutputs({opnd0, opnd1, opnd3}, {opnd2, opnd5});
3448 
3449     // This will result in 3 partitions:
3450     // deviceA handles op0, deviceB handles op1, deviceC handles op2.
3451     const auto devices = makeDevices(
3452             {{"deviceA", 0.8, ~0U}, {"deviceB", 0.5, 1 << 1}, {"deviceC", 0.5, 1 << 2}});
3453     finishAndPartitionModelForDevices(devices);
3454     checkExecutionPlanSteps(mPlan, {"deviceB", "deviceA", "deviceC"});
3455 
3456     // Check the step roles of the main model inputs and outputs:
3457     //
3458     // input0 and input2 are each exclusive for a single partition.
3459     checkStepRolesOfInput(0, {{"deviceA", IOType::INPUT}});
3460     checkStepRolesOfInput(2, {{"deviceB", IOType::INPUT}});
3461     // input1 is shared by two operations in different partitions.
3462     checkStepRolesOfInput(1, {{"deviceA", IOType::INPUT}, {"deviceB", IOType::INPUT}});
3463     // output0 is a model output that is a downstream input.
3464     checkStepRolesOfOutput(0, {{"deviceA", IOType::OUTPUT}, {"deviceC", IOType::INPUT}});
3465     // output1 is only used in a single partition.
3466     checkStepRolesOfOutput(1, {{"deviceC", IOType::OUTPUT}});
3467 
3468     // Check the step roles of the partition boundary temporaries that we will allocate memory on
3469     // behalf of (see ExecutionPlan::makeController for the allocation logic):
3470     //
3471     // opnd4 is a partition boundary temporary.
3472     checkStepRolesOfSourceOperand({0, opnd4},
3473                                   {{"deviceB", IOType::OUTPUT}, {"deviceC", IOType::INPUT}});
3474 }
3475 
3476 // Test a graph with an interpreted IF operation.
TEST_F(MemoryStepRoleTest,InterpretedIf)3477 TEST_F(MemoryStepRoleTest, InterpretedIf) {
3478     auto thenModel = std::make_unique<PartitioningModel>();
3479     const uint32_t thenOpnd0 = thenModel->addFloatOperand();
3480     const uint32_t thenOpnd1 = thenModel->addFloatOperand();
3481     const uint32_t thenOpnd2 = thenModel->addOperation2To1V1_0(0, thenOpnd0, thenOpnd1);
3482     thenModel->identifyInputsAndOutputs({thenOpnd0, thenOpnd1}, {thenOpnd2});
3483     thenModel->finish();
3484     EXPECT_TRUE(thenModel->isValid());
3485 
3486     auto elseModel = std::make_unique<PartitioningModel>();
3487     const uint32_t elseOpnd0 = elseModel->addFloatOperand();
3488     const uint32_t elseOpnd1 = elseModel->addFloatOperand();
3489     const uint32_t elseOpnd2 = elseModel->addOperation2To1V1_0(1, elseOpnd0, elseOpnd1);
3490     elseModel->identifyInputsAndOutputs({elseOpnd0, elseOpnd1}, {elseOpnd2});
3491     elseModel->finish();
3492     EXPECT_TRUE(elseModel->isValid());
3493 
3494     const uint32_t mainOpnd0 = mModel->addBooleanOperand();
3495     const uint32_t mainOpnd1 = mModel->addFloatOperand();
3496     const uint32_t mainOpnd2 = mModel->addFloatOperand();
3497     const uint32_t mainOpnd3 = mModel->addFloatOperand();
3498     mModel->addIfOperation(mainOpnd0, *thenModel, *elseModel, {mainOpnd1, mainOpnd2}, {mainOpnd3});
3499     mModel->identifyInputsAndOutputs({mainOpnd0, mainOpnd1, mainOpnd2}, {mainOpnd3});
3500 
3501     // deviceA handles op0, deviceB handles op1.
3502     const auto devices = makeDevices({{"deviceA", 0.8, ~0U}, {"deviceB", 0.5, 1 << 1}});
3503     finishAndPartitionModelForDevices(devices);
3504     checkExecutionPlanSteps(mPlan, {kIfStep, "deviceA", kGotoStep, "deviceB"});
3505 
3506     // Check the step roles of the main model inputs and outputs:
3507     //
3508     // input0 is a condition operand of the interpreted IF that will only be read by the runtime.
3509     checkStepRolesOfInput(0, {});
3510     // input1 and input2 are outer inputs of the interpreted IF. The memories may be directly used
3511     // by the input operands of the then and else model.
3512     checkStepRolesOfInput(1, {{"deviceA", IOType::INPUT}, {"deviceB", IOType::INPUT}});
3513     checkStepRolesOfInput(2, {{"deviceA", IOType::INPUT}, {"deviceB", IOType::INPUT}});
3514     // output0 is the outer output of the interpreted IF. The memory may be directly
3515     // used by the output operands of the then and else model.
3516     checkStepRolesOfOutput(0, {{"deviceA", IOType::OUTPUT}, {"deviceB", IOType::OUTPUT}});
3517 
3518     // There is no partition boundary temporary in this model that we will allocate memory on
3519     // behalf of (see ExecutionPlan::makeController for the allocation logic).
3520 }
3521 
3522 // Test a graph with an interpreted WHILE operation.
TEST_F(MemoryStepRoleTest,InterpretedWhile)3523 TEST_F(MemoryStepRoleTest, InterpretedWhile) {
3524     // Condition model:
3525     //     condOpnd3 = OP0(condOpnd0, condOpnd1)
3526     //     condOpnd4 = EQUAL(condOpnd2, condOpnd3)
3527     auto condModel = std::make_unique<PartitioningModel>();
3528     const uint32_t condOpnd0 = condModel->addFloatOperand();
3529     const uint32_t condOpnd1 = condModel->addFloatOperand();
3530     const uint32_t condOpnd2 = condModel->addFloatOperand();
3531     const uint32_t condOpnd3 = condModel->addOperation2To1V1_0(0, condOpnd0, condOpnd1);
3532     const uint32_t condOpnd4 = condModel->addExplicitOperationXTo1(
3533             ANEURALNETWORKS_EQUAL, {condOpnd2, condOpnd3}, WrapperType::TENSOR_BOOL8);
3534     condModel->identifyInputsAndOutputs({condOpnd0, condOpnd1, condOpnd2}, {condOpnd4});
3535     condModel->finish();
3536     EXPECT_TRUE(condModel->isValid());
3537 
3538     // Body model:
3539     //     bodyOpnd3 = OP1(bodyOpnd0, bodyOpnd1)
3540     //     bodyOpnd4 = OP1(bodyOpnd0, bodyOpnd2)
3541     auto bodyModel = std::make_unique<PartitioningModel>();
3542     const uint32_t bodyOpnd0 = bodyModel->addFloatOperand();
3543     const uint32_t bodyOpnd1 = bodyModel->addFloatOperand();
3544     const uint32_t bodyOpnd2 = bodyModel->addFloatOperand();
3545     const uint32_t bodyOpnd3 = bodyModel->addOperation2To1V1_0(1, bodyOpnd0, bodyOpnd1);
3546     const uint32_t bodyOpnd4 = bodyModel->addOperation2To1V1_0(1, bodyOpnd0, bodyOpnd2);
3547     bodyModel->identifyInputsAndOutputs({bodyOpnd0, bodyOpnd1, bodyOpnd2}, {bodyOpnd3, bodyOpnd4});
3548     bodyModel->finish();
3549     EXPECT_TRUE(bodyModel->isValid());
3550 
3551     const uint32_t mainOpnd0 = mModel->addFloatOperand();
3552     const uint32_t mainOpnd1 = mModel->addFloatOperand();
3553     const uint32_t mainOpnd2 = mModel->addFloatOperand();
3554     const uint32_t mainOpnd3 = mModel->addFloatOperand();
3555     mModel->addWhileOperation(*condModel, *bodyModel, {mainOpnd0, mainOpnd1, mainOpnd2},
3556                               {mainOpnd3});
3557     mModel->identifyInputsAndOutputs({mainOpnd0, mainOpnd1, mainOpnd2}, {mainOpnd3});
3558 
3559     // deviceA handles the cond model, deviceB handles the body model.
3560     const auto devices = makeDevices({{"deviceA",
3561                                        0.8,
3562                                        ~0U,
3563                                        PartitioningDriver::OEMNo,
3564                                        HalVersion::LATEST,
3565                                        {V1_3::OperationType::EQUAL}},
3566                                       {"deviceB", 0.5, 1 << 1}});
3567     finishAndPartitionModelForDevices(devices);
3568     checkExecutionPlanSteps(mPlan, {kWhileStep, "deviceA", kGotoStep, "deviceB", kGotoStep});
3569 
3570     // The subgraph indexes of the condition and body models of the WHILE operation.
3571     const uint32_t condModelIndex = 1;
3572     const uint32_t bodyModelIndex = 2;
3573 
3574     // Check the step roles of the main model inputs and outputs:
3575     //
3576     // input0 (input-output), input1 (state-only), and input2 (input-only) are outer inputs of the
3577     // interpreted WHILE. The memories may be directly used by the input operands of the condition
3578     // and body models.
3579     checkStepRolesOfInput(0, {{"deviceA", IOType::INPUT}, {"deviceB", IOType::INPUT}});
3580     checkStepRolesOfInput(1, {{"deviceA", IOType::INPUT}, {"deviceB", IOType::INPUT}});
3581     checkStepRolesOfInput(2, {{"deviceA", IOType::INPUT}, {"deviceB", IOType::INPUT}});
3582     // output0 is an outer output of the interpreted WHILE that will only be written by the runtime.
3583     checkStepRolesOfOutput(0, {});
3584 
3585     // Check the step roles of the partition boundary temporaries that we will allocate memory on
3586     // behalf of (see ExecutionPlan::makeController for the allocation logic):
3587     //
3588     // condOpnd4 is output of the interpreted WHILE condition model.
3589     checkStepRolesOfSourceOperand({condModelIndex, condOpnd4}, {{"deviceA", IOType::OUTPUT}});
3590     // bodyOpnd3 (input-output) and bodyOpnd4 (state-only) are outputs of the interpreted WHILE body
3591     // model. The memories may be directly used by the input operands of the condition and body
3592     // models.
3593     checkStepRolesOfSourceOperand(
3594             {bodyModelIndex, bodyOpnd3},
3595             {{"deviceA", IOType::INPUT}, {"deviceB", IOType::INPUT}, {"deviceB", IOType::OUTPUT}});
3596     checkStepRolesOfSourceOperand(
3597             {bodyModelIndex, bodyOpnd4},
3598             {{"deviceA", IOType::INPUT}, {"deviceB", IOType::INPUT}, {"deviceB", IOType::OUTPUT}});
3599 }
3600 
3601 // Test a graph with nested interpreted control flow operations: a WHILE operation with IF operation
3602 // in the body model.
TEST_F(MemoryStepRoleTest,NestedInterpretedControlFlow)3603 TEST_F(MemoryStepRoleTest, NestedInterpretedControlFlow) {
3604     auto condModel = std::make_unique<PartitioningModel>();
3605     const uint32_t condOpnd0 = condModel->addFloatOperand();
3606     const uint32_t condOpnd1 = condModel->addFloatOperand();
3607     const uint32_t condOpnd2 = condModel->addBooleanOperand();
3608     const uint32_t condOpnd3 = condModel->addExplicitOperationXTo1(
3609             ANEURALNETWORKS_EQUAL, {condOpnd0, condOpnd1}, WrapperType::TENSOR_BOOL8);
3610     condModel->identifyInputsAndOutputs({condOpnd0, condOpnd1, condOpnd2}, {condOpnd3});
3611     condModel->finish();
3612     EXPECT_TRUE(condModel->isValid());
3613 
3614     auto thenModel = std::make_unique<PartitioningModel>();
3615     const uint32_t thenOpnd0 = thenModel->addFloatOperand();
3616     const uint32_t thenOpnd1 = thenModel->addFloatOperand();
3617     const uint32_t thenOpnd2 = thenModel->addOperation2To1V1_0(0, thenOpnd0, thenOpnd1);
3618     thenModel->identifyInputsAndOutputs({thenOpnd0, thenOpnd1}, {thenOpnd2});
3619     thenModel->finish();
3620     EXPECT_TRUE(thenModel->isValid());
3621 
3622     auto elseModel = std::make_unique<PartitioningModel>();
3623     const uint32_t elseOpnd0 = elseModel->addFloatOperand();
3624     const uint32_t elseOpnd1 = elseModel->addFloatOperand();
3625     const uint32_t elseOpnd2 = elseModel->addOperation2To1V1_0(1, elseOpnd0, elseOpnd1);
3626     elseModel->identifyInputsAndOutputs({elseOpnd0, elseOpnd1}, {elseOpnd2});
3627     elseModel->finish();
3628     EXPECT_TRUE(elseModel->isValid());
3629 
3630     auto bodyModel = std::make_unique<PartitioningModel>();
3631     const uint32_t bodyOpnd0 = bodyModel->addFloatOperand();
3632     const uint32_t bodyOpnd1 = bodyModel->addFloatOperand();
3633     const uint32_t bodyOpnd2 = bodyModel->addBooleanOperand();
3634     const uint32_t bodyOpnd3 = bodyModel->addFloatOperand();
3635     bodyModel->addIfOperation(bodyOpnd2, *thenModel, *elseModel, {bodyOpnd0, bodyOpnd1},
3636                               {bodyOpnd3});
3637     bodyModel->identifyInputsAndOutputs({bodyOpnd0, bodyOpnd1, bodyOpnd2}, {bodyOpnd3});
3638     bodyModel->finish();
3639     EXPECT_TRUE(bodyModel->isValid());
3640 
3641     const uint32_t mainOpnd0 = mModel->addFloatOperand();
3642     const uint32_t mainOpnd1 = mModel->addFloatOperand();
3643     const uint32_t mainOpnd2 = mModel->addBooleanOperand();
3644     const uint32_t mainOpnd3 = mModel->addFloatOperand();
3645     mModel->addWhileOperation(*condModel, *bodyModel, {mainOpnd0, mainOpnd1, mainOpnd2},
3646                               {mainOpnd3});
3647     mModel->identifyInputsAndOutputs({mainOpnd0, mainOpnd1, mainOpnd2}, {mainOpnd3});
3648 
3649     // deviceA handles the cond model, deviceB handles the then model,
3650     // deviceC handles the else model.
3651     const auto devices = makeDevices({{"deviceA",
3652                                        0.8,
3653                                        ~0U,
3654                                        PartitioningDriver::OEMNo,
3655                                        HalVersion::LATEST,
3656                                        {V1_3::OperationType::EQUAL}},
3657                                       {"deviceB", 0.5, 1 << 0},
3658                                       {"deviceC", 0.5, 1 << 1}});
3659     finishAndPartitionModelForDevices(devices);
3660     checkExecutionPlanSteps(mPlan, {kWhileStep, "deviceA", kGotoStep, kIfStep, "deviceB", kGotoStep,
3661                                     "deviceC", kGotoStep});
3662 
3663     // The subgraph indexes of the condition and body models of the WHILE operation.
3664     const uint32_t condModelIndex = 1;
3665     const uint32_t bodyModelIndex = 2;
3666 
3667     // Check the step roles of the main model inputs and outputs:
3668     //
3669     // input0 and input1 are outer inputs of the interpreted WHILE. The memories may be directly
3670     // used by the input operands of the condition and body models, and then be directly used by the
3671     // input operands of the then and else model of the interpreted IF in the body model.
3672     checkStepRolesOfInput(
3673             0,
3674             {{"deviceA", IOType::INPUT}, {"deviceB", IOType::INPUT}, {"deviceC", IOType::INPUT}});
3675     checkStepRolesOfInput(
3676             1,
3677             {{"deviceA", IOType::INPUT}, {"deviceB", IOType::INPUT}, {"deviceC", IOType::INPUT}});
3678     // input2 is also an outer input of the interpreted WHILE. The memory has no step role in the
3679     // condition model. In the body model, the memory will be used by the condition operand of the
3680     // interpreted IF that will only be read by the runtime.
3681     checkStepRolesOfInput(2, {});
3682     // output0 is an outer output of the interpreted WHILE that will only be written by the runtime.
3683     checkStepRolesOfOutput(0, {});
3684 
3685     // Check the step roles of the partition boundary temporaries that we will allocate memory on
3686     // behalf of (see ExecutionPlan::makeController for the allocation logic):
3687     //
3688     // condOpnd2 is output of the interpreted WHILE condition model.
3689     checkStepRolesOfSourceOperand({condModelIndex, condOpnd3}, {{"deviceA", IOType::OUTPUT}});
3690     // bodyOpnd3 is output of the interpreted WHILE body model. The memories may be directly used by
3691     // the input operands of the condition and body models, and then be directly used by the
3692     // input operands of the then and else model of the interpreted IF in the body model.
3693     checkStepRolesOfSourceOperand({bodyModelIndex, bodyOpnd3}, {{"deviceA", IOType::INPUT},
3694                                                                 {"deviceB", IOType::INPUT},
3695                                                                 {"deviceB", IOType::OUTPUT},
3696                                                                 {"deviceC", IOType::INPUT},
3697                                                                 {"deviceC", IOType::OUTPUT}});
3698 }
3699 
3700 }  // namespace
3701