1 /*
2 * Copyright (C) 2019 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #define LOG_TAG "ExecutionBurstController"
18
19 #include "ExecutionBurstController.h"
20 #include "ExecutionBurstUtils.h"
21
22 #include <android-base/logging.h>
23 #include <android-base/thread_annotations.h>
24 #include <nnapi/IBurst.h>
25 #include <nnapi/IPreparedModel.h>
26 #include <nnapi/Result.h>
27 #include <nnapi/TypeUtils.h>
28 #include <nnapi/Types.h>
29 #include <nnapi/Validation.h>
30 #include <nnapi/hal/1.0/Conversions.h>
31 #include <nnapi/hal/CommonUtils.h>
32 #include <nnapi/hal/HandleError.h>
33 #include <nnapi/hal/ProtectCallback.h>
34 #include <nnapi/hal/TransferValue.h>
35
36 #include <algorithm>
37 #include <cstring>
38 #include <limits>
39 #include <memory>
40 #include <string>
41 #include <thread>
42 #include <tuple>
43 #include <utility>
44 #include <vector>
45
46 #include "Callbacks.h"
47 #include "Conversions.h"
48 #include "Tracing.h"
49 #include "Utils.h"
50
51 namespace android::hardware::neuralnetworks::V1_2::utils {
52 namespace {
53
54 class BurstExecution final : public nn::IExecution,
55 public std::enable_shared_from_this<BurstExecution> {
56 struct PrivateConstructorTag {};
57
58 public:
59 static nn::GeneralResult<std::shared_ptr<const BurstExecution>> create(
60 std::shared_ptr<const ExecutionBurstController> controller,
61 std::vector<FmqRequestDatum> request, hal::utils::RequestRelocation relocation,
62 std::vector<ExecutionBurstController::OptionalCacheHold> cacheHolds);
63
64 BurstExecution(PrivateConstructorTag tag,
65 std::shared_ptr<const ExecutionBurstController> controller,
66 std::vector<FmqRequestDatum> request, hal::utils::RequestRelocation relocation,
67 std::vector<ExecutionBurstController::OptionalCacheHold> cacheHolds);
68
69 nn::ExecutionResult<std::pair<std::vector<nn::OutputShape>, nn::Timing>> compute(
70 const nn::OptionalTimePoint& deadline) const override;
71
72 nn::GeneralResult<std::pair<nn::SyncFence, nn::ExecuteFencedInfoCallback>> computeFenced(
73 const std::vector<nn::SyncFence>& waitFor, const nn::OptionalTimePoint& deadline,
74 const nn::OptionalDuration& timeoutDurationAfterFence) const override;
75
76 private:
77 const std::shared_ptr<const ExecutionBurstController> kController;
78 const std::vector<FmqRequestDatum> kRequest;
79 const hal::utils::RequestRelocation kRelocation;
80 const std::vector<ExecutionBurstController::OptionalCacheHold> kCacheHolds;
81 };
82
executionBurstResultCallback(V1_0::ErrorStatus status,const sp<IBurstContext> & burstContext)83 nn::GeneralResult<sp<IBurstContext>> executionBurstResultCallback(
84 V1_0::ErrorStatus status, const sp<IBurstContext>& burstContext) {
85 HANDLE_HAL_STATUS(status) << "IPreparedModel::configureExecutionBurst failed with status "
86 << toString(status);
87 if (burstContext == nullptr) {
88 return NN_ERROR(nn::ErrorStatus::GENERAL_FAILURE)
89 << "IPreparedModel::configureExecutionBurst returned nullptr for burst";
90 }
91 return burstContext;
92 }
93
getMemoriesHelper(const hidl_vec<int32_t> & slots,const std::shared_ptr<ExecutionBurstController::MemoryCache> & memoryCache)94 nn::GeneralResult<hidl_vec<hidl_memory>> getMemoriesHelper(
95 const hidl_vec<int32_t>& slots,
96 const std::shared_ptr<ExecutionBurstController::MemoryCache>& memoryCache) {
97 hidl_vec<hidl_memory> memories(slots.size());
98 for (size_t i = 0; i < slots.size(); ++i) {
99 const int32_t slot = slots[i];
100 const auto memory = NN_TRY(memoryCache->getMemory(slot));
101 memories[i] = NN_TRY(V1_0::utils::unvalidatedConvert(memory));
102 if (!memories[i].valid()) {
103 return NN_ERROR() << "memory at slot " << slot << " is invalid";
104 }
105 }
106 return memories;
107 }
108
109 } // namespace
110
111 // MemoryCache methods
112
MemoryCache()113 ExecutionBurstController::MemoryCache::MemoryCache() {
114 constexpr size_t kPreallocatedCount = 1024;
115 std::vector<int32_t> freeSlotsSpace;
116 freeSlotsSpace.reserve(kPreallocatedCount);
117 mFreeSlots = std::stack<int32_t, std::vector<int32_t>>(std::move(freeSlotsSpace));
118 mMemoryCache.reserve(kPreallocatedCount);
119 mCacheCleaner.reserve(kPreallocatedCount);
120 }
121
setBurstContext(sp<IBurstContext> burstContext)122 void ExecutionBurstController::MemoryCache::setBurstContext(sp<IBurstContext> burstContext) {
123 std::lock_guard guard(mMutex);
124 mBurstContext = std::move(burstContext);
125 }
126
127 std::pair<int32_t, ExecutionBurstController::MemoryCache::SharedCleanup>
cacheMemory(const nn::SharedMemory & memory)128 ExecutionBurstController::MemoryCache::cacheMemory(const nn::SharedMemory& memory) {
129 std::unique_lock lock(mMutex);
130 base::ScopedLockAssertion lockAssert(mMutex);
131
132 // Use existing cache entry if (1) the Memory object is in the cache and (2) the cache entry is
133 // not currently being freed.
134 auto iter = mMemoryIdToSlot.find(memory);
135 while (iter != mMemoryIdToSlot.end()) {
136 const int32_t slot = iter->second;
137 if (auto cleaner = mCacheCleaner.at(slot).lock()) {
138 return std::make_pair(slot, std::move(cleaner));
139 }
140
141 // If the code reaches this point, the Memory object was in the cache, but is currently
142 // being destroyed. This code waits until the cache entry has been freed, then loops to
143 // ensure the cache entry has been freed or has been made present by another thread.
144 mCond.wait(lock);
145 iter = mMemoryIdToSlot.find(memory);
146 }
147
148 // Allocate a new cache entry.
149 const int32_t slot = allocateSlotLocked();
150 mMemoryIdToSlot[memory] = slot;
151 mMemoryCache[slot] = memory;
152
153 // Create reference-counted self-cleaning cache object.
154 auto self = weak_from_this();
155 Task cleanup = [memory, memoryCache = std::move(self)] {
156 if (const auto lock = memoryCache.lock()) {
157 lock->freeMemory(memory);
158 }
159 };
160 auto cleaner = std::make_shared<const Cleanup>(std::move(cleanup));
161 mCacheCleaner[slot] = cleaner;
162
163 return std::make_pair(slot, std::move(cleaner));
164 }
165
getMemory(int32_t slot)166 nn::GeneralResult<nn::SharedMemory> ExecutionBurstController::MemoryCache::getMemory(int32_t slot) {
167 std::lock_guard guard(mMutex);
168 if (slot < 0 || static_cast<size_t>(slot) >= mMemoryCache.size()) {
169 return NN_ERROR() << "Invalid slot: " << slot << " vs " << mMemoryCache.size();
170 }
171 return mMemoryCache[slot];
172 }
173
freeMemory(const nn::SharedMemory & memory)174 void ExecutionBurstController::MemoryCache::freeMemory(const nn::SharedMemory& memory) {
175 {
176 std::lock_guard guard(mMutex);
177 const int32_t slot = mMemoryIdToSlot.at(memory);
178 if (mBurstContext) {
179 mBurstContext->freeMemory(slot);
180 }
181 mMemoryIdToSlot.erase(memory);
182 mMemoryCache[slot] = {};
183 mCacheCleaner[slot].reset();
184 mFreeSlots.push(slot);
185 }
186 mCond.notify_all();
187 }
188
allocateSlotLocked()189 int32_t ExecutionBurstController::MemoryCache::allocateSlotLocked() {
190 constexpr size_t kMaxNumberOfSlots = std::numeric_limits<int32_t>::max();
191
192 // If there is a free slot, use it.
193 if (!mFreeSlots.empty()) {
194 const int32_t slot = mFreeSlots.top();
195 mFreeSlots.pop();
196 return slot;
197 }
198
199 // Use a slot for the first time.
200 CHECK_LT(mMemoryCache.size(), kMaxNumberOfSlots) << "Exceeded maximum number of slots!";
201 const int32_t slot = static_cast<int32_t>(mMemoryCache.size());
202 mMemoryCache.emplace_back();
203 mCacheCleaner.emplace_back();
204
205 return slot;
206 }
207
208 // ExecutionBurstCallback methods
209
ExecutionBurstCallback(const std::shared_ptr<MemoryCache> & memoryCache)210 ExecutionBurstController::ExecutionBurstCallback::ExecutionBurstCallback(
211 const std::shared_ptr<MemoryCache>& memoryCache)
212 : kMemoryCache(memoryCache) {
213 CHECK(memoryCache != nullptr);
214 }
215
getMemories(const hidl_vec<int32_t> & slots,getMemories_cb cb)216 Return<void> ExecutionBurstController::ExecutionBurstCallback::getMemories(
217 const hidl_vec<int32_t>& slots, getMemories_cb cb) {
218 const auto memoryCache = kMemoryCache.lock();
219 if (memoryCache == nullptr) {
220 LOG(ERROR) << "ExecutionBurstController::ExecutionBurstCallback::getMemories called after "
221 "the MemoryCache has been freed";
222 cb(V1_0::ErrorStatus::GENERAL_FAILURE, {});
223 return Void();
224 }
225
226 const auto maybeMemories = getMemoriesHelper(slots, memoryCache);
227 if (!maybeMemories.has_value()) {
228 const auto& [message, code] = maybeMemories.error();
229 LOG(ERROR) << "ExecutionBurstController::ExecutionBurstCallback::getMemories failed with "
230 << code << ": " << message;
231 cb(V1_0::ErrorStatus::INVALID_ARGUMENT, {});
232 return Void();
233 }
234
235 cb(V1_0::ErrorStatus::NONE, maybeMemories.value());
236 return Void();
237 }
238
239 // ExecutionBurstController methods
240
create(nn::SharedPreparedModel preparedModel,const sp<V1_2::IPreparedModel> & hidlPreparedModel,std::chrono::microseconds pollingTimeWindow)241 nn::GeneralResult<std::shared_ptr<const ExecutionBurstController>> ExecutionBurstController::create(
242 nn::SharedPreparedModel preparedModel, const sp<V1_2::IPreparedModel>& hidlPreparedModel,
243 std::chrono::microseconds pollingTimeWindow) {
244 // check inputs
245 if (preparedModel == nullptr || hidlPreparedModel == nullptr) {
246 return NN_ERROR() << "ExecutionBurstController::create passed a nullptr";
247 }
248
249 // create FMQ objects
250 auto [requestChannelSender, requestChannelDescriptor] =
251 NN_TRY(RequestChannelSender::create(kExecutionBurstChannelLength));
252 auto [resultChannelReceiver, resultChannelDescriptor] =
253 NN_TRY(ResultChannelReceiver::create(kExecutionBurstChannelLength, pollingTimeWindow));
254
255 // check FMQ objects
256 CHECK(requestChannelSender != nullptr);
257 CHECK(requestChannelDescriptor != nullptr);
258 CHECK(resultChannelReceiver != nullptr);
259 CHECK(resultChannelDescriptor != nullptr);
260
261 // create memory cache
262 auto memoryCache = std::make_shared<MemoryCache>();
263
264 // create callback object
265 auto burstCallback = sp<ExecutionBurstCallback>::make(memoryCache);
266 auto cb = hal::utils::CallbackValue(executionBurstResultCallback);
267
268 // configure burst
269 const Return<void> ret = hidlPreparedModel->configureExecutionBurst(
270 burstCallback, *requestChannelDescriptor, *resultChannelDescriptor, cb);
271 HANDLE_TRANSPORT_FAILURE(ret);
272
273 auto burstContext = NN_TRY(cb.take());
274 memoryCache->setBurstContext(burstContext);
275
276 // create death handler object
277 auto deathHandler = NN_TRY(neuralnetworks::utils::DeathHandler::create(burstContext));
278 deathHandler.protectCallbackForLifetimeOfDeathHandler(requestChannelSender.get());
279 deathHandler.protectCallbackForLifetimeOfDeathHandler(resultChannelReceiver.get());
280
281 // make and return controller
282 return std::make_shared<const ExecutionBurstController>(
283 PrivateConstructorTag{}, std::move(preparedModel), std::move(requestChannelSender),
284 std::move(resultChannelReceiver), std::move(burstCallback), std::move(burstContext),
285 std::move(memoryCache), std::move(deathHandler));
286 }
287
ExecutionBurstController(PrivateConstructorTag,nn::SharedPreparedModel preparedModel,std::unique_ptr<RequestChannelSender> requestChannelSender,std::unique_ptr<ResultChannelReceiver> resultChannelReceiver,sp<ExecutionBurstCallback> callback,sp<IBurstContext> burstContext,std::shared_ptr<MemoryCache> memoryCache,neuralnetworks::utils::DeathHandler deathHandler)288 ExecutionBurstController::ExecutionBurstController(
289 PrivateConstructorTag /*tag*/, nn::SharedPreparedModel preparedModel,
290 std::unique_ptr<RequestChannelSender> requestChannelSender,
291 std::unique_ptr<ResultChannelReceiver> resultChannelReceiver,
292 sp<ExecutionBurstCallback> callback, sp<IBurstContext> burstContext,
293 std::shared_ptr<MemoryCache> memoryCache, neuralnetworks::utils::DeathHandler deathHandler)
294 : kPreparedModel(std::move(preparedModel)),
295 mRequestChannelSender(std::move(requestChannelSender)),
296 mResultChannelReceiver(std::move(resultChannelReceiver)),
297 mBurstCallback(std::move(callback)),
298 mBurstContext(std::move(burstContext)),
299 mMemoryCache(std::move(memoryCache)),
300 kDeathHandler(std::move(deathHandler)) {}
301
cacheMemory(const nn::SharedMemory & memory) const302 ExecutionBurstController::OptionalCacheHold ExecutionBurstController::cacheMemory(
303 const nn::SharedMemory& memory) const {
304 auto [slot, hold] = mMemoryCache->cacheMemory(memory);
305 return hold;
306 }
307
308 nn::ExecutionResult<std::pair<std::vector<nn::OutputShape>, nn::Timing>>
execute(const nn::Request & request,nn::MeasureTiming measure,const nn::OptionalTimePoint & deadline,const nn::OptionalDuration & loopTimeoutDuration) const309 ExecutionBurstController::execute(const nn::Request& request, nn::MeasureTiming measure,
310 const nn::OptionalTimePoint& deadline,
311 const nn::OptionalDuration& loopTimeoutDuration) const {
312 // This is the first point when we know an execution is occurring, so begin to collect
313 // systraces. Note that the first point we can begin collecting systraces in
314 // ExecutionBurstServer is when the RequestChannelReceiver realizes there is data in the FMQ, so
315 // ExecutionBurstServer collects systraces at different points in the code.
316 NNTRACE_RT(NNTRACE_PHASE_EXECUTION, "ExecutionBurstController::execute");
317
318 // if the request is valid but of a higher version than what's supported in burst execution,
319 // fall back to another execution path
320 if (const auto version = NN_TRY(hal::utils::makeExecutionFailure(nn::validate(request)));
321 version > nn::Version::ANDROID_Q) {
322 // fallback to another execution path if the packet could not be sent
323 return kPreparedModel->execute(request, measure, deadline, loopTimeoutDuration);
324 }
325
326 // ensure that request is ready for IPC
327 std::optional<nn::Request> maybeRequestInShared;
328 hal::utils::RequestRelocation relocation;
329 const nn::Request& requestInShared =
330 NN_TRY(hal::utils::makeExecutionFailure(hal::utils::convertRequestFromPointerToShared(
331 &request, nn::kDefaultRequestMemoryAlignment, nn::kMinMemoryPadding,
332 &maybeRequestInShared, &relocation)));
333
334 // clear pools field of request, as they will be provided via slots
335 const auto requestWithoutPools = nn::Request{
336 .inputs = requestInShared.inputs, .outputs = requestInShared.outputs, .pools = {}};
337 auto hidlRequest = NN_TRY(
338 hal::utils::makeExecutionFailure(V1_0::utils::unvalidatedConvert(requestWithoutPools)));
339 const auto hidlMeasure = NN_TRY(hal::utils::makeExecutionFailure(convert(measure)));
340
341 std::vector<int32_t> slots;
342 std::vector<OptionalCacheHold> holds;
343 slots.reserve(requestInShared.pools.size());
344 holds.reserve(requestInShared.pools.size());
345 for (const auto& memoryPool : requestInShared.pools) {
346 auto [slot, hold] = mMemoryCache->cacheMemory(std::get<nn::SharedMemory>(memoryPool));
347 slots.push_back(slot);
348 holds.push_back(std::move(hold));
349 }
350
351 // send request packet
352 const auto requestPacket = serialize(hidlRequest, hidlMeasure, slots);
353 const auto fallback = [this, &request, measure, &deadline, &loopTimeoutDuration] {
354 return kPreparedModel->execute(request, measure, deadline, loopTimeoutDuration);
355 };
356 return executeInternal(requestPacket, relocation, fallback);
357 }
358
359 // See IBurst::createReusableExecution for information on this method.
createReusableExecution(const nn::Request & request,nn::MeasureTiming measure,const nn::OptionalDuration & loopTimeoutDuration) const360 nn::GeneralResult<nn::SharedExecution> ExecutionBurstController::createReusableExecution(
361 const nn::Request& request, nn::MeasureTiming measure,
362 const nn::OptionalDuration& loopTimeoutDuration) const {
363 NNTRACE_RT(NNTRACE_PHASE_EXECUTION, "ExecutionBurstController::createReusableExecution");
364
365 // if the request is valid but of a higher version than what's supported in burst execution,
366 // fall back to another execution path
367 if (const auto version = NN_TRY(hal::utils::makeGeneralFailure(nn::validate(request)));
368 version > nn::Version::ANDROID_Q) {
369 // fallback to another execution path if the packet could not be sent
370 return kPreparedModel->createReusableExecution(request, measure, loopTimeoutDuration);
371 }
372
373 // ensure that request is ready for IPC
374 std::optional<nn::Request> maybeRequestInShared;
375 hal::utils::RequestRelocation relocation;
376 const nn::Request& requestInShared = NN_TRY(hal::utils::convertRequestFromPointerToShared(
377 &request, nn::kDefaultRequestMemoryAlignment, nn::kMinMemoryPadding,
378 &maybeRequestInShared, &relocation));
379
380 // clear pools field of request, as they will be provided via slots
381 const auto requestWithoutPools = nn::Request{
382 .inputs = requestInShared.inputs, .outputs = requestInShared.outputs, .pools = {}};
383 auto hidlRequest = NN_TRY(V1_0::utils::unvalidatedConvert(requestWithoutPools));
384 const auto hidlMeasure = NN_TRY(convert(measure));
385
386 std::vector<int32_t> slots;
387 std::vector<OptionalCacheHold> holds;
388 slots.reserve(requestInShared.pools.size());
389 holds.reserve(requestInShared.pools.size());
390 for (const auto& memoryPool : requestInShared.pools) {
391 auto [slot, hold] = mMemoryCache->cacheMemory(std::get<nn::SharedMemory>(memoryPool));
392 slots.push_back(slot);
393 holds.push_back(std::move(hold));
394 }
395
396 const auto requestPacket = serialize(hidlRequest, hidlMeasure, slots);
397 return BurstExecution::create(shared_from_this(), std::move(requestPacket),
398 std::move(relocation), std::move(holds));
399 }
400
401 nn::ExecutionResult<std::pair<std::vector<nn::OutputShape>, nn::Timing>>
executeInternal(const std::vector<FmqRequestDatum> & requestPacket,const hal::utils::RequestRelocation & relocation,FallbackFunction fallback) const402 ExecutionBurstController::executeInternal(const std::vector<FmqRequestDatum>& requestPacket,
403 const hal::utils::RequestRelocation& relocation,
404 FallbackFunction fallback) const {
405 NNTRACE_FULL(NNTRACE_LAYER_IPC, NNTRACE_PHASE_EXECUTION,
406 "ExecutionBurstController::executeInternal");
407
408 // Ensure that at most one execution is in flight at any given time.
409 const bool alreadyInFlight = mExecutionInFlight.test_and_set();
410 if (alreadyInFlight) {
411 return NN_ERROR() << "IBurst already has an execution in flight";
412 }
413 const auto guard = base::make_scope_guard([this] { mExecutionInFlight.clear(); });
414
415 if (relocation.input) {
416 relocation.input->flush();
417 }
418
419 // send request packet
420 const auto sendStatus = mRequestChannelSender->sendPacket(requestPacket);
421 if (!sendStatus.ok()) {
422 // fallback to another execution path if the packet could not be sent
423 if (fallback) {
424 return fallback();
425 }
426 return NN_ERROR() << "Error sending FMQ packet: " << sendStatus.error();
427 }
428
429 // get result packet
430 const auto [status, outputShapes, timing] =
431 NN_TRY(hal::utils::makeExecutionFailure(mResultChannelReceiver->getBlocking()));
432
433 if (relocation.output) {
434 relocation.output->flush();
435 }
436 return executionCallback(status, outputShapes, timing);
437 }
438
create(std::shared_ptr<const ExecutionBurstController> controller,std::vector<FmqRequestDatum> request,hal::utils::RequestRelocation relocation,std::vector<ExecutionBurstController::OptionalCacheHold> cacheHolds)439 nn::GeneralResult<std::shared_ptr<const BurstExecution>> BurstExecution::create(
440 std::shared_ptr<const ExecutionBurstController> controller,
441 std::vector<FmqRequestDatum> request, hal::utils::RequestRelocation relocation,
442 std::vector<ExecutionBurstController::OptionalCacheHold> cacheHolds) {
443 if (controller == nullptr) {
444 return NN_ERROR() << "V1_2::utils::BurstExecution::create must have non-null controller";
445 }
446
447 return std::make_shared<const BurstExecution>(PrivateConstructorTag{}, std::move(controller),
448 std::move(request), std::move(relocation),
449 std::move(cacheHolds));
450 }
451
BurstExecution(PrivateConstructorTag,std::shared_ptr<const ExecutionBurstController> controller,std::vector<FmqRequestDatum> request,hal::utils::RequestRelocation relocation,std::vector<ExecutionBurstController::OptionalCacheHold> cacheHolds)452 BurstExecution::BurstExecution(PrivateConstructorTag /*tag*/,
453 std::shared_ptr<const ExecutionBurstController> controller,
454 std::vector<FmqRequestDatum> request,
455 hal::utils::RequestRelocation relocation,
456 std::vector<ExecutionBurstController::OptionalCacheHold> cacheHolds)
457 : kController(std::move(controller)),
458 kRequest(std::move(request)),
459 kRelocation(std::move(relocation)),
460 kCacheHolds(std::move(cacheHolds)) {}
461
compute(const nn::OptionalTimePoint &) const462 nn::ExecutionResult<std::pair<std::vector<nn::OutputShape>, nn::Timing>> BurstExecution::compute(
463 const nn::OptionalTimePoint& /*deadline*/) const {
464 return kController->executeInternal(kRequest, kRelocation, /*fallback=*/nullptr);
465 }
466
467 nn::GeneralResult<std::pair<nn::SyncFence, nn::ExecuteFencedInfoCallback>>
computeFenced(const std::vector<nn::SyncFence> &,const nn::OptionalTimePoint &,const nn::OptionalDuration &) const468 BurstExecution::computeFenced(const std::vector<nn::SyncFence>& /*waitFor*/,
469 const nn::OptionalTimePoint& /*deadline*/,
470 const nn::OptionalDuration& /*timeoutDurationAfterFence*/) const {
471 return NN_ERROR(nn::ErrorStatus::GENERAL_FAILURE)
472 << "IExecution::computeFenced is not supported on burst object";
473 }
474
475 } // namespace android::hardware::neuralnetworks::V1_2::utils
476