1 /* 2 * Copyright (C) 2021 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef ANDROID_PACKAGES_MODULES_NEURALNETWORKS_COMMON_NNAPI_IEXECUTION_H 18 #define ANDROID_PACKAGES_MODULES_NEURALNETWORKS_COMMON_NNAPI_IEXECUTION_H 19 20 #include <utility> 21 #include <vector> 22 23 #include "nnapi/Types.h" 24 25 namespace android::nn { 26 27 /** 28 * IExecution describes a reusable execution with request and several other execution configurations 29 * fixed. It is used to launch computations. 30 * 31 * IExecution manages a sequence of computations of the same prepared model with the same request 32 * and configurations. An IExecution object is used to control a set of computation, and to preserve 33 * resources between computations, enabling computations to have lower overhead. IExecution objects 34 * enable some optimizations: 35 * (1) An IExecution object can preserve resources between computations. For example, a driver can 36 * map a memory object when the IExecution object is created and cache the mapping for reuse in 37 * subsequent computations. Any cached resource can be released when the IExecution object is 38 * destroyed. 39 * (2) An IExecution object may be used for at most one computation at a time. This enables any 40 * transient computation resources such as intermediate tensors to be allocated once when the 41 * IExecution object is created and freed when the IExecution object is destroyed. 42 * (3) An IExecution object is created for a fixed request. This enables the implementation to apply 43 * request-specific optimizations. For example, an implementation can avoid request validation 44 * and conversions when the IExecution object is reused. An implementation may also choose to 45 * specialize the dynamic tensor shapes in a prepared model according to the request. 46 * 47 * This interface is thread-safe, and any class that implements this interface must be thread-safe. 48 */ 49 class IExecution { 50 public: 51 /** 52 * Performs a synchronous computation on a reusable execution. 53 * 54 * At most one computation may occur on a execution object at any given time, either by means of 55 * IExecution::compute or IExecution::computeFenced. 56 * 57 * The computation is performed synchronously with respect to the caller. 58 * IExecution::compute must verify the inputs to the function are correct. If there is an 59 * error, IExecution::compute must immediately return {@link ErrorStatus::INVALID_ARGUMENT} 60 * as a ExecutionError. If the inputs to the function are valid and there is no error, 61 * IExecution::compute must perform the computation, and must not return until the computation 62 * is complete. 63 * 64 * The caller must not change the content of any data object referenced by the request provided 65 * in IPreparedModel::createReusableExecution (described by the {@link DataLocation} of a {@link 66 * RequestArgument}) until IExecution::compute returns. IExecution::compute must not change the 67 * content of any of the data objects corresponding to request inputs. 68 * 69 * If the prepared model that the execution is created from was prepared from a model wherein 70 * all tensor operands have fully specified dimensions, and the inputs to the function are 71 * valid, and at computation time every operation's input operands have legal values, then the 72 * computation should complete successfully. There must be no failure unless the device itself 73 * is in a bad state. 74 * 75 * IExecution::compute may be called with an optional deadline. If the computation is not 76 * able to be completed before the provided deadline, the computation may be aborted, and either 77 * {@link ErrorStatus::MISSED_DEADLINE_TRANSIENT} or {@link 78 * ErrorStatus::MISSED_DEADLINE_PERSISTENT} may be returned as a ExecutionError. 79 * 80 * @param deadline Optional time point. If provided, compute is expected to complete by this 81 * time point. If it is not able to be completed by the deadline, the computation may be 82 * aborted. 83 * @return A pair consisting of: 84 * - A list of shape information of model output operands. The index into "outputShapes" 85 * corresponds to the index of the output operand in the Request outputs vector. 86 * outputShapes must be empty unless the computation is successful or the ExecutionResult 87 * is {@link ErrorStatus::OUTPUT_INSUFFICIENT_SIZE}. outputShapes may be empty if the 88 * computation is successful and all model output operands are fully-specified at 89 * computation time. outputShapes must have the same number of elements as the number of 90 * model output operands if the ExecutionResult is 91 * {@link ErrorStatus::OUTPUT_INSUFFICIENT_SIZE}, or if the computation is successful and 92 * the model has at least one output operand that is not fully-specified. 93 * - Duration of computation. Unless measure is YES and the computation is successful, all 94 * times must be reported as UINT64_MAX. A driver may choose to report any time as 95 * UINT64_MAX, indicating that measurement is not available. 96 */ 97 virtual ExecutionResult<std::pair<std::vector<OutputShape>, Timing>> compute( 98 const OptionalTimePoint& deadline) const = 0; 99 100 /** 101 * Launch a fenced asynchronous computation on a reusabled execution. 102 * 103 * At most one computation may occur on a execution object at any given time, either by means of 104 * IExecution::compute or IExecution::computeFenced. 105 * 106 * The computation is performed asynchronously with respect to the caller. 107 * IExecution::computeFenced must verify its inputs are correct, and the usages of memory 108 * pools allocated by IDevice::allocate are valid. If there is an error, 109 * IExecution::computeFenced must immediately return {@link ErrorStatus::INVALID_ARGUMENT} 110 * as a GeneralError. If the inputs to the function are valid and there is no error, 111 * IExecution::computeFenced must dispatch an asynchronous task to perform the computation in 112 * the background, and immediately return with a sync fence that will be signaled once the 113 * computation is completed and a callback that can be used by the client to query the duration 114 * and runtime error status. If the task has finished before the call returns, an empty handle 115 * may be returned for syncFence. The computation must wait for all the sync fences (if any) in 116 * waitFor to be signaled before starting the actual computation. 117 * 118 * When the asynchronous task has finished its computation, it must immediately signal the 119 * syncFence returned from the IExecution::computeFenced call. After the syncFence is 120 * signaled, the task must not modify the content of any data object referenced by request 121 * (described by the {@link DataLocation} of a {@link Request::Argument}). 122 * 123 * IExecution::computeFenced may be called with an optional deadline and an optional 124 * duration. If the computation is not able to be completed before the provided deadline or 125 * within the timeout duration (measured from when all sync fences in waitFor are signaled), 126 * whichever comes earlier, the computation may be aborted, and either {@link 127 * ErrorStatus::MISSED_DEADLINE_TRANSIENT} or {@link ErrorStatus::MISSED_DEADLINE_PERSISTENT} 128 * may be returned as an GeneralError. The error due to an abort must be sent the same way as 129 * other errors, described above. 130 * 131 * If any of the sync fences in waitFor changes to error status after the 132 * IExecution::computeFenced call succeeds, or the computation is aborted because it cannot 133 * finish before the deadline has been reached or the duration has elapsed, the driver must 134 * immediately set the returned syncFence to error status. 135 * 136 * @param waitFor A vector of sync fence file descriptors. The computation must wait for all 137 * sync fence to be signaled before starting the task. 138 * @param deadline The time by which computation is expected to complete. If the computation 139 * cannot be finished by the deadline, the computation may be aborted. 140 * @param timeoutDurationAfterFence The timeout duration within which the computation is 141 * expected to complete after all sync fences in waitFor are signaled. 142 * @return A pair consisting of: 143 * - A syncFence that will be triggered when the task is completed. The syncFence will be 144 * set to error if critical error occurs when doing actual evaluation. 145 * - A callback can be used to query information like duration and detailed runtime error 146 * status when the task is completed. 147 */ 148 virtual GeneralResult<std::pair<SyncFence, ExecuteFencedInfoCallback>> computeFenced( 149 const std::vector<SyncFence>& waitFor, const OptionalTimePoint& deadline, 150 const OptionalDuration& timeoutDurationAfterFence) const = 0; 151 152 // Public virtual destructor to allow objects to be stored (and destroyed) as smart pointers. 153 // E.g., std::unique_ptr<IExecution>. 154 virtual ~IExecution() = default; 155 156 protected: 157 // Protect the non-destructor special member functions to prevent object slicing. 158 IExecution() = default; 159 IExecution(const IExecution&) = default; 160 IExecution(IExecution&&) noexcept = default; 161 IExecution& operator=(const IExecution&) = default; 162 IExecution& operator=(IExecution&&) noexcept = default; 163 }; 164 165 } // namespace android::nn 166 167 #endif // ANDROID_PACKAGES_MODULES_NEURALNETWORKS_COMMON_NNAPI_IEXECUTION_H 168