1 /*
2  * Copyright (C) 2021 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ANDROID_PACKAGES_MODULES_NEURALNETWORKS_COMMON_NNAPI_IEXECUTION_H
18 #define ANDROID_PACKAGES_MODULES_NEURALNETWORKS_COMMON_NNAPI_IEXECUTION_H
19 
20 #include <utility>
21 #include <vector>
22 
23 #include "nnapi/Types.h"
24 
25 namespace android::nn {
26 
27 /**
28  * IExecution describes a reusable execution with request and several other execution configurations
29  * fixed. It is used to launch computations.
30  *
31  * IExecution manages a sequence of computations of the same prepared model with the same request
32  * and configurations. An IExecution object is used to control a set of computation, and to preserve
33  * resources between computations, enabling computations to have lower overhead. IExecution objects
34  * enable some optimizations:
35  * (1) An IExecution object can preserve resources between computations. For example, a driver can
36  *     map a memory object when the IExecution object is created and cache the mapping for reuse in
37  *     subsequent computations. Any cached resource can be released when the IExecution object is
38  *     destroyed.
39  * (2) An IExecution object may be used for at most one computation at a time. This enables any
40  *     transient computation resources such as intermediate tensors to be allocated once when the
41  *     IExecution object is created and freed when the IExecution object is destroyed.
42  * (3) An IExecution object is created for a fixed request. This enables the implementation to apply
43  *     request-specific optimizations. For example, an implementation can avoid request validation
44  *     and conversions when the IExecution object is reused. An implementation may also choose to
45  *     specialize the dynamic tensor shapes in a prepared model according to the request.
46  *
47  * This interface is thread-safe, and any class that implements this interface must be thread-safe.
48  */
49 class IExecution {
50    public:
51     /**
52      * Performs a synchronous computation on a reusable execution.
53      *
54      * At most one computation may occur on a execution object at any given time, either by means of
55      * IExecution::compute or IExecution::computeFenced.
56      *
57      * The computation is performed synchronously with respect to the caller.
58      * IExecution::compute must verify the inputs to the function are correct. If there is an
59      * error, IExecution::compute must immediately return {@link ErrorStatus::INVALID_ARGUMENT}
60      * as a ExecutionError. If the inputs to the function are valid and there is no error,
61      * IExecution::compute must perform the computation, and must not return until the computation
62      * is complete.
63      *
64      * The caller must not change the content of any data object referenced by the request provided
65      * in IPreparedModel::createReusableExecution (described by the {@link DataLocation} of a {@link
66      * RequestArgument}) until IExecution::compute returns. IExecution::compute must not change the
67      * content of any of the data objects corresponding to request inputs.
68      *
69      * If the prepared model that the execution is created from was prepared from a model wherein
70      * all tensor operands have fully specified dimensions, and the inputs to the function are
71      * valid, and at computation time every operation's input operands have legal values, then the
72      * computation should complete successfully. There must be no failure unless the device itself
73      * is in a bad state.
74      *
75      * IExecution::compute may be called with an optional deadline. If the computation is not
76      * able to be completed before the provided deadline, the computation may be aborted, and either
77      * {@link ErrorStatus::MISSED_DEADLINE_TRANSIENT} or {@link
78      * ErrorStatus::MISSED_DEADLINE_PERSISTENT} may be returned as a ExecutionError.
79      *
80      * @param deadline Optional time point. If provided, compute is expected to complete by this
81      *     time point. If it is not able to be completed by the deadline, the computation may be
82      *     aborted.
83      * @return A pair consisting of:
84      *     - A list of shape information of model output operands. The index into "outputShapes"
85      *       corresponds to the index of the output operand in the Request outputs vector.
86      *       outputShapes must be empty unless the computation is successful or the ExecutionResult
87      *       is {@link ErrorStatus::OUTPUT_INSUFFICIENT_SIZE}. outputShapes may be empty if the
88      *       computation is successful and all model output operands are fully-specified at
89      *       computation time. outputShapes must have the same number of elements as the number of
90      *       model output operands if the ExecutionResult is
91      *       {@link ErrorStatus::OUTPUT_INSUFFICIENT_SIZE}, or if the computation is successful and
92      *       the model has at least one output operand that is not fully-specified.
93      *     - Duration of computation. Unless measure is YES and the computation is successful, all
94      *       times must be reported as UINT64_MAX. A driver may choose to report any time as
95      *       UINT64_MAX, indicating that measurement is not available.
96      */
97     virtual ExecutionResult<std::pair<std::vector<OutputShape>, Timing>> compute(
98             const OptionalTimePoint& deadline) const = 0;
99 
100     /**
101      * Launch a fenced asynchronous computation on a reusabled execution.
102      *
103      * At most one computation may occur on a execution object at any given time, either by means of
104      * IExecution::compute or IExecution::computeFenced.
105      *
106      * The computation is performed asynchronously with respect to the caller.
107      * IExecution::computeFenced must verify its inputs are correct, and the usages of memory
108      * pools allocated by IDevice::allocate are valid. If there is an error,
109      * IExecution::computeFenced must immediately return {@link ErrorStatus::INVALID_ARGUMENT}
110      * as a GeneralError. If the inputs to the function are valid and there is no error,
111      * IExecution::computeFenced must dispatch an asynchronous task to perform the computation in
112      * the background, and immediately return with a sync fence that will be signaled once the
113      * computation is completed and a callback that can be used by the client to query the duration
114      * and runtime error status. If the task has finished before the call returns, an empty handle
115      * may be returned for syncFence. The computation must wait for all the sync fences (if any) in
116      * waitFor to be signaled before starting the actual computation.
117      *
118      * When the asynchronous task has finished its computation, it must immediately signal the
119      * syncFence returned from the IExecution::computeFenced call. After the syncFence is
120      * signaled, the task must not modify the content of any data object referenced by request
121      * (described by the {@link DataLocation} of a {@link Request::Argument}).
122      *
123      * IExecution::computeFenced may be called with an optional deadline and an optional
124      * duration. If the computation is not able to be completed before the provided deadline or
125      * within the timeout duration (measured from when all sync fences in waitFor are signaled),
126      * whichever comes earlier, the computation may be aborted, and either {@link
127      * ErrorStatus::MISSED_DEADLINE_TRANSIENT} or {@link ErrorStatus::MISSED_DEADLINE_PERSISTENT}
128      * may be returned as an GeneralError. The error due to an abort must be sent the same way as
129      * other errors, described above.
130      *
131      * If any of the sync fences in waitFor changes to error status after the
132      * IExecution::computeFenced call succeeds, or the computation is aborted because it cannot
133      * finish before the deadline has been reached or the duration has elapsed, the driver must
134      * immediately set the returned syncFence to error status.
135      *
136      * @param waitFor A vector of sync fence file descriptors. The computation must wait for all
137      *     sync fence to be signaled before starting the task.
138      * @param deadline The time by which computation is expected to complete. If the computation
139      *     cannot be finished by the deadline, the computation may be aborted.
140      * @param timeoutDurationAfterFence The timeout duration within which the computation is
141      *     expected to complete after all sync fences in waitFor are signaled.
142      * @return A pair consisting of:
143      *     - A syncFence that will be triggered when the task is completed. The syncFence will be
144      *       set to error if critical error occurs when doing actual evaluation.
145      *     - A callback can be used to query information like duration and detailed runtime error
146      *       status when the task is completed.
147      */
148     virtual GeneralResult<std::pair<SyncFence, ExecuteFencedInfoCallback>> computeFenced(
149             const std::vector<SyncFence>& waitFor, const OptionalTimePoint& deadline,
150             const OptionalDuration& timeoutDurationAfterFence) const = 0;
151 
152     // Public virtual destructor to allow objects to be stored (and destroyed) as smart pointers.
153     // E.g., std::unique_ptr<IExecution>.
154     virtual ~IExecution() = default;
155 
156    protected:
157     // Protect the non-destructor special member functions to prevent object slicing.
158     IExecution() = default;
159     IExecution(const IExecution&) = default;
160     IExecution(IExecution&&) noexcept = default;
161     IExecution& operator=(const IExecution&) = default;
162     IExecution& operator=(IExecution&&) noexcept = default;
163 };
164 
165 }  // namespace android::nn
166 
167 #endif  // ANDROID_PACKAGES_MODULES_NEURALNETWORKS_COMMON_NNAPI_IEXECUTION_H
168