1 /*
2  * Copyright (C) 2019 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define LOG_TAG "libtimeinstate"
18 
19 #include "cputimeinstate.h"
20 #include <bpf_timeinstate.h>
21 
22 #include <dirent.h>
23 #include <errno.h>
24 #include <inttypes.h>
25 #include <sys/sysinfo.h>
26 
27 #include <mutex>
28 #include <numeric>
29 #include <optional>
30 #include <set>
31 #include <string>
32 #include <unordered_map>
33 #include <vector>
34 
35 #include <android-base/file.h>
36 #include <android-base/parseint.h>
37 #include <android-base/stringprintf.h>
38 #include <android-base/strings.h>
39 #include <android-base/unique_fd.h>
40 #include <bpf/BpfMap.h>
41 #include <libbpf.h>
42 #include <log/log.h>
43 
44 using android::base::StringPrintf;
45 using android::base::unique_fd;
46 
47 namespace android {
48 namespace bpf {
49 
50 static std::mutex gInitializedMutex;
51 static bool gInitialized = false;
52 static std::mutex gTrackingMutex;
53 static bool gTracking = false;
54 static uint32_t gNPolicies = 0;
55 static uint32_t gNCpus = 0;
56 static std::vector<std::vector<uint32_t>> gPolicyFreqs;
57 static std::vector<std::vector<uint32_t>> gPolicyCpus;
58 static std::set<uint32_t> gAllFreqs;
59 static unique_fd gTisTotalMapFd;
60 static unique_fd gTisMapFd;
61 static unique_fd gConcurrentMapFd;
62 static unique_fd gUidLastUpdateMapFd;
63 static unique_fd gPidTisMapFd;
64 
readNumbersFromFile(const std::string & path)65 static std::optional<std::vector<uint32_t>> readNumbersFromFile(const std::string &path) {
66     std::string data;
67 
68     if (!android::base::ReadFileToString(path, &data)) return {};
69 
70     auto strings = android::base::Split(data, " \n");
71     std::vector<uint32_t> ret;
72     for (const auto &s : strings) {
73         if (s.empty()) continue;
74         uint32_t n;
75         if (!android::base::ParseUint(s, &n)) return {};
76         ret.emplace_back(n);
77     }
78     return ret;
79 }
80 
isPolicyFile(const struct dirent * d)81 static int isPolicyFile(const struct dirent *d) {
82     return android::base::StartsWith(d->d_name, "policy");
83 }
84 
comparePolicyFiles(const struct dirent ** d1,const struct dirent ** d2)85 static int comparePolicyFiles(const struct dirent **d1, const struct dirent **d2) {
86     uint32_t policyN1, policyN2;
87     if (sscanf((*d1)->d_name, "policy%" SCNu32 "", &policyN1) != 1 ||
88         sscanf((*d2)->d_name, "policy%" SCNu32 "", &policyN2) != 1)
89         return 0;
90     return policyN1 - policyN2;
91 }
92 
initGlobals()93 static bool initGlobals() {
94     std::lock_guard<std::mutex> guard(gInitializedMutex);
95     if (gInitialized) return true;
96 
97     gNCpus = get_nprocs_conf();
98 
99     struct dirent **dirlist;
100     const char basepath[] = "/sys/devices/system/cpu/cpufreq";
101     int ret = scandir(basepath, &dirlist, isPolicyFile, comparePolicyFiles);
102     if (ret == -1 || ret == 0) return false;
103     gNPolicies = ret;
104 
105     std::vector<std::string> policyFileNames;
106     for (uint32_t i = 0; i < gNPolicies; ++i) {
107         policyFileNames.emplace_back(dirlist[i]->d_name);
108         free(dirlist[i]);
109     }
110     free(dirlist);
111 
112     for (const auto &policy : policyFileNames) {
113         std::vector<uint32_t> freqs;
114         for (const auto &name : {"available", "boost"}) {
115             std::string path =
116                     StringPrintf("%s/%s/scaling_%s_frequencies", basepath, policy.c_str(), name);
117             auto nums = readNumbersFromFile(path);
118             if (!nums) continue;
119             freqs.insert(freqs.end(), nums->begin(), nums->end());
120         }
121         if (freqs.empty()) return false;
122         std::sort(freqs.begin(), freqs.end());
123         gPolicyFreqs.emplace_back(freqs);
124 
125         for (auto freq : freqs) gAllFreqs.insert(freq);
126 
127         std::string path = StringPrintf("%s/%s/%s", basepath, policy.c_str(), "related_cpus");
128         auto cpus = readNumbersFromFile(path);
129         if (!cpus) return false;
130         gPolicyCpus.emplace_back(*cpus);
131     }
132 
133     gTisTotalMapFd =
134             unique_fd{bpf_obj_get(BPF_FS_PATH "map_time_in_state_total_time_in_state_map")};
135     if (gTisTotalMapFd < 0) return false;
136 
137     gTisMapFd = unique_fd{bpf_obj_get(BPF_FS_PATH "map_time_in_state_uid_time_in_state_map")};
138     if (gTisMapFd < 0) return false;
139 
140     gConcurrentMapFd =
141             unique_fd{bpf_obj_get(BPF_FS_PATH "map_time_in_state_uid_concurrent_times_map")};
142     if (gConcurrentMapFd < 0) return false;
143 
144     gUidLastUpdateMapFd =
145             unique_fd{bpf_obj_get(BPF_FS_PATH "map_time_in_state_uid_last_update_map")};
146     if (gUidLastUpdateMapFd < 0) return false;
147 
148     gPidTisMapFd = unique_fd{mapRetrieveRO(BPF_FS_PATH "map_time_in_state_pid_time_in_state_map")};
149     if (gPidTisMapFd < 0) return false;
150 
151     unique_fd trackedPidMapFd(mapRetrieveWO(BPF_FS_PATH "map_time_in_state_pid_tracked_map"));
152     if (trackedPidMapFd < 0) return false;
153 
154     gInitialized = true;
155     return true;
156 }
157 
retrieveProgramFd(const std::string & eventType,const std::string & eventName)158 static int retrieveProgramFd(const std::string &eventType, const std::string &eventName) {
159     std::string path = StringPrintf(BPF_FS_PATH "prog_time_in_state_tracepoint_%s_%s",
160                                     eventType.c_str(), eventName.c_str());
161     return retrieveProgram(path.c_str());
162 }
163 
attachTracepointProgram(const std::string & eventType,const std::string & eventName)164 static bool attachTracepointProgram(const std::string &eventType, const std::string &eventName) {
165     int prog_fd = retrieveProgramFd(eventType, eventName);
166     if (prog_fd < 0) return false;
167     return bpf_attach_tracepoint(prog_fd, eventType.c_str(), eventName.c_str()) >= 0;
168 }
169 
getPolicyFreqIdx(uint32_t policy)170 static std::optional<uint32_t> getPolicyFreqIdx(uint32_t policy) {
171     auto path = StringPrintf("/sys/devices/system/cpu/cpufreq/policy%u/scaling_cur_freq",
172                              gPolicyCpus[policy][0]);
173     auto freqVec = readNumbersFromFile(path);
174     if (!freqVec.has_value() || freqVec->size() != 1) return {};
175     for (uint32_t idx = 0; idx < gPolicyFreqs[policy].size(); ++idx) {
176         if ((*freqVec)[0] == gPolicyFreqs[policy][idx]) return idx + 1;
177     }
178     return {};
179 }
180 
181 // Check if tracking is expected to work without activating it.
isTrackingUidTimesSupported()182 bool isTrackingUidTimesSupported() {
183     auto freqs = getCpuFreqs();
184     if (!freqs || freqs->empty()) return false;
185     if (gTracking) return true;
186     if (retrieveProgramFd("sched", "sched_switch") < 0) return false;
187     if (retrieveProgramFd("power", "cpu_frequency") < 0) return false;
188     if (retrieveProgramFd("sched", "sched_process_free") < 0) return false;
189     return true;
190 }
191 
192 // Start tracking and aggregating data to be reported by getUidCpuFreqTimes and getUidsCpuFreqTimes.
193 // Returns true on success, false otherwise.
194 // Tracking is active only once a live process has successfully called this function; if the calling
195 // process dies then it must be called again to resume tracking.
196 // This function should *not* be called while tracking is already active; doing so is unnecessary
197 // and can lead to accounting errors.
startTrackingUidTimes()198 bool startTrackingUidTimes() {
199     std::lock_guard<std::mutex> guard(gTrackingMutex);
200     if (!initGlobals()) return false;
201     if (gTracking) return true;
202 
203     unique_fd cpuPolicyFd(mapRetrieveWO(BPF_FS_PATH "map_time_in_state_cpu_policy_map"));
204     if (cpuPolicyFd < 0) return false;
205 
206     for (uint32_t i = 0; i < gPolicyCpus.size(); ++i) {
207         for (auto &cpu : gPolicyCpus[i]) {
208             if (writeToMapEntry(cpuPolicyFd, &cpu, &i, BPF_ANY)) return false;
209         }
210     }
211 
212     unique_fd freqToIdxFd(mapRetrieveWO(BPF_FS_PATH "map_time_in_state_freq_to_idx_map"));
213     if (freqToIdxFd < 0) return false;
214     freq_idx_key_t key;
215     for (uint32_t i = 0; i < gNPolicies; ++i) {
216         key.policy = i;
217         for (uint32_t j = 0; j < gPolicyFreqs[i].size(); ++j) {
218             key.freq = gPolicyFreqs[i][j];
219             // Start indexes at 1 so that uninitialized state is distinguishable from lowest freq.
220             // The uid_times map still uses 0-based indexes, and the sched_switch program handles
221             // conversion between them, so this does not affect our map reading code.
222             uint32_t idx = j + 1;
223             if (writeToMapEntry(freqToIdxFd, &key, &idx, BPF_ANY)) return false;
224         }
225     }
226 
227     unique_fd cpuLastUpdateFd(mapRetrieveWO(BPF_FS_PATH "map_time_in_state_cpu_last_update_map"));
228     if (cpuLastUpdateFd < 0) return false;
229     std::vector<uint64_t> zeros(get_nprocs_conf(), 0);
230     uint32_t zero = 0;
231     if (writeToMapEntry(cpuLastUpdateFd, &zero, zeros.data(), BPF_ANY)) return false;
232 
233     unique_fd nrActiveFd(mapRetrieveWO(BPF_FS_PATH "map_time_in_state_nr_active_map"));
234     if (nrActiveFd < 0) return false;
235     if (writeToMapEntry(nrActiveFd, &zero, &zero, BPF_ANY)) return false;
236 
237     unique_fd policyNrActiveFd(mapRetrieveWO(BPF_FS_PATH "map_time_in_state_policy_nr_active_map"));
238     if (policyNrActiveFd < 0) return false;
239     for (uint32_t i = 0; i < gNPolicies; ++i) {
240         if (writeToMapEntry(policyNrActiveFd, &i, &zero, BPF_ANY)) return false;
241     }
242 
243     unique_fd policyFreqIdxFd(mapRetrieveWO(BPF_FS_PATH "map_time_in_state_policy_freq_idx_map"));
244     if (policyFreqIdxFd < 0) return false;
245     for (uint32_t i = 0; i < gNPolicies; ++i) {
246         auto freqIdx = getPolicyFreqIdx(i);
247         if (!freqIdx.has_value()) return false;
248         if (writeToMapEntry(policyFreqIdxFd, &i, &(*freqIdx), BPF_ANY)) return false;
249     }
250 
251     gTracking = attachTracepointProgram("sched", "sched_switch") &&
252             attachTracepointProgram("power", "cpu_frequency") &&
253             attachTracepointProgram("sched", "sched_process_free");
254     return gTracking;
255 }
256 
getCpuFreqs()257 std::optional<std::vector<std::vector<uint32_t>>> getCpuFreqs() {
258     if (!gInitialized && !initGlobals()) return {};
259     return gPolicyFreqs;
260 }
261 
getTotalCpuFreqTimes()262 std::optional<std::vector<std::vector<uint64_t>>> getTotalCpuFreqTimes() {
263     if (!gInitialized && !initGlobals()) return {};
264 
265     std::vector<std::vector<uint64_t>> out;
266     uint32_t maxFreqCount = 0;
267     for (const auto &freqList : gPolicyFreqs) {
268         if (freqList.size() > maxFreqCount) maxFreqCount = freqList.size();
269         out.emplace_back(freqList.size(), 0);
270     }
271 
272     std::vector<uint64_t> vals(gNCpus);
273     const uint32_t freqCount = maxFreqCount <= MAX_FREQS_FOR_TOTAL ? maxFreqCount :
274             MAX_FREQS_FOR_TOTAL;
275     for (uint32_t freqIdx = 0; freqIdx < freqCount; ++freqIdx) {
276         if (findMapEntry(gTisTotalMapFd, &freqIdx, vals.data())) return {};
277         for (uint32_t policyIdx = 0; policyIdx < gNPolicies; ++policyIdx) {
278             if (freqIdx >= gPolicyFreqs[policyIdx].size()) continue;
279             for (const auto &cpu : gPolicyCpus[policyIdx]) {
280                 out[policyIdx][freqIdx] += vals[cpu];
281             }
282         }
283     }
284 
285     return out;
286 }
287 // Retrieve the times in ns that uid spent running at each CPU frequency.
288 // Return contains no value on error, otherwise it contains a vector of vectors using the format:
289 // [[t0_0, t0_1, ...],
290 //  [t1_0, t1_1, ...], ...]
291 // where ti_j is the ns that uid spent running on the ith cluster at that cluster's jth lowest freq.
getUidCpuFreqTimes(uint32_t uid)292 std::optional<std::vector<std::vector<uint64_t>>> getUidCpuFreqTimes(uint32_t uid) {
293     if (!gInitialized && !initGlobals()) return {};
294 
295     std::vector<std::vector<uint64_t>> out;
296     uint32_t maxFreqCount = 0;
297     for (const auto &freqList : gPolicyFreqs) {
298         if (freqList.size() > maxFreqCount) maxFreqCount = freqList.size();
299         out.emplace_back(freqList.size(), 0);
300     }
301 
302     std::vector<tis_val_t> vals(gNCpus);
303     time_key_t key = {.uid = uid};
304     for (uint32_t i = 0; i <= (maxFreqCount - 1) / FREQS_PER_ENTRY; ++i) {
305         key.bucket = i;
306         if (findMapEntry(gTisMapFd, &key, vals.data())) {
307             if (errno != ENOENT || getFirstMapKey(gTisMapFd, &key)) return {};
308             continue;
309         }
310 
311         auto offset = i * FREQS_PER_ENTRY;
312         auto nextOffset = (i + 1) * FREQS_PER_ENTRY;
313         for (uint32_t j = 0; j < gNPolicies; ++j) {
314             if (offset >= gPolicyFreqs[j].size()) continue;
315             auto begin = out[j].begin() + offset;
316             auto end = nextOffset < gPolicyFreqs[j].size() ? begin + FREQS_PER_ENTRY : out[j].end();
317 
318             for (const auto &cpu : gPolicyCpus[j]) {
319                 std::transform(begin, end, std::begin(vals[cpu].ar), begin, std::plus<uint64_t>());
320             }
321         }
322     }
323 
324     return out;
325 }
326 
uidUpdatedSince(uint32_t uid,uint64_t lastUpdate,uint64_t * newLastUpdate)327 static std::optional<bool> uidUpdatedSince(uint32_t uid, uint64_t lastUpdate,
328                                            uint64_t *newLastUpdate) {
329     uint64_t uidLastUpdate;
330     if (findMapEntry(gUidLastUpdateMapFd, &uid, &uidLastUpdate)) return {};
331     // Updates that occurred during the previous read may have been missed. To mitigate
332     // this, don't ignore entries updated up to 1s before *lastUpdate
333     constexpr uint64_t NSEC_PER_SEC = 1000000000;
334     if (uidLastUpdate + NSEC_PER_SEC < lastUpdate) return false;
335     if (uidLastUpdate > *newLastUpdate) *newLastUpdate = uidLastUpdate;
336     return true;
337 }
338 
339 // Retrieve the times in ns that each uid spent running at each CPU freq.
340 // Return contains no value on error, otherwise it contains a map from uids to vectors of vectors
341 // using the format:
342 // { uid0 -> [[t0_0_0, t0_0_1, ...], [t0_1_0, t0_1_1, ...], ...],
343 //   uid1 -> [[t1_0_0, t1_0_1, ...], [t1_1_0, t1_1_1, ...], ...], ... }
344 // where ti_j_k is the ns uid i spent running on the jth cluster at the cluster's kth lowest freq.
345 std::optional<std::unordered_map<uint32_t, std::vector<std::vector<uint64_t>>>>
getUidsCpuFreqTimes()346 getUidsCpuFreqTimes() {
347     return getUidsUpdatedCpuFreqTimes(nullptr);
348 }
349 
350 // Retrieve the times in ns that each uid spent running at each CPU freq, excluding UIDs that have
351 // not run since before lastUpdate.
352 // Return format is the same as getUidsCpuFreqTimes()
353 std::optional<std::unordered_map<uint32_t, std::vector<std::vector<uint64_t>>>>
getUidsUpdatedCpuFreqTimes(uint64_t * lastUpdate)354 getUidsUpdatedCpuFreqTimes(uint64_t *lastUpdate) {
355     if (!gInitialized && !initGlobals()) return {};
356     time_key_t key, prevKey;
357     std::unordered_map<uint32_t, std::vector<std::vector<uint64_t>>> map;
358     if (getFirstMapKey(gTisMapFd, &key)) {
359         if (errno == ENOENT) return map;
360         return std::nullopt;
361     }
362 
363     std::vector<std::vector<uint64_t>> mapFormat;
364     for (const auto &freqList : gPolicyFreqs) mapFormat.emplace_back(freqList.size(), 0);
365 
366     uint64_t newLastUpdate = lastUpdate ? *lastUpdate : 0;
367     std::vector<tis_val_t> vals(gNCpus);
368     do {
369         if (lastUpdate) {
370             auto uidUpdated = uidUpdatedSince(key.uid, *lastUpdate, &newLastUpdate);
371             if (!uidUpdated.has_value()) return {};
372             if (!*uidUpdated) continue;
373         }
374         if (findMapEntry(gTisMapFd, &key, vals.data())) return {};
375         if (map.find(key.uid) == map.end()) map.emplace(key.uid, mapFormat);
376 
377         auto offset = key.bucket * FREQS_PER_ENTRY;
378         auto nextOffset = (key.bucket + 1) * FREQS_PER_ENTRY;
379         for (uint32_t i = 0; i < gNPolicies; ++i) {
380             if (offset >= gPolicyFreqs[i].size()) continue;
381             auto begin = map[key.uid][i].begin() + offset;
382             auto end = nextOffset < gPolicyFreqs[i].size() ? begin + FREQS_PER_ENTRY :
383                 map[key.uid][i].end();
384             for (const auto &cpu : gPolicyCpus[i]) {
385                 std::transform(begin, end, std::begin(vals[cpu].ar), begin, std::plus<uint64_t>());
386             }
387         }
388         prevKey = key;
389     } while (prevKey = key, !getNextMapKey(gTisMapFd, &prevKey, &key));
390     if (errno != ENOENT) return {};
391     if (lastUpdate && newLastUpdate > *lastUpdate) *lastUpdate = newLastUpdate;
392     return map;
393 }
394 
verifyConcurrentTimes(const concurrent_time_t & ct)395 static bool verifyConcurrentTimes(const concurrent_time_t &ct) {
396     uint64_t activeSum = std::accumulate(ct.active.begin(), ct.active.end(), (uint64_t)0);
397     uint64_t policySum = 0;
398     for (const auto &vec : ct.policy) {
399         policySum += std::accumulate(vec.begin(), vec.end(), (uint64_t)0);
400     }
401     return activeSum == policySum;
402 }
403 
404 // Retrieve the times in ns that uid spent running concurrently with each possible number of other
405 // tasks on each cluster (policy times) and overall (active times).
406 // Return contains no value on error, otherwise it contains a concurrent_time_t with the format:
407 // {.active = [a0, a1, ...], .policy = [[p0_0, p0_1, ...], [p1_0, p1_1, ...], ...]}
408 // where ai is the ns spent running concurrently with tasks on i other cpus and pi_j is the ns spent
409 // running on the ith cluster, concurrently with tasks on j other cpus in the same cluster
getUidConcurrentTimes(uint32_t uid,bool retry)410 std::optional<concurrent_time_t> getUidConcurrentTimes(uint32_t uid, bool retry) {
411     if (!gInitialized && !initGlobals()) return {};
412     concurrent_time_t ret = {.active = std::vector<uint64_t>(gNCpus, 0)};
413     for (const auto &cpuList : gPolicyCpus) ret.policy.emplace_back(cpuList.size(), 0);
414     std::vector<concurrent_val_t> vals(gNCpus);
415     time_key_t key = {.uid = uid};
416     for (key.bucket = 0; key.bucket <= (gNCpus - 1) / CPUS_PER_ENTRY; ++key.bucket) {
417         if (findMapEntry(gConcurrentMapFd, &key, vals.data())) {
418             if (errno != ENOENT || getFirstMapKey(gConcurrentMapFd, &key)) return {};
419             continue;
420         }
421         auto offset = key.bucket * CPUS_PER_ENTRY;
422         auto nextOffset = (key.bucket + 1) * CPUS_PER_ENTRY;
423 
424         auto activeBegin = ret.active.begin() + offset;
425         auto activeEnd = nextOffset < gNCpus ? activeBegin + CPUS_PER_ENTRY : ret.active.end();
426 
427         for (uint32_t cpu = 0; cpu < gNCpus; ++cpu) {
428             std::transform(activeBegin, activeEnd, std::begin(vals[cpu].active), activeBegin,
429                            std::plus<uint64_t>());
430         }
431 
432         for (uint32_t policy = 0; policy < gNPolicies; ++policy) {
433             if (offset >= gPolicyCpus[policy].size()) continue;
434             auto policyBegin = ret.policy[policy].begin() + offset;
435             auto policyEnd = nextOffset < gPolicyCpus[policy].size() ? policyBegin + CPUS_PER_ENTRY
436                                                                      : ret.policy[policy].end();
437 
438             for (const auto &cpu : gPolicyCpus[policy]) {
439                 std::transform(policyBegin, policyEnd, std::begin(vals[cpu].policy), policyBegin,
440                                std::plus<uint64_t>());
441             }
442         }
443     }
444     if (!verifyConcurrentTimes(ret) && retry)  return getUidConcurrentTimes(uid, false);
445     return ret;
446 }
447 
448 // Retrieve the times in ns that each uid spent running concurrently with each possible number of
449 // other tasks on each cluster (policy times) and overall (active times).
450 // Return contains no value on error, otherwise it contains a map from uids to concurrent_time_t's
451 // using the format:
452 // { uid0 -> {.active = [a0, a1, ...], .policy = [[p0_0, p0_1, ...], [p1_0, p1_1, ...], ...] }, ...}
453 // where ai is the ns spent running concurrently with tasks on i other cpus and pi_j is the ns spent
454 // running on the ith cluster, concurrently with tasks on j other cpus in the same cluster.
getUidsConcurrentTimes()455 std::optional<std::unordered_map<uint32_t, concurrent_time_t>> getUidsConcurrentTimes() {
456     return getUidsUpdatedConcurrentTimes(nullptr);
457 }
458 
459 // Retrieve the times in ns that each uid spent running concurrently with each possible number of
460 // other tasks on each cluster (policy times) and overall (active times), excluding UIDs that have
461 // not run since before lastUpdate.
462 // Return format is the same as getUidsConcurrentTimes()
getUidsUpdatedConcurrentTimes(uint64_t * lastUpdate)463 std::optional<std::unordered_map<uint32_t, concurrent_time_t>> getUidsUpdatedConcurrentTimes(
464         uint64_t *lastUpdate) {
465     if (!gInitialized && !initGlobals()) return {};
466     time_key_t key, prevKey;
467     std::unordered_map<uint32_t, concurrent_time_t> ret;
468     if (getFirstMapKey(gConcurrentMapFd, &key)) {
469         if (errno == ENOENT) return ret;
470         return {};
471     }
472 
473     concurrent_time_t retFormat = {.active = std::vector<uint64_t>(gNCpus, 0)};
474     for (const auto &cpuList : gPolicyCpus) retFormat.policy.emplace_back(cpuList.size(), 0);
475 
476     std::vector<concurrent_val_t> vals(gNCpus);
477     std::vector<uint64_t>::iterator activeBegin, activeEnd, policyBegin, policyEnd;
478 
479     uint64_t newLastUpdate = lastUpdate ? *lastUpdate : 0;
480     do {
481         if (key.bucket > (gNCpus - 1) / CPUS_PER_ENTRY) return {};
482         if (lastUpdate) {
483             auto uidUpdated = uidUpdatedSince(key.uid, *lastUpdate, &newLastUpdate);
484             if (!uidUpdated.has_value()) return {};
485             if (!*uidUpdated) continue;
486         }
487         if (findMapEntry(gConcurrentMapFd, &key, vals.data())) return {};
488         if (ret.find(key.uid) == ret.end()) ret.emplace(key.uid, retFormat);
489 
490         auto offset = key.bucket * CPUS_PER_ENTRY;
491         auto nextOffset = (key.bucket + 1) * CPUS_PER_ENTRY;
492 
493         activeBegin = ret[key.uid].active.begin();
494         activeEnd = nextOffset < gNCpus ? activeBegin + CPUS_PER_ENTRY : ret[key.uid].active.end();
495 
496         for (uint32_t cpu = 0; cpu < gNCpus; ++cpu) {
497             std::transform(activeBegin, activeEnd, std::begin(vals[cpu].active), activeBegin,
498                            std::plus<uint64_t>());
499         }
500 
501         for (uint32_t policy = 0; policy < gNPolicies; ++policy) {
502             if (offset >= gPolicyCpus[policy].size()) continue;
503             policyBegin = ret[key.uid].policy[policy].begin() + offset;
504             policyEnd = nextOffset < gPolicyCpus[policy].size() ? policyBegin + CPUS_PER_ENTRY
505                                                                 : ret[key.uid].policy[policy].end();
506 
507             for (const auto &cpu : gPolicyCpus[policy]) {
508                 std::transform(policyBegin, policyEnd, std::begin(vals[cpu].policy), policyBegin,
509                                std::plus<uint64_t>());
510             }
511         }
512     } while (prevKey = key, !getNextMapKey(gConcurrentMapFd, &prevKey, &key));
513     if (errno != ENOENT) return {};
514     for (const auto &[key, value] : ret) {
515         if (!verifyConcurrentTimes(value)) {
516             auto val = getUidConcurrentTimes(key, false);
517             if (val.has_value()) ret[key] = val.value();
518         }
519     }
520     if (lastUpdate && newLastUpdate > *lastUpdate) *lastUpdate = newLastUpdate;
521     return ret;
522 }
523 
524 // Clear all time in state data for a given uid. Returns false on error, true otherwise.
525 // This is only suitable for clearing data when an app is uninstalled; if called on a UID with
526 // running tasks it will cause time in state vs. concurrent time totals to be inconsistent for that
527 // UID.
clearUidTimes(uint32_t uid)528 bool clearUidTimes(uint32_t uid) {
529     if (!gInitialized && !initGlobals()) return false;
530 
531     time_key_t key = {.uid = uid};
532 
533     uint32_t maxFreqCount = 0;
534     for (const auto &freqList : gPolicyFreqs) {
535         if (freqList.size() > maxFreqCount) maxFreqCount = freqList.size();
536     }
537 
538     tis_val_t zeros = {0};
539     std::vector<tis_val_t> vals(gNCpus, zeros);
540     for (key.bucket = 0; key.bucket <= (maxFreqCount - 1) / FREQS_PER_ENTRY; ++key.bucket) {
541         if (writeToMapEntry(gTisMapFd, &key, vals.data(), BPF_EXIST) && errno != ENOENT)
542             return false;
543         if (deleteMapEntry(gTisMapFd, &key) && errno != ENOENT) return false;
544     }
545 
546     concurrent_val_t czeros = { .active = {0}, .policy = {0}, };
547     std::vector<concurrent_val_t> cvals(gNCpus, czeros);
548     for (key.bucket = 0; key.bucket <= (gNCpus - 1) / CPUS_PER_ENTRY; ++key.bucket) {
549         if (writeToMapEntry(gConcurrentMapFd, &key, cvals.data(), BPF_EXIST) && errno != ENOENT)
550             return false;
551         if (deleteMapEntry(gConcurrentMapFd, &key) && errno != ENOENT) return false;
552     }
553 
554     if (deleteMapEntry(gUidLastUpdateMapFd, &uid) && errno != ENOENT) return false;
555     return true;
556 }
557 
startTrackingProcessCpuTimes(pid_t pid)558 bool startTrackingProcessCpuTimes(pid_t pid) {
559     if (!gInitialized && !initGlobals()) return false;
560 
561     unique_fd trackedPidHashMapFd(
562             mapRetrieveWO(BPF_FS_PATH "map_time_in_state_pid_tracked_hash_map"));
563     if (trackedPidHashMapFd < 0) return false;
564 
565     unique_fd trackedPidMapFd(mapRetrieveWO(BPF_FS_PATH "map_time_in_state_pid_tracked_map"));
566     if (trackedPidMapFd < 0) return false;
567 
568     for (uint32_t index = 0; index < MAX_TRACKED_PIDS; index++) {
569         // Find first available [index, pid] entry in the pid_tracked_hash_map map
570         if (writeToMapEntry(trackedPidHashMapFd, &index, &pid, BPF_NOEXIST) != 0) {
571             if (errno != EEXIST) {
572                 return false;
573             }
574             continue; // This index is already taken
575         }
576 
577         tracked_pid_t tracked_pid = {.pid = pid, .state = TRACKED_PID_STATE_ACTIVE};
578         if (writeToMapEntry(trackedPidMapFd, &index, &tracked_pid, BPF_ANY) != 0) {
579             return false;
580         }
581         return true;
582     }
583     return false;
584 }
585 
586 // Marks the specified task identified by its PID (aka TID) for CPU time-in-state tracking
587 // aggregated with other tasks sharing the same TGID and aggregation key.
startAggregatingTaskCpuTimes(pid_t pid,uint16_t aggregationKey)588 bool startAggregatingTaskCpuTimes(pid_t pid, uint16_t aggregationKey) {
589     if (!gInitialized && !initGlobals()) return false;
590 
591     unique_fd taskAggregationMapFd(
592             mapRetrieveWO(BPF_FS_PATH "map_time_in_state_pid_task_aggregation_map"));
593     if (taskAggregationMapFd < 0) return false;
594 
595     return writeToMapEntry(taskAggregationMapFd, &pid, &aggregationKey, BPF_ANY) == 0;
596 }
597 
598 // Retrieves the times in ns that each thread spent running at each CPU freq, aggregated by
599 // aggregation key.
600 // Return contains no value on error, otherwise it contains a map from aggregation keys
601 // to vectors of vectors using the format:
602 // { aggKey0 -> [[t0_0_0, t0_0_1, ...], [t0_1_0, t0_1_1, ...], ...],
603 //   aggKey1 -> [[t1_0_0, t1_0_1, ...], [t1_1_0, t1_1_1, ...], ...], ... }
604 // where ti_j_k is the ns tid i spent running on the jth cluster at the cluster's kth lowest freq.
605 std::optional<std::unordered_map<uint16_t, std::vector<std::vector<uint64_t>>>>
getAggregatedTaskCpuFreqTimes(pid_t tgid,const std::vector<uint16_t> & aggregationKeys)606 getAggregatedTaskCpuFreqTimes(pid_t tgid, const std::vector<uint16_t> &aggregationKeys) {
607     if (!gInitialized && !initGlobals()) return {};
608 
609     uint32_t maxFreqCount = 0;
610     std::vector<std::vector<uint64_t>> mapFormat;
611     for (const auto &freqList : gPolicyFreqs) {
612         if (freqList.size() > maxFreqCount) maxFreqCount = freqList.size();
613         mapFormat.emplace_back(freqList.size(), 0);
614     }
615 
616     bool dataCollected = false;
617     std::unordered_map<uint16_t, std::vector<std::vector<uint64_t>>> map;
618     std::vector<tis_val_t> vals(gNCpus);
619     for (uint16_t aggregationKey : aggregationKeys) {
620         map.emplace(aggregationKey, mapFormat);
621 
622         aggregated_task_tis_key_t key{.tgid = tgid, .aggregation_key = aggregationKey};
623         for (key.bucket = 0; key.bucket <= (maxFreqCount - 1) / FREQS_PER_ENTRY; ++key.bucket) {
624             if (findMapEntry(gPidTisMapFd, &key, vals.data()) != 0) {
625                 if (errno != ENOENT) {
626                     return {};
627                 }
628                 continue;
629             } else {
630                 dataCollected = true;
631             }
632 
633             // Combine data by aggregating time-in-state data grouped by CPU cluster aka policy.
634             uint32_t offset = key.bucket * FREQS_PER_ENTRY;
635             uint32_t nextOffset = offset + FREQS_PER_ENTRY;
636             for (uint32_t j = 0; j < gNPolicies; ++j) {
637                 if (offset >= gPolicyFreqs[j].size()) continue;
638                 auto begin = map[key.aggregation_key][j].begin() + offset;
639                 auto end = nextOffset < gPolicyFreqs[j].size() ? begin + FREQS_PER_ENTRY
640                                                                : map[key.aggregation_key][j].end();
641                 for (const auto &cpu : gPolicyCpus[j]) {
642                     std::transform(begin, end, std::begin(vals[cpu].ar), begin,
643                                    std::plus<uint64_t>());
644                 }
645             }
646         }
647     }
648 
649     if (!dataCollected) {
650         // Check if eBPF is supported on this device. If it is, gTisMap should not be empty.
651         time_key_t key;
652         if (getFirstMapKey(gTisMapFd, &key) != 0) {
653             return {};
654         }
655     }
656     return map;
657 }
658 
659 } // namespace bpf
660 } // namespace android
661