1 /*
2  * Copyright (c) 2020, The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define LOG_TAG "carwatchdogd"
18 #define DEBUG false  // STOPSHIP if true.
19 
20 #include "UidProcStatsCollector.h"
21 
22 #include <android-base/file.h>
23 #include <android-base/parseint.h>
24 #include <android-base/stringprintf.h>
25 #include <android-base/strings.h>
26 #include <log/log.h>
27 
28 #include <dirent.h>
29 
30 #include <string>
31 #include <unordered_map>
32 #include <vector>
33 
34 namespace android {
35 namespace automotive {
36 namespace watchdog {
37 
38 using ::android::base::EndsWith;
39 using ::android::base::Error;
40 using ::android::base::ParseInt;
41 using ::android::base::ParseUint;
42 using ::android::base::ReadFileToString;
43 using ::android::base::Result;
44 using ::android::base::Split;
45 using ::android::base::StringAppendF;
46 using ::android::base::Trim;
47 
48 namespace {
49 
50 enum ReadError {
51     ERR_INVALID_FILE = 0,
52     ERR_FILE_OPEN_READ = 1,
53     NUM_ERRORS = 2,
54 };
55 
56 // Per-pid/tid stats.
57 struct PidStat {
58     std::string comm = "";
59     std::string state = "";
60     uint64_t startTime = 0;
61     uint64_t majorFaults = 0;
62 };
63 
64 /**
65  * /proc/PID/stat or /proc/PID/task/TID/stat format:
66  * <pid> <comm> <state> <ppid> <pgrp ID> <session ID> <tty_nr> <tpgid> <flags> <minor faults>
67  * <children minor faults> <major faults> <children major faults> <user mode time>
68  * <system mode time> <children user mode time> <children kernel mode time> <priority> <nice value>
69  * <num threads> <start time since boot> <virtual memory size> <resident set size> <rss soft limit>
70  * <start code addr> <end code addr> <start stack addr> <ESP value> <EIP> <bitmap of pending sigs>
71  * <bitmap of blocked sigs> <bitmap of ignored sigs> <waiting channel> <num pages swapped>
72  * <cumulative pages swapped> <exit signal> <processor #> <real-time prio> <agg block I/O delays>
73  * <guest time> <children guest time> <start data addr> <end data addr> <start break addr>
74  * <cmd line args start addr> <amd line args end addr> <env start addr> <env end addr> <exit code>
75  * Example line: 1 (init) S 0 0 0 0 0 0 0 0 220 0 0 0 0 0 0 0 2 0 0 ...etc...
76  */
parsePidStatLine(const std::string & line,PidStat * pidStat)77 bool parsePidStatLine(const std::string& line, PidStat* pidStat) {
78     std::vector<std::string> fields = Split(line, " ");
79 
80     /* Note: Regex parsing for the below logic increased the time taken to run the
81      * UidProcStatsCollectorTest#TestProcPidStatContentsFromDevice from 151.7ms to 1.3 seconds.
82      *
83      * Comm string is enclosed with ( ) brackets and may contain space(s). Thus calculate the
84      * commEndOffset based on the field that contains the closing bracket.
85      */
86     size_t commEndOffset = 0;
87     for (size_t i = 1; i < fields.size(); ++i) {
88         pidStat->comm += fields[i];
89         if (EndsWith(fields[i], ")")) {
90             commEndOffset = i - 1;
91             break;
92         }
93         pidStat->comm += " ";
94     }
95 
96     if (pidStat->comm.front() != '(' || pidStat->comm.back() != ')') {
97         ALOGD("Comm string `%s` not enclosed in brackets", pidStat->comm.c_str());
98         return false;
99     }
100     pidStat->comm.erase(pidStat->comm.begin());
101     pidStat->comm.erase(pidStat->comm.end() - 1);
102 
103     if (fields.size() < 22 + commEndOffset ||
104         !ParseUint(fields[11 + commEndOffset], &pidStat->majorFaults) ||
105         !ParseUint(fields[21 + commEndOffset], &pidStat->startTime)) {
106         ALOGD("Invalid proc pid stat contents: \"%s\"", line.c_str());
107         return false;
108     }
109     pidStat->state = fields[2 + commEndOffset];
110     return true;
111 }
112 
readPidStatFile(const std::string & path,PidStat * pidStat)113 Result<void> readPidStatFile(const std::string& path, PidStat* pidStat) {
114     std::string buffer;
115     if (!ReadFileToString(path, &buffer)) {
116         return Error(ERR_FILE_OPEN_READ) << "ReadFileToString failed for " << path;
117     }
118     std::vector<std::string> lines = Split(std::move(buffer), "\n");
119     if (lines.size() != 1 && (lines.size() != 2 || !lines[1].empty())) {
120         return Error(ERR_INVALID_FILE) << path << " contains " << lines.size() << " lines != 1";
121     }
122     if (!parsePidStatLine(std::move(lines[0]), pidStat)) {
123         return Error(ERR_INVALID_FILE) << "Failed to parse the contents of " << path;
124     }
125     return {};
126 }
127 
readKeyValueFile(const std::string & path,const std::string & delimiter)128 Result<std::unordered_map<std::string, std::string>> readKeyValueFile(
129         const std::string& path, const std::string& delimiter) {
130     std::string buffer;
131     if (!ReadFileToString(path, &buffer)) {
132         return Error(ERR_FILE_OPEN_READ) << "ReadFileToString failed for " << path;
133     }
134     std::unordered_map<std::string, std::string> contents;
135     std::vector<std::string> lines = Split(std::move(buffer), "\n");
136     for (size_t i = 0; i < lines.size(); ++i) {
137         if (lines[i].empty()) {
138             continue;
139         }
140         std::vector<std::string> elements = Split(lines[i], delimiter);
141         if (elements.size() < 2) {
142             return Error(ERR_INVALID_FILE)
143                     << "Line \"" << lines[i] << "\" doesn't contain the delimiter \"" << delimiter
144                     << "\" in file " << path;
145         }
146         std::string key = elements[0];
147         std::string value = Trim(lines[i].substr(key.length() + delimiter.length()));
148         if (contents.find(key) != contents.end()) {
149             return Error(ERR_INVALID_FILE)
150                     << "Duplicate " << key << " line: \"" << lines[i] << "\" in file " << path;
151         }
152         contents[key] = value;
153     }
154     return contents;
155 }
156 
157 /**
158  * /proc/PID/status file format:
159  * Tgid:    <Thread group ID of the process>
160  * Uid:     <Read UID>   <Effective UID>   <Saved set UID>   <Filesystem UID>
161  *
162  * Note: Included only the fields that are parsed from the file.
163  */
readPidStatusFile(const std::string & path)164 Result<std::tuple<uid_t, pid_t>> readPidStatusFile(const std::string& path) {
165     auto result = readKeyValueFile(path, ":\t");
166     if (!result.ok()) {
167         return Error(result.error().code()) << result.error();
168     }
169     auto contents = result.value();
170     if (contents.empty()) {
171         return Error(ERR_INVALID_FILE) << "Empty file " << path;
172     }
173     int64_t uid = 0;
174     int64_t tgid = 0;
175     if (contents.find("Uid") == contents.end() ||
176         !ParseInt(Split(contents["Uid"], "\t")[0], &uid)) {
177         return Error(ERR_INVALID_FILE) << "Failed to read 'UID' from file " << path;
178     }
179     if (contents.find("Tgid") == contents.end() || !ParseInt(contents["Tgid"], &tgid)) {
180         return Error(ERR_INVALID_FILE) << "Failed to read 'Tgid' from file " << path;
181     }
182     return std::make_tuple(uid, tgid);
183 }
184 
185 }  // namespace
186 
toString() const187 std::string ProcessStats::toString() const {
188     return StringPrintf("{comm: %s, startTime: %" PRIu64 ", totalMajorFaults: %" PRIu64
189                         ", totalTasksCount: %d, ioBlockedTasksCount: %d}",
190                         comm.c_str(), startTime, totalMajorFaults, totalTasksCount,
191                         ioBlockedTasksCount);
192 }
193 
toString() const194 std::string UidProcStats::toString() const {
195     std::string buffer;
196     StringAppendF(&buffer,
197                   "UidProcStats{totalMajorFaults: %" PRIu64 ", totalTasksCount: %d,"
198                   "ioBlockedTasksCount: %d, processStatsByPid: {",
199                   totalMajorFaults, totalTasksCount, ioBlockedTasksCount);
200     for (const auto& [pid, processStats] : processStatsByPid) {
201         StringAppendF(&buffer, "{pid: %" PRIi32 ", processStats: %s},", pid,
202                       processStats.toString().c_str());
203     }
204     StringAppendF(&buffer, "}");
205     return buffer;
206 }
207 
collect()208 Result<void> UidProcStatsCollector::collect() {
209     if (!mEnabled) {
210         return Error() << "Can not access PID stat files under " << kProcDirPath;
211     }
212 
213     Mutex::Autolock lock(mMutex);
214     auto uidProcStatsByUid = readUidProcStatsLocked();
215     if (!uidProcStatsByUid.ok()) {
216         return Error() << uidProcStatsByUid.error();
217     }
218 
219     mDeltaStats.clear();
220     for (const auto& [uid, currStats] : *uidProcStatsByUid) {
221         if (const auto& it = mLatestStats.find(uid); it == mLatestStats.end()) {
222             mDeltaStats[uid] = currStats;
223             continue;
224         }
225         const auto& prevStats = mLatestStats[uid];
226         UidProcStats deltaStats = {
227                 .totalTasksCount = currStats.totalTasksCount,
228                 .ioBlockedTasksCount = currStats.ioBlockedTasksCount,
229         };
230         for (const auto& [pid, processStats] : currStats.processStatsByPid) {
231             ProcessStats deltaProcessStats = processStats;
232             if (const auto& it = prevStats.processStatsByPid.find(pid);
233                 it != prevStats.processStatsByPid.end() &&
234                 it->second.startTime == processStats.startTime &&
235                 it->second.totalMajorFaults <= deltaProcessStats.totalMajorFaults) {
236                 deltaProcessStats.totalMajorFaults =
237                         deltaProcessStats.totalMajorFaults - it->second.totalMajorFaults;
238             }
239             deltaStats.totalMajorFaults += deltaProcessStats.totalMajorFaults;
240             deltaStats.processStatsByPid[pid] = deltaProcessStats;
241         }
242         mDeltaStats[uid] = std::move(deltaStats);
243     }
244     mLatestStats = std::move(*uidProcStatsByUid);
245     return {};
246 }
247 
readUidProcStatsLocked() const248 Result<std::unordered_map<uid_t, UidProcStats>> UidProcStatsCollector::readUidProcStatsLocked()
249         const {
250     std::unordered_map<uid_t, UidProcStats> uidProcStatsByUid;
251     auto procDirp = std::unique_ptr<DIR, int (*)(DIR*)>(opendir(mPath.c_str()), closedir);
252     if (!procDirp) {
253         return Error() << "Failed to open " << mPath << " directory";
254     }
255     for (dirent* pidDir = nullptr; (pidDir = readdir(procDirp.get())) != nullptr;) {
256         pid_t pid = 0;
257         if (pidDir->d_type != DT_DIR || !ParseInt(pidDir->d_name, &pid)) {
258             continue;
259         }
260         auto result = readProcessStatsLocked(pid);
261         if (!result.ok()) {
262             if (result.error().code() != ERR_FILE_OPEN_READ) {
263                 return Error() << result.error();
264             }
265             /* |ERR_FILE_OPEN_READ| is a soft-error because PID may disappear between scanning and
266              * reading directory/files.
267              */
268             if (DEBUG) {
269                 ALOGD("%s", result.error().message().c_str());
270             }
271             continue;
272         }
273         uid_t uid = std::get<0>(*result);
274         ProcessStats processStats = std::get<ProcessStats>(*result);
275         if (uidProcStatsByUid.find(uid) == uidProcStatsByUid.end()) {
276             uidProcStatsByUid[uid] = {};
277         }
278         UidProcStats* uidProcStats = &uidProcStatsByUid[uid];
279         uidProcStats->totalMajorFaults += processStats.totalMajorFaults;
280         uidProcStats->totalTasksCount += processStats.totalTasksCount;
281         uidProcStats->ioBlockedTasksCount += processStats.ioBlockedTasksCount;
282         uidProcStats->processStatsByPid[pid] = std::move(processStats);
283     }
284     return uidProcStatsByUid;
285 }
286 
readProcessStatsLocked(pid_t pid) const287 Result<std::tuple<uid_t, ProcessStats>> UidProcStatsCollector::readProcessStatsLocked(
288         pid_t pid) const {
289     // 1. Read top-level pid stats.
290     PidStat pidStat = {};
291     std::string path = StringPrintf((mPath + kStatFileFormat).c_str(), pid);
292     if (auto result = readPidStatFile(path, &pidStat); !result.ok()) {
293         return Error(result.error().code())
294                 << "Failed to read top-level per-process stat file '%s': %s"
295                 << result.error().message().c_str();
296     }
297 
298     // 2. Read aggregated process status.
299     pid_t tgid = -1;
300     uid_t uid = -1;
301     path = StringPrintf((mPath + kStatusFileFormat).c_str(), pid);
302     if (auto result = readPidStatusFile(path); !result.ok()) {
303         if (result.error().code() != ERR_FILE_OPEN_READ) {
304             return Error() << "Failed to read pid status for pid " << pid << ": "
305                            << result.error().message().c_str();
306         }
307         for (const auto& [curUid, uidProcStats] : mLatestStats) {
308             if (const auto it = uidProcStats.processStatsByPid.find(pid);
309                 it != uidProcStats.processStatsByPid.end() &&
310                 it->second.startTime == pidStat.startTime) {
311                 tgid = pid;
312                 uid = curUid;
313                 break;
314             }
315         }
316     } else {
317         uid = std::get<0>(*result);
318         tgid = std::get<1>(*result);
319     }
320 
321     if (uid == -1 || tgid != pid) {
322         return Error(ERR_FILE_OPEN_READ)
323                 << "Skipping PID '" << pid << "' because either Tgid != PID or invalid UID";
324     }
325 
326     ProcessStats processStats = {
327             .comm = std::move(pidStat.comm),
328             .startTime = pidStat.startTime,
329             .totalTasksCount = 1,
330             /* Top-level process stats has the aggregated major page faults count and this should be
331              * persistent across thread creation/termination. Thus use the value from this field.
332              */
333             .totalMajorFaults = pidStat.majorFaults,
334             .ioBlockedTasksCount = pidStat.state == "D" ? 1 : 0,
335     };
336 
337     // 3. Read per-thread stats.
338     std::string taskDir = StringPrintf((mPath + kTaskDirFormat).c_str(), pid);
339     bool didReadMainThread = false;
340     auto taskDirp = std::unique_ptr<DIR, int (*)(DIR*)>(opendir(taskDir.c_str()), closedir);
341     for (dirent* tidDir = nullptr;
342          taskDirp != nullptr && (tidDir = readdir(taskDirp.get())) != nullptr;) {
343         pid_t tid = 0;
344         if (tidDir->d_type != DT_DIR || !ParseInt(tidDir->d_name, &tid) || tid == pid) {
345             continue;
346         }
347 
348         PidStat tidStat = {};
349         path = StringPrintf((taskDir + kStatFileFormat).c_str(), tid);
350         if (const auto& result = readPidStatFile(path, &tidStat); !result.ok()) {
351             if (result.error().code() != ERR_FILE_OPEN_READ) {
352                 return Error() << "Failed to read per-thread stat file: "
353                                << result.error().message().c_str();
354             }
355             /* Maybe the thread terminated before reading the file so skip this thread and
356              * continue with scanning the next thread's stat.
357              */
358             continue;
359         }
360         processStats.ioBlockedTasksCount += tidStat.state == "D" ? 1 : 0;
361         processStats.totalTasksCount += 1;
362     }
363     return std::make_tuple(uid, processStats);
364 }
365 
366 }  // namespace watchdog
367 }  // namespace automotive
368 }  // namespace android
369