1 /*
2 * Copyright (c) 2020, The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #define LOG_TAG "carwatchdogd"
18 #define DEBUG false // STOPSHIP if true.
19
20 #include "UidProcStatsCollector.h"
21
22 #include <android-base/file.h>
23 #include <android-base/parseint.h>
24 #include <android-base/stringprintf.h>
25 #include <android-base/strings.h>
26 #include <log/log.h>
27
28 #include <dirent.h>
29
30 #include <string>
31 #include <unordered_map>
32 #include <vector>
33
34 namespace android {
35 namespace automotive {
36 namespace watchdog {
37
38 using ::android::base::EndsWith;
39 using ::android::base::Error;
40 using ::android::base::ParseInt;
41 using ::android::base::ParseUint;
42 using ::android::base::ReadFileToString;
43 using ::android::base::Result;
44 using ::android::base::Split;
45 using ::android::base::StringAppendF;
46 using ::android::base::Trim;
47
48 namespace {
49
50 enum ReadError {
51 ERR_INVALID_FILE = 0,
52 ERR_FILE_OPEN_READ = 1,
53 NUM_ERRORS = 2,
54 };
55
56 // Per-pid/tid stats.
57 struct PidStat {
58 std::string comm = "";
59 std::string state = "";
60 uint64_t startTime = 0;
61 uint64_t majorFaults = 0;
62 };
63
64 /**
65 * /proc/PID/stat or /proc/PID/task/TID/stat format:
66 * <pid> <comm> <state> <ppid> <pgrp ID> <session ID> <tty_nr> <tpgid> <flags> <minor faults>
67 * <children minor faults> <major faults> <children major faults> <user mode time>
68 * <system mode time> <children user mode time> <children kernel mode time> <priority> <nice value>
69 * <num threads> <start time since boot> <virtual memory size> <resident set size> <rss soft limit>
70 * <start code addr> <end code addr> <start stack addr> <ESP value> <EIP> <bitmap of pending sigs>
71 * <bitmap of blocked sigs> <bitmap of ignored sigs> <waiting channel> <num pages swapped>
72 * <cumulative pages swapped> <exit signal> <processor #> <real-time prio> <agg block I/O delays>
73 * <guest time> <children guest time> <start data addr> <end data addr> <start break addr>
74 * <cmd line args start addr> <amd line args end addr> <env start addr> <env end addr> <exit code>
75 * Example line: 1 (init) S 0 0 0 0 0 0 0 0 220 0 0 0 0 0 0 0 2 0 0 ...etc...
76 */
parsePidStatLine(const std::string & line,PidStat * pidStat)77 bool parsePidStatLine(const std::string& line, PidStat* pidStat) {
78 std::vector<std::string> fields = Split(line, " ");
79
80 /* Note: Regex parsing for the below logic increased the time taken to run the
81 * UidProcStatsCollectorTest#TestProcPidStatContentsFromDevice from 151.7ms to 1.3 seconds.
82 *
83 * Comm string is enclosed with ( ) brackets and may contain space(s). Thus calculate the
84 * commEndOffset based on the field that contains the closing bracket.
85 */
86 size_t commEndOffset = 0;
87 for (size_t i = 1; i < fields.size(); ++i) {
88 pidStat->comm += fields[i];
89 if (EndsWith(fields[i], ")")) {
90 commEndOffset = i - 1;
91 break;
92 }
93 pidStat->comm += " ";
94 }
95
96 if (pidStat->comm.front() != '(' || pidStat->comm.back() != ')') {
97 ALOGD("Comm string `%s` not enclosed in brackets", pidStat->comm.c_str());
98 return false;
99 }
100 pidStat->comm.erase(pidStat->comm.begin());
101 pidStat->comm.erase(pidStat->comm.end() - 1);
102
103 if (fields.size() < 22 + commEndOffset ||
104 !ParseUint(fields[11 + commEndOffset], &pidStat->majorFaults) ||
105 !ParseUint(fields[21 + commEndOffset], &pidStat->startTime)) {
106 ALOGD("Invalid proc pid stat contents: \"%s\"", line.c_str());
107 return false;
108 }
109 pidStat->state = fields[2 + commEndOffset];
110 return true;
111 }
112
readPidStatFile(const std::string & path,PidStat * pidStat)113 Result<void> readPidStatFile(const std::string& path, PidStat* pidStat) {
114 std::string buffer;
115 if (!ReadFileToString(path, &buffer)) {
116 return Error(ERR_FILE_OPEN_READ) << "ReadFileToString failed for " << path;
117 }
118 std::vector<std::string> lines = Split(std::move(buffer), "\n");
119 if (lines.size() != 1 && (lines.size() != 2 || !lines[1].empty())) {
120 return Error(ERR_INVALID_FILE) << path << " contains " << lines.size() << " lines != 1";
121 }
122 if (!parsePidStatLine(std::move(lines[0]), pidStat)) {
123 return Error(ERR_INVALID_FILE) << "Failed to parse the contents of " << path;
124 }
125 return {};
126 }
127
readKeyValueFile(const std::string & path,const std::string & delimiter)128 Result<std::unordered_map<std::string, std::string>> readKeyValueFile(
129 const std::string& path, const std::string& delimiter) {
130 std::string buffer;
131 if (!ReadFileToString(path, &buffer)) {
132 return Error(ERR_FILE_OPEN_READ) << "ReadFileToString failed for " << path;
133 }
134 std::unordered_map<std::string, std::string> contents;
135 std::vector<std::string> lines = Split(std::move(buffer), "\n");
136 for (size_t i = 0; i < lines.size(); ++i) {
137 if (lines[i].empty()) {
138 continue;
139 }
140 std::vector<std::string> elements = Split(lines[i], delimiter);
141 if (elements.size() < 2) {
142 return Error(ERR_INVALID_FILE)
143 << "Line \"" << lines[i] << "\" doesn't contain the delimiter \"" << delimiter
144 << "\" in file " << path;
145 }
146 std::string key = elements[0];
147 std::string value = Trim(lines[i].substr(key.length() + delimiter.length()));
148 if (contents.find(key) != contents.end()) {
149 return Error(ERR_INVALID_FILE)
150 << "Duplicate " << key << " line: \"" << lines[i] << "\" in file " << path;
151 }
152 contents[key] = value;
153 }
154 return contents;
155 }
156
157 /**
158 * /proc/PID/status file format:
159 * Tgid: <Thread group ID of the process>
160 * Uid: <Read UID> <Effective UID> <Saved set UID> <Filesystem UID>
161 *
162 * Note: Included only the fields that are parsed from the file.
163 */
readPidStatusFile(const std::string & path)164 Result<std::tuple<uid_t, pid_t>> readPidStatusFile(const std::string& path) {
165 auto result = readKeyValueFile(path, ":\t");
166 if (!result.ok()) {
167 return Error(result.error().code()) << result.error();
168 }
169 auto contents = result.value();
170 if (contents.empty()) {
171 return Error(ERR_INVALID_FILE) << "Empty file " << path;
172 }
173 int64_t uid = 0;
174 int64_t tgid = 0;
175 if (contents.find("Uid") == contents.end() ||
176 !ParseInt(Split(contents["Uid"], "\t")[0], &uid)) {
177 return Error(ERR_INVALID_FILE) << "Failed to read 'UID' from file " << path;
178 }
179 if (contents.find("Tgid") == contents.end() || !ParseInt(contents["Tgid"], &tgid)) {
180 return Error(ERR_INVALID_FILE) << "Failed to read 'Tgid' from file " << path;
181 }
182 return std::make_tuple(uid, tgid);
183 }
184
185 } // namespace
186
toString() const187 std::string ProcessStats::toString() const {
188 return StringPrintf("{comm: %s, startTime: %" PRIu64 ", totalMajorFaults: %" PRIu64
189 ", totalTasksCount: %d, ioBlockedTasksCount: %d}",
190 comm.c_str(), startTime, totalMajorFaults, totalTasksCount,
191 ioBlockedTasksCount);
192 }
193
toString() const194 std::string UidProcStats::toString() const {
195 std::string buffer;
196 StringAppendF(&buffer,
197 "UidProcStats{totalMajorFaults: %" PRIu64 ", totalTasksCount: %d,"
198 "ioBlockedTasksCount: %d, processStatsByPid: {",
199 totalMajorFaults, totalTasksCount, ioBlockedTasksCount);
200 for (const auto& [pid, processStats] : processStatsByPid) {
201 StringAppendF(&buffer, "{pid: %" PRIi32 ", processStats: %s},", pid,
202 processStats.toString().c_str());
203 }
204 StringAppendF(&buffer, "}");
205 return buffer;
206 }
207
collect()208 Result<void> UidProcStatsCollector::collect() {
209 if (!mEnabled) {
210 return Error() << "Can not access PID stat files under " << kProcDirPath;
211 }
212
213 Mutex::Autolock lock(mMutex);
214 auto uidProcStatsByUid = readUidProcStatsLocked();
215 if (!uidProcStatsByUid.ok()) {
216 return Error() << uidProcStatsByUid.error();
217 }
218
219 mDeltaStats.clear();
220 for (const auto& [uid, currStats] : *uidProcStatsByUid) {
221 if (const auto& it = mLatestStats.find(uid); it == mLatestStats.end()) {
222 mDeltaStats[uid] = currStats;
223 continue;
224 }
225 const auto& prevStats = mLatestStats[uid];
226 UidProcStats deltaStats = {
227 .totalTasksCount = currStats.totalTasksCount,
228 .ioBlockedTasksCount = currStats.ioBlockedTasksCount,
229 };
230 for (const auto& [pid, processStats] : currStats.processStatsByPid) {
231 ProcessStats deltaProcessStats = processStats;
232 if (const auto& it = prevStats.processStatsByPid.find(pid);
233 it != prevStats.processStatsByPid.end() &&
234 it->second.startTime == processStats.startTime &&
235 it->second.totalMajorFaults <= deltaProcessStats.totalMajorFaults) {
236 deltaProcessStats.totalMajorFaults =
237 deltaProcessStats.totalMajorFaults - it->second.totalMajorFaults;
238 }
239 deltaStats.totalMajorFaults += deltaProcessStats.totalMajorFaults;
240 deltaStats.processStatsByPid[pid] = deltaProcessStats;
241 }
242 mDeltaStats[uid] = std::move(deltaStats);
243 }
244 mLatestStats = std::move(*uidProcStatsByUid);
245 return {};
246 }
247
readUidProcStatsLocked() const248 Result<std::unordered_map<uid_t, UidProcStats>> UidProcStatsCollector::readUidProcStatsLocked()
249 const {
250 std::unordered_map<uid_t, UidProcStats> uidProcStatsByUid;
251 auto procDirp = std::unique_ptr<DIR, int (*)(DIR*)>(opendir(mPath.c_str()), closedir);
252 if (!procDirp) {
253 return Error() << "Failed to open " << mPath << " directory";
254 }
255 for (dirent* pidDir = nullptr; (pidDir = readdir(procDirp.get())) != nullptr;) {
256 pid_t pid = 0;
257 if (pidDir->d_type != DT_DIR || !ParseInt(pidDir->d_name, &pid)) {
258 continue;
259 }
260 auto result = readProcessStatsLocked(pid);
261 if (!result.ok()) {
262 if (result.error().code() != ERR_FILE_OPEN_READ) {
263 return Error() << result.error();
264 }
265 /* |ERR_FILE_OPEN_READ| is a soft-error because PID may disappear between scanning and
266 * reading directory/files.
267 */
268 if (DEBUG) {
269 ALOGD("%s", result.error().message().c_str());
270 }
271 continue;
272 }
273 uid_t uid = std::get<0>(*result);
274 ProcessStats processStats = std::get<ProcessStats>(*result);
275 if (uidProcStatsByUid.find(uid) == uidProcStatsByUid.end()) {
276 uidProcStatsByUid[uid] = {};
277 }
278 UidProcStats* uidProcStats = &uidProcStatsByUid[uid];
279 uidProcStats->totalMajorFaults += processStats.totalMajorFaults;
280 uidProcStats->totalTasksCount += processStats.totalTasksCount;
281 uidProcStats->ioBlockedTasksCount += processStats.ioBlockedTasksCount;
282 uidProcStats->processStatsByPid[pid] = std::move(processStats);
283 }
284 return uidProcStatsByUid;
285 }
286
readProcessStatsLocked(pid_t pid) const287 Result<std::tuple<uid_t, ProcessStats>> UidProcStatsCollector::readProcessStatsLocked(
288 pid_t pid) const {
289 // 1. Read top-level pid stats.
290 PidStat pidStat = {};
291 std::string path = StringPrintf((mPath + kStatFileFormat).c_str(), pid);
292 if (auto result = readPidStatFile(path, &pidStat); !result.ok()) {
293 return Error(result.error().code())
294 << "Failed to read top-level per-process stat file '%s': %s"
295 << result.error().message().c_str();
296 }
297
298 // 2. Read aggregated process status.
299 pid_t tgid = -1;
300 uid_t uid = -1;
301 path = StringPrintf((mPath + kStatusFileFormat).c_str(), pid);
302 if (auto result = readPidStatusFile(path); !result.ok()) {
303 if (result.error().code() != ERR_FILE_OPEN_READ) {
304 return Error() << "Failed to read pid status for pid " << pid << ": "
305 << result.error().message().c_str();
306 }
307 for (const auto& [curUid, uidProcStats] : mLatestStats) {
308 if (const auto it = uidProcStats.processStatsByPid.find(pid);
309 it != uidProcStats.processStatsByPid.end() &&
310 it->second.startTime == pidStat.startTime) {
311 tgid = pid;
312 uid = curUid;
313 break;
314 }
315 }
316 } else {
317 uid = std::get<0>(*result);
318 tgid = std::get<1>(*result);
319 }
320
321 if (uid == -1 || tgid != pid) {
322 return Error(ERR_FILE_OPEN_READ)
323 << "Skipping PID '" << pid << "' because either Tgid != PID or invalid UID";
324 }
325
326 ProcessStats processStats = {
327 .comm = std::move(pidStat.comm),
328 .startTime = pidStat.startTime,
329 .totalTasksCount = 1,
330 /* Top-level process stats has the aggregated major page faults count and this should be
331 * persistent across thread creation/termination. Thus use the value from this field.
332 */
333 .totalMajorFaults = pidStat.majorFaults,
334 .ioBlockedTasksCount = pidStat.state == "D" ? 1 : 0,
335 };
336
337 // 3. Read per-thread stats.
338 std::string taskDir = StringPrintf((mPath + kTaskDirFormat).c_str(), pid);
339 bool didReadMainThread = false;
340 auto taskDirp = std::unique_ptr<DIR, int (*)(DIR*)>(opendir(taskDir.c_str()), closedir);
341 for (dirent* tidDir = nullptr;
342 taskDirp != nullptr && (tidDir = readdir(taskDirp.get())) != nullptr;) {
343 pid_t tid = 0;
344 if (tidDir->d_type != DT_DIR || !ParseInt(tidDir->d_name, &tid) || tid == pid) {
345 continue;
346 }
347
348 PidStat tidStat = {};
349 path = StringPrintf((taskDir + kStatFileFormat).c_str(), tid);
350 if (const auto& result = readPidStatFile(path, &tidStat); !result.ok()) {
351 if (result.error().code() != ERR_FILE_OPEN_READ) {
352 return Error() << "Failed to read per-thread stat file: "
353 << result.error().message().c_str();
354 }
355 /* Maybe the thread terminated before reading the file so skip this thread and
356 * continue with scanning the next thread's stat.
357 */
358 continue;
359 }
360 processStats.ioBlockedTasksCount += tidStat.state == "D" ? 1 : 0;
361 processStats.totalTasksCount += 1;
362 }
363 return std::make_tuple(uid, processStats);
364 }
365
366 } // namespace watchdog
367 } // namespace automotive
368 } // namespace android
369