1 /*
2 * Copyright (c) 2020, The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #define LOG_TAG "carwatchdogd"
18 #define DEBUG false // STOPSHIP if true.
19
20 #include "WatchdogPerfService.h"
21
22 #include <WatchdogProperties.sysprop.h>
23 #include <android-base/file.h>
24 #include <android-base/parseint.h>
25 #include <android-base/stringprintf.h>
26 #include <android-base/strings.h>
27 #include <log/log.h>
28 #include <processgroup/sched_policy.h>
29
30 #include <pthread.h>
31
32 #include <iterator>
33 #include <vector>
34
35 namespace android {
36 namespace automotive {
37 namespace watchdog {
38
39 using ::android::sp;
40 using ::android::String16;
41 using ::android::String8;
42 using ::android::automotive::watchdog::internal::PowerCycle;
43 using ::android::base::Error;
44 using ::android::base::Join;
45 using ::android::base::ParseUint;
46 using ::android::base::Result;
47 using ::android::base::Split;
48 using ::android::base::StringAppendF;
49 using ::android::base::StringPrintf;
50 using ::android::base::WriteStringToFd;
51
52 namespace {
53
54 // Minimum required collection interval between subsequent collections.
55 const std::chrono::nanoseconds kMinEventInterval = 1s;
56 const std::chrono::seconds kDefaultBoottimeCollectionInterval = 1s;
57 const std::chrono::seconds kDefaultPeriodicCollectionInterval = 20s;
58 const std::chrono::seconds kDefaultPeriodicMonitorInterval = 5s;
59 const std::chrono::nanoseconds kCustomCollectionInterval = 10s;
60 const std::chrono::nanoseconds kCustomCollectionDuration = 30min;
61
62 constexpr const char* kServiceName = "WatchdogPerfService";
63 static const std::string kDumpMajorDelimiter = std::string(100, '-') + "\n"; // NOLINT
64 constexpr const char* kHelpText =
65 "\n%s dump options:\n"
66 "%s: Starts custom performance data collection. Customize the collection behavior with "
67 "the following optional arguments:\n"
68 "\t%s <seconds>: Modifies the collection interval. Default behavior is to collect once "
69 "every %lld seconds.\n"
70 "\t%s <seconds>: Modifies the maximum collection duration. Default behavior is to collect "
71 "until %ld minutes before automatically stopping the custom collection and discarding "
72 "the collected data.\n"
73 "\t%s <package name>,<package name>,...: Comma-separated value containing package names. "
74 "When provided, the results are filtered only to the provided package names. Default "
75 "behavior is to list the results for the top N packages.\n"
76 "%s: Stops custom performance data collection and generates a dump of "
77 "the collection report.\n\n"
78 "When no options are specified, the carwatchdog report contains the performance data "
79 "collected during boot-time and over the last few minutes before the report generation.\n";
80
parseSecondsFlag(const Vector<String16> & args,size_t pos)81 Result<std::chrono::seconds> parseSecondsFlag(const Vector<String16>& args, size_t pos) {
82 if (args.size() <= pos) {
83 return Error() << "Value not provided";
84 }
85 uint64_t value;
86 if (std::string strValue = std::string(String8(args[pos]).string());
87 !ParseUint(strValue, &value)) {
88 return Error() << "Invalid value " << strValue << ", must be an integer";
89 }
90 return std::chrono::seconds(value);
91 }
92
toString(std::variant<EventType,SwitchMessage> what)93 constexpr const char* toString(std::variant<EventType, SwitchMessage> what) {
94 return std::visit(
95 [&](const auto& v) -> const char* {
96 switch (static_cast<int>(v)) {
97 case EventType::INIT:
98 return "INIT";
99 case EventType::TERMINATED:
100 return "TERMINATED";
101 case EventType::BOOT_TIME_COLLECTION:
102 return "BOOT_TIME_COLLECTION";
103 case EventType::PERIODIC_COLLECTION:
104 return "PERIODIC_COLLECTION";
105 case EventType::CUSTOM_COLLECTION:
106 return "CUSTOM_COLLECTION";
107 case EventType::PERIODIC_MONITOR:
108 return "PERIODIC_MONITOR";
109 case EventType::LAST_EVENT:
110 return "LAST_EVENT";
111 case SwitchMessage::END_BOOTTIME_COLLECTION:
112 return "END_BOOTTIME_COLLECTION";
113 case SwitchMessage::END_CUSTOM_COLLECTION:
114 return "END_CUSTOM_COLLECTION";
115 default:
116 return "INVALID_EVENT_OR_SWITCH_MESSAGE";
117 }
118 },
119 what);
120 }
121
toString(SystemState systemState)122 constexpr const char* toString(SystemState systemState) {
123 switch (systemState) {
124 case SystemState::NORMAL_MODE:
125 return "NORMAL_MODE";
126 case SystemState::GARAGE_MODE:
127 return "GARAGE_MODE";
128 default:
129 return "UNKNOWN MODE";
130 }
131 }
132
133 } // namespace
134
toString() const135 std::string WatchdogPerfService::EventMetadata::toString() const {
136 std::string buffer;
137 const auto intervalInSecs = std::chrono::duration_cast<std::chrono::seconds>(interval).count();
138 StringAppendF(&buffer, "Event interval: %lld second%s\n", intervalInSecs,
139 ((intervalInSecs > 1) ? "s" : ""));
140 if (!filterPackages.empty()) {
141 std::vector<std::string> packages(filterPackages.begin(), filterPackages.end());
142 StringAppendF(&buffer, "Filtered results to packages: %s\n", Join(packages, ", ").c_str());
143 }
144 return buffer;
145 }
146
registerDataProcessor(sp<IDataProcessorInterface> processor)147 Result<void> WatchdogPerfService::registerDataProcessor(sp<IDataProcessorInterface> processor) {
148 if (processor == nullptr) {
149 return Error() << "Must provide a valid data processor";
150 }
151 if (const auto result = processor->init(); !result.ok()) {
152 return Error() << "Failed to initialize " << processor->name().c_str() << ": "
153 << result.error().message();
154 }
155 Mutex::Autolock lock(mMutex);
156 mDataProcessors.push_back(processor);
157 if (DEBUG) {
158 ALOGD("Successfully registered %s to %s", processor->name().c_str(), kServiceName);
159 }
160 return {};
161 }
162
start()163 Result<void> WatchdogPerfService::start() {
164 {
165 Mutex::Autolock lock(mMutex);
166 if (mCurrCollectionEvent != EventType::INIT || mCollectionThread.joinable()) {
167 return Error(INVALID_OPERATION) << "Cannot start " << kServiceName << " more than once";
168 }
169 std::chrono::nanoseconds boottimeCollectionInterval =
170 std::chrono::duration_cast<std::chrono::nanoseconds>(
171 std::chrono::seconds(sysprop::boottimeCollectionInterval().value_or(
172 kDefaultBoottimeCollectionInterval.count())));
173 std::chrono::nanoseconds periodicCollectionInterval =
174 std::chrono::duration_cast<std::chrono::nanoseconds>(
175 std::chrono::seconds(sysprop::periodicCollectionInterval().value_or(
176 kDefaultPeriodicCollectionInterval.count())));
177 std::chrono::nanoseconds periodicMonitorInterval =
178 std::chrono::duration_cast<std::chrono::nanoseconds>(
179 std::chrono::seconds(sysprop::periodicMonitorInterval().value_or(
180 kDefaultPeriodicMonitorInterval.count())));
181 mBoottimeCollection = {
182 .eventType = EventType::BOOT_TIME_COLLECTION,
183 .interval = boottimeCollectionInterval,
184 .lastUptime = 0,
185 };
186 mPeriodicCollection = {
187 .eventType = EventType::PERIODIC_COLLECTION,
188 .interval = periodicCollectionInterval,
189 .lastUptime = 0,
190 };
191 mPeriodicMonitor = {
192 .eventType = EventType::PERIODIC_MONITOR,
193 .interval = periodicMonitorInterval,
194 .lastUptime = 0,
195 };
196 if (mDataProcessors.empty()) {
197 ALOGE("Terminating %s: No data processor is registered", kServiceName);
198 mCurrCollectionEvent = EventType::TERMINATED;
199 return Error() << "No data processor is registered";
200 }
201 }
202
203 mCollectionThread = std::thread([&]() {
204 {
205 Mutex::Autolock lock(mMutex);
206 if (EventType expected = EventType::INIT; mCurrCollectionEvent != expected) {
207 ALOGE("Skipping performance data collection as the current collection event "
208 "%s != %s",
209 toString(mCurrCollectionEvent), toString(expected));
210 return;
211 }
212 mCurrCollectionEvent = EventType::BOOT_TIME_COLLECTION;
213 mBoottimeCollection.lastUptime = mHandlerLooper->now();
214 mHandlerLooper->setLooper(Looper::prepare(/*opts=*/0));
215 mHandlerLooper->sendMessage(this, EventType::BOOT_TIME_COLLECTION);
216 }
217 if (set_sched_policy(0, SP_BACKGROUND) != 0) {
218 ALOGW("Failed to set background scheduling priority to %s thread", kServiceName);
219 }
220 if (int result = pthread_setname_np(pthread_self(), "WatchdogPerfSvc"); result != 0) {
221 ALOGE("Failed to set %s thread name: %d", kServiceName, result);
222 }
223 ALOGI("Starting %s performance data collection", toString(mCurrCollectionEvent));
224 bool isCollectionActive = true;
225 /*
226 * Loop until the collection is not active -- performance collection runs on this thread in
227 * a handler.
228 */
229 while (isCollectionActive) {
230 mHandlerLooper->pollAll(/*timeoutMillis=*/-1);
231 Mutex::Autolock lock(mMutex);
232 isCollectionActive = mCurrCollectionEvent != EventType::TERMINATED;
233 }
234 });
235 return {};
236 }
237
terminate()238 void WatchdogPerfService::terminate() {
239 {
240 Mutex::Autolock lock(mMutex);
241 if (mCurrCollectionEvent == EventType::TERMINATED) {
242 ALOGE("%s was terminated already", kServiceName);
243 return;
244 }
245 ALOGE("Terminating %s as carwatchdog is terminating", kServiceName);
246 if (mCurrCollectionEvent != EventType::INIT) {
247 /*
248 * Looper runs only after EventType::TNIT has completed so remove looper messages
249 * and wake the looper only when the current collection has changed from INIT.
250 */
251 mHandlerLooper->removeMessages(this);
252 mHandlerLooper->wake();
253 }
254 for (const auto& processor : mDataProcessors) {
255 processor->terminate();
256 }
257 mCurrCollectionEvent = EventType::TERMINATED;
258 }
259 if (mCollectionThread.joinable()) {
260 mCollectionThread.join();
261 if (DEBUG) {
262 ALOGD("%s collection thread terminated", kServiceName);
263 }
264 }
265 }
266
setSystemState(SystemState systemState)267 void WatchdogPerfService::setSystemState(SystemState systemState) {
268 Mutex::Autolock lock(mMutex);
269 if (mSystemState != systemState) {
270 ALOGI("%s switching from %s to %s", kServiceName, toString(mSystemState),
271 toString(systemState));
272 }
273 mSystemState = systemState;
274 }
275
onBootFinished()276 Result<void> WatchdogPerfService::onBootFinished() {
277 Mutex::Autolock lock(mMutex);
278 if (EventType expected = EventType::BOOT_TIME_COLLECTION; mCurrCollectionEvent != expected) {
279 /*
280 * This case happens when either the WatchdogPerfService has prematurely terminated before
281 * boot complete notification is received or multiple boot complete notifications are
282 * received. In either case don't return error as this will lead to runtime exception and
283 * cause system to boot loop.
284 */
285 ALOGE("Current performance data collection event %s != %s", toString(mCurrCollectionEvent),
286 toString(expected));
287 return {};
288 }
289 mBoottimeCollection.lastUptime = mHandlerLooper->now();
290 mHandlerLooper->removeMessages(this);
291 mHandlerLooper->sendMessage(this, SwitchMessage::END_BOOTTIME_COLLECTION);
292 if (DEBUG) {
293 ALOGD("Boot-time event finished");
294 }
295 return {};
296 }
297
onCustomCollection(int fd,const Vector<String16> & args)298 Result<void> WatchdogPerfService::onCustomCollection(int fd, const Vector<String16>& args) {
299 if (args.empty()) {
300 return Error(BAD_VALUE) << "No custom collection dump arguments";
301 }
302
303 if (args[0] == String16(kStartCustomCollectionFlag)) {
304 if (args.size() > 7) {
305 return Error(BAD_VALUE) << "Number of arguments to start custom performance data "
306 << "collection cannot exceed 7";
307 }
308 std::chrono::nanoseconds interval = kCustomCollectionInterval;
309 std::chrono::nanoseconds maxDuration = kCustomCollectionDuration;
310 std::unordered_set<std::string> filterPackages;
311 for (size_t i = 1; i < args.size(); ++i) {
312 if (args[i] == String16(kIntervalFlag)) {
313 const auto& result = parseSecondsFlag(args, i + 1);
314 if (!result.ok()) {
315 return Error(BAD_VALUE)
316 << "Failed to parse " << kIntervalFlag << ": " << result.error();
317 }
318 interval = std::chrono::duration_cast<std::chrono::nanoseconds>(*result);
319 ++i;
320 continue;
321 }
322 if (args[i] == String16(kMaxDurationFlag)) {
323 const auto& result = parseSecondsFlag(args, i + 1);
324 if (!result.ok()) {
325 return Error(BAD_VALUE)
326 << "Failed to parse " << kMaxDurationFlag << ": " << result.error();
327 }
328 maxDuration = std::chrono::duration_cast<std::chrono::nanoseconds>(*result);
329 ++i;
330 continue;
331 }
332 if (args[i] == String16(kFilterPackagesFlag)) {
333 if (args.size() < i + 1) {
334 return Error(BAD_VALUE)
335 << "Must provide value for '" << kFilterPackagesFlag << "' flag";
336 }
337 std::vector<std::string> packages =
338 Split(std::string(String8(args[i + 1]).string()), ",");
339 std::copy(packages.begin(), packages.end(),
340 std::inserter(filterPackages, filterPackages.end()));
341 ++i;
342 continue;
343 }
344 ALOGW("Unknown flag %s provided to start custom performance data collection",
345 String8(args[i]).string());
346 return Error(BAD_VALUE) << "Unknown flag " << String8(args[i]).string()
347 << " provided to start custom performance data collection";
348 }
349 if (const auto& result = startCustomCollection(interval, maxDuration, filterPackages);
350 !result.ok()) {
351 WriteStringToFd(result.error().message(), fd);
352 return result;
353 }
354 return {};
355 }
356
357 if (args[0] == String16(kEndCustomCollectionFlag)) {
358 if (args.size() != 1) {
359 ALOGW("Number of arguments to stop custom performance data collection cannot exceed 1. "
360 "Stopping the data collection.");
361 WriteStringToFd("Number of arguments to stop custom performance data collection "
362 "cannot exceed 1. Stopping the data collection.",
363 fd);
364 }
365 return endCustomCollection(fd);
366 }
367
368 return Error(BAD_VALUE) << "Custom perf collection dump arguments start neither with "
369 << kStartCustomCollectionFlag << " nor with "
370 << kEndCustomCollectionFlag << " flags";
371 }
372
onDump(int fd) const373 Result<void> WatchdogPerfService::onDump(int fd) const {
374 Mutex::Autolock lock(mMutex);
375 if (mCurrCollectionEvent == EventType::TERMINATED) {
376 ALOGW("%s not active. Dumping cached data", kServiceName);
377 if (!WriteStringToFd(StringPrintf("%s not active. Dumping cached data.", kServiceName),
378 fd)) {
379 return Error(FAILED_TRANSACTION) << "Failed to write " << kServiceName << " status";
380 }
381 }
382
383 if (const auto& result = dumpCollectorsStatusLocked(fd); !result.ok()) {
384 return Error(FAILED_TRANSACTION) << result.error();
385 }
386
387 if (!WriteStringToFd(StringPrintf("\n%s%s report:\n%sBoot-time collection information:\n%s\n",
388 kDumpMajorDelimiter.c_str(), kServiceName,
389 kDumpMajorDelimiter.c_str(), std::string(33, '=').c_str()),
390 fd) ||
391 !WriteStringToFd(mBoottimeCollection.toString(), fd) ||
392 !WriteStringToFd(StringPrintf("\nPeriodic collection information:\n%s\n",
393 std::string(32, '=').c_str()),
394 fd) ||
395 !WriteStringToFd(mPeriodicCollection.toString(), fd)) {
396 return Error(FAILED_TRANSACTION)
397 << "Failed to dump the boot-time and periodic collection reports.";
398 }
399
400 for (const auto& processor : mDataProcessors) {
401 if (const auto result = processor->onDump(fd); !result.ok()) {
402 return result;
403 }
404 }
405
406 WriteStringToFd(kDumpMajorDelimiter, fd);
407 return {};
408 }
409
dumpHelpText(int fd) const410 bool WatchdogPerfService::dumpHelpText(int fd) const {
411 return WriteStringToFd(StringPrintf(kHelpText, kServiceName, kStartCustomCollectionFlag,
412 kIntervalFlag,
413 std::chrono::duration_cast<std::chrono::seconds>(
414 kCustomCollectionInterval)
415 .count(),
416 kMaxDurationFlag,
417 std::chrono::duration_cast<std::chrono::minutes>(
418 kCustomCollectionDuration)
419 .count(),
420 kFilterPackagesFlag, kEndCustomCollectionFlag),
421 fd);
422 }
423
dumpCollectorsStatusLocked(int fd) const424 Result<void> WatchdogPerfService::dumpCollectorsStatusLocked(int fd) const {
425 if (!mUidStatsCollector->enabled() &&
426 !WriteStringToFd(StringPrintf("UidStatsCollector failed to access proc and I/O files"),
427 fd)) {
428 return Error() << "Failed to write UidStatsCollector status";
429 }
430 if (!mProcStat->enabled() &&
431 !WriteStringToFd(StringPrintf("ProcStat collector failed to access the file %s",
432 mProcStat->filePath().c_str()),
433 fd)) {
434 return Error() << "Failed to write ProcStat collector status";
435 }
436 return {};
437 }
438
startCustomCollection(std::chrono::nanoseconds interval,std::chrono::nanoseconds maxDuration,const std::unordered_set<std::string> & filterPackages)439 Result<void> WatchdogPerfService::startCustomCollection(
440 std::chrono::nanoseconds interval, std::chrono::nanoseconds maxDuration,
441 const std::unordered_set<std::string>& filterPackages) {
442 if (interval < kMinEventInterval || maxDuration < kMinEventInterval) {
443 return Error(INVALID_OPERATION)
444 << "Collection interval and maximum duration must be >= "
445 << std::chrono::duration_cast<std::chrono::milliseconds>(kMinEventInterval).count()
446 << " milliseconds.";
447 }
448 Mutex::Autolock lock(mMutex);
449 if (EventType expected = EventType::PERIODIC_COLLECTION; mCurrCollectionEvent != expected) {
450 return Error(INVALID_OPERATION)
451 << "Cannot start a custom collection when the current collection event "
452 << toString(mCurrCollectionEvent) << " != " << toString(expected)
453 << " collection event";
454 }
455
456 mCustomCollection = {
457 .eventType = EventType::CUSTOM_COLLECTION,
458 .interval = interval,
459 .lastUptime = mHandlerLooper->now(),
460 .filterPackages = filterPackages,
461 };
462
463 mHandlerLooper->removeMessages(this);
464 nsecs_t uptime = mHandlerLooper->now() + maxDuration.count();
465 mHandlerLooper->sendMessageAtTime(uptime, this, SwitchMessage::END_CUSTOM_COLLECTION);
466 mCurrCollectionEvent = EventType::CUSTOM_COLLECTION;
467 mHandlerLooper->sendMessage(this, EventType::CUSTOM_COLLECTION);
468 ALOGI("Starting %s performance data collection", toString(mCurrCollectionEvent));
469 return {};
470 }
471
endCustomCollection(int fd)472 Result<void> WatchdogPerfService::endCustomCollection(int fd) {
473 Mutex::Autolock lock(mMutex);
474 if (mCurrCollectionEvent != EventType::CUSTOM_COLLECTION) {
475 return Error(INVALID_OPERATION) << "No custom collection is running";
476 }
477
478 mHandlerLooper->removeMessages(this);
479 mHandlerLooper->sendMessage(this, SwitchMessage::END_CUSTOM_COLLECTION);
480
481 if (const auto result = dumpCollectorsStatusLocked(fd); !result.ok()) {
482 return Error(FAILED_TRANSACTION) << result.error();
483 }
484
485 if (!WriteStringToFd(StringPrintf("%sPerformance data report for custom collection:\n%s",
486 kDumpMajorDelimiter.c_str(), kDumpMajorDelimiter.c_str()),
487 fd) ||
488 !WriteStringToFd(mCustomCollection.toString(), fd)) {
489 return Error(FAILED_TRANSACTION) << "Failed to write custom collection report.";
490 }
491
492 for (const auto& processor : mDataProcessors) {
493 if (const auto result = processor->onCustomCollectionDump(fd); !result.ok()) {
494 return Error() << processor->name() << " failed on " << toString(mCurrCollectionEvent)
495 << " collection: " << result.error();
496 }
497 }
498
499 if (DEBUG) {
500 ALOGD("Custom event finished");
501 }
502 WriteStringToFd(kDumpMajorDelimiter, fd);
503 return {};
504 }
505
handleMessage(const Message & message)506 void WatchdogPerfService::handleMessage(const Message& message) {
507 Result<void> result;
508
509 auto switchToPeriodicLocked = [&](bool startNow) {
510 mHandlerLooper->removeMessages(this);
511 mCurrCollectionEvent = EventType::PERIODIC_COLLECTION;
512 mPeriodicCollection.lastUptime = mHandlerLooper->now();
513 if (startNow) {
514 mHandlerLooper->sendMessage(this, EventType::PERIODIC_COLLECTION);
515 } else {
516 mPeriodicCollection.lastUptime += mPeriodicCollection.interval.count();
517 mHandlerLooper->sendMessageAtTime(mPeriodicCollection.lastUptime, this,
518 EventType::PERIODIC_COLLECTION);
519 }
520 mPeriodicMonitor.lastUptime = mHandlerLooper->now() + mPeriodicMonitor.interval.count();
521 mHandlerLooper->sendMessageAtTime(mPeriodicMonitor.lastUptime, this,
522 EventType::PERIODIC_MONITOR);
523 ALOGI("Switching to %s and %s", toString(mCurrCollectionEvent),
524 toString(EventType::PERIODIC_MONITOR));
525 };
526
527 switch (message.what) {
528 case static_cast<int>(EventType::BOOT_TIME_COLLECTION):
529 result = processCollectionEvent(&mBoottimeCollection);
530 break;
531 case static_cast<int>(SwitchMessage::END_BOOTTIME_COLLECTION):
532 if (result = processCollectionEvent(&mBoottimeCollection); result.ok()) {
533 Mutex::Autolock lock(mMutex);
534 switchToPeriodicLocked(/*startNow=*/false);
535 }
536 break;
537 case static_cast<int>(EventType::PERIODIC_COLLECTION):
538 result = processCollectionEvent(&mPeriodicCollection);
539 break;
540 case static_cast<int>(EventType::CUSTOM_COLLECTION):
541 result = processCollectionEvent(&mCustomCollection);
542 break;
543 case static_cast<int>(EventType::PERIODIC_MONITOR):
544 result = processMonitorEvent(&mPeriodicMonitor);
545 break;
546 case static_cast<int>(SwitchMessage::END_CUSTOM_COLLECTION): {
547 Mutex::Autolock lock(mMutex);
548 if (EventType expected = EventType::CUSTOM_COLLECTION;
549 mCurrCollectionEvent != expected) {
550 ALOGW("Skipping END_CUSTOM_COLLECTION message as the current collection %s != %s",
551 toString(mCurrCollectionEvent), toString(expected));
552 return;
553 }
554 mCustomCollection = {};
555 for (const auto& processor : mDataProcessors) {
556 /*
557 * Clear custom collection cache on the data processors when the custom collection
558 * ends.
559 */
560 processor->onCustomCollectionDump(-1);
561 }
562 switchToPeriodicLocked(/*startNow=*/true);
563 return;
564 }
565 default:
566 result = Error() << "Unknown message: " << message.what;
567 }
568
569 if (!result.ok()) {
570 Mutex::Autolock lock(mMutex);
571 ALOGE("Terminating %s: %s", kServiceName, result.error().message().c_str());
572 /*
573 * DO NOT CALL terminate() as it tries to join the collection thread but this code is
574 * executed on the collection thread. Thus it will result in a deadlock.
575 */
576 mCurrCollectionEvent = EventType::TERMINATED;
577 mHandlerLooper->removeMessages(this);
578 mHandlerLooper->wake();
579 }
580 }
581
processCollectionEvent(WatchdogPerfService::EventMetadata * metadata)582 Result<void> WatchdogPerfService::processCollectionEvent(
583 WatchdogPerfService::EventMetadata* metadata) {
584 Mutex::Autolock lock(mMutex);
585 /*
586 * Messages sent to the looper are intrinsically racy such that a message from the previous
587 * collection event may land in the looper after the current collection has already begun. Thus
588 * verify the current collection event before starting the collection.
589 */
590 if (mCurrCollectionEvent != metadata->eventType) {
591 ALOGW("Skipping %s event on collection event %s", toString(metadata->eventType),
592 toString(mCurrCollectionEvent));
593 return {};
594 }
595 if (DEBUG) {
596 ALOGD("Processing %s collection event", toString(metadata->eventType));
597 }
598 if (metadata->interval < kMinEventInterval) {
599 return Error()
600 << "Collection interval of "
601 << std::chrono::duration_cast<std::chrono::seconds>(metadata->interval).count()
602 << " seconds for " << toString(metadata->eventType)
603 << " collection cannot be less than "
604 << std::chrono::duration_cast<std::chrono::seconds>(kMinEventInterval).count()
605 << " seconds";
606 }
607 if (const auto result = collectLocked(metadata); !result.ok()) {
608 return Error() << toString(metadata->eventType) << " collection failed: " << result.error();
609 }
610 metadata->lastUptime += metadata->interval.count();
611 mHandlerLooper->sendMessageAtTime(metadata->lastUptime, this, metadata->eventType);
612 return {};
613 }
614
collectLocked(WatchdogPerfService::EventMetadata * metadata)615 Result<void> WatchdogPerfService::collectLocked(WatchdogPerfService::EventMetadata* metadata) {
616 if (!mUidStatsCollector->enabled() && !mProcStat->enabled()) {
617 return Error() << "No collectors enabled";
618 }
619
620 time_t now = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now());
621
622 if (mUidStatsCollector->enabled()) {
623 if (const auto result = mUidStatsCollector->collect(); !result.ok()) {
624 return Error() << "Failed to collect per-uid proc and I/O stats: " << result.error();
625 }
626 }
627
628 if (mProcStat->enabled()) {
629 if (const auto result = mProcStat->collect(); !result.ok()) {
630 return Error() << "Failed to collect proc stats: " << result.error();
631 }
632 }
633
634 for (const auto& processor : mDataProcessors) {
635 Result<void> result;
636 switch (mCurrCollectionEvent) {
637 case EventType::BOOT_TIME_COLLECTION:
638 result = processor->onBoottimeCollection(now, mUidStatsCollector, mProcStat);
639 break;
640 case EventType::PERIODIC_COLLECTION:
641 result = processor->onPeriodicCollection(now, mSystemState, mUidStatsCollector,
642 mProcStat);
643 break;
644 case EventType::CUSTOM_COLLECTION:
645 result = processor->onCustomCollection(now, mSystemState, metadata->filterPackages,
646 mUidStatsCollector, mProcStat);
647 break;
648 default:
649 result = Error() << "Invalid collection event " << toString(mCurrCollectionEvent);
650 }
651 if (!result.ok()) {
652 return Error() << processor->name() << " failed on " << toString(mCurrCollectionEvent)
653 << " collection: " << result.error();
654 }
655 }
656
657 return {};
658 }
659
processMonitorEvent(WatchdogPerfService::EventMetadata * metadata)660 Result<void> WatchdogPerfService::processMonitorEvent(
661 WatchdogPerfService::EventMetadata* metadata) {
662 if (metadata->eventType != static_cast<int>(EventType::PERIODIC_MONITOR)) {
663 return Error() << "Invalid monitor event " << toString(metadata->eventType);
664 }
665 if (DEBUG) {
666 ALOGD("Processing %s monitor event", toString(metadata->eventType));
667 }
668 if (metadata->interval < kMinEventInterval) {
669 return Error()
670 << "Monitor interval of "
671 << std::chrono::duration_cast<std::chrono::seconds>(metadata->interval).count()
672 << " seconds for " << toString(metadata->eventType) << " event cannot be less than "
673 << std::chrono::duration_cast<std::chrono::seconds>(kMinEventInterval).count()
674 << " seconds";
675 }
676 Mutex::Autolock lock(mMutex);
677 if (!mProcDiskStats->enabled()) {
678 return Error() << "Cannot access proc disk stats for monitoring";
679 }
680 time_t now = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now());
681 if (const auto result = mProcDiskStats->collect(); !result.ok()) {
682 return Error() << "Failed to collect disk stats: " << result.error();
683 }
684 auto* currCollectionMetadata = currCollectionMetadataLocked();
685 if (currCollectionMetadata == nullptr) {
686 return Error() << "No metadata available for current collection event: "
687 << toString(mCurrCollectionEvent);
688 }
689 bool requestedCollection = false;
690 const auto requestCollection = [&]() mutable {
691 if (requestedCollection) {
692 return;
693 }
694 const nsecs_t prevUptime =
695 currCollectionMetadata->lastUptime - currCollectionMetadata->interval.count();
696 nsecs_t uptime = mHandlerLooper->now();
697 if (const auto delta = std::abs(uptime - prevUptime); delta < kMinEventInterval.count()) {
698 return;
699 }
700 currCollectionMetadata->lastUptime = uptime;
701 mHandlerLooper->removeMessages(this, currCollectionMetadata->eventType);
702 mHandlerLooper->sendMessage(this, currCollectionMetadata->eventType);
703 requestedCollection = true;
704 };
705 for (const auto& processor : mDataProcessors) {
706 if (const auto result =
707 processor->onPeriodicMonitor(now, mProcDiskStats, requestCollection);
708 !result.ok()) {
709 return Error() << processor->name() << " failed on " << toString(metadata->eventType)
710 << ": " << result.error();
711 }
712 }
713 metadata->lastUptime += metadata->interval.count();
714 if (metadata->lastUptime == currCollectionMetadata->lastUptime) {
715 /*
716 * If the |PERIODIC_MONITOR| and *_COLLECTION events overlap, skip the |PERIODIC_MONITOR|
717 * event.
718 */
719 metadata->lastUptime += metadata->interval.count();
720 }
721 mHandlerLooper->sendMessageAtTime(metadata->lastUptime, this, metadata->eventType);
722 return {};
723 }
724
currCollectionMetadataLocked()725 WatchdogPerfService::EventMetadata* WatchdogPerfService::currCollectionMetadataLocked() {
726 switch (mCurrCollectionEvent) {
727 case EventType::BOOT_TIME_COLLECTION:
728 return &mBoottimeCollection;
729 case EventType::PERIODIC_COLLECTION:
730 return &mPeriodicCollection;
731 case EventType::CUSTOM_COLLECTION:
732 return &mCustomCollection;
733 default:
734 return nullptr;
735 }
736 }
737
738 } // namespace watchdog
739 } // namespace automotive
740 } // namespace android
741