1 /*
2  * Copyright (c) 2020, The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define LOG_TAG "carwatchdogd"
18 #define DEBUG false  // STOPSHIP if true.
19 
20 #include "WatchdogPerfService.h"
21 
22 #include <WatchdogProperties.sysprop.h>
23 #include <android-base/file.h>
24 #include <android-base/parseint.h>
25 #include <android-base/stringprintf.h>
26 #include <android-base/strings.h>
27 #include <log/log.h>
28 #include <processgroup/sched_policy.h>
29 
30 #include <pthread.h>
31 
32 #include <iterator>
33 #include <vector>
34 
35 namespace android {
36 namespace automotive {
37 namespace watchdog {
38 
39 using ::android::sp;
40 using ::android::String16;
41 using ::android::String8;
42 using ::android::automotive::watchdog::internal::PowerCycle;
43 using ::android::base::Error;
44 using ::android::base::Join;
45 using ::android::base::ParseUint;
46 using ::android::base::Result;
47 using ::android::base::Split;
48 using ::android::base::StringAppendF;
49 using ::android::base::StringPrintf;
50 using ::android::base::WriteStringToFd;
51 
52 namespace {
53 
54 // Minimum required collection interval between subsequent collections.
55 const std::chrono::nanoseconds kMinEventInterval = 1s;
56 const std::chrono::seconds kDefaultBoottimeCollectionInterval = 1s;
57 const std::chrono::seconds kDefaultPeriodicCollectionInterval = 20s;
58 const std::chrono::seconds kDefaultPeriodicMonitorInterval = 5s;
59 const std::chrono::nanoseconds kCustomCollectionInterval = 10s;
60 const std::chrono::nanoseconds kCustomCollectionDuration = 30min;
61 
62 constexpr const char* kServiceName = "WatchdogPerfService";
63 static const std::string kDumpMajorDelimiter = std::string(100, '-') + "\n";  // NOLINT
64 constexpr const char* kHelpText =
65         "\n%s dump options:\n"
66         "%s: Starts custom performance data collection. Customize the collection behavior with "
67         "the following optional arguments:\n"
68         "\t%s <seconds>: Modifies the collection interval. Default behavior is to collect once "
69         "every %lld seconds.\n"
70         "\t%s <seconds>: Modifies the maximum collection duration. Default behavior is to collect "
71         "until %ld minutes before automatically stopping the custom collection and discarding "
72         "the collected data.\n"
73         "\t%s <package name>,<package name>,...: Comma-separated value containing package names. "
74         "When provided, the results are filtered only to the provided package names. Default "
75         "behavior is to list the results for the top N packages.\n"
76         "%s: Stops custom performance data collection and generates a dump of "
77         "the collection report.\n\n"
78         "When no options are specified, the carwatchdog report contains the performance data "
79         "collected during boot-time and over the last few minutes before the report generation.\n";
80 
parseSecondsFlag(const Vector<String16> & args,size_t pos)81 Result<std::chrono::seconds> parseSecondsFlag(const Vector<String16>& args, size_t pos) {
82     if (args.size() <= pos) {
83         return Error() << "Value not provided";
84     }
85     uint64_t value;
86     if (std::string strValue = std::string(String8(args[pos]).string());
87         !ParseUint(strValue, &value)) {
88         return Error() << "Invalid value " << strValue << ", must be an integer";
89     }
90     return std::chrono::seconds(value);
91 }
92 
toString(std::variant<EventType,SwitchMessage> what)93 constexpr const char* toString(std::variant<EventType, SwitchMessage> what) {
94     return std::visit(
95             [&](const auto& v) -> const char* {
96                 switch (static_cast<int>(v)) {
97                     case EventType::INIT:
98                         return "INIT";
99                     case EventType::TERMINATED:
100                         return "TERMINATED";
101                     case EventType::BOOT_TIME_COLLECTION:
102                         return "BOOT_TIME_COLLECTION";
103                     case EventType::PERIODIC_COLLECTION:
104                         return "PERIODIC_COLLECTION";
105                     case EventType::CUSTOM_COLLECTION:
106                         return "CUSTOM_COLLECTION";
107                     case EventType::PERIODIC_MONITOR:
108                         return "PERIODIC_MONITOR";
109                     case EventType::LAST_EVENT:
110                         return "LAST_EVENT";
111                     case SwitchMessage::END_BOOTTIME_COLLECTION:
112                         return "END_BOOTTIME_COLLECTION";
113                     case SwitchMessage::END_CUSTOM_COLLECTION:
114                         return "END_CUSTOM_COLLECTION";
115                     default:
116                         return "INVALID_EVENT_OR_SWITCH_MESSAGE";
117                 }
118             },
119             what);
120 }
121 
toString(SystemState systemState)122 constexpr const char* toString(SystemState systemState) {
123     switch (systemState) {
124         case SystemState::NORMAL_MODE:
125             return "NORMAL_MODE";
126         case SystemState::GARAGE_MODE:
127             return "GARAGE_MODE";
128         default:
129             return "UNKNOWN MODE";
130     }
131 }
132 
133 }  // namespace
134 
toString() const135 std::string WatchdogPerfService::EventMetadata::toString() const {
136     std::string buffer;
137     const auto intervalInSecs = std::chrono::duration_cast<std::chrono::seconds>(interval).count();
138     StringAppendF(&buffer, "Event interval: %lld second%s\n", intervalInSecs,
139                   ((intervalInSecs > 1) ? "s" : ""));
140     if (!filterPackages.empty()) {
141         std::vector<std::string> packages(filterPackages.begin(), filterPackages.end());
142         StringAppendF(&buffer, "Filtered results to packages: %s\n", Join(packages, ", ").c_str());
143     }
144     return buffer;
145 }
146 
registerDataProcessor(sp<IDataProcessorInterface> processor)147 Result<void> WatchdogPerfService::registerDataProcessor(sp<IDataProcessorInterface> processor) {
148     if (processor == nullptr) {
149         return Error() << "Must provide a valid data processor";
150     }
151     if (const auto result = processor->init(); !result.ok()) {
152         return Error() << "Failed to initialize " << processor->name().c_str() << ": "
153                        << result.error().message();
154     }
155     Mutex::Autolock lock(mMutex);
156     mDataProcessors.push_back(processor);
157     if (DEBUG) {
158         ALOGD("Successfully registered %s to %s", processor->name().c_str(), kServiceName);
159     }
160     return {};
161 }
162 
start()163 Result<void> WatchdogPerfService::start() {
164     {
165         Mutex::Autolock lock(mMutex);
166         if (mCurrCollectionEvent != EventType::INIT || mCollectionThread.joinable()) {
167             return Error(INVALID_OPERATION) << "Cannot start " << kServiceName << " more than once";
168         }
169         std::chrono::nanoseconds boottimeCollectionInterval =
170                 std::chrono::duration_cast<std::chrono::nanoseconds>(
171                         std::chrono::seconds(sysprop::boottimeCollectionInterval().value_or(
172                                 kDefaultBoottimeCollectionInterval.count())));
173         std::chrono::nanoseconds periodicCollectionInterval =
174                 std::chrono::duration_cast<std::chrono::nanoseconds>(
175                         std::chrono::seconds(sysprop::periodicCollectionInterval().value_or(
176                                 kDefaultPeriodicCollectionInterval.count())));
177         std::chrono::nanoseconds periodicMonitorInterval =
178                 std::chrono::duration_cast<std::chrono::nanoseconds>(
179                         std::chrono::seconds(sysprop::periodicMonitorInterval().value_or(
180                                 kDefaultPeriodicMonitorInterval.count())));
181         mBoottimeCollection = {
182                 .eventType = EventType::BOOT_TIME_COLLECTION,
183                 .interval = boottimeCollectionInterval,
184                 .lastUptime = 0,
185         };
186         mPeriodicCollection = {
187                 .eventType = EventType::PERIODIC_COLLECTION,
188                 .interval = periodicCollectionInterval,
189                 .lastUptime = 0,
190         };
191         mPeriodicMonitor = {
192                 .eventType = EventType::PERIODIC_MONITOR,
193                 .interval = periodicMonitorInterval,
194                 .lastUptime = 0,
195         };
196         if (mDataProcessors.empty()) {
197             ALOGE("Terminating %s: No data processor is registered", kServiceName);
198             mCurrCollectionEvent = EventType::TERMINATED;
199             return Error() << "No data processor is registered";
200         }
201     }
202 
203     mCollectionThread = std::thread([&]() {
204         {
205             Mutex::Autolock lock(mMutex);
206             if (EventType expected = EventType::INIT; mCurrCollectionEvent != expected) {
207                 ALOGE("Skipping performance data collection as the current collection event "
208                       "%s != %s",
209                       toString(mCurrCollectionEvent), toString(expected));
210                 return;
211             }
212             mCurrCollectionEvent = EventType::BOOT_TIME_COLLECTION;
213             mBoottimeCollection.lastUptime = mHandlerLooper->now();
214             mHandlerLooper->setLooper(Looper::prepare(/*opts=*/0));
215             mHandlerLooper->sendMessage(this, EventType::BOOT_TIME_COLLECTION);
216         }
217         if (set_sched_policy(0, SP_BACKGROUND) != 0) {
218             ALOGW("Failed to set background scheduling priority to %s thread", kServiceName);
219         }
220         if (int result = pthread_setname_np(pthread_self(), "WatchdogPerfSvc"); result != 0) {
221             ALOGE("Failed to set %s thread name: %d", kServiceName, result);
222         }
223         ALOGI("Starting %s performance data collection", toString(mCurrCollectionEvent));
224         bool isCollectionActive = true;
225         /*
226          * Loop until the collection is not active -- performance collection runs on this thread in
227          * a handler.
228          */
229         while (isCollectionActive) {
230             mHandlerLooper->pollAll(/*timeoutMillis=*/-1);
231             Mutex::Autolock lock(mMutex);
232             isCollectionActive = mCurrCollectionEvent != EventType::TERMINATED;
233         }
234     });
235     return {};
236 }
237 
terminate()238 void WatchdogPerfService::terminate() {
239     {
240         Mutex::Autolock lock(mMutex);
241         if (mCurrCollectionEvent == EventType::TERMINATED) {
242             ALOGE("%s was terminated already", kServiceName);
243             return;
244         }
245         ALOGE("Terminating %s as carwatchdog is terminating", kServiceName);
246         if (mCurrCollectionEvent != EventType::INIT) {
247             /*
248              * Looper runs only after EventType::TNIT has completed so remove looper messages
249              * and wake the looper only when the current collection has changed from INIT.
250              */
251             mHandlerLooper->removeMessages(this);
252             mHandlerLooper->wake();
253         }
254         for (const auto& processor : mDataProcessors) {
255             processor->terminate();
256         }
257         mCurrCollectionEvent = EventType::TERMINATED;
258     }
259     if (mCollectionThread.joinable()) {
260         mCollectionThread.join();
261         if (DEBUG) {
262             ALOGD("%s collection thread terminated", kServiceName);
263         }
264     }
265 }
266 
setSystemState(SystemState systemState)267 void WatchdogPerfService::setSystemState(SystemState systemState) {
268     Mutex::Autolock lock(mMutex);
269     if (mSystemState != systemState) {
270         ALOGI("%s switching from %s to %s", kServiceName, toString(mSystemState),
271               toString(systemState));
272     }
273     mSystemState = systemState;
274 }
275 
onBootFinished()276 Result<void> WatchdogPerfService::onBootFinished() {
277     Mutex::Autolock lock(mMutex);
278     if (EventType expected = EventType::BOOT_TIME_COLLECTION; mCurrCollectionEvent != expected) {
279         /*
280          * This case happens when either the WatchdogPerfService has prematurely terminated before
281          * boot complete notification is received or multiple boot complete notifications are
282          * received. In either case don't return error as this will lead to runtime exception and
283          * cause system to boot loop.
284          */
285         ALOGE("Current performance data collection event %s != %s", toString(mCurrCollectionEvent),
286               toString(expected));
287         return {};
288     }
289     mBoottimeCollection.lastUptime = mHandlerLooper->now();
290     mHandlerLooper->removeMessages(this);
291     mHandlerLooper->sendMessage(this, SwitchMessage::END_BOOTTIME_COLLECTION);
292     if (DEBUG) {
293         ALOGD("Boot-time event finished");
294     }
295     return {};
296 }
297 
onCustomCollection(int fd,const Vector<String16> & args)298 Result<void> WatchdogPerfService::onCustomCollection(int fd, const Vector<String16>& args) {
299     if (args.empty()) {
300         return Error(BAD_VALUE) << "No custom collection dump arguments";
301     }
302 
303     if (args[0] == String16(kStartCustomCollectionFlag)) {
304         if (args.size() > 7) {
305             return Error(BAD_VALUE) << "Number of arguments to start custom performance data "
306                                     << "collection cannot exceed 7";
307         }
308         std::chrono::nanoseconds interval = kCustomCollectionInterval;
309         std::chrono::nanoseconds maxDuration = kCustomCollectionDuration;
310         std::unordered_set<std::string> filterPackages;
311         for (size_t i = 1; i < args.size(); ++i) {
312             if (args[i] == String16(kIntervalFlag)) {
313                 const auto& result = parseSecondsFlag(args, i + 1);
314                 if (!result.ok()) {
315                     return Error(BAD_VALUE)
316                             << "Failed to parse " << kIntervalFlag << ": " << result.error();
317                 }
318                 interval = std::chrono::duration_cast<std::chrono::nanoseconds>(*result);
319                 ++i;
320                 continue;
321             }
322             if (args[i] == String16(kMaxDurationFlag)) {
323                 const auto& result = parseSecondsFlag(args, i + 1);
324                 if (!result.ok()) {
325                     return Error(BAD_VALUE)
326                             << "Failed to parse " << kMaxDurationFlag << ": " << result.error();
327                 }
328                 maxDuration = std::chrono::duration_cast<std::chrono::nanoseconds>(*result);
329                 ++i;
330                 continue;
331             }
332             if (args[i] == String16(kFilterPackagesFlag)) {
333                 if (args.size() < i + 1) {
334                     return Error(BAD_VALUE)
335                             << "Must provide value for '" << kFilterPackagesFlag << "' flag";
336                 }
337                 std::vector<std::string> packages =
338                         Split(std::string(String8(args[i + 1]).string()), ",");
339                 std::copy(packages.begin(), packages.end(),
340                           std::inserter(filterPackages, filterPackages.end()));
341                 ++i;
342                 continue;
343             }
344             ALOGW("Unknown flag %s provided to start custom performance data collection",
345                   String8(args[i]).string());
346             return Error(BAD_VALUE) << "Unknown flag " << String8(args[i]).string()
347                                     << " provided to start custom performance data collection";
348         }
349         if (const auto& result = startCustomCollection(interval, maxDuration, filterPackages);
350             !result.ok()) {
351             WriteStringToFd(result.error().message(), fd);
352             return result;
353         }
354         return {};
355     }
356 
357     if (args[0] == String16(kEndCustomCollectionFlag)) {
358         if (args.size() != 1) {
359             ALOGW("Number of arguments to stop custom performance data collection cannot exceed 1. "
360                   "Stopping the data collection.");
361             WriteStringToFd("Number of arguments to stop custom performance data collection "
362                             "cannot exceed 1. Stopping the data collection.",
363                             fd);
364         }
365         return endCustomCollection(fd);
366     }
367 
368     return Error(BAD_VALUE) << "Custom perf collection dump arguments start neither with "
369                             << kStartCustomCollectionFlag << " nor with "
370                             << kEndCustomCollectionFlag << " flags";
371 }
372 
onDump(int fd) const373 Result<void> WatchdogPerfService::onDump(int fd) const {
374     Mutex::Autolock lock(mMutex);
375     if (mCurrCollectionEvent == EventType::TERMINATED) {
376         ALOGW("%s not active. Dumping cached data", kServiceName);
377         if (!WriteStringToFd(StringPrintf("%s not active. Dumping cached data.", kServiceName),
378                              fd)) {
379             return Error(FAILED_TRANSACTION) << "Failed to write " << kServiceName << " status";
380         }
381     }
382 
383     if (const auto& result = dumpCollectorsStatusLocked(fd); !result.ok()) {
384         return Error(FAILED_TRANSACTION) << result.error();
385     }
386 
387     if (!WriteStringToFd(StringPrintf("\n%s%s report:\n%sBoot-time collection information:\n%s\n",
388                                       kDumpMajorDelimiter.c_str(), kServiceName,
389                                       kDumpMajorDelimiter.c_str(), std::string(33, '=').c_str()),
390                          fd) ||
391         !WriteStringToFd(mBoottimeCollection.toString(), fd) ||
392         !WriteStringToFd(StringPrintf("\nPeriodic collection information:\n%s\n",
393                                       std::string(32, '=').c_str()),
394                          fd) ||
395         !WriteStringToFd(mPeriodicCollection.toString(), fd)) {
396         return Error(FAILED_TRANSACTION)
397                 << "Failed to dump the boot-time and periodic collection reports.";
398     }
399 
400     for (const auto& processor : mDataProcessors) {
401         if (const auto result = processor->onDump(fd); !result.ok()) {
402             return result;
403         }
404     }
405 
406     WriteStringToFd(kDumpMajorDelimiter, fd);
407     return {};
408 }
409 
dumpHelpText(int fd) const410 bool WatchdogPerfService::dumpHelpText(int fd) const {
411     return WriteStringToFd(StringPrintf(kHelpText, kServiceName, kStartCustomCollectionFlag,
412                                         kIntervalFlag,
413                                         std::chrono::duration_cast<std::chrono::seconds>(
414                                                 kCustomCollectionInterval)
415                                                 .count(),
416                                         kMaxDurationFlag,
417                                         std::chrono::duration_cast<std::chrono::minutes>(
418                                                 kCustomCollectionDuration)
419                                                 .count(),
420                                         kFilterPackagesFlag, kEndCustomCollectionFlag),
421                            fd);
422 }
423 
dumpCollectorsStatusLocked(int fd) const424 Result<void> WatchdogPerfService::dumpCollectorsStatusLocked(int fd) const {
425     if (!mUidStatsCollector->enabled() &&
426         !WriteStringToFd(StringPrintf("UidStatsCollector failed to access proc and I/O files"),
427                          fd)) {
428         return Error() << "Failed to write UidStatsCollector status";
429     }
430     if (!mProcStat->enabled() &&
431         !WriteStringToFd(StringPrintf("ProcStat collector failed to access the file %s",
432                                       mProcStat->filePath().c_str()),
433                          fd)) {
434         return Error() << "Failed to write ProcStat collector status";
435     }
436     return {};
437 }
438 
startCustomCollection(std::chrono::nanoseconds interval,std::chrono::nanoseconds maxDuration,const std::unordered_set<std::string> & filterPackages)439 Result<void> WatchdogPerfService::startCustomCollection(
440         std::chrono::nanoseconds interval, std::chrono::nanoseconds maxDuration,
441         const std::unordered_set<std::string>& filterPackages) {
442     if (interval < kMinEventInterval || maxDuration < kMinEventInterval) {
443         return Error(INVALID_OPERATION)
444                 << "Collection interval and maximum duration must be >= "
445                 << std::chrono::duration_cast<std::chrono::milliseconds>(kMinEventInterval).count()
446                 << " milliseconds.";
447     }
448     Mutex::Autolock lock(mMutex);
449     if (EventType expected = EventType::PERIODIC_COLLECTION; mCurrCollectionEvent != expected) {
450         return Error(INVALID_OPERATION)
451                 << "Cannot start a custom collection when the current collection event "
452                 << toString(mCurrCollectionEvent) << " != " << toString(expected)
453                 << " collection event";
454     }
455 
456     mCustomCollection = {
457             .eventType = EventType::CUSTOM_COLLECTION,
458             .interval = interval,
459             .lastUptime = mHandlerLooper->now(),
460             .filterPackages = filterPackages,
461     };
462 
463     mHandlerLooper->removeMessages(this);
464     nsecs_t uptime = mHandlerLooper->now() + maxDuration.count();
465     mHandlerLooper->sendMessageAtTime(uptime, this, SwitchMessage::END_CUSTOM_COLLECTION);
466     mCurrCollectionEvent = EventType::CUSTOM_COLLECTION;
467     mHandlerLooper->sendMessage(this, EventType::CUSTOM_COLLECTION);
468     ALOGI("Starting %s performance data collection", toString(mCurrCollectionEvent));
469     return {};
470 }
471 
endCustomCollection(int fd)472 Result<void> WatchdogPerfService::endCustomCollection(int fd) {
473     Mutex::Autolock lock(mMutex);
474     if (mCurrCollectionEvent != EventType::CUSTOM_COLLECTION) {
475         return Error(INVALID_OPERATION) << "No custom collection is running";
476     }
477 
478     mHandlerLooper->removeMessages(this);
479     mHandlerLooper->sendMessage(this, SwitchMessage::END_CUSTOM_COLLECTION);
480 
481     if (const auto result = dumpCollectorsStatusLocked(fd); !result.ok()) {
482         return Error(FAILED_TRANSACTION) << result.error();
483     }
484 
485     if (!WriteStringToFd(StringPrintf("%sPerformance data report for custom collection:\n%s",
486                                       kDumpMajorDelimiter.c_str(), kDumpMajorDelimiter.c_str()),
487                          fd) ||
488         !WriteStringToFd(mCustomCollection.toString(), fd)) {
489         return Error(FAILED_TRANSACTION) << "Failed to write custom collection report.";
490     }
491 
492     for (const auto& processor : mDataProcessors) {
493         if (const auto result = processor->onCustomCollectionDump(fd); !result.ok()) {
494             return Error() << processor->name() << " failed on " << toString(mCurrCollectionEvent)
495                            << " collection: " << result.error();
496         }
497     }
498 
499     if (DEBUG) {
500         ALOGD("Custom event finished");
501     }
502     WriteStringToFd(kDumpMajorDelimiter, fd);
503     return {};
504 }
505 
handleMessage(const Message & message)506 void WatchdogPerfService::handleMessage(const Message& message) {
507     Result<void> result;
508 
509     auto switchToPeriodicLocked = [&](bool startNow) {
510         mHandlerLooper->removeMessages(this);
511         mCurrCollectionEvent = EventType::PERIODIC_COLLECTION;
512         mPeriodicCollection.lastUptime = mHandlerLooper->now();
513         if (startNow) {
514             mHandlerLooper->sendMessage(this, EventType::PERIODIC_COLLECTION);
515         } else {
516             mPeriodicCollection.lastUptime += mPeriodicCollection.interval.count();
517             mHandlerLooper->sendMessageAtTime(mPeriodicCollection.lastUptime, this,
518                                               EventType::PERIODIC_COLLECTION);
519         }
520         mPeriodicMonitor.lastUptime = mHandlerLooper->now() + mPeriodicMonitor.interval.count();
521         mHandlerLooper->sendMessageAtTime(mPeriodicMonitor.lastUptime, this,
522                                           EventType::PERIODIC_MONITOR);
523         ALOGI("Switching to %s and %s", toString(mCurrCollectionEvent),
524               toString(EventType::PERIODIC_MONITOR));
525     };
526 
527     switch (message.what) {
528         case static_cast<int>(EventType::BOOT_TIME_COLLECTION):
529             result = processCollectionEvent(&mBoottimeCollection);
530             break;
531         case static_cast<int>(SwitchMessage::END_BOOTTIME_COLLECTION):
532             if (result = processCollectionEvent(&mBoottimeCollection); result.ok()) {
533                 Mutex::Autolock lock(mMutex);
534                 switchToPeriodicLocked(/*startNow=*/false);
535             }
536             break;
537         case static_cast<int>(EventType::PERIODIC_COLLECTION):
538             result = processCollectionEvent(&mPeriodicCollection);
539             break;
540         case static_cast<int>(EventType::CUSTOM_COLLECTION):
541             result = processCollectionEvent(&mCustomCollection);
542             break;
543         case static_cast<int>(EventType::PERIODIC_MONITOR):
544             result = processMonitorEvent(&mPeriodicMonitor);
545             break;
546         case static_cast<int>(SwitchMessage::END_CUSTOM_COLLECTION): {
547             Mutex::Autolock lock(mMutex);
548             if (EventType expected = EventType::CUSTOM_COLLECTION;
549                 mCurrCollectionEvent != expected) {
550                 ALOGW("Skipping END_CUSTOM_COLLECTION message as the current collection %s != %s",
551                       toString(mCurrCollectionEvent), toString(expected));
552                 return;
553             }
554             mCustomCollection = {};
555             for (const auto& processor : mDataProcessors) {
556                 /*
557                  * Clear custom collection cache on the data processors when the custom collection
558                  * ends.
559                  */
560                 processor->onCustomCollectionDump(-1);
561             }
562             switchToPeriodicLocked(/*startNow=*/true);
563             return;
564         }
565         default:
566             result = Error() << "Unknown message: " << message.what;
567     }
568 
569     if (!result.ok()) {
570         Mutex::Autolock lock(mMutex);
571         ALOGE("Terminating %s: %s", kServiceName, result.error().message().c_str());
572         /*
573          * DO NOT CALL terminate() as it tries to join the collection thread but this code is
574          * executed on the collection thread. Thus it will result in a deadlock.
575          */
576         mCurrCollectionEvent = EventType::TERMINATED;
577         mHandlerLooper->removeMessages(this);
578         mHandlerLooper->wake();
579     }
580 }
581 
processCollectionEvent(WatchdogPerfService::EventMetadata * metadata)582 Result<void> WatchdogPerfService::processCollectionEvent(
583         WatchdogPerfService::EventMetadata* metadata) {
584     Mutex::Autolock lock(mMutex);
585     /*
586      * Messages sent to the looper are intrinsically racy such that a message from the previous
587      * collection event may land in the looper after the current collection has already begun. Thus
588      * verify the current collection event before starting the collection.
589      */
590     if (mCurrCollectionEvent != metadata->eventType) {
591         ALOGW("Skipping %s event on collection event %s", toString(metadata->eventType),
592               toString(mCurrCollectionEvent));
593         return {};
594     }
595     if (DEBUG) {
596         ALOGD("Processing %s collection event", toString(metadata->eventType));
597     }
598     if (metadata->interval < kMinEventInterval) {
599         return Error()
600                 << "Collection interval of "
601                 << std::chrono::duration_cast<std::chrono::seconds>(metadata->interval).count()
602                 << " seconds for " << toString(metadata->eventType)
603                 << " collection cannot be less than "
604                 << std::chrono::duration_cast<std::chrono::seconds>(kMinEventInterval).count()
605                 << " seconds";
606     }
607     if (const auto result = collectLocked(metadata); !result.ok()) {
608         return Error() << toString(metadata->eventType) << " collection failed: " << result.error();
609     }
610     metadata->lastUptime += metadata->interval.count();
611     mHandlerLooper->sendMessageAtTime(metadata->lastUptime, this, metadata->eventType);
612     return {};
613 }
614 
collectLocked(WatchdogPerfService::EventMetadata * metadata)615 Result<void> WatchdogPerfService::collectLocked(WatchdogPerfService::EventMetadata* metadata) {
616     if (!mUidStatsCollector->enabled() && !mProcStat->enabled()) {
617         return Error() << "No collectors enabled";
618     }
619 
620     time_t now = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now());
621 
622     if (mUidStatsCollector->enabled()) {
623         if (const auto result = mUidStatsCollector->collect(); !result.ok()) {
624             return Error() << "Failed to collect per-uid proc and I/O stats: " << result.error();
625         }
626     }
627 
628     if (mProcStat->enabled()) {
629         if (const auto result = mProcStat->collect(); !result.ok()) {
630             return Error() << "Failed to collect proc stats: " << result.error();
631         }
632     }
633 
634     for (const auto& processor : mDataProcessors) {
635         Result<void> result;
636         switch (mCurrCollectionEvent) {
637             case EventType::BOOT_TIME_COLLECTION:
638                 result = processor->onBoottimeCollection(now, mUidStatsCollector, mProcStat);
639                 break;
640             case EventType::PERIODIC_COLLECTION:
641                 result = processor->onPeriodicCollection(now, mSystemState, mUidStatsCollector,
642                                                          mProcStat);
643                 break;
644             case EventType::CUSTOM_COLLECTION:
645                 result = processor->onCustomCollection(now, mSystemState, metadata->filterPackages,
646                                                        mUidStatsCollector, mProcStat);
647                 break;
648             default:
649                 result = Error() << "Invalid collection event " << toString(mCurrCollectionEvent);
650         }
651         if (!result.ok()) {
652             return Error() << processor->name() << " failed on " << toString(mCurrCollectionEvent)
653                            << " collection: " << result.error();
654         }
655     }
656 
657     return {};
658 }
659 
processMonitorEvent(WatchdogPerfService::EventMetadata * metadata)660 Result<void> WatchdogPerfService::processMonitorEvent(
661         WatchdogPerfService::EventMetadata* metadata) {
662     if (metadata->eventType != static_cast<int>(EventType::PERIODIC_MONITOR)) {
663         return Error() << "Invalid monitor event " << toString(metadata->eventType);
664     }
665     if (DEBUG) {
666         ALOGD("Processing %s monitor event", toString(metadata->eventType));
667     }
668     if (metadata->interval < kMinEventInterval) {
669         return Error()
670                 << "Monitor interval of "
671                 << std::chrono::duration_cast<std::chrono::seconds>(metadata->interval).count()
672                 << " seconds for " << toString(metadata->eventType) << " event cannot be less than "
673                 << std::chrono::duration_cast<std::chrono::seconds>(kMinEventInterval).count()
674                 << " seconds";
675     }
676     Mutex::Autolock lock(mMutex);
677     if (!mProcDiskStats->enabled()) {
678         return Error() << "Cannot access proc disk stats for monitoring";
679     }
680     time_t now = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now());
681     if (const auto result = mProcDiskStats->collect(); !result.ok()) {
682         return Error() << "Failed to collect disk stats: " << result.error();
683     }
684     auto* currCollectionMetadata = currCollectionMetadataLocked();
685     if (currCollectionMetadata == nullptr) {
686         return Error() << "No metadata available for current collection event: "
687                        << toString(mCurrCollectionEvent);
688     }
689     bool requestedCollection = false;
690     const auto requestCollection = [&]() mutable {
691         if (requestedCollection) {
692             return;
693         }
694         const nsecs_t prevUptime =
695                 currCollectionMetadata->lastUptime - currCollectionMetadata->interval.count();
696         nsecs_t uptime = mHandlerLooper->now();
697         if (const auto delta = std::abs(uptime - prevUptime); delta < kMinEventInterval.count()) {
698             return;
699         }
700         currCollectionMetadata->lastUptime = uptime;
701         mHandlerLooper->removeMessages(this, currCollectionMetadata->eventType);
702         mHandlerLooper->sendMessage(this, currCollectionMetadata->eventType);
703         requestedCollection = true;
704     };
705     for (const auto& processor : mDataProcessors) {
706         if (const auto result =
707                     processor->onPeriodicMonitor(now, mProcDiskStats, requestCollection);
708             !result.ok()) {
709             return Error() << processor->name() << " failed on " << toString(metadata->eventType)
710                            << ": " << result.error();
711         }
712     }
713     metadata->lastUptime += metadata->interval.count();
714     if (metadata->lastUptime == currCollectionMetadata->lastUptime) {
715         /*
716          * If the |PERIODIC_MONITOR| and  *_COLLECTION events overlap, skip the |PERIODIC_MONITOR|
717          * event.
718          */
719         metadata->lastUptime += metadata->interval.count();
720     }
721     mHandlerLooper->sendMessageAtTime(metadata->lastUptime, this, metadata->eventType);
722     return {};
723 }
724 
currCollectionMetadataLocked()725 WatchdogPerfService::EventMetadata* WatchdogPerfService::currCollectionMetadataLocked() {
726     switch (mCurrCollectionEvent) {
727         case EventType::BOOT_TIME_COLLECTION:
728             return &mBoottimeCollection;
729         case EventType::PERIODIC_COLLECTION:
730             return &mPeriodicCollection;
731         case EventType::CUSTOM_COLLECTION:
732             return &mCustomCollection;
733         default:
734             return nullptr;
735     }
736 }
737 
738 }  // namespace watchdog
739 }  // namespace automotive
740 }  // namespace android
741