1 /**
2  * Copyright (c) 2020, The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define LOG_TAG "carwatchdogd"
18 #define DEBUG false  // STOPSHIP if true.
19 
20 #include "WatchdogProcessService.h"
21 
22 #include "WatchdogServiceHelper.h"
23 
24 #include <android-base/file.h>
25 #include <android-base/macros.h>
26 #include <android-base/properties.h>
27 #include <android-base/stringprintf.h>
28 #include <android-base/strings.h>
29 #include <android/automotive/watchdog/BnCarWatchdogClient.h>
30 #include <android/automotive/watchdog/internal/BnCarWatchdogMonitor.h>
31 #include <android/automotive/watchdog/internal/BnCarWatchdogServiceForSystem.h>
32 #include <android/hardware/automotive/vehicle/2.0/types.h>
33 #include <android/hidl/manager/1.0/IServiceManager.h>
34 #include <binder/IPCThreadState.h>
35 #include <hidl/HidlTransportSupport.h>
36 #include <utils/SystemClock.h>
37 
38 #include <utility>
39 
40 namespace android {
41 namespace automotive {
42 namespace watchdog {
43 
44 namespace aawi = ::android::automotive::watchdog::internal;
45 
46 using aawi::BnCarWatchdogServiceForSystem;
47 using aawi::ICarWatchdogServiceForSystem;
48 using ::android::IBinder;
49 using ::android::sp;
50 using ::android::String16;
51 using ::android::base::Error;
52 using ::android::base::GetIntProperty;
53 using ::android::base::GetProperty;
54 using ::android::base::ReadFileToString;
55 using ::android::base::Result;
56 using ::android::base::StringAppendF;
57 using ::android::base::StringPrintf;
58 using ::android::base::Trim;
59 using ::android::base::WriteStringToFd;
60 using ::android::binder::Status;
61 using ::android::hardware::hidl_vec;
62 using ::android::hardware::interfacesEqual;
63 using ::android::hardware::Return;
64 using ::android::hardware::automotive::vehicle::V2_0::IVehicle;
65 using ::android::hardware::automotive::vehicle::V2_0::ProcessTerminationReason;
66 using ::android::hardware::automotive::vehicle::V2_0::StatusCode;
67 using ::android::hardware::automotive::vehicle::V2_0::SubscribeFlags;
68 using ::android::hardware::automotive::vehicle::V2_0::SubscribeOptions;
69 using ::android::hardware::automotive::vehicle::V2_0::VehiclePropConfig;
70 using ::android::hardware::automotive::vehicle::V2_0::VehicleProperty;
71 using ::android::hardware::automotive::vehicle::V2_0::VehiclePropertyStatus;
72 using ::android::hardware::automotive::vehicle::V2_0::VehiclePropValue;
73 using ::android::hidl::base::V1_0::IBase;
74 
75 namespace {
76 
77 const std::vector<TimeoutLength> kTimeouts = {TimeoutLength::TIMEOUT_CRITICAL,
78                                               TimeoutLength::TIMEOUT_MODERATE,
79                                               TimeoutLength::TIMEOUT_NORMAL};
80 
81 // TimeoutLength is also used as a message ID. Other message IDs should start next to
82 // TimeoutLength::TIMEOUT_NORMAL.
83 const int32_t MSG_VHAL_WATCHDOG_ALIVE = static_cast<int>(TimeoutLength::TIMEOUT_NORMAL) + 1;
84 const int32_t MSG_VHAL_HEALTH_CHECK = MSG_VHAL_WATCHDOG_ALIVE + 1;
85 
86 // VHAL is supposed to send heart beat every 3s. Car watchdog checks if there is the latest heart
87 // beat from VHAL within 3s, allowing 1s marginal time.
88 // If {@code ro.carwatchdog.vhal_healthcheck.interval} is set, car watchdog checks VHAL health at
89 // the given interval. The lower bound of the interval is 3s.
90 constexpr int32_t kDefaultVhalCheckIntervalSec = 3;
91 constexpr std::chrono::milliseconds kHealthCheckDelayMs = 1s;
92 
93 constexpr const char kPropertyVhalCheckInterval[] = "ro.carwatchdog.vhal_healthcheck.interval";
94 constexpr const char kServiceName[] = "WatchdogProcessService";
95 constexpr const char kVhalInterfaceName[] = "android.hardware.automotive.vehicle@2.0::IVehicle";
96 
timeoutToDurationNs(const TimeoutLength & timeout)97 std::chrono::nanoseconds timeoutToDurationNs(const TimeoutLength& timeout) {
98     switch (timeout) {
99         case TimeoutLength::TIMEOUT_CRITICAL:
100             return 3s;  // 3s and no buffer time.
101         case TimeoutLength::TIMEOUT_MODERATE:
102             return 6s;  // 5s + 1s as buffer time.
103         case TimeoutLength::TIMEOUT_NORMAL:
104             return 12s;  // 10s + 2s as buffer time.
105     }
106 }
107 
pidArrayToString(const std::vector<int32_t> & pids)108 std::string pidArrayToString(const std::vector<int32_t>& pids) {
109     size_t size = pids.size();
110     if (size == 0) {
111         return "";
112     }
113     std::string buffer;
114     StringAppendF(&buffer, "%d", pids[0]);
115     for (int i = 1; i < size; i++) {
116         int pid = pids[i];
117         StringAppendF(&buffer, ", %d", pid);
118     }
119     return buffer;
120 }
121 
isSystemShuttingDown()122 bool isSystemShuttingDown() {
123     std::string sysPowerCtl;
124     std::istringstream tokenStream(GetProperty("sys.powerctl", ""));
125     std::getline(tokenStream, sysPowerCtl, ',');
126     return sysPowerCtl == "reboot" || sysPowerCtl == "shutdown";
127 }
128 
129 }  // namespace
130 
WatchdogProcessService(const sp<Looper> & handlerLooper)131 WatchdogProcessService::WatchdogProcessService(const sp<Looper>& handlerLooper) :
132       mHandlerLooper(handlerLooper),
133       mIsEnabled(true),
134       mLastSessionId(0),
135       mServiceStarted(false),
136       mVhalService(nullptr) {
137     mMessageHandler = sp<MessageHandlerImpl>::make(this);
138     mBinderDeathRecipient = sp<BinderDeathRecipient>::make(this);
139     mHidlDeathRecipient = sp<HidlDeathRecipient>::make(this);
140     mPropertyChangeListener = sp<PropertyChangeListener>::make(this);
141     for (const auto& timeout : kTimeouts) {
142         mClients.insert(std::make_pair(timeout, std::vector<ClientInfo>()));
143         mPingedClients.insert(std::make_pair(timeout, PingedClientMap()));
144     }
145     int32_t vhalHealthCheckIntervalSec =
146             GetIntProperty(kPropertyVhalCheckInterval, kDefaultVhalCheckIntervalSec);
147     vhalHealthCheckIntervalSec = std::max(vhalHealthCheckIntervalSec, kDefaultVhalCheckIntervalSec);
148     mVhalHealthCheckWindowMs = std::chrono::seconds(vhalHealthCheckIntervalSec);
149 }
registerWatchdogServiceHelper(const sp<IWatchdogServiceHelper> & helper)150 Result<void> WatchdogProcessService::registerWatchdogServiceHelper(
151         const sp<IWatchdogServiceHelper>& helper) {
152     if (helper == nullptr) {
153         return Error() << "Must provide a non-null watchdog service helper instance";
154     }
155     Mutex::Autolock lock(mMutex);
156     mWatchdogServiceHelper = helper;
157     return {};
158 }
159 
registerClient(const sp<ICarWatchdogClient> & client,TimeoutLength timeout)160 Status WatchdogProcessService::registerClient(const sp<ICarWatchdogClient>& client,
161                                               TimeoutLength timeout) {
162     pid_t callingPid = IPCThreadState::self()->getCallingPid();
163     uid_t callingUid = IPCThreadState::self()->getCallingUid();
164     ClientInfo clientInfo(client, callingPid, callingUid);
165 
166     Mutex::Autolock lock(mMutex);
167     return registerClientLocked(clientInfo, timeout);
168 }
169 
unregisterClient(const sp<ICarWatchdogClient> & client)170 Status WatchdogProcessService::unregisterClient(const sp<ICarWatchdogClient>& client) {
171     Mutex::Autolock lock(mMutex);
172     sp<IBinder> binder = BnCarWatchdogClient::asBinder(client);
173     // kTimeouts is declared as global static constant to cover all kinds of timeout (CRITICAL,
174     // MODERATE, NORMAL).
175     return unregisterClientLocked(kTimeouts, binder, ClientType::Regular);
176 }
177 
registerCarWatchdogService(const sp<IBinder> & binder)178 Status WatchdogProcessService::registerCarWatchdogService(const sp<IBinder>& binder) {
179     pid_t callingPid = IPCThreadState::self()->getCallingPid();
180     uid_t callingUid = IPCThreadState::self()->getCallingUid();
181 
182     Mutex::Autolock lock(mMutex);
183     if (mWatchdogServiceHelper == nullptr) {
184         return Status::fromExceptionCode(Status::EX_ILLEGAL_STATE,
185                                          "Watchdog service helper instance is null");
186     }
187     ClientInfo clientInfo(mWatchdogServiceHelper, binder, callingPid, callingUid);
188     return registerClientLocked(clientInfo, TimeoutLength::TIMEOUT_CRITICAL);
189 }
190 
unregisterCarWatchdogService(const sp<IBinder> & binder)191 void WatchdogProcessService::unregisterCarWatchdogService(const sp<IBinder>& binder) {
192     Mutex::Autolock lock(mMutex);
193 
194     std::vector<TimeoutLength> timeouts = {TimeoutLength::TIMEOUT_CRITICAL};
195     unregisterClientLocked(timeouts, binder, ClientType::Service);
196 }
197 
registerMonitor(const sp<aawi::ICarWatchdogMonitor> & monitor)198 Status WatchdogProcessService::registerMonitor(const sp<aawi::ICarWatchdogMonitor>& monitor) {
199     Mutex::Autolock lock(mMutex);
200     sp<IBinder> binder = aawi::BnCarWatchdogMonitor::asBinder(monitor);
201     if (mMonitor != nullptr && binder == aawi::BnCarWatchdogMonitor::asBinder(mMonitor)) {
202         return Status::ok();
203     }
204     status_t ret = binder->linkToDeath(mBinderDeathRecipient);
205     if (ret != OK) {
206         ALOGW("Failed to register the monitor as it is dead.");
207         return Status::fromExceptionCode(Status::EX_ILLEGAL_STATE, "The monitor is dead.");
208     }
209     mMonitor = monitor;
210     if (DEBUG) {
211         ALOGD("Car watchdog monitor is registered");
212     }
213     return Status::ok();
214 }
215 
unregisterMonitor(const sp<aawi::ICarWatchdogMonitor> & monitor)216 Status WatchdogProcessService::unregisterMonitor(const sp<aawi::ICarWatchdogMonitor>& monitor) {
217     Mutex::Autolock lock(mMutex);
218     sp<IBinder> curBinder = aawi::BnCarWatchdogMonitor::asBinder(mMonitor);
219     sp<IBinder> newBinder = aawi::BnCarWatchdogMonitor::asBinder(monitor);
220     if (curBinder != newBinder) {
221         ALOGW("Failed to unregister the monitor as it has not been registered.");
222         return Status::fromExceptionCode(Status::EX_ILLEGAL_ARGUMENT,
223                                          "The monitor has not been registered.");
224     }
225     curBinder->unlinkToDeath(mBinderDeathRecipient);
226     mMonitor = nullptr;
227     if (DEBUG) {
228         ALOGD("Car watchdog monitor is unregistered");
229     }
230     return Status::ok();
231 }
232 
tellClientAlive(const sp<ICarWatchdogClient> & client,int32_t sessionId)233 Status WatchdogProcessService::tellClientAlive(const sp<ICarWatchdogClient>& client,
234                                                int32_t sessionId) {
235     Mutex::Autolock lock(mMutex);
236     return tellClientAliveLocked(BnCarWatchdogClient::asBinder(client), sessionId);
237 }
238 
tellCarWatchdogServiceAlive(const sp<ICarWatchdogServiceForSystem> & service,const std::vector<int32_t> & clientsNotResponding,int32_t sessionId)239 Status WatchdogProcessService::tellCarWatchdogServiceAlive(
240         const sp<ICarWatchdogServiceForSystem>& service,
241         const std::vector<int32_t>& clientsNotResponding, int32_t sessionId) {
242     Status status;
243     {
244         Mutex::Autolock lock(mMutex);
245         if (DEBUG) {
246             std::string buffer;
247             int size = clientsNotResponding.size();
248             if (size != 0) {
249                 StringAppendF(&buffer, "%d", clientsNotResponding[0]);
250                 for (int i = 1; i < clientsNotResponding.size(); i++) {
251                     StringAppendF(&buffer, ", %d", clientsNotResponding[i]);
252                 }
253                 ALOGD("CarWatchdogService(session: %d) responded with non-responding clients: %s",
254                       sessionId, buffer.c_str());
255             }
256         }
257         status = tellClientAliveLocked(BnCarWatchdogServiceForSystem::asBinder(service), sessionId);
258     }
259     if (status.isOk()) {
260         dumpAndKillAllProcesses(clientsNotResponding, true);
261     }
262     return status;
263 }
264 
tellDumpFinished(const sp<aawi::ICarWatchdogMonitor> & monitor,int32_t pid)265 Status WatchdogProcessService::tellDumpFinished(const sp<aawi::ICarWatchdogMonitor>& monitor,
266                                                 int32_t pid) {
267     Mutex::Autolock lock(mMutex);
268     if (mMonitor == nullptr || monitor == nullptr ||
269         aawi::BnCarWatchdogMonitor::asBinder(monitor) !=
270                 aawi::BnCarWatchdogMonitor::asBinder(mMonitor)) {
271         return Status::
272                 fromExceptionCode(Status::EX_ILLEGAL_ARGUMENT,
273                                   "The monitor is not registered or an invalid monitor is given");
274     }
275     ALOGI("Process(pid: %d) has been dumped and killed", pid);
276     return Status::ok();
277 }
278 
setEnabled(bool isEnabled)279 void WatchdogProcessService::setEnabled(bool isEnabled) {
280     Mutex::Autolock lock(mMutex);
281     if (mIsEnabled != isEnabled) {
282         ALOGI("%s is %s", kServiceName, isEnabled ? "enabled" : "disabled");
283     }
284     mIsEnabled = isEnabled;
285     if (mIsEnabled) {
286         for (const auto& timeout : kTimeouts) {
287             startHealthCheckingLocked(timeout);
288         }
289     }
290 }
291 
notifyUserStateChange(userid_t userId,bool isStarted)292 void WatchdogProcessService::notifyUserStateChange(userid_t userId, bool isStarted) {
293     std::string buffer;
294     Mutex::Autolock lock(mMutex);
295     if (isStarted) {
296         mStoppedUserIds.erase(userId);
297     } else {
298         mStoppedUserIds.insert(userId);
299     }
300 }
301 
dump(int fd,const Vector<String16> &)302 Result<void> WatchdogProcessService::dump(int fd, const Vector<String16>& /*args*/) {
303     Mutex::Autolock lock(mMutex);
304     const char* indent = "  ";
305     const char* doubleIndent = "    ";
306     std::string buffer;
307     WriteStringToFd("CAR WATCHDOG PROCESS SERVICE\n", fd);
308     WriteStringToFd(StringPrintf("%s%s enabled: %s\n", indent, kServiceName,
309                                  mIsEnabled ? "true" : "false"),
310                     fd);
311     WriteStringToFd(StringPrintf("%sRegistered clients\n", indent), fd);
312     int count = 1;
313     for (const auto& timeout : kTimeouts) {
314         std::vector<ClientInfo>& clients = mClients[timeout];
315         for (auto it = clients.begin(); it != clients.end(); it++, count++) {
316             WriteStringToFd(StringPrintf("%sClient #%d: %s\n", doubleIndent, count,
317                                          it->toString().c_str()),
318                             fd);
319         }
320     }
321     WriteStringToFd(StringPrintf("%sMonitor registered: %s\n", indent,
322                                  mMonitor == nullptr ? "false" : "true"),
323                     fd);
324     WriteStringToFd(StringPrintf("%sisSystemShuttingDown: %s\n", indent,
325                                  isSystemShuttingDown() ? "true" : "false"),
326                     fd);
327     buffer = "none";
328     bool first = true;
329     for (const auto& userId : mStoppedUserIds) {
330         if (first) {
331             buffer = StringPrintf("%d", userId);
332             first = false;
333         } else {
334             StringAppendF(&buffer, ", %d", userId);
335         }
336     }
337     WriteStringToFd(StringPrintf("%sStopped users: %s\n", indent, buffer.c_str()), fd);
338     WriteStringToFd(StringPrintf("%sVHAL health check interval: %lldms\n", indent,
339                                  mVhalHealthCheckWindowMs.count()),
340                     fd);
341     return {};
342 }
343 
doHealthCheck(int what)344 void WatchdogProcessService::doHealthCheck(int what) {
345     mHandlerLooper->removeMessages(mMessageHandler, what);
346     if (Mutex::Autolock lock(mMutex); !mIsEnabled) {
347         return;
348     }
349     const TimeoutLength timeout = static_cast<TimeoutLength>(what);
350     dumpAndKillClientsIfNotResponding(timeout);
351 
352     /* Generates a temporary/local vector containing clients.
353      * Using a local copy may send unnecessary ping messages to clients after they are unregistered.
354      * Clients should be able to handle them.
355      */
356     std::vector<ClientInfo> clientsToCheck;
357     PingedClientMap& pingedClients = mPingedClients[timeout];
358     {
359         Mutex::Autolock lock(mMutex);
360         pingedClients.clear();
361         clientsToCheck = mClients[timeout];
362         for (auto& clientInfo : clientsToCheck) {
363             if (mStoppedUserIds.count(clientInfo.userId) > 0) {
364                 continue;
365             }
366             int sessionId = getNewSessionId();
367             clientInfo.sessionId = sessionId;
368             pingedClients.insert(std::make_pair(sessionId, clientInfo));
369         }
370     }
371 
372     for (const auto& clientInfo : clientsToCheck) {
373         Status status = clientInfo.checkIfAlive(timeout);
374         if (!status.isOk()) {
375             ALOGW("Sending a ping message to client(pid: %d) failed: %s", clientInfo.pid,
376                   status.exceptionMessage().c_str());
377             {
378                 Mutex::Autolock lock(mMutex);
379                 pingedClients.erase(clientInfo.sessionId);
380             }
381         }
382     }
383     // Though the size of pingedClients is a more specific measure, clientsToCheck is used as a
384     // conservative approach.
385     if (clientsToCheck.size() > 0) {
386         auto durationNs = timeoutToDurationNs(timeout);
387         mHandlerLooper->sendMessageDelayed(durationNs.count(), mMessageHandler, Message(what));
388     }
389 }
390 
start()391 Result<void> WatchdogProcessService::start() {
392     if (mServiceStarted) {
393         return Error(INVALID_OPERATION) << "Cannot start process monitoring more than once";
394     }
395     mServiceStarted = true;
396     reportWatchdogAliveToVhal();
397     return {};
398 }
399 
terminate()400 void WatchdogProcessService::terminate() {
401     Mutex::Autolock lock(mMutex);
402     for (const auto& timeout : kTimeouts) {
403         std::vector<ClientInfo>& clients = mClients[timeout];
404         for (auto it = clients.begin(); it != clients.end();) {
405             it->unlinkToDeath(mBinderDeathRecipient);
406             it = clients.erase(it);
407         }
408     }
409     mWatchdogServiceHelper.clear();
410     if (mMonitor != nullptr) {
411         sp<IBinder> binder = aawi::BnCarWatchdogMonitor::asBinder(mMonitor);
412         binder->unlinkToDeath(mBinderDeathRecipient);
413     }
414     if (mVhalService != nullptr) {
415         mVhalService->unlinkToDeath(mHidlDeathRecipient);
416     }
417     mServiceStarted = false;
418 }
419 
registerClientLocked(const ClientInfo & clientInfo,TimeoutLength timeout)420 Status WatchdogProcessService::registerClientLocked(const ClientInfo& clientInfo,
421                                                     TimeoutLength timeout) {
422     if (findClientAndProcessLocked(kTimeouts, clientInfo, nullptr)) {
423         ALOGW("Failed to register (%s) as it is already registered.",
424               clientInfo.toString().c_str());
425         return Status::ok();
426     }
427     status_t status = clientInfo.linkToDeath(mBinderDeathRecipient);
428     if (status != OK) {
429         ALOGW("Failed to register (%s) as it is dead", clientInfo.toString().c_str());
430         std::string errorStr = StringPrintf("(%s) is dead", clientInfo.toString().c_str());
431         return Status::fromExceptionCode(Status::EX_ILLEGAL_STATE, errorStr.c_str());
432     }
433     std::vector<ClientInfo>& clients = mClients[timeout];
434     clients.emplace_back(clientInfo);
435 
436     // If the client array becomes non-empty, start health checking.
437     if (clients.size() == 1) {
438         startHealthCheckingLocked(timeout);
439     }
440     if (DEBUG) {
441         ALOGD("Car watchdog client (%s, timeout = %d) is registered", clientInfo.toString().c_str(),
442               timeout);
443     }
444     return Status::ok();
445 }
446 
unregisterClientLocked(const std::vector<TimeoutLength> & timeouts,sp<IBinder> binder,ClientType clientType)447 Status WatchdogProcessService::unregisterClientLocked(const std::vector<TimeoutLength>& timeouts,
448                                                       sp<IBinder> binder, ClientType clientType) {
449     const char* clientName = clientType == ClientType::Regular ? "client" : "watchdog service";
450     bool result = findClientAndProcessLocked(timeouts, binder,
451                                              [&](std::vector<ClientInfo>& clients,
452                                                  std::vector<ClientInfo>::const_iterator it) {
453                                                  it->unlinkToDeath(mBinderDeathRecipient);
454                                                  clients.erase(it);
455                                              });
456     if (!result) {
457         std::string errorStr = StringPrintf("The %s has not been registered", clientName);
458         const char* errorCause = errorStr.c_str();
459         ALOGW("Failed to unregister the %s: %s", clientName, errorCause);
460         return Status::fromExceptionCode(Status::EX_ILLEGAL_ARGUMENT, errorCause);
461     }
462     if (DEBUG) {
463         ALOGD("Car watchdog %s is unregistered", clientName);
464     }
465     return Status::ok();
466 }
467 
tellClientAliveLocked(const sp<IBinder> & binder,int32_t sessionId)468 Status WatchdogProcessService::tellClientAliveLocked(const sp<IBinder>& binder, int32_t sessionId) {
469     for (const auto& timeout : kTimeouts) {
470         PingedClientMap& clients = mPingedClients[timeout];
471         PingedClientMap::const_iterator it = clients.find(sessionId);
472         if (it == clients.cend() || !it->second.matchesBinder(binder)) {
473             continue;
474         }
475         clients.erase(it);
476         return Status::ok();
477     }
478     return Status::fromExceptionCode(Status::EX_ILLEGAL_ARGUMENT,
479                                      "The client is not registered or the session ID is not found");
480 }
481 
findClientAndProcessLocked(const std::vector<TimeoutLength> timeouts,const ClientInfo & clientInfo,const Processor & processor)482 bool WatchdogProcessService::findClientAndProcessLocked(const std::vector<TimeoutLength> timeouts,
483                                                         const ClientInfo& clientInfo,
484                                                         const Processor& processor) {
485     for (const auto& timeout : timeouts) {
486         std::vector<ClientInfo>& clients = mClients[timeout];
487         for (auto it = clients.begin(); it != clients.end(); it++) {
488             if (std::as_const(*it) != clientInfo) {
489                 continue;
490             }
491             if (processor != nullptr) {
492                 processor(clients, it);
493             }
494             return true;
495         }
496     }
497     return false;
498 }
499 
findClientAndProcessLocked(const std::vector<TimeoutLength> timeouts,const sp<IBinder> binder,const Processor & processor)500 bool WatchdogProcessService::findClientAndProcessLocked(const std::vector<TimeoutLength> timeouts,
501                                                         const sp<IBinder> binder,
502                                                         const Processor& processor) {
503     for (const auto& timeout : timeouts) {
504         std::vector<ClientInfo>& clients = mClients[timeout];
505         for (auto it = clients.begin(); it != clients.end(); it++) {
506             if (!it->matchesBinder(binder)) {
507                 continue;
508             }
509             if (processor != nullptr) {
510                 processor(clients, it);
511             }
512             return true;
513         }
514     }
515     return false;
516 }
517 
startHealthCheckingLocked(TimeoutLength timeout)518 Result<void> WatchdogProcessService::startHealthCheckingLocked(TimeoutLength timeout) {
519     PingedClientMap& clients = mPingedClients[timeout];
520     clients.clear();
521     int what = static_cast<int>(timeout);
522     auto durationNs = timeoutToDurationNs(timeout);
523     mHandlerLooper->sendMessageDelayed(durationNs.count(), mMessageHandler, Message(what));
524     return {};
525 }
526 
dumpAndKillClientsIfNotResponding(TimeoutLength timeout)527 Result<void> WatchdogProcessService::dumpAndKillClientsIfNotResponding(TimeoutLength timeout) {
528     std::vector<int32_t> processIds;
529     std::vector<const ClientInfo*> clientsToNotify;
530     {
531         Mutex::Autolock lock(mMutex);
532         PingedClientMap& clients = mPingedClients[timeout];
533         for (PingedClientMap::const_iterator it = clients.cbegin(); it != clients.cend(); it++) {
534             pid_t pid = -1;
535             userid_t userId = -1;
536             std::vector<TimeoutLength> timeouts = {timeout};
537             findClientAndProcessLocked(timeouts, it->second,
538                                        [&](std::vector<ClientInfo>& cachedClients,
539                                            std::vector<ClientInfo>::const_iterator
540                                                    cachedClientsIt) {
541                                            pid = cachedClientsIt->pid;
542                                            userId = cachedClientsIt->userId;
543                                            cachedClients.erase(cachedClientsIt);
544                                        });
545             if (pid != -1 && mStoppedUserIds.count(userId) == 0) {
546                 clientsToNotify.emplace_back(&it->second);
547                 processIds.push_back(pid);
548             }
549         }
550     }
551     for (const ClientInfo*& clientInfo : clientsToNotify) {
552         clientInfo->prepareProcessTermination();
553     }
554     return dumpAndKillAllProcesses(processIds, true);
555 }
556 
dumpAndKillAllProcesses(const std::vector<int32_t> & processesNotResponding,bool reportToVhal)557 Result<void> WatchdogProcessService::dumpAndKillAllProcesses(
558         const std::vector<int32_t>& processesNotResponding, bool reportToVhal) {
559     size_t size = processesNotResponding.size();
560     if (size == 0) {
561         return {};
562     }
563     std::string pidString = pidArrayToString(processesNotResponding);
564     sp<aawi::ICarWatchdogMonitor> monitor;
565     {
566         Mutex::Autolock lock(mMutex);
567         if (mMonitor == nullptr) {
568             std::string errorMsg =
569                     StringPrintf("Failed to dump and kill processes(pid = %s): Monitor is not set",
570                                  pidString.c_str());
571             ALOGW("%s", errorMsg.c_str());
572             return Error() << errorMsg;
573         }
574         monitor = mMonitor;
575     }
576     if (isSystemShuttingDown()) {
577         ALOGI("Skip dumping and killing processes(%s): The system is shutting down",
578               pidString.c_str());
579         return {};
580     }
581     if (reportToVhal) {
582         reportTerminatedProcessToVhal(processesNotResponding);
583     }
584     monitor->onClientsNotResponding(processesNotResponding);
585     if (DEBUG) {
586         ALOGD("Dumping and killing processes is requested: %s", pidString.c_str());
587     }
588     return {};
589 }
590 
591 // Handle when car watchdog clients die.
handleBinderDeath(const wp<IBinder> & who)592 void WatchdogProcessService::handleBinderDeath(const wp<IBinder>& who) {
593     Mutex::Autolock lock(mMutex);
594     IBinder* binder = who.unsafe_get();
595     // Check if dead binder is monitor.
596     sp<IBinder> monitor = aawi::BnCarWatchdogMonitor::asBinder(mMonitor);
597     if (monitor == binder) {
598         mMonitor = nullptr;
599         ALOGW("The monitor has died.");
600         return;
601     }
602     findClientAndProcessLocked(kTimeouts, binder,
603                                [&](std::vector<ClientInfo>& clients,
604                                    std::vector<ClientInfo>::const_iterator it) {
605                                    ALOGW("Client(pid: %d) died", it->pid);
606                                    clients.erase(it);
607                                });
608 }
609 
610 // Handle when VHAL dies.
handleHidlDeath(const wp<IBase> & who)611 void WatchdogProcessService::handleHidlDeath(const wp<IBase>& who) {
612     Mutex::Autolock lock(mMutex);
613     if (!interfacesEqual(mVhalService, who.promote())) {
614         return;
615     }
616     ALOGW("VHAL has died.");
617     mVhalService->unlinkToDeath(mHidlDeathRecipient);
618     mVhalService = nullptr;
619 }
620 
reportWatchdogAliveToVhal()621 void WatchdogProcessService::reportWatchdogAliveToVhal() {
622     if (mNotSupportedVhalProperties.count(VehicleProperty::WATCHDOG_ALIVE) > 0) {
623         ALOGW("VHAL doesn't support WATCHDOG_ALIVE. Car watchdog will not update WATCHDOG_ALIVE.");
624         return;
625     }
626     int64_t systemUptime = uptimeMillis();
627     VehiclePropValue propValue{
628             .prop = static_cast<int32_t>(VehicleProperty::WATCHDOG_ALIVE),
629             .status = VehiclePropertyStatus::AVAILABLE,
630             .value = {.int64Values = {systemUptime}},
631     };
632     const auto& ret = updateVhal(propValue);
633     if (!ret.ok()) {
634         ALOGW("Failed to update WATCHDOG_ALIVE VHAL property. Will try again in 3s");
635     }
636     // Update VHAL with the interval of TIMEOUT_CRITICAL(3s).
637     auto durationNs = timeoutToDurationNs(TimeoutLength::TIMEOUT_CRITICAL);
638     mHandlerLooper->removeMessages(mMessageHandler, MSG_VHAL_WATCHDOG_ALIVE);
639     mHandlerLooper->sendMessageDelayed(durationNs.count(), mMessageHandler,
640                                        Message(MSG_VHAL_WATCHDOG_ALIVE));
641 }
642 
reportTerminatedProcessToVhal(const std::vector<int32_t> & processesNotResponding)643 void WatchdogProcessService::reportTerminatedProcessToVhal(
644         const std::vector<int32_t>& processesNotResponding) {
645     if (mNotSupportedVhalProperties.count(VehicleProperty::WATCHDOG_TERMINATED_PROCESS) > 0) {
646         ALOGW("VHAL doesn't support WATCHDOG_TERMINATED_PROCESS. Terminated process is not "
647               "reported to VHAL.");
648         return;
649     }
650     for (auto&& pid : processesNotResponding) {
651         const auto& retCmdLine = readProcCmdLine(pid);
652         if (!retCmdLine.ok()) {
653             ALOGW("Failed to get process command line for pid(%d): %s", pid,
654                   retCmdLine.error().message().c_str());
655             continue;
656         }
657         std::string procCmdLine = retCmdLine.value();
658         VehiclePropValue propValue{
659                 .prop = static_cast<int32_t>(VehicleProperty::WATCHDOG_TERMINATED_PROCESS),
660                 .status = VehiclePropertyStatus::AVAILABLE,
661                 .value = {
662                          .int32Values = {static_cast<int32_t>(
663                                  ProcessTerminationReason::NOT_RESPONDING)},
664                          .stringValue = procCmdLine,
665                 },
666         };
667         const auto& retUpdate = updateVhal(propValue);
668         if (!retUpdate.ok()) {
669             ALOGW("Failed to update WATCHDOG_TERMINATED_PROCESS VHAL property(command line: %s)",
670                   procCmdLine.c_str());
671         }
672     }
673 }
674 
updateVhal(const VehiclePropValue & value)675 Result<void> WatchdogProcessService::updateVhal(const VehiclePropValue& value) {
676     Mutex::Autolock lock(mMutex);
677     const auto& connectRet = connectToVhalLocked();
678     if (!connectRet.ok()) {
679         std::string errorMsg = "VHAL is not connected: " + connectRet.error().message();
680         ALOGW("%s", errorMsg.c_str());
681         return Error() << errorMsg;
682     }
683     if (mNotSupportedVhalProperties.count(static_cast<VehicleProperty>(value.prop)) > 0) {
684         std::string errorMsg = StringPrintf("VHAL doesn't support property(id: %d)", value.prop);
685         ALOGW("%s", errorMsg.c_str());
686         return Error() << errorMsg;
687     }
688     const auto& updateRet = mVhalService->set(value);
689     if (updateRet.isOk() && updateRet == StatusCode::OK) {
690         return {};
691     }
692     return Error() << "Failed to set propValue(" << value.prop << ") to VHAL";
693 }
694 
readProcCmdLine(int32_t pid)695 Result<std::string> WatchdogProcessService::readProcCmdLine(int32_t pid) {
696     std::string cmdLinePath = StringPrintf("/proc/%d/cmdline", pid);
697     std::string procCmdLine;
698     if (ReadFileToString(cmdLinePath, &procCmdLine)) {
699         std::replace(procCmdLine.begin(), procCmdLine.end(), '\0', ' ');
700         procCmdLine = Trim(procCmdLine);
701         return procCmdLine;
702     }
703     return Error() << "Failed to read " << cmdLinePath;
704 }
705 
connectToVhalLocked()706 Result<void> WatchdogProcessService::connectToVhalLocked() {
707     if (mVhalService.get() != nullptr) {
708         return {};
709     }
710     mVhalService = IVehicle::tryGetService();
711     if (mVhalService.get() == nullptr) {
712         return Error() << "Failed to connect to VHAL.";
713     }
714     mVhalService->linkToDeath(mHidlDeathRecipient, /*cookie=*/0);
715     queryVhalPropertiesLocked();
716     subscribeToVhalHeartBeatLocked();
717     ALOGI("Successfully connected to VHAL.");
718     return {};
719 }
720 
queryVhalPropertiesLocked()721 void WatchdogProcessService::queryVhalPropertiesLocked() {
722     mNotSupportedVhalProperties.clear();
723     std::vector<VehicleProperty> propIds = {VehicleProperty::WATCHDOG_ALIVE,
724                                             VehicleProperty::WATCHDOG_TERMINATED_PROCESS,
725                                             VehicleProperty::VHAL_HEARTBEAT};
726     for (const auto& propId : propIds) {
727         if (!isVhalPropertySupportedLocked(propId)) {
728             mNotSupportedVhalProperties.insert(propId);
729         }
730     }
731 }
732 
isVhalPropertySupportedLocked(VehicleProperty propId)733 bool WatchdogProcessService::isVhalPropertySupportedLocked(VehicleProperty propId) {
734     StatusCode status;
735     hidl_vec<int32_t> props = {static_cast<int32_t>(propId)};
736     mVhalService->getPropConfigs(props,
737                                  [&status](StatusCode s,
738                                            hidl_vec<VehiclePropConfig> /*propConfigs*/) {
739                                      status = s;
740                                  });
741     return status == StatusCode::OK;
742 }
743 
subscribeToVhalHeartBeatLocked()744 void WatchdogProcessService::subscribeToVhalHeartBeatLocked() {
745     if (mNotSupportedVhalProperties.count(VehicleProperty::VHAL_HEARTBEAT) > 0) {
746         ALOGW("VHAL doesn't support VHAL_HEARTBEAT. Checking VHAL health is disabled.");
747         return;
748     }
749 
750     mVhalHeartBeat = {
751             .eventTime = 0,
752             .value = 0,
753     };
754 
755     SubscribeOptions reqVhalProperties[] = {
756             {.propId = static_cast<int32_t>(VehicleProperty::VHAL_HEARTBEAT),
757              .flags = SubscribeFlags::EVENTS_FROM_CAR},
758     };
759     hidl_vec<SubscribeOptions> options;
760     options.setToExternal(reqVhalProperties, arraysize(reqVhalProperties));
761     StatusCode status = mVhalService->subscribe(mPropertyChangeListener, options);
762     if (status != StatusCode::OK) {
763         ALOGW("Failed to subscribe to VHAL_HEARTBEAT. Checking VHAL health is disabled.");
764         return;
765     }
766     std::chrono::nanoseconds intervalNs = mVhalHealthCheckWindowMs + kHealthCheckDelayMs;
767     mHandlerLooper->sendMessageDelayed(intervalNs.count(), mMessageHandler,
768                                        Message(MSG_VHAL_HEALTH_CHECK));
769 }
770 
getNewSessionId()771 int32_t WatchdogProcessService::getNewSessionId() {
772     // Make sure that session id is always positive number.
773     if (++mLastSessionId <= 0) {
774         mLastSessionId = 1;
775     }
776     return mLastSessionId;
777 }
778 
updateVhalHeartBeat(int64_t value)779 void WatchdogProcessService::updateVhalHeartBeat(int64_t value) {
780     bool wrongHeartBeat;
781     {
782         Mutex::Autolock lock(mMutex);
783         wrongHeartBeat = value <= mVhalHeartBeat.value;
784         mVhalHeartBeat.eventTime = uptimeMillis();
785         mVhalHeartBeat.value = value;
786     }
787     if (wrongHeartBeat) {
788         ALOGW("VHAL updated heart beat with a wrong value. Terminating VHAL...");
789         terminateVhal();
790         return;
791     }
792     std::chrono::nanoseconds intervalNs = mVhalHealthCheckWindowMs + kHealthCheckDelayMs;
793     mHandlerLooper->sendMessageDelayed(intervalNs.count(), mMessageHandler,
794                                        Message(MSG_VHAL_HEALTH_CHECK));
795 }
796 
checkVhalHealth()797 void WatchdogProcessService::checkVhalHealth() {
798     int64_t lastEventTime;
799     int64_t currentUptime = uptimeMillis();
800     {
801         Mutex::Autolock lock(mMutex);
802         lastEventTime = mVhalHeartBeat.eventTime;
803     }
804     if (currentUptime > lastEventTime + mVhalHealthCheckWindowMs.count()) {
805         ALOGW("VHAL failed to update heart beat within timeout. Terminating VHAL...");
806         terminateVhal();
807     }
808 }
809 
terminateVhal()810 void WatchdogProcessService::terminateVhal() {
811     using ::android::hidl::manager::V1_0::IServiceManager;
812 
813     std::vector<int32_t> processIds;
814     sp<IServiceManager> manager = IServiceManager::getService();
815     Return<void> ret = manager->debugDump([&](auto& hals) {
816         for (const auto& info : hals) {
817             if (info.pid == static_cast<int>(IServiceManager::PidConstant::NO_PID)) {
818                 continue;
819             }
820             if (info.interfaceName == kVhalInterfaceName) {
821                 processIds.push_back(info.pid);
822                 break;
823             }
824         }
825     });
826 
827     if (!ret.isOk()) {
828         ALOGE("Failed to terminate VHAL: could not get VHAL process id");
829         return;
830     } else if (processIds.empty()) {
831         ALOGE("Failed to terminate VHAL: VHAL is not running");
832         return;
833     }
834     dumpAndKillAllProcesses(processIds, false);
835 }
836 
toString() const837 std::string WatchdogProcessService::ClientInfo::toString() const {
838     std::string buffer;
839     StringAppendF(&buffer, "pid = %d, userId = %d, type = %s", pid, userId,
840                   type == ClientType::Regular ? "regular" : "watchdog service");
841     return buffer;
842 }
843 
getBinder() const844 sp<IBinder> WatchdogProcessService::ClientInfo::getBinder() const {
845     if (type == ClientType::Regular) {
846         return BnCarWatchdogClient::asBinder(client);
847     }
848     return watchdogServiceBinder;
849 }
850 
linkToDeath(const sp<IBinder::DeathRecipient> & recipient) const851 status_t WatchdogProcessService::ClientInfo::linkToDeath(
852         const sp<IBinder::DeathRecipient>& recipient) const {
853     if (type == ClientType::Regular) {
854         return BnCarWatchdogClient::asBinder(client)->linkToDeath(recipient);
855     }
856     // WatchdogServiceHelper is the binder death recipient for watchdog service, ergo
857     // skip this step.
858     return OK;
859 }
860 
unlinkToDeath(const wp<IBinder::DeathRecipient> & recipient) const861 status_t WatchdogProcessService::ClientInfo::unlinkToDeath(
862         const wp<IBinder::DeathRecipient>& recipient) const {
863     if (type == ClientType::Regular) {
864         return BnCarWatchdogClient::asBinder(client)->unlinkToDeath(recipient);
865     }
866     // WatchdogServiceHelper is the binder death recipient for watchdog service, ergo
867     // skip this step.
868     return OK;
869 }
870 
checkIfAlive(TimeoutLength timeout) const871 Status WatchdogProcessService::ClientInfo::checkIfAlive(TimeoutLength timeout) const {
872     if (type == ClientType::Regular) {
873         return client->checkIfAlive(sessionId, timeout);
874     }
875     return watchdogServiceHelper->checkIfAlive(watchdogServiceBinder, sessionId, timeout);
876 }
877 
prepareProcessTermination() const878 Status WatchdogProcessService::ClientInfo::prepareProcessTermination() const {
879     if (type == ClientType::Regular) {
880         return client->prepareProcessTermination();
881     }
882     return watchdogServiceHelper->prepareProcessTermination(watchdogServiceBinder);
883 }
884 
BinderDeathRecipient(const sp<WatchdogProcessService> & service)885 WatchdogProcessService::BinderDeathRecipient::BinderDeathRecipient(
886         const sp<WatchdogProcessService>& service) :
887       mService(service) {}
888 
binderDied(const wp<IBinder> & who)889 void WatchdogProcessService::BinderDeathRecipient::binderDied(const wp<IBinder>& who) {
890     mService->handleBinderDeath(who);
891 }
892 
HidlDeathRecipient(const sp<WatchdogProcessService> & service)893 WatchdogProcessService::HidlDeathRecipient::HidlDeathRecipient(
894         const sp<WatchdogProcessService>& service) :
895       mService(service) {}
896 
serviceDied(uint64_t,const wp<IBase> & who)897 void WatchdogProcessService::HidlDeathRecipient::serviceDied(uint64_t /*cookie*/,
898                                                              const wp<IBase>& who) {
899     mService->handleHidlDeath(who);
900 }
901 
PropertyChangeListener(const sp<WatchdogProcessService> & service)902 WatchdogProcessService::PropertyChangeListener::PropertyChangeListener(
903         const sp<WatchdogProcessService>& service) :
904       mService(service) {}
905 
onPropertyEvent(const hidl_vec<VehiclePropValue> & propValues)906 Return<void> WatchdogProcessService::PropertyChangeListener::onPropertyEvent(
907         const hidl_vec<VehiclePropValue>& propValues) {
908     for (const auto& value : propValues) {
909         if (value.prop == static_cast<int32_t>(VehicleProperty::VHAL_HEARTBEAT)) {
910             mService->updateVhalHeartBeat(value.value.int64Values[0]);
911             break;
912         }
913     }
914     return Return<void>();
915 }
916 
onPropertySet(const VehiclePropValue &)917 Return<void> WatchdogProcessService::PropertyChangeListener::onPropertySet(
918         const VehiclePropValue& /*propValue*/) {
919     return Return<void>();
920 }
921 
onPropertySetError(StatusCode,int32_t,int32_t)922 Return<void> WatchdogProcessService::PropertyChangeListener::onPropertySetError(
923         StatusCode /*status*/, int32_t /*propId*/, int32_t /*areaId*/) {
924     return Return<void>();
925 }
926 
MessageHandlerImpl(const sp<WatchdogProcessService> & service)927 WatchdogProcessService::MessageHandlerImpl::MessageHandlerImpl(
928         const sp<WatchdogProcessService>& service) :
929       mService(service) {}
930 
handleMessage(const Message & message)931 void WatchdogProcessService::MessageHandlerImpl::handleMessage(const Message& message) {
932     switch (message.what) {
933         case static_cast<int>(TimeoutLength::TIMEOUT_CRITICAL):
934         case static_cast<int>(TimeoutLength::TIMEOUT_MODERATE):
935         case static_cast<int>(TimeoutLength::TIMEOUT_NORMAL):
936             mService->doHealthCheck(message.what);
937             break;
938         case MSG_VHAL_WATCHDOG_ALIVE:
939             mService->reportWatchdogAliveToVhal();
940             break;
941         case MSG_VHAL_HEALTH_CHECK:
942             mService->checkVhalHealth();
943             break;
944         default:
945             ALOGW("Unknown message: %d", message.what);
946     }
947 }
948 
949 }  // namespace watchdog
950 }  // namespace automotive
951 }  // namespace android
952