/** * Copyright (c) 2020, The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #define LOG_TAG "carwatchdogd" #define DEBUG false // STOPSHIP if true. #include "WatchdogProcessService.h" #include "WatchdogServiceHelper.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include namespace android { namespace automotive { namespace watchdog { namespace aawi = ::android::automotive::watchdog::internal; using aawi::BnCarWatchdogServiceForSystem; using aawi::ICarWatchdogServiceForSystem; using ::android::IBinder; using ::android::sp; using ::android::String16; using ::android::base::Error; using ::android::base::GetIntProperty; using ::android::base::GetProperty; using ::android::base::ReadFileToString; using ::android::base::Result; using ::android::base::StringAppendF; using ::android::base::StringPrintf; using ::android::base::Trim; using ::android::base::WriteStringToFd; using ::android::binder::Status; using ::android::hardware::hidl_vec; using ::android::hardware::interfacesEqual; using ::android::hardware::Return; using ::android::hardware::automotive::vehicle::V2_0::IVehicle; using ::android::hardware::automotive::vehicle::V2_0::ProcessTerminationReason; using ::android::hardware::automotive::vehicle::V2_0::StatusCode; using ::android::hardware::automotive::vehicle::V2_0::SubscribeFlags; using ::android::hardware::automotive::vehicle::V2_0::SubscribeOptions; using ::android::hardware::automotive::vehicle::V2_0::VehiclePropConfig; using ::android::hardware::automotive::vehicle::V2_0::VehicleProperty; using ::android::hardware::automotive::vehicle::V2_0::VehiclePropertyStatus; using ::android::hardware::automotive::vehicle::V2_0::VehiclePropValue; using ::android::hidl::base::V1_0::IBase; namespace { const std::vector kTimeouts = {TimeoutLength::TIMEOUT_CRITICAL, TimeoutLength::TIMEOUT_MODERATE, TimeoutLength::TIMEOUT_NORMAL}; // TimeoutLength is also used as a message ID. Other message IDs should start next to // TimeoutLength::TIMEOUT_NORMAL. const int32_t MSG_VHAL_WATCHDOG_ALIVE = static_cast(TimeoutLength::TIMEOUT_NORMAL) + 1; const int32_t MSG_VHAL_HEALTH_CHECK = MSG_VHAL_WATCHDOG_ALIVE + 1; // VHAL is supposed to send heart beat every 3s. Car watchdog checks if there is the latest heart // beat from VHAL within 3s, allowing 1s marginal time. // If {@code ro.carwatchdog.vhal_healthcheck.interval} is set, car watchdog checks VHAL health at // the given interval. The lower bound of the interval is 3s. constexpr int32_t kDefaultVhalCheckIntervalSec = 3; constexpr std::chrono::milliseconds kHealthCheckDelayMs = 1s; constexpr const char kPropertyVhalCheckInterval[] = "ro.carwatchdog.vhal_healthcheck.interval"; constexpr const char kServiceName[] = "WatchdogProcessService"; constexpr const char kVhalInterfaceName[] = "android.hardware.automotive.vehicle@2.0::IVehicle"; std::chrono::nanoseconds timeoutToDurationNs(const TimeoutLength& timeout) { switch (timeout) { case TimeoutLength::TIMEOUT_CRITICAL: return 3s; // 3s and no buffer time. case TimeoutLength::TIMEOUT_MODERATE: return 6s; // 5s + 1s as buffer time. case TimeoutLength::TIMEOUT_NORMAL: return 12s; // 10s + 2s as buffer time. } } std::string pidArrayToString(const std::vector& pids) { size_t size = pids.size(); if (size == 0) { return ""; } std::string buffer; StringAppendF(&buffer, "%d", pids[0]); for (int i = 1; i < size; i++) { int pid = pids[i]; StringAppendF(&buffer, ", %d", pid); } return buffer; } bool isSystemShuttingDown() { std::string sysPowerCtl; std::istringstream tokenStream(GetProperty("sys.powerctl", "")); std::getline(tokenStream, sysPowerCtl, ','); return sysPowerCtl == "reboot" || sysPowerCtl == "shutdown"; } } // namespace WatchdogProcessService::WatchdogProcessService(const sp& handlerLooper) : mHandlerLooper(handlerLooper), mIsEnabled(true), mLastSessionId(0), mServiceStarted(false), mVhalService(nullptr) { mMessageHandler = sp::make(this); mBinderDeathRecipient = sp::make(this); mHidlDeathRecipient = sp::make(this); mPropertyChangeListener = sp::make(this); for (const auto& timeout : kTimeouts) { mClients.insert(std::make_pair(timeout, std::vector())); mPingedClients.insert(std::make_pair(timeout, PingedClientMap())); } int32_t vhalHealthCheckIntervalSec = GetIntProperty(kPropertyVhalCheckInterval, kDefaultVhalCheckIntervalSec); vhalHealthCheckIntervalSec = std::max(vhalHealthCheckIntervalSec, kDefaultVhalCheckIntervalSec); mVhalHealthCheckWindowMs = std::chrono::seconds(vhalHealthCheckIntervalSec); } Result WatchdogProcessService::registerWatchdogServiceHelper( const sp& helper) { if (helper == nullptr) { return Error() << "Must provide a non-null watchdog service helper instance"; } Mutex::Autolock lock(mMutex); mWatchdogServiceHelper = helper; return {}; } Status WatchdogProcessService::registerClient(const sp& client, TimeoutLength timeout) { pid_t callingPid = IPCThreadState::self()->getCallingPid(); uid_t callingUid = IPCThreadState::self()->getCallingUid(); ClientInfo clientInfo(client, callingPid, callingUid); Mutex::Autolock lock(mMutex); return registerClientLocked(clientInfo, timeout); } Status WatchdogProcessService::unregisterClient(const sp& client) { Mutex::Autolock lock(mMutex); sp binder = BnCarWatchdogClient::asBinder(client); // kTimeouts is declared as global static constant to cover all kinds of timeout (CRITICAL, // MODERATE, NORMAL). return unregisterClientLocked(kTimeouts, binder, ClientType::Regular); } Status WatchdogProcessService::registerCarWatchdogService(const sp& binder) { pid_t callingPid = IPCThreadState::self()->getCallingPid(); uid_t callingUid = IPCThreadState::self()->getCallingUid(); Mutex::Autolock lock(mMutex); if (mWatchdogServiceHelper == nullptr) { return Status::fromExceptionCode(Status::EX_ILLEGAL_STATE, "Watchdog service helper instance is null"); } ClientInfo clientInfo(mWatchdogServiceHelper, binder, callingPid, callingUid); return registerClientLocked(clientInfo, TimeoutLength::TIMEOUT_CRITICAL); } void WatchdogProcessService::unregisterCarWatchdogService(const sp& binder) { Mutex::Autolock lock(mMutex); std::vector timeouts = {TimeoutLength::TIMEOUT_CRITICAL}; unregisterClientLocked(timeouts, binder, ClientType::Service); } Status WatchdogProcessService::registerMonitor(const sp& monitor) { Mutex::Autolock lock(mMutex); sp binder = aawi::BnCarWatchdogMonitor::asBinder(monitor); if (mMonitor != nullptr && binder == aawi::BnCarWatchdogMonitor::asBinder(mMonitor)) { return Status::ok(); } status_t ret = binder->linkToDeath(mBinderDeathRecipient); if (ret != OK) { ALOGW("Failed to register the monitor as it is dead."); return Status::fromExceptionCode(Status::EX_ILLEGAL_STATE, "The monitor is dead."); } mMonitor = monitor; if (DEBUG) { ALOGD("Car watchdog monitor is registered"); } return Status::ok(); } Status WatchdogProcessService::unregisterMonitor(const sp& monitor) { Mutex::Autolock lock(mMutex); sp curBinder = aawi::BnCarWatchdogMonitor::asBinder(mMonitor); sp newBinder = aawi::BnCarWatchdogMonitor::asBinder(monitor); if (curBinder != newBinder) { ALOGW("Failed to unregister the monitor as it has not been registered."); return Status::fromExceptionCode(Status::EX_ILLEGAL_ARGUMENT, "The monitor has not been registered."); } curBinder->unlinkToDeath(mBinderDeathRecipient); mMonitor = nullptr; if (DEBUG) { ALOGD("Car watchdog monitor is unregistered"); } return Status::ok(); } Status WatchdogProcessService::tellClientAlive(const sp& client, int32_t sessionId) { Mutex::Autolock lock(mMutex); return tellClientAliveLocked(BnCarWatchdogClient::asBinder(client), sessionId); } Status WatchdogProcessService::tellCarWatchdogServiceAlive( const sp& service, const std::vector& clientsNotResponding, int32_t sessionId) { Status status; { Mutex::Autolock lock(mMutex); if (DEBUG) { std::string buffer; int size = clientsNotResponding.size(); if (size != 0) { StringAppendF(&buffer, "%d", clientsNotResponding[0]); for (int i = 1; i < clientsNotResponding.size(); i++) { StringAppendF(&buffer, ", %d", clientsNotResponding[i]); } ALOGD("CarWatchdogService(session: %d) responded with non-responding clients: %s", sessionId, buffer.c_str()); } } status = tellClientAliveLocked(BnCarWatchdogServiceForSystem::asBinder(service), sessionId); } if (status.isOk()) { dumpAndKillAllProcesses(clientsNotResponding, true); } return status; } Status WatchdogProcessService::tellDumpFinished(const sp& monitor, int32_t pid) { Mutex::Autolock lock(mMutex); if (mMonitor == nullptr || monitor == nullptr || aawi::BnCarWatchdogMonitor::asBinder(monitor) != aawi::BnCarWatchdogMonitor::asBinder(mMonitor)) { return Status:: fromExceptionCode(Status::EX_ILLEGAL_ARGUMENT, "The monitor is not registered or an invalid monitor is given"); } ALOGI("Process(pid: %d) has been dumped and killed", pid); return Status::ok(); } void WatchdogProcessService::setEnabled(bool isEnabled) { Mutex::Autolock lock(mMutex); if (mIsEnabled != isEnabled) { ALOGI("%s is %s", kServiceName, isEnabled ? "enabled" : "disabled"); } mIsEnabled = isEnabled; if (mIsEnabled) { for (const auto& timeout : kTimeouts) { startHealthCheckingLocked(timeout); } } } void WatchdogProcessService::notifyUserStateChange(userid_t userId, bool isStarted) { std::string buffer; Mutex::Autolock lock(mMutex); if (isStarted) { mStoppedUserIds.erase(userId); } else { mStoppedUserIds.insert(userId); } } Result WatchdogProcessService::dump(int fd, const Vector& /*args*/) { Mutex::Autolock lock(mMutex); const char* indent = " "; const char* doubleIndent = " "; std::string buffer; WriteStringToFd("CAR WATCHDOG PROCESS SERVICE\n", fd); WriteStringToFd(StringPrintf("%s%s enabled: %s\n", indent, kServiceName, mIsEnabled ? "true" : "false"), fd); WriteStringToFd(StringPrintf("%sRegistered clients\n", indent), fd); int count = 1; for (const auto& timeout : kTimeouts) { std::vector& clients = mClients[timeout]; for (auto it = clients.begin(); it != clients.end(); it++, count++) { WriteStringToFd(StringPrintf("%sClient #%d: %s\n", doubleIndent, count, it->toString().c_str()), fd); } } WriteStringToFd(StringPrintf("%sMonitor registered: %s\n", indent, mMonitor == nullptr ? "false" : "true"), fd); WriteStringToFd(StringPrintf("%sisSystemShuttingDown: %s\n", indent, isSystemShuttingDown() ? "true" : "false"), fd); buffer = "none"; bool first = true; for (const auto& userId : mStoppedUserIds) { if (first) { buffer = StringPrintf("%d", userId); first = false; } else { StringAppendF(&buffer, ", %d", userId); } } WriteStringToFd(StringPrintf("%sStopped users: %s\n", indent, buffer.c_str()), fd); WriteStringToFd(StringPrintf("%sVHAL health check interval: %lldms\n", indent, mVhalHealthCheckWindowMs.count()), fd); return {}; } void WatchdogProcessService::doHealthCheck(int what) { mHandlerLooper->removeMessages(mMessageHandler, what); if (Mutex::Autolock lock(mMutex); !mIsEnabled) { return; } const TimeoutLength timeout = static_cast(what); dumpAndKillClientsIfNotResponding(timeout); /* Generates a temporary/local vector containing clients. * Using a local copy may send unnecessary ping messages to clients after they are unregistered. * Clients should be able to handle them. */ std::vector clientsToCheck; PingedClientMap& pingedClients = mPingedClients[timeout]; { Mutex::Autolock lock(mMutex); pingedClients.clear(); clientsToCheck = mClients[timeout]; for (auto& clientInfo : clientsToCheck) { if (mStoppedUserIds.count(clientInfo.userId) > 0) { continue; } int sessionId = getNewSessionId(); clientInfo.sessionId = sessionId; pingedClients.insert(std::make_pair(sessionId, clientInfo)); } } for (const auto& clientInfo : clientsToCheck) { Status status = clientInfo.checkIfAlive(timeout); if (!status.isOk()) { ALOGW("Sending a ping message to client(pid: %d) failed: %s", clientInfo.pid, status.exceptionMessage().c_str()); { Mutex::Autolock lock(mMutex); pingedClients.erase(clientInfo.sessionId); } } } // Though the size of pingedClients is a more specific measure, clientsToCheck is used as a // conservative approach. if (clientsToCheck.size() > 0) { auto durationNs = timeoutToDurationNs(timeout); mHandlerLooper->sendMessageDelayed(durationNs.count(), mMessageHandler, Message(what)); } } Result WatchdogProcessService::start() { if (mServiceStarted) { return Error(INVALID_OPERATION) << "Cannot start process monitoring more than once"; } mServiceStarted = true; reportWatchdogAliveToVhal(); return {}; } void WatchdogProcessService::terminate() { Mutex::Autolock lock(mMutex); for (const auto& timeout : kTimeouts) { std::vector& clients = mClients[timeout]; for (auto it = clients.begin(); it != clients.end();) { it->unlinkToDeath(mBinderDeathRecipient); it = clients.erase(it); } } mWatchdogServiceHelper.clear(); if (mMonitor != nullptr) { sp binder = aawi::BnCarWatchdogMonitor::asBinder(mMonitor); binder->unlinkToDeath(mBinderDeathRecipient); } if (mVhalService != nullptr) { mVhalService->unlinkToDeath(mHidlDeathRecipient); } mServiceStarted = false; } Status WatchdogProcessService::registerClientLocked(const ClientInfo& clientInfo, TimeoutLength timeout) { if (findClientAndProcessLocked(kTimeouts, clientInfo, nullptr)) { ALOGW("Failed to register (%s) as it is already registered.", clientInfo.toString().c_str()); return Status::ok(); } status_t status = clientInfo.linkToDeath(mBinderDeathRecipient); if (status != OK) { ALOGW("Failed to register (%s) as it is dead", clientInfo.toString().c_str()); std::string errorStr = StringPrintf("(%s) is dead", clientInfo.toString().c_str()); return Status::fromExceptionCode(Status::EX_ILLEGAL_STATE, errorStr.c_str()); } std::vector& clients = mClients[timeout]; clients.emplace_back(clientInfo); // If the client array becomes non-empty, start health checking. if (clients.size() == 1) { startHealthCheckingLocked(timeout); } if (DEBUG) { ALOGD("Car watchdog client (%s, timeout = %d) is registered", clientInfo.toString().c_str(), timeout); } return Status::ok(); } Status WatchdogProcessService::unregisterClientLocked(const std::vector& timeouts, sp binder, ClientType clientType) { const char* clientName = clientType == ClientType::Regular ? "client" : "watchdog service"; bool result = findClientAndProcessLocked(timeouts, binder, [&](std::vector& clients, std::vector::const_iterator it) { it->unlinkToDeath(mBinderDeathRecipient); clients.erase(it); }); if (!result) { std::string errorStr = StringPrintf("The %s has not been registered", clientName); const char* errorCause = errorStr.c_str(); ALOGW("Failed to unregister the %s: %s", clientName, errorCause); return Status::fromExceptionCode(Status::EX_ILLEGAL_ARGUMENT, errorCause); } if (DEBUG) { ALOGD("Car watchdog %s is unregistered", clientName); } return Status::ok(); } Status WatchdogProcessService::tellClientAliveLocked(const sp& binder, int32_t sessionId) { for (const auto& timeout : kTimeouts) { PingedClientMap& clients = mPingedClients[timeout]; PingedClientMap::const_iterator it = clients.find(sessionId); if (it == clients.cend() || !it->second.matchesBinder(binder)) { continue; } clients.erase(it); return Status::ok(); } return Status::fromExceptionCode(Status::EX_ILLEGAL_ARGUMENT, "The client is not registered or the session ID is not found"); } bool WatchdogProcessService::findClientAndProcessLocked(const std::vector timeouts, const ClientInfo& clientInfo, const Processor& processor) { for (const auto& timeout : timeouts) { std::vector& clients = mClients[timeout]; for (auto it = clients.begin(); it != clients.end(); it++) { if (std::as_const(*it) != clientInfo) { continue; } if (processor != nullptr) { processor(clients, it); } return true; } } return false; } bool WatchdogProcessService::findClientAndProcessLocked(const std::vector timeouts, const sp binder, const Processor& processor) { for (const auto& timeout : timeouts) { std::vector& clients = mClients[timeout]; for (auto it = clients.begin(); it != clients.end(); it++) { if (!it->matchesBinder(binder)) { continue; } if (processor != nullptr) { processor(clients, it); } return true; } } return false; } Result WatchdogProcessService::startHealthCheckingLocked(TimeoutLength timeout) { PingedClientMap& clients = mPingedClients[timeout]; clients.clear(); int what = static_cast(timeout); auto durationNs = timeoutToDurationNs(timeout); mHandlerLooper->sendMessageDelayed(durationNs.count(), mMessageHandler, Message(what)); return {}; } Result WatchdogProcessService::dumpAndKillClientsIfNotResponding(TimeoutLength timeout) { std::vector processIds; std::vector clientsToNotify; { Mutex::Autolock lock(mMutex); PingedClientMap& clients = mPingedClients[timeout]; for (PingedClientMap::const_iterator it = clients.cbegin(); it != clients.cend(); it++) { pid_t pid = -1; userid_t userId = -1; std::vector timeouts = {timeout}; findClientAndProcessLocked(timeouts, it->second, [&](std::vector& cachedClients, std::vector::const_iterator cachedClientsIt) { pid = cachedClientsIt->pid; userId = cachedClientsIt->userId; cachedClients.erase(cachedClientsIt); }); if (pid != -1 && mStoppedUserIds.count(userId) == 0) { clientsToNotify.emplace_back(&it->second); processIds.push_back(pid); } } } for (const ClientInfo*& clientInfo : clientsToNotify) { clientInfo->prepareProcessTermination(); } return dumpAndKillAllProcesses(processIds, true); } Result WatchdogProcessService::dumpAndKillAllProcesses( const std::vector& processesNotResponding, bool reportToVhal) { size_t size = processesNotResponding.size(); if (size == 0) { return {}; } std::string pidString = pidArrayToString(processesNotResponding); sp monitor; { Mutex::Autolock lock(mMutex); if (mMonitor == nullptr) { std::string errorMsg = StringPrintf("Failed to dump and kill processes(pid = %s): Monitor is not set", pidString.c_str()); ALOGW("%s", errorMsg.c_str()); return Error() << errorMsg; } monitor = mMonitor; } if (isSystemShuttingDown()) { ALOGI("Skip dumping and killing processes(%s): The system is shutting down", pidString.c_str()); return {}; } if (reportToVhal) { reportTerminatedProcessToVhal(processesNotResponding); } monitor->onClientsNotResponding(processesNotResponding); if (DEBUG) { ALOGD("Dumping and killing processes is requested: %s", pidString.c_str()); } return {}; } // Handle when car watchdog clients die. void WatchdogProcessService::handleBinderDeath(const wp& who) { Mutex::Autolock lock(mMutex); IBinder* binder = who.unsafe_get(); // Check if dead binder is monitor. sp monitor = aawi::BnCarWatchdogMonitor::asBinder(mMonitor); if (monitor == binder) { mMonitor = nullptr; ALOGW("The monitor has died."); return; } findClientAndProcessLocked(kTimeouts, binder, [&](std::vector& clients, std::vector::const_iterator it) { ALOGW("Client(pid: %d) died", it->pid); clients.erase(it); }); } // Handle when VHAL dies. void WatchdogProcessService::handleHidlDeath(const wp& who) { Mutex::Autolock lock(mMutex); if (!interfacesEqual(mVhalService, who.promote())) { return; } ALOGW("VHAL has died."); mVhalService->unlinkToDeath(mHidlDeathRecipient); mVhalService = nullptr; } void WatchdogProcessService::reportWatchdogAliveToVhal() { if (mNotSupportedVhalProperties.count(VehicleProperty::WATCHDOG_ALIVE) > 0) { ALOGW("VHAL doesn't support WATCHDOG_ALIVE. Car watchdog will not update WATCHDOG_ALIVE."); return; } int64_t systemUptime = uptimeMillis(); VehiclePropValue propValue{ .prop = static_cast(VehicleProperty::WATCHDOG_ALIVE), .status = VehiclePropertyStatus::AVAILABLE, .value = {.int64Values = {systemUptime}}, }; const auto& ret = updateVhal(propValue); if (!ret.ok()) { ALOGW("Failed to update WATCHDOG_ALIVE VHAL property. Will try again in 3s"); } // Update VHAL with the interval of TIMEOUT_CRITICAL(3s). auto durationNs = timeoutToDurationNs(TimeoutLength::TIMEOUT_CRITICAL); mHandlerLooper->removeMessages(mMessageHandler, MSG_VHAL_WATCHDOG_ALIVE); mHandlerLooper->sendMessageDelayed(durationNs.count(), mMessageHandler, Message(MSG_VHAL_WATCHDOG_ALIVE)); } void WatchdogProcessService::reportTerminatedProcessToVhal( const std::vector& processesNotResponding) { if (mNotSupportedVhalProperties.count(VehicleProperty::WATCHDOG_TERMINATED_PROCESS) > 0) { ALOGW("VHAL doesn't support WATCHDOG_TERMINATED_PROCESS. Terminated process is not " "reported to VHAL."); return; } for (auto&& pid : processesNotResponding) { const auto& retCmdLine = readProcCmdLine(pid); if (!retCmdLine.ok()) { ALOGW("Failed to get process command line for pid(%d): %s", pid, retCmdLine.error().message().c_str()); continue; } std::string procCmdLine = retCmdLine.value(); VehiclePropValue propValue{ .prop = static_cast(VehicleProperty::WATCHDOG_TERMINATED_PROCESS), .status = VehiclePropertyStatus::AVAILABLE, .value = { .int32Values = {static_cast( ProcessTerminationReason::NOT_RESPONDING)}, .stringValue = procCmdLine, }, }; const auto& retUpdate = updateVhal(propValue); if (!retUpdate.ok()) { ALOGW("Failed to update WATCHDOG_TERMINATED_PROCESS VHAL property(command line: %s)", procCmdLine.c_str()); } } } Result WatchdogProcessService::updateVhal(const VehiclePropValue& value) { Mutex::Autolock lock(mMutex); const auto& connectRet = connectToVhalLocked(); if (!connectRet.ok()) { std::string errorMsg = "VHAL is not connected: " + connectRet.error().message(); ALOGW("%s", errorMsg.c_str()); return Error() << errorMsg; } if (mNotSupportedVhalProperties.count(static_cast(value.prop)) > 0) { std::string errorMsg = StringPrintf("VHAL doesn't support property(id: %d)", value.prop); ALOGW("%s", errorMsg.c_str()); return Error() << errorMsg; } const auto& updateRet = mVhalService->set(value); if (updateRet.isOk() && updateRet == StatusCode::OK) { return {}; } return Error() << "Failed to set propValue(" << value.prop << ") to VHAL"; } Result WatchdogProcessService::readProcCmdLine(int32_t pid) { std::string cmdLinePath = StringPrintf("/proc/%d/cmdline", pid); std::string procCmdLine; if (ReadFileToString(cmdLinePath, &procCmdLine)) { std::replace(procCmdLine.begin(), procCmdLine.end(), '\0', ' '); procCmdLine = Trim(procCmdLine); return procCmdLine; } return Error() << "Failed to read " << cmdLinePath; } Result WatchdogProcessService::connectToVhalLocked() { if (mVhalService.get() != nullptr) { return {}; } mVhalService = IVehicle::tryGetService(); if (mVhalService.get() == nullptr) { return Error() << "Failed to connect to VHAL."; } mVhalService->linkToDeath(mHidlDeathRecipient, /*cookie=*/0); queryVhalPropertiesLocked(); subscribeToVhalHeartBeatLocked(); ALOGI("Successfully connected to VHAL."); return {}; } void WatchdogProcessService::queryVhalPropertiesLocked() { mNotSupportedVhalProperties.clear(); std::vector propIds = {VehicleProperty::WATCHDOG_ALIVE, VehicleProperty::WATCHDOG_TERMINATED_PROCESS, VehicleProperty::VHAL_HEARTBEAT}; for (const auto& propId : propIds) { if (!isVhalPropertySupportedLocked(propId)) { mNotSupportedVhalProperties.insert(propId); } } } bool WatchdogProcessService::isVhalPropertySupportedLocked(VehicleProperty propId) { StatusCode status; hidl_vec props = {static_cast(propId)}; mVhalService->getPropConfigs(props, [&status](StatusCode s, hidl_vec /*propConfigs*/) { status = s; }); return status == StatusCode::OK; } void WatchdogProcessService::subscribeToVhalHeartBeatLocked() { if (mNotSupportedVhalProperties.count(VehicleProperty::VHAL_HEARTBEAT) > 0) { ALOGW("VHAL doesn't support VHAL_HEARTBEAT. Checking VHAL health is disabled."); return; } mVhalHeartBeat = { .eventTime = 0, .value = 0, }; SubscribeOptions reqVhalProperties[] = { {.propId = static_cast(VehicleProperty::VHAL_HEARTBEAT), .flags = SubscribeFlags::EVENTS_FROM_CAR}, }; hidl_vec options; options.setToExternal(reqVhalProperties, arraysize(reqVhalProperties)); StatusCode status = mVhalService->subscribe(mPropertyChangeListener, options); if (status != StatusCode::OK) { ALOGW("Failed to subscribe to VHAL_HEARTBEAT. Checking VHAL health is disabled."); return; } std::chrono::nanoseconds intervalNs = mVhalHealthCheckWindowMs + kHealthCheckDelayMs; mHandlerLooper->sendMessageDelayed(intervalNs.count(), mMessageHandler, Message(MSG_VHAL_HEALTH_CHECK)); } int32_t WatchdogProcessService::getNewSessionId() { // Make sure that session id is always positive number. if (++mLastSessionId <= 0) { mLastSessionId = 1; } return mLastSessionId; } void WatchdogProcessService::updateVhalHeartBeat(int64_t value) { bool wrongHeartBeat; { Mutex::Autolock lock(mMutex); wrongHeartBeat = value <= mVhalHeartBeat.value; mVhalHeartBeat.eventTime = uptimeMillis(); mVhalHeartBeat.value = value; } if (wrongHeartBeat) { ALOGW("VHAL updated heart beat with a wrong value. Terminating VHAL..."); terminateVhal(); return; } std::chrono::nanoseconds intervalNs = mVhalHealthCheckWindowMs + kHealthCheckDelayMs; mHandlerLooper->sendMessageDelayed(intervalNs.count(), mMessageHandler, Message(MSG_VHAL_HEALTH_CHECK)); } void WatchdogProcessService::checkVhalHealth() { int64_t lastEventTime; int64_t currentUptime = uptimeMillis(); { Mutex::Autolock lock(mMutex); lastEventTime = mVhalHeartBeat.eventTime; } if (currentUptime > lastEventTime + mVhalHealthCheckWindowMs.count()) { ALOGW("VHAL failed to update heart beat within timeout. Terminating VHAL..."); terminateVhal(); } } void WatchdogProcessService::terminateVhal() { using ::android::hidl::manager::V1_0::IServiceManager; std::vector processIds; sp manager = IServiceManager::getService(); Return ret = manager->debugDump([&](auto& hals) { for (const auto& info : hals) { if (info.pid == static_cast(IServiceManager::PidConstant::NO_PID)) { continue; } if (info.interfaceName == kVhalInterfaceName) { processIds.push_back(info.pid); break; } } }); if (!ret.isOk()) { ALOGE("Failed to terminate VHAL: could not get VHAL process id"); return; } else if (processIds.empty()) { ALOGE("Failed to terminate VHAL: VHAL is not running"); return; } dumpAndKillAllProcesses(processIds, false); } std::string WatchdogProcessService::ClientInfo::toString() const { std::string buffer; StringAppendF(&buffer, "pid = %d, userId = %d, type = %s", pid, userId, type == ClientType::Regular ? "regular" : "watchdog service"); return buffer; } sp WatchdogProcessService::ClientInfo::getBinder() const { if (type == ClientType::Regular) { return BnCarWatchdogClient::asBinder(client); } return watchdogServiceBinder; } status_t WatchdogProcessService::ClientInfo::linkToDeath( const sp& recipient) const { if (type == ClientType::Regular) { return BnCarWatchdogClient::asBinder(client)->linkToDeath(recipient); } // WatchdogServiceHelper is the binder death recipient for watchdog service, ergo // skip this step. return OK; } status_t WatchdogProcessService::ClientInfo::unlinkToDeath( const wp& recipient) const { if (type == ClientType::Regular) { return BnCarWatchdogClient::asBinder(client)->unlinkToDeath(recipient); } // WatchdogServiceHelper is the binder death recipient for watchdog service, ergo // skip this step. return OK; } Status WatchdogProcessService::ClientInfo::checkIfAlive(TimeoutLength timeout) const { if (type == ClientType::Regular) { return client->checkIfAlive(sessionId, timeout); } return watchdogServiceHelper->checkIfAlive(watchdogServiceBinder, sessionId, timeout); } Status WatchdogProcessService::ClientInfo::prepareProcessTermination() const { if (type == ClientType::Regular) { return client->prepareProcessTermination(); } return watchdogServiceHelper->prepareProcessTermination(watchdogServiceBinder); } WatchdogProcessService::BinderDeathRecipient::BinderDeathRecipient( const sp& service) : mService(service) {} void WatchdogProcessService::BinderDeathRecipient::binderDied(const wp& who) { mService->handleBinderDeath(who); } WatchdogProcessService::HidlDeathRecipient::HidlDeathRecipient( const sp& service) : mService(service) {} void WatchdogProcessService::HidlDeathRecipient::serviceDied(uint64_t /*cookie*/, const wp& who) { mService->handleHidlDeath(who); } WatchdogProcessService::PropertyChangeListener::PropertyChangeListener( const sp& service) : mService(service) {} Return WatchdogProcessService::PropertyChangeListener::onPropertyEvent( const hidl_vec& propValues) { for (const auto& value : propValues) { if (value.prop == static_cast(VehicleProperty::VHAL_HEARTBEAT)) { mService->updateVhalHeartBeat(value.value.int64Values[0]); break; } } return Return(); } Return WatchdogProcessService::PropertyChangeListener::onPropertySet( const VehiclePropValue& /*propValue*/) { return Return(); } Return WatchdogProcessService::PropertyChangeListener::onPropertySetError( StatusCode /*status*/, int32_t /*propId*/, int32_t /*areaId*/) { return Return(); } WatchdogProcessService::MessageHandlerImpl::MessageHandlerImpl( const sp& service) : mService(service) {} void WatchdogProcessService::MessageHandlerImpl::handleMessage(const Message& message) { switch (message.what) { case static_cast(TimeoutLength::TIMEOUT_CRITICAL): case static_cast(TimeoutLength::TIMEOUT_MODERATE): case static_cast(TimeoutLength::TIMEOUT_NORMAL): mService->doHealthCheck(message.what); break; case MSG_VHAL_WATCHDOG_ALIVE: mService->reportWatchdogAliveToVhal(); break; case MSG_VHAL_HEALTH_CHECK: mService->checkVhalHealth(); break; default: ALOGW("Unknown message: %d", message.what); } } } // namespace watchdog } // namespace automotive } // namespace android