1 /* 2 * Copyright (C) 2021 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.car.watchdog; 18 19 import static android.car.watchdog.CarWatchdogManager.TIMEOUT_CRITICAL; 20 import static android.car.watchdog.CarWatchdogManager.TIMEOUT_MODERATE; 21 import static android.car.watchdog.CarWatchdogManager.TIMEOUT_NORMAL; 22 23 import android.annotation.NonNull; 24 import android.annotation.UserIdInt; 25 import android.automotive.watchdog.internal.ICarWatchdogServiceForSystem; 26 import android.car.watchdog.ICarWatchdogServiceCallback; 27 import android.car.watchdoglib.CarWatchdogDaemonHelper; 28 import android.os.Binder; 29 import android.os.Handler; 30 import android.os.IBinder; 31 import android.os.Looper; 32 import android.os.RemoteException; 33 import android.os.UserHandle; 34 import android.util.IndentingPrintWriter; 35 import android.util.SparseArray; 36 import android.util.SparseBooleanArray; 37 38 import com.android.internal.annotations.GuardedBy; 39 import com.android.server.utils.Slogf; 40 41 import java.util.ArrayList; 42 43 /** 44 * Handles clients' health status checking and reporting the statuses to the watchdog daemon. 45 */ 46 public final class WatchdogProcessHandler { 47 private static final int[] ALL_TIMEOUTS = 48 { TIMEOUT_CRITICAL, TIMEOUT_MODERATE, TIMEOUT_NORMAL }; 49 50 private final ICarWatchdogServiceForSystem mWatchdogServiceForSystem; 51 private final CarWatchdogDaemonHelper mCarWatchdogDaemonHelper; 52 private final Handler mMainHandler = new Handler(Looper.getMainLooper()); 53 private final Object mLock = new Object(); 54 /* 55 * Keeps the list of car watchdog client according to timeout: 56 * key => timeout, value => ClientInfo list. 57 * The value of SparseArray is guarded by mLock. 58 */ 59 @GuardedBy("mLock") 60 private final SparseArray<ArrayList<ClientInfo>> mClientMap = new SparseArray<>(); 61 /* 62 * Keeps the map of car watchdog client being checked by CarWatchdogService according to 63 * timeout: key => timeout, value => ClientInfo map. 64 * The value is also a map: key => session id, value => ClientInfo. 65 */ 66 @GuardedBy("mLock") 67 private final SparseArray<SparseArray<ClientInfo>> mPingedClientMap = new SparseArray<>(); 68 /* 69 * Keeps whether client health checking is being performed according to timeout: 70 * key => timeout, value => boolean (whether client health checking is being performed). 71 * The value of SparseArray is guarded by mLock. 72 */ 73 @GuardedBy("mLock") 74 private final SparseArray<Boolean> mClientCheckInProgress = new SparseArray<>(); 75 @GuardedBy("mLock") 76 private final ArrayList<ClientInfo> mClientsNotResponding = new ArrayList<>(); 77 @GuardedBy("mMainHandler") 78 private int mLastSessionId; 79 @GuardedBy("mMainHandler") 80 private final SparseBooleanArray mStoppedUser = new SparseBooleanArray(); 81 WatchdogProcessHandler(ICarWatchdogServiceForSystem serviceImpl, CarWatchdogDaemonHelper daemonHelper)82 public WatchdogProcessHandler(ICarWatchdogServiceForSystem serviceImpl, 83 CarWatchdogDaemonHelper daemonHelper) { 84 mWatchdogServiceForSystem = serviceImpl; 85 mCarWatchdogDaemonHelper = daemonHelper; 86 } 87 88 /** Initializes the handler. */ init()89 public void init() { 90 for (int timeout : ALL_TIMEOUTS) { 91 mClientMap.put(timeout, new ArrayList<ClientInfo>()); 92 mPingedClientMap.put(timeout, new SparseArray<ClientInfo>()); 93 mClientCheckInProgress.put(timeout, false); 94 } 95 if (CarWatchdogService.DEBUG) { 96 Slogf.d(CarWatchdogService.TAG, "WatchdogProcessHandler is initialized"); 97 } 98 } 99 100 /** Dumps its state. */ dump(IndentingPrintWriter writer)101 public void dump(IndentingPrintWriter writer) { 102 synchronized (mLock) { 103 writer.println("Registered clients"); 104 writer.increaseIndent(); 105 int count = 1; 106 for (int timeout : ALL_TIMEOUTS) { 107 ArrayList<ClientInfo> clients = mClientMap.get(timeout); 108 String timeoutStr = timeoutToString(timeout); 109 for (ClientInfo clientInfo : clients) { 110 writer.printf("client #%d: timeout = %s, pid = %d\n", count++, timeoutStr, 111 clientInfo.pid); 112 } 113 } 114 writer.printf("Stopped users: "); 115 int size = mStoppedUser.size(); 116 if (size > 0) { 117 writer.printf("%d", mStoppedUser.keyAt(0)); 118 for (int i = 1; i < size; i++) { 119 writer.printf(", %d", mStoppedUser.keyAt(i)); 120 } 121 writer.println(); 122 } else { 123 writer.println("none"); 124 } 125 writer.decreaseIndent(); 126 } 127 } 128 129 /** Registers the client callback */ registerClient(ICarWatchdogServiceCallback client, int timeout)130 public void registerClient(ICarWatchdogServiceCallback client, int timeout) { 131 synchronized (mLock) { 132 ArrayList<ClientInfo> clients = mClientMap.get(timeout); 133 if (clients == null) { 134 Slogf.w(CarWatchdogService.TAG, "Cannot register the client: invalid timeout"); 135 return; 136 } 137 IBinder binder = client.asBinder(); 138 for (int i = 0; i < clients.size(); i++) { 139 ClientInfo clientInfo = clients.get(i); 140 if (binder == clientInfo.client.asBinder()) { 141 Slogf.w(CarWatchdogService.TAG, 142 "Cannot register the client: the client(pid: %d) has been already " 143 + "registered", clientInfo.pid); 144 return; 145 } 146 } 147 int pid = Binder.getCallingPid(); 148 int userId = UserHandle.getUserId(Binder.getCallingUid()); 149 ClientInfo clientInfo = new ClientInfo(client, pid, userId, timeout); 150 try { 151 clientInfo.linkToDeath(); 152 } catch (RemoteException e) { 153 Slogf.w(CarWatchdogService.TAG, 154 "Cannot register the client: linkToDeath to the client failed"); 155 return; 156 } 157 clients.add(clientInfo); 158 if (CarWatchdogService.DEBUG) { 159 Slogf.d(CarWatchdogService.TAG, "Client(pid: %d) is registered", pid); 160 } 161 } 162 } 163 164 /** Unregisters the previously registered client callback */ unregisterClient(ICarWatchdogServiceCallback client)165 public void unregisterClient(ICarWatchdogServiceCallback client) { 166 synchronized (mLock) { 167 IBinder binder = client.asBinder(); 168 for (int timeout : ALL_TIMEOUTS) { 169 ArrayList<ClientInfo> clients = mClientMap.get(timeout); 170 for (int i = 0; i < clients.size(); i++) { 171 ClientInfo clientInfo = clients.get(i); 172 if (binder != clientInfo.client.asBinder()) { 173 continue; 174 } 175 clientInfo.unlinkToDeath(); 176 clients.remove(i); 177 if (CarWatchdogService.DEBUG) { 178 Slogf.d(CarWatchdogService.TAG, "Client(pid: %d) is unregistered", 179 clientInfo.pid); 180 } 181 return; 182 } 183 } 184 } 185 Slogf.w(CarWatchdogService.TAG, 186 "Cannot unregister the client: the client has not been registered before"); 187 return; 188 } 189 190 /** Tells the handler that the client is alive. */ tellClientAlive(ICarWatchdogServiceCallback client, int sessionId)191 public void tellClientAlive(ICarWatchdogServiceCallback client, int sessionId) { 192 synchronized (mLock) { 193 for (int timeout : ALL_TIMEOUTS) { 194 if (!mClientCheckInProgress.get(timeout)) { 195 continue; 196 } 197 SparseArray<ClientInfo> pingedClients = mPingedClientMap.get(timeout); 198 ClientInfo clientInfo = pingedClients.get(sessionId); 199 if (clientInfo != null && clientInfo.client.asBinder() == client.asBinder()) { 200 pingedClients.remove(sessionId); 201 return; 202 } 203 } 204 } 205 } 206 207 /** Updates the user stopped state */ updateUserState(@serIdInt int userId, boolean isStopped)208 public void updateUserState(@UserIdInt int userId, boolean isStopped) { 209 synchronized (mLock) { 210 if (isStopped) { 211 mStoppedUser.put(userId, true); 212 } else { 213 mStoppedUser.delete(userId); 214 } 215 } 216 } 217 218 /** Posts health check message */ postHealthCheckMessage(int sessionId)219 public void postHealthCheckMessage(int sessionId) { 220 mMainHandler.post(() -> doHealthCheck(sessionId)); 221 } 222 223 /** Returns the registered and alive client count. */ getClientCount(int timeout)224 public int getClientCount(int timeout) { 225 synchronized (mLock) { 226 ArrayList<ClientInfo> clients = mClientMap.get(timeout); 227 return clients != null ? clients.size() : 0; 228 } 229 } 230 231 /** Resets pinged clients before health checking */ prepareHealthCheck()232 public void prepareHealthCheck() { 233 synchronized (mLock) { 234 for (int timeout : ALL_TIMEOUTS) { 235 SparseArray<ClientInfo> pingedClients = mPingedClientMap.get(timeout); 236 pingedClients.clear(); 237 } 238 } 239 } 240 241 /** Enables/disables the watchdog daemon client health check process. */ controlProcessHealthCheck(boolean disable)242 void controlProcessHealthCheck(boolean disable) { 243 try { 244 mCarWatchdogDaemonHelper.controlProcessHealthCheck(disable); 245 } catch (RemoteException e) { 246 Slogf.w(CarWatchdogService.TAG, 247 "Cannot enable/disable the car watchdog daemon health check process: %s", e); 248 } 249 } 250 onClientDeath(ICarWatchdogServiceCallback client, int timeout)251 private void onClientDeath(ICarWatchdogServiceCallback client, int timeout) { 252 synchronized (mLock) { 253 removeClientLocked(client.asBinder(), timeout); 254 } 255 } 256 doHealthCheck(int sessionId)257 private void doHealthCheck(int sessionId) { 258 // For critical clients, the response status are checked just before reporting to car 259 // watchdog daemon. For moderate and normal clients, the status are checked after allowed 260 // delay per timeout. 261 analyzeClientResponse(TIMEOUT_CRITICAL); 262 reportHealthCheckResult(sessionId); 263 sendPingToClients(TIMEOUT_CRITICAL); 264 sendPingToClientsAndCheck(TIMEOUT_MODERATE); 265 sendPingToClientsAndCheck(TIMEOUT_NORMAL); 266 } 267 analyzeClientResponse(int timeout)268 private void analyzeClientResponse(int timeout) { 269 // Clients which are not responding are stored in mClientsNotResponding, and will be dumped 270 // and killed at the next response of CarWatchdogService to car watchdog daemon. 271 SparseArray<ClientInfo> pingedClients = mPingedClientMap.get(timeout); 272 synchronized (mLock) { 273 for (int i = 0; i < pingedClients.size(); i++) { 274 ClientInfo clientInfo = pingedClients.valueAt(i); 275 if (mStoppedUser.get(clientInfo.userId)) { 276 continue; 277 } 278 mClientsNotResponding.add(clientInfo); 279 removeClientLocked(clientInfo.client.asBinder(), timeout); 280 } 281 mClientCheckInProgress.setValueAt(timeout, false); 282 } 283 } 284 sendPingToClients(int timeout)285 private void sendPingToClients(int timeout) { 286 SparseArray<ClientInfo> pingedClients = mPingedClientMap.get(timeout); 287 ArrayList<ClientInfo> clientsToCheck; 288 synchronized (mLock) { 289 pingedClients.clear(); 290 clientsToCheck = new ArrayList<>(mClientMap.get(timeout)); 291 for (int i = 0; i < clientsToCheck.size(); i++) { 292 ClientInfo clientInfo = clientsToCheck.get(i); 293 if (mStoppedUser.get(clientInfo.userId)) { 294 continue; 295 } 296 int sessionId = getNewSessionId(); 297 clientInfo.sessionId = sessionId; 298 pingedClients.put(sessionId, clientInfo); 299 } 300 mClientCheckInProgress.setValueAt(timeout, true); 301 } 302 for (int i = 0; i < clientsToCheck.size(); i++) { 303 ClientInfo clientInfo = clientsToCheck.get(i); 304 try { 305 clientInfo.client.onCheckHealthStatus(clientInfo.sessionId, timeout); 306 } catch (RemoteException e) { 307 Slogf.w(CarWatchdogService.TAG, 308 "Sending a ping message to client(pid: %d) failed: %s", clientInfo.pid, e); 309 synchronized (mLock) { 310 pingedClients.remove(clientInfo.sessionId); 311 } 312 } 313 } 314 } 315 sendPingToClientsAndCheck(int timeout)316 private void sendPingToClientsAndCheck(int timeout) { 317 synchronized (mLock) { 318 if (mClientCheckInProgress.get(timeout)) { 319 return; 320 } 321 } 322 sendPingToClients(timeout); 323 mMainHandler.postDelayed( 324 () -> analyzeClientResponse(timeout), timeoutToDurationMs(timeout)); 325 } 326 getNewSessionId()327 private int getNewSessionId() { 328 if (++mLastSessionId <= 0) { 329 mLastSessionId = 1; 330 } 331 return mLastSessionId; 332 } 333 removeClientLocked(IBinder clientBinder, int timeout)334 private void removeClientLocked(IBinder clientBinder, int timeout) { 335 ArrayList<ClientInfo> clients = mClientMap.get(timeout); 336 for (int i = 0; i < clients.size(); i++) { 337 ClientInfo clientInfo = clients.get(i); 338 if (clientBinder == clientInfo.client.asBinder()) { 339 clients.remove(i); 340 return; 341 } 342 } 343 } 344 reportHealthCheckResult(int sessionId)345 private void reportHealthCheckResult(int sessionId) { 346 int[] clientsNotResponding; 347 ArrayList<ClientInfo> clientsToNotify; 348 synchronized (mLock) { 349 clientsNotResponding = toIntArray(mClientsNotResponding); 350 clientsToNotify = new ArrayList<>(mClientsNotResponding); 351 mClientsNotResponding.clear(); 352 } 353 for (int i = 0; i < clientsToNotify.size(); i++) { 354 ClientInfo clientInfo = clientsToNotify.get(i); 355 try { 356 clientInfo.client.onPrepareProcessTermination(); 357 } catch (RemoteException e) { 358 Slogf.w(CarWatchdogService.TAG, 359 "Notifying onPrepareProcessTermination to client(pid: %d) failed: %s", 360 clientInfo.pid, e); 361 } 362 } 363 364 try { 365 mCarWatchdogDaemonHelper.tellCarWatchdogServiceAlive( 366 mWatchdogServiceForSystem, clientsNotResponding, sessionId); 367 } catch (RemoteException | RuntimeException e) { 368 Slogf.w(CarWatchdogService.TAG, 369 "Cannot respond to car watchdog daemon (sessionId=%d): %s", sessionId, e); 370 } 371 } 372 373 @NonNull toIntArray(@onNull ArrayList<ClientInfo> list)374 private int[] toIntArray(@NonNull ArrayList<ClientInfo> list) { 375 int size = list.size(); 376 int[] intArray = new int[size]; 377 for (int i = 0; i < size; i++) { 378 intArray[i] = list.get(i).pid; 379 } 380 return intArray; 381 } 382 timeoutToString(int timeout)383 private String timeoutToString(int timeout) { 384 switch (timeout) { 385 case TIMEOUT_CRITICAL: 386 return "critical"; 387 case TIMEOUT_MODERATE: 388 return "moderate"; 389 case TIMEOUT_NORMAL: 390 return "normal"; 391 default: 392 Slogf.w(CarWatchdogService.TAG, "Unknown timeout value"); 393 return "unknown"; 394 } 395 } 396 timeoutToDurationMs(int timeout)397 private long timeoutToDurationMs(int timeout) { 398 switch (timeout) { 399 case TIMEOUT_CRITICAL: 400 return 3000L; 401 case TIMEOUT_MODERATE: 402 return 5000L; 403 case TIMEOUT_NORMAL: 404 return 10000L; 405 default: 406 Slogf.w(CarWatchdogService.TAG, "Unknown timeout value"); 407 return 10000L; 408 } 409 } 410 411 private final class ClientInfo implements IBinder.DeathRecipient { 412 public final ICarWatchdogServiceCallback client; 413 public final int pid; 414 @UserIdInt public final int userId; 415 public final int timeout; 416 public volatile int sessionId; 417 ClientInfo(ICarWatchdogServiceCallback client, int pid, @UserIdInt int userId, int timeout)418 ClientInfo(ICarWatchdogServiceCallback client, int pid, @UserIdInt int userId, 419 int timeout) { 420 this.client = client; 421 this.pid = pid; 422 this.userId = userId; 423 this.timeout = timeout; 424 } 425 426 @Override binderDied()427 public void binderDied() { 428 Slogf.w(CarWatchdogService.TAG, "Client(pid: %d) died", pid); 429 onClientDeath(client, timeout); 430 } 431 linkToDeath()432 private void linkToDeath() throws RemoteException { 433 client.asBinder().linkToDeath(this, 0); 434 } 435 unlinkToDeath()436 private void unlinkToDeath() { 437 client.asBinder().unlinkToDeath(this, 0); 438 } 439 } 440 } 441