1 /*
2  * Copyright (C) 2021 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.android.car.watchdog;
18 
19 import static android.car.watchdog.CarWatchdogManager.TIMEOUT_CRITICAL;
20 import static android.car.watchdog.CarWatchdogManager.TIMEOUT_MODERATE;
21 import static android.car.watchdog.CarWatchdogManager.TIMEOUT_NORMAL;
22 
23 import android.annotation.NonNull;
24 import android.annotation.UserIdInt;
25 import android.automotive.watchdog.internal.ICarWatchdogServiceForSystem;
26 import android.car.watchdog.ICarWatchdogServiceCallback;
27 import android.car.watchdoglib.CarWatchdogDaemonHelper;
28 import android.os.Binder;
29 import android.os.Handler;
30 import android.os.IBinder;
31 import android.os.Looper;
32 import android.os.RemoteException;
33 import android.os.UserHandle;
34 import android.util.IndentingPrintWriter;
35 import android.util.SparseArray;
36 import android.util.SparseBooleanArray;
37 
38 import com.android.internal.annotations.GuardedBy;
39 import com.android.server.utils.Slogf;
40 
41 import java.util.ArrayList;
42 
43 /**
44  * Handles clients' health status checking and reporting the statuses to the watchdog daemon.
45  */
46 public final class WatchdogProcessHandler {
47     private static final int[] ALL_TIMEOUTS =
48             { TIMEOUT_CRITICAL, TIMEOUT_MODERATE, TIMEOUT_NORMAL };
49 
50     private final ICarWatchdogServiceForSystem mWatchdogServiceForSystem;
51     private final CarWatchdogDaemonHelper mCarWatchdogDaemonHelper;
52     private final Handler mMainHandler = new Handler(Looper.getMainLooper());
53     private final Object mLock = new Object();
54     /*
55      * Keeps the list of car watchdog client according to timeout:
56      * key => timeout, value => ClientInfo list.
57      * The value of SparseArray is guarded by mLock.
58      */
59     @GuardedBy("mLock")
60     private final SparseArray<ArrayList<ClientInfo>> mClientMap = new SparseArray<>();
61     /*
62      * Keeps the map of car watchdog client being checked by CarWatchdogService according to
63      * timeout: key => timeout, value => ClientInfo map.
64      * The value is also a map: key => session id, value => ClientInfo.
65      */
66     @GuardedBy("mLock")
67     private final SparseArray<SparseArray<ClientInfo>> mPingedClientMap = new SparseArray<>();
68     /*
69      * Keeps whether client health checking is being performed according to timeout:
70      * key => timeout, value => boolean (whether client health checking is being performed).
71      * The value of SparseArray is guarded by mLock.
72      */
73     @GuardedBy("mLock")
74     private final SparseArray<Boolean> mClientCheckInProgress = new SparseArray<>();
75     @GuardedBy("mLock")
76     private final ArrayList<ClientInfo> mClientsNotResponding = new ArrayList<>();
77     @GuardedBy("mMainHandler")
78     private int mLastSessionId;
79     @GuardedBy("mMainHandler")
80     private final SparseBooleanArray mStoppedUser = new SparseBooleanArray();
81 
WatchdogProcessHandler(ICarWatchdogServiceForSystem serviceImpl, CarWatchdogDaemonHelper daemonHelper)82     public WatchdogProcessHandler(ICarWatchdogServiceForSystem serviceImpl,
83             CarWatchdogDaemonHelper daemonHelper) {
84         mWatchdogServiceForSystem = serviceImpl;
85         mCarWatchdogDaemonHelper = daemonHelper;
86     }
87 
88     /** Initializes the handler. */
init()89     public void init() {
90         for (int timeout : ALL_TIMEOUTS) {
91             mClientMap.put(timeout, new ArrayList<ClientInfo>());
92             mPingedClientMap.put(timeout, new SparseArray<ClientInfo>());
93             mClientCheckInProgress.put(timeout, false);
94         }
95         if (CarWatchdogService.DEBUG) {
96             Slogf.d(CarWatchdogService.TAG, "WatchdogProcessHandler is initialized");
97         }
98     }
99 
100     /** Dumps its state. */
dump(IndentingPrintWriter writer)101     public void dump(IndentingPrintWriter writer) {
102         synchronized (mLock) {
103             writer.println("Registered clients");
104             writer.increaseIndent();
105             int count = 1;
106             for (int timeout : ALL_TIMEOUTS) {
107                 ArrayList<ClientInfo> clients = mClientMap.get(timeout);
108                 String timeoutStr = timeoutToString(timeout);
109                 for (ClientInfo clientInfo : clients) {
110                     writer.printf("client #%d: timeout = %s, pid = %d\n", count++, timeoutStr,
111                             clientInfo.pid);
112                 }
113             }
114             writer.printf("Stopped users: ");
115             int size = mStoppedUser.size();
116             if (size > 0) {
117                 writer.printf("%d", mStoppedUser.keyAt(0));
118                 for (int i = 1; i < size; i++) {
119                     writer.printf(", %d", mStoppedUser.keyAt(i));
120                 }
121                 writer.println();
122             } else {
123                 writer.println("none");
124             }
125             writer.decreaseIndent();
126         }
127     }
128 
129     /** Registers the client callback */
registerClient(ICarWatchdogServiceCallback client, int timeout)130     public void registerClient(ICarWatchdogServiceCallback client, int timeout) {
131         synchronized (mLock) {
132             ArrayList<ClientInfo> clients = mClientMap.get(timeout);
133             if (clients == null) {
134                 Slogf.w(CarWatchdogService.TAG, "Cannot register the client: invalid timeout");
135                 return;
136             }
137             IBinder binder = client.asBinder();
138             for (int i = 0; i < clients.size(); i++) {
139                 ClientInfo clientInfo = clients.get(i);
140                 if (binder == clientInfo.client.asBinder()) {
141                     Slogf.w(CarWatchdogService.TAG,
142                             "Cannot register the client: the client(pid: %d) has been already "
143                             + "registered", clientInfo.pid);
144                     return;
145                 }
146             }
147             int pid = Binder.getCallingPid();
148             int userId = UserHandle.getUserId(Binder.getCallingUid());
149             ClientInfo clientInfo = new ClientInfo(client, pid, userId, timeout);
150             try {
151                 clientInfo.linkToDeath();
152             } catch (RemoteException e) {
153                 Slogf.w(CarWatchdogService.TAG,
154                         "Cannot register the client: linkToDeath to the client failed");
155                 return;
156             }
157             clients.add(clientInfo);
158             if (CarWatchdogService.DEBUG) {
159                 Slogf.d(CarWatchdogService.TAG, "Client(pid: %d) is registered", pid);
160             }
161         }
162     }
163 
164     /** Unregisters the previously registered client callback */
unregisterClient(ICarWatchdogServiceCallback client)165     public void unregisterClient(ICarWatchdogServiceCallback client) {
166         synchronized (mLock) {
167             IBinder binder = client.asBinder();
168             for (int timeout : ALL_TIMEOUTS) {
169                 ArrayList<ClientInfo> clients = mClientMap.get(timeout);
170                 for (int i = 0; i < clients.size(); i++) {
171                     ClientInfo clientInfo = clients.get(i);
172                     if (binder != clientInfo.client.asBinder()) {
173                         continue;
174                     }
175                     clientInfo.unlinkToDeath();
176                     clients.remove(i);
177                     if (CarWatchdogService.DEBUG) {
178                         Slogf.d(CarWatchdogService.TAG, "Client(pid: %d) is unregistered",
179                                 clientInfo.pid);
180                     }
181                     return;
182                 }
183             }
184         }
185         Slogf.w(CarWatchdogService.TAG,
186                 "Cannot unregister the client: the client has not been registered before");
187         return;
188     }
189 
190     /** Tells the handler that the client is alive. */
tellClientAlive(ICarWatchdogServiceCallback client, int sessionId)191     public void tellClientAlive(ICarWatchdogServiceCallback client, int sessionId) {
192         synchronized (mLock) {
193             for (int timeout : ALL_TIMEOUTS) {
194                 if (!mClientCheckInProgress.get(timeout)) {
195                     continue;
196                 }
197                 SparseArray<ClientInfo> pingedClients = mPingedClientMap.get(timeout);
198                 ClientInfo clientInfo = pingedClients.get(sessionId);
199                 if (clientInfo != null && clientInfo.client.asBinder() == client.asBinder()) {
200                     pingedClients.remove(sessionId);
201                     return;
202                 }
203             }
204         }
205     }
206 
207     /** Updates the user stopped state */
updateUserState(@serIdInt int userId, boolean isStopped)208     public void updateUserState(@UserIdInt int userId, boolean isStopped) {
209         synchronized (mLock) {
210             if (isStopped) {
211                 mStoppedUser.put(userId, true);
212             } else {
213                 mStoppedUser.delete(userId);
214             }
215         }
216     }
217 
218     /** Posts health check message */
postHealthCheckMessage(int sessionId)219     public void postHealthCheckMessage(int sessionId) {
220         mMainHandler.post(() -> doHealthCheck(sessionId));
221     }
222 
223     /** Returns the registered and alive client count. */
getClientCount(int timeout)224     public int getClientCount(int timeout) {
225         synchronized (mLock) {
226             ArrayList<ClientInfo> clients = mClientMap.get(timeout);
227             return clients != null ? clients.size() : 0;
228         }
229     }
230 
231     /** Resets pinged clients before health checking */
prepareHealthCheck()232     public void prepareHealthCheck() {
233         synchronized (mLock) {
234             for (int timeout : ALL_TIMEOUTS) {
235                 SparseArray<ClientInfo> pingedClients = mPingedClientMap.get(timeout);
236                 pingedClients.clear();
237             }
238         }
239     }
240 
241     /** Enables/disables the watchdog daemon client health check process. */
controlProcessHealthCheck(boolean disable)242     void controlProcessHealthCheck(boolean disable) {
243         try {
244             mCarWatchdogDaemonHelper.controlProcessHealthCheck(disable);
245         } catch (RemoteException e) {
246             Slogf.w(CarWatchdogService.TAG,
247                     "Cannot enable/disable the car watchdog daemon health check process: %s", e);
248         }
249     }
250 
onClientDeath(ICarWatchdogServiceCallback client, int timeout)251     private void onClientDeath(ICarWatchdogServiceCallback client, int timeout) {
252         synchronized (mLock) {
253             removeClientLocked(client.asBinder(), timeout);
254         }
255     }
256 
doHealthCheck(int sessionId)257     private void doHealthCheck(int sessionId) {
258         // For critical clients, the response status are checked just before reporting to car
259         // watchdog daemon. For moderate and normal clients, the status are checked after allowed
260         // delay per timeout.
261         analyzeClientResponse(TIMEOUT_CRITICAL);
262         reportHealthCheckResult(sessionId);
263         sendPingToClients(TIMEOUT_CRITICAL);
264         sendPingToClientsAndCheck(TIMEOUT_MODERATE);
265         sendPingToClientsAndCheck(TIMEOUT_NORMAL);
266     }
267 
analyzeClientResponse(int timeout)268     private void analyzeClientResponse(int timeout) {
269         // Clients which are not responding are stored in mClientsNotResponding, and will be dumped
270         // and killed at the next response of CarWatchdogService to car watchdog daemon.
271         SparseArray<ClientInfo> pingedClients = mPingedClientMap.get(timeout);
272         synchronized (mLock) {
273             for (int i = 0; i < pingedClients.size(); i++) {
274                 ClientInfo clientInfo = pingedClients.valueAt(i);
275                 if (mStoppedUser.get(clientInfo.userId)) {
276                     continue;
277                 }
278                 mClientsNotResponding.add(clientInfo);
279                 removeClientLocked(clientInfo.client.asBinder(), timeout);
280             }
281             mClientCheckInProgress.setValueAt(timeout, false);
282         }
283     }
284 
sendPingToClients(int timeout)285     private void sendPingToClients(int timeout) {
286         SparseArray<ClientInfo> pingedClients = mPingedClientMap.get(timeout);
287         ArrayList<ClientInfo> clientsToCheck;
288         synchronized (mLock) {
289             pingedClients.clear();
290             clientsToCheck = new ArrayList<>(mClientMap.get(timeout));
291             for (int i = 0; i < clientsToCheck.size(); i++) {
292                 ClientInfo clientInfo = clientsToCheck.get(i);
293                 if (mStoppedUser.get(clientInfo.userId)) {
294                     continue;
295                 }
296                 int sessionId = getNewSessionId();
297                 clientInfo.sessionId = sessionId;
298                 pingedClients.put(sessionId, clientInfo);
299             }
300             mClientCheckInProgress.setValueAt(timeout, true);
301         }
302         for (int i = 0; i < clientsToCheck.size(); i++) {
303             ClientInfo clientInfo = clientsToCheck.get(i);
304             try {
305                 clientInfo.client.onCheckHealthStatus(clientInfo.sessionId, timeout);
306             } catch (RemoteException e) {
307                 Slogf.w(CarWatchdogService.TAG,
308                         "Sending a ping message to client(pid: %d) failed: %s", clientInfo.pid, e);
309                 synchronized (mLock) {
310                     pingedClients.remove(clientInfo.sessionId);
311                 }
312             }
313         }
314     }
315 
sendPingToClientsAndCheck(int timeout)316     private void sendPingToClientsAndCheck(int timeout) {
317         synchronized (mLock) {
318             if (mClientCheckInProgress.get(timeout)) {
319                 return;
320             }
321         }
322         sendPingToClients(timeout);
323         mMainHandler.postDelayed(
324                 () -> analyzeClientResponse(timeout), timeoutToDurationMs(timeout));
325     }
326 
getNewSessionId()327     private int getNewSessionId() {
328         if (++mLastSessionId <= 0) {
329             mLastSessionId = 1;
330         }
331         return mLastSessionId;
332     }
333 
removeClientLocked(IBinder clientBinder, int timeout)334     private void removeClientLocked(IBinder clientBinder, int timeout) {
335         ArrayList<ClientInfo> clients = mClientMap.get(timeout);
336         for (int i = 0; i < clients.size(); i++) {
337             ClientInfo clientInfo = clients.get(i);
338             if (clientBinder == clientInfo.client.asBinder()) {
339                 clients.remove(i);
340                 return;
341             }
342         }
343     }
344 
reportHealthCheckResult(int sessionId)345     private void reportHealthCheckResult(int sessionId) {
346         int[] clientsNotResponding;
347         ArrayList<ClientInfo> clientsToNotify;
348         synchronized (mLock) {
349             clientsNotResponding = toIntArray(mClientsNotResponding);
350             clientsToNotify = new ArrayList<>(mClientsNotResponding);
351             mClientsNotResponding.clear();
352         }
353         for (int i = 0; i < clientsToNotify.size(); i++) {
354             ClientInfo clientInfo = clientsToNotify.get(i);
355             try {
356                 clientInfo.client.onPrepareProcessTermination();
357             } catch (RemoteException e) {
358                 Slogf.w(CarWatchdogService.TAG,
359                         "Notifying onPrepareProcessTermination to client(pid: %d) failed: %s",
360                         clientInfo.pid, e);
361             }
362         }
363 
364         try {
365             mCarWatchdogDaemonHelper.tellCarWatchdogServiceAlive(
366                     mWatchdogServiceForSystem, clientsNotResponding, sessionId);
367         } catch (RemoteException | RuntimeException e) {
368             Slogf.w(CarWatchdogService.TAG,
369                     "Cannot respond to car watchdog daemon (sessionId=%d): %s", sessionId, e);
370         }
371     }
372 
373     @NonNull
toIntArray(@onNull ArrayList<ClientInfo> list)374     private int[] toIntArray(@NonNull ArrayList<ClientInfo> list) {
375         int size = list.size();
376         int[] intArray = new int[size];
377         for (int i = 0; i < size; i++) {
378             intArray[i] = list.get(i).pid;
379         }
380         return intArray;
381     }
382 
timeoutToString(int timeout)383     private String timeoutToString(int timeout) {
384         switch (timeout) {
385             case TIMEOUT_CRITICAL:
386                 return "critical";
387             case TIMEOUT_MODERATE:
388                 return "moderate";
389             case TIMEOUT_NORMAL:
390                 return "normal";
391             default:
392                 Slogf.w(CarWatchdogService.TAG, "Unknown timeout value");
393                 return "unknown";
394         }
395     }
396 
timeoutToDurationMs(int timeout)397     private long timeoutToDurationMs(int timeout) {
398         switch (timeout) {
399             case TIMEOUT_CRITICAL:
400                 return 3000L;
401             case TIMEOUT_MODERATE:
402                 return 5000L;
403             case TIMEOUT_NORMAL:
404                 return 10000L;
405             default:
406                 Slogf.w(CarWatchdogService.TAG, "Unknown timeout value");
407                 return 10000L;
408         }
409     }
410 
411     private final class ClientInfo implements IBinder.DeathRecipient {
412         public final ICarWatchdogServiceCallback client;
413         public final int pid;
414         @UserIdInt public final int userId;
415         public final int timeout;
416         public volatile int sessionId;
417 
ClientInfo(ICarWatchdogServiceCallback client, int pid, @UserIdInt int userId, int timeout)418         ClientInfo(ICarWatchdogServiceCallback client, int pid, @UserIdInt int userId,
419                 int timeout) {
420             this.client = client;
421             this.pid = pid;
422             this.userId = userId;
423             this.timeout = timeout;
424         }
425 
426         @Override
binderDied()427         public void binderDied() {
428             Slogf.w(CarWatchdogService.TAG, "Client(pid: %d) died", pid);
429             onClientDeath(client, timeout);
430         }
431 
linkToDeath()432         private void linkToDeath() throws RemoteException {
433             client.asBinder().linkToDeath(this, 0);
434         }
435 
unlinkToDeath()436         private void unlinkToDeath() {
437             client.asBinder().unlinkToDeath(this, 0);
438         }
439     }
440 }
441