1 /*
2  * Copyright (C) 2018 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "first_stage_init.h"
18 
19 #include <dirent.h>
20 #include <fcntl.h>
21 #include <paths.h>
22 #include <stdlib.h>
23 #include <sys/mount.h>
24 #include <sys/stat.h>
25 #include <sys/sysmacros.h>
26 #include <sys/types.h>
27 #include <sys/utsname.h>
28 #include <unistd.h>
29 
30 #include <filesystem>
31 #include <string>
32 #include <vector>
33 
34 #include <android-base/chrono_utils.h>
35 #include <android-base/file.h>
36 #include <android-base/logging.h>
37 #include <modprobe/modprobe.h>
38 #include <private/android_filesystem_config.h>
39 
40 #include "debug_ramdisk.h"
41 #include "first_stage_console.h"
42 #include "first_stage_mount.h"
43 #include "reboot_utils.h"
44 #include "second_stage_resources.h"
45 #include "snapuserd_transition.h"
46 #include "switch_root.h"
47 #include "util.h"
48 
49 using android::base::boot_clock;
50 
51 using namespace std::literals;
52 
53 namespace fs = std::filesystem;
54 
55 namespace android {
56 namespace init {
57 
58 namespace {
59 
FreeRamdisk(DIR * dir,dev_t dev)60 void FreeRamdisk(DIR* dir, dev_t dev) {
61     int dfd = dirfd(dir);
62 
63     dirent* de;
64     while ((de = readdir(dir)) != nullptr) {
65         if (de->d_name == "."s || de->d_name == ".."s) {
66             continue;
67         }
68 
69         bool is_dir = false;
70 
71         if (de->d_type == DT_DIR || de->d_type == DT_UNKNOWN) {
72             struct stat info;
73             if (fstatat(dfd, de->d_name, &info, AT_SYMLINK_NOFOLLOW) != 0) {
74                 continue;
75             }
76 
77             if (info.st_dev != dev) {
78                 continue;
79             }
80 
81             if (S_ISDIR(info.st_mode)) {
82                 is_dir = true;
83                 auto fd = openat(dfd, de->d_name, O_RDONLY | O_DIRECTORY | O_CLOEXEC);
84                 if (fd >= 0) {
85                     auto subdir =
86                             std::unique_ptr<DIR, decltype(&closedir)>{fdopendir(fd), closedir};
87                     if (subdir) {
88                         FreeRamdisk(subdir.get(), dev);
89                     } else {
90                         close(fd);
91                     }
92                 }
93             }
94         } else if (de->d_type == DT_REG) {
95             // Do not free snapuserd if we will need the ramdisk copy during the
96             // selinux transition.
97             if (de->d_name == "snapuserd"s && IsFirstStageSnapuserdRunning()) {
98                 continue;
99             }
100         }
101         unlinkat(dfd, de->d_name, is_dir ? AT_REMOVEDIR : 0);
102     }
103 }
104 
ForceNormalBoot(const std::string & cmdline,const std::string & bootconfig)105 bool ForceNormalBoot(const std::string& cmdline, const std::string& bootconfig) {
106     return bootconfig.find("androidboot.force_normal_boot = \"1\"") != std::string::npos ||
107            cmdline.find("androidboot.force_normal_boot=1") != std::string::npos;
108 }
109 
110 }  // namespace
111 
GetModuleLoadList(bool recovery,const std::string & dir_path)112 std::string GetModuleLoadList(bool recovery, const std::string& dir_path) {
113     auto module_load_file = "modules.load";
114     if (recovery) {
115         struct stat fileStat;
116         std::string recovery_load_path = dir_path + "/modules.load.recovery";
117         if (!stat(recovery_load_path.c_str(), &fileStat)) {
118             module_load_file = "modules.load.recovery";
119         }
120     }
121 
122     return module_load_file;
123 }
124 
125 #define MODULE_BASE_DIR "/lib/modules"
LoadKernelModules(bool recovery,bool want_console,int & modules_loaded)126 bool LoadKernelModules(bool recovery, bool want_console, int& modules_loaded) {
127     struct utsname uts;
128     if (uname(&uts)) {
129         LOG(FATAL) << "Failed to get kernel version.";
130     }
131     int major, minor;
132     if (sscanf(uts.release, "%d.%d", &major, &minor) != 2) {
133         LOG(FATAL) << "Failed to parse kernel version " << uts.release;
134     }
135 
136     std::unique_ptr<DIR, decltype(&closedir)> base_dir(opendir(MODULE_BASE_DIR), closedir);
137     if (!base_dir) {
138         LOG(INFO) << "Unable to open /lib/modules, skipping module loading.";
139         return true;
140     }
141     dirent* entry;
142     std::vector<std::string> module_dirs;
143     while ((entry = readdir(base_dir.get()))) {
144         if (entry->d_type != DT_DIR) {
145             continue;
146         }
147         int dir_major, dir_minor;
148         if (sscanf(entry->d_name, "%d.%d", &dir_major, &dir_minor) != 2 || dir_major != major ||
149             dir_minor != minor) {
150             continue;
151         }
152         module_dirs.emplace_back(entry->d_name);
153     }
154 
155     // Sort the directories so they are iterated over during module loading
156     // in a consistent order. Alphabetical sorting is fine here because the
157     // kernel version at the beginning of the directory name must match the
158     // current kernel version, so the sort only applies to a label that
159     // follows the kernel version, for example /lib/modules/5.4 vs.
160     // /lib/modules/5.4-gki.
161     std::sort(module_dirs.begin(), module_dirs.end());
162 
163     for (const auto& module_dir : module_dirs) {
164         std::string dir_path = MODULE_BASE_DIR "/";
165         dir_path.append(module_dir);
166         Modprobe m({dir_path}, GetModuleLoadList(recovery, dir_path));
167         bool retval = m.LoadListedModules(!want_console);
168         modules_loaded = m.GetModuleCount();
169         if (modules_loaded > 0) {
170             return retval;
171         }
172     }
173 
174     Modprobe m({MODULE_BASE_DIR}, GetModuleLoadList(recovery, MODULE_BASE_DIR));
175     bool retval = m.LoadListedModules(!want_console);
176     modules_loaded = m.GetModuleCount();
177     if (modules_loaded > 0) {
178         return retval;
179     }
180     return true;
181 }
182 
FirstStageMain(int argc,char ** argv)183 int FirstStageMain(int argc, char** argv) {
184     if (REBOOT_BOOTLOADER_ON_PANIC) {
185         InstallRebootSignalHandlers();
186     }
187 
188     boot_clock::time_point start_time = boot_clock::now();
189 
190     std::vector<std::pair<std::string, int>> errors;
191 #define CHECKCALL(x) \
192     if ((x) != 0) errors.emplace_back(#x " failed", errno);
193 
194     // Clear the umask.
195     umask(0);
196 
197     CHECKCALL(clearenv());
198     CHECKCALL(setenv("PATH", _PATH_DEFPATH, 1));
199     // Get the basic filesystem setup we need put together in the initramdisk
200     // on / and then we'll let the rc file figure out the rest.
201     CHECKCALL(mount("tmpfs", "/dev", "tmpfs", MS_NOSUID, "mode=0755"));
202     CHECKCALL(mkdir("/dev/pts", 0755));
203     CHECKCALL(mkdir("/dev/socket", 0755));
204     CHECKCALL(mkdir("/dev/dm-user", 0755));
205     CHECKCALL(mount("devpts", "/dev/pts", "devpts", 0, NULL));
206 #define MAKE_STR(x) __STRING(x)
207     CHECKCALL(mount("proc", "/proc", "proc", 0, "hidepid=2,gid=" MAKE_STR(AID_READPROC)));
208 #undef MAKE_STR
209     // Don't expose the raw commandline to unprivileged processes.
210     CHECKCALL(chmod("/proc/cmdline", 0440));
211     std::string cmdline;
212     android::base::ReadFileToString("/proc/cmdline", &cmdline);
213     // Don't expose the raw bootconfig to unprivileged processes.
214     chmod("/proc/bootconfig", 0440);
215     std::string bootconfig;
216     android::base::ReadFileToString("/proc/bootconfig", &bootconfig);
217     gid_t groups[] = {AID_READPROC};
218     CHECKCALL(setgroups(arraysize(groups), groups));
219     CHECKCALL(mount("sysfs", "/sys", "sysfs", 0, NULL));
220     CHECKCALL(mount("selinuxfs", "/sys/fs/selinux", "selinuxfs", 0, NULL));
221 
222     CHECKCALL(mknod("/dev/kmsg", S_IFCHR | 0600, makedev(1, 11)));
223 
224     if constexpr (WORLD_WRITABLE_KMSG) {
225         CHECKCALL(mknod("/dev/kmsg_debug", S_IFCHR | 0622, makedev(1, 11)));
226     }
227 
228     CHECKCALL(mknod("/dev/random", S_IFCHR | 0666, makedev(1, 8)));
229     CHECKCALL(mknod("/dev/urandom", S_IFCHR | 0666, makedev(1, 9)));
230 
231     // This is needed for log wrapper, which gets called before ueventd runs.
232     CHECKCALL(mknod("/dev/ptmx", S_IFCHR | 0666, makedev(5, 2)));
233     CHECKCALL(mknod("/dev/null", S_IFCHR | 0666, makedev(1, 3)));
234 
235     // These below mounts are done in first stage init so that first stage mount can mount
236     // subdirectories of /mnt/{vendor,product}/.  Other mounts, not required by first stage mount,
237     // should be done in rc files.
238     // Mount staging areas for devices managed by vold
239     // See storage config details at http://source.android.com/devices/storage/
240     CHECKCALL(mount("tmpfs", "/mnt", "tmpfs", MS_NOEXEC | MS_NOSUID | MS_NODEV,
241                     "mode=0755,uid=0,gid=1000"));
242     // /mnt/vendor is used to mount vendor-specific partitions that can not be
243     // part of the vendor partition, e.g. because they are mounted read-write.
244     CHECKCALL(mkdir("/mnt/vendor", 0755));
245     // /mnt/product is used to mount product-specific partitions that can not be
246     // part of the product partition, e.g. because they are mounted read-write.
247     CHECKCALL(mkdir("/mnt/product", 0755));
248 
249     // /debug_ramdisk is used to preserve additional files from the debug ramdisk
250     CHECKCALL(mount("tmpfs", "/debug_ramdisk", "tmpfs", MS_NOEXEC | MS_NOSUID | MS_NODEV,
251                     "mode=0755,uid=0,gid=0"));
252 
253     // /second_stage_resources is used to preserve files from first to second
254     // stage init
255     CHECKCALL(mount("tmpfs", kSecondStageRes, "tmpfs", MS_NOEXEC | MS_NOSUID | MS_NODEV,
256                     "mode=0755,uid=0,gid=0"))
257 #undef CHECKCALL
258 
259     SetStdioToDevNull(argv);
260     // Now that tmpfs is mounted on /dev and we have /dev/kmsg, we can actually
261     // talk to the outside world...
262     InitKernelLogging(argv);
263 
264     if (!errors.empty()) {
265         for (const auto& [error_string, error_errno] : errors) {
266             LOG(ERROR) << error_string << " " << strerror(error_errno);
267         }
268         LOG(FATAL) << "Init encountered errors starting first stage, aborting";
269     }
270 
271     LOG(INFO) << "init first stage started!";
272 
273     auto old_root_dir = std::unique_ptr<DIR, decltype(&closedir)>{opendir("/"), closedir};
274     if (!old_root_dir) {
275         PLOG(ERROR) << "Could not opendir(\"/\"), not freeing ramdisk";
276     }
277 
278     struct stat old_root_info;
279     if (stat("/", &old_root_info) != 0) {
280         PLOG(ERROR) << "Could not stat(\"/\"), not freeing ramdisk";
281         old_root_dir.reset();
282     }
283 
284     auto want_console = ALLOW_FIRST_STAGE_CONSOLE ? FirstStageConsole(cmdline, bootconfig) : 0;
285 
286     boot_clock::time_point module_start_time = boot_clock::now();
287     int module_count = 0;
288     if (!LoadKernelModules(IsRecoveryMode() && !ForceNormalBoot(cmdline, bootconfig), want_console,
289                            module_count)) {
290         if (want_console != FirstStageConsoleParam::DISABLED) {
291             LOG(ERROR) << "Failed to load kernel modules, starting console";
292         } else {
293             LOG(FATAL) << "Failed to load kernel modules";
294         }
295     }
296     if (module_count > 0) {
297         auto module_elapse_time = std::chrono::duration_cast<std::chrono::milliseconds>(
298                 boot_clock::now() - module_start_time);
299         setenv(kEnvInitModuleDurationMs, std::to_string(module_elapse_time.count()).c_str(), 1);
300         LOG(INFO) << "Loaded " << module_count << " kernel modules took "
301                   << module_elapse_time.count() << " ms";
302     }
303 
304 
305     bool created_devices = false;
306     if (want_console == FirstStageConsoleParam::CONSOLE_ON_FAILURE) {
307         if (!IsRecoveryMode()) {
308             created_devices = DoCreateDevices();
309             if (!created_devices){
310                 LOG(ERROR) << "Failed to create device nodes early";
311             }
312         }
313         StartConsole(cmdline);
314     }
315 
316     if (access(kBootImageRamdiskProp, F_OK) == 0) {
317         std::string dest = GetRamdiskPropForSecondStage();
318         std::string dir = android::base::Dirname(dest);
319         std::error_code ec;
320         if (!fs::create_directories(dir, ec) && !!ec) {
321             LOG(FATAL) << "Can't mkdir " << dir << ": " << ec.message();
322         }
323         if (!fs::copy_file(kBootImageRamdiskProp, dest, ec)) {
324             LOG(FATAL) << "Can't copy " << kBootImageRamdiskProp << " to " << dest << ": "
325                        << ec.message();
326         }
327         LOG(INFO) << "Copied ramdisk prop to " << dest;
328     }
329 
330     // If "/force_debuggable" is present, the second-stage init will use a userdebug
331     // sepolicy and load adb_debug.prop to allow adb root, if the device is unlocked.
332     if (access("/force_debuggable", F_OK) == 0) {
333         constexpr const char adb_debug_prop_src[] = "/adb_debug.prop";
334         constexpr const char userdebug_plat_sepolicy_cil_src[] = "/userdebug_plat_sepolicy.cil";
335         std::error_code ec;  // to invoke the overloaded copy_file() that won't throw.
336         if (access(adb_debug_prop_src, F_OK) == 0 &&
337             !fs::copy_file(adb_debug_prop_src, kDebugRamdiskProp, ec)) {
338             LOG(WARNING) << "Can't copy " << adb_debug_prop_src << " to " << kDebugRamdiskProp
339                          << ": " << ec.message();
340         }
341         if (access(userdebug_plat_sepolicy_cil_src, F_OK) == 0 &&
342             !fs::copy_file(userdebug_plat_sepolicy_cil_src, kDebugRamdiskSEPolicy, ec)) {
343             LOG(WARNING) << "Can't copy " << userdebug_plat_sepolicy_cil_src << " to "
344                          << kDebugRamdiskSEPolicy << ": " << ec.message();
345         }
346         // setenv for second-stage init to read above kDebugRamdisk* files.
347         setenv("INIT_FORCE_DEBUGGABLE", "true", 1);
348     }
349 
350     if (ForceNormalBoot(cmdline, bootconfig)) {
351         mkdir("/first_stage_ramdisk", 0755);
352         // SwitchRoot() must be called with a mount point as the target, so we bind mount the
353         // target directory to itself here.
354         if (mount("/first_stage_ramdisk", "/first_stage_ramdisk", nullptr, MS_BIND, nullptr) != 0) {
355             LOG(FATAL) << "Could not bind mount /first_stage_ramdisk to itself";
356         }
357         SwitchRoot("/first_stage_ramdisk");
358     }
359 
360     if (!DoFirstStageMount(!created_devices)) {
361         LOG(FATAL) << "Failed to mount required partitions early ...";
362     }
363 
364     struct stat new_root_info;
365     if (stat("/", &new_root_info) != 0) {
366         PLOG(ERROR) << "Could not stat(\"/\"), not freeing ramdisk";
367         old_root_dir.reset();
368     }
369 
370     if (old_root_dir && old_root_info.st_dev != new_root_info.st_dev) {
371         FreeRamdisk(old_root_dir.get(), old_root_info.st_dev);
372     }
373 
374     SetInitAvbVersionInRecovery();
375 
376     setenv(kEnvFirstStageStartedAt, std::to_string(start_time.time_since_epoch().count()).c_str(),
377            1);
378 
379     const char* path = "/system/bin/init";
380     const char* args[] = {path, "selinux_setup", nullptr};
381     auto fd = open("/dev/kmsg", O_WRONLY | O_CLOEXEC);
382     dup2(fd, STDOUT_FILENO);
383     dup2(fd, STDERR_FILENO);
384     close(fd);
385     execv(path, const_cast<char**>(args));
386 
387     // execv() only returns if an error happened, in which case we
388     // panic and never fall through this conditional.
389     PLOG(FATAL) << "execv(\"" << path << "\") failed";
390 
391     return 1;
392 }
393 
394 }  // namespace init
395 }  // namespace android
396