1 // Copyright (C) 2019 The Android Open Source Project
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include <libsnapshot/snapshot.h>
16 
17 #include <dirent.h>
18 #include <fcntl.h>
19 #include <math.h>
20 #include <sys/file.h>
21 #include <sys/types.h>
22 #include <sys/unistd.h>
23 
24 #include <filesystem>
25 #include <optional>
26 #include <thread>
27 #include <unordered_set>
28 
29 #include <android-base/file.h>
30 #include <android-base/logging.h>
31 #include <android-base/parseint.h>
32 #include <android-base/properties.h>
33 #include <android-base/strings.h>
34 #include <android-base/unique_fd.h>
35 #include <cutils/sockets.h>
36 #include <ext4_utils/ext4_utils.h>
37 #include <fs_mgr.h>
38 #include <fs_mgr/file_wait.h>
39 #include <fs_mgr_dm_linear.h>
40 #include <fstab/fstab.h>
41 #include <libdm/dm.h>
42 #include <libfiemap/image_manager.h>
43 #include <liblp/liblp.h>
44 
45 #include <android/snapshot/snapshot.pb.h>
46 #include <libsnapshot/snapshot_stats.h>
47 #include "device_info.h"
48 #include "partition_cow_creator.h"
49 #include "snapshot_metadata_updater.h"
50 #include "snapshot_reader.h"
51 #include "utility.h"
52 
53 namespace android {
54 namespace snapshot {
55 
56 using android::base::unique_fd;
57 using android::dm::DeviceMapper;
58 using android::dm::DmDeviceState;
59 using android::dm::DmTable;
60 using android::dm::DmTargetLinear;
61 using android::dm::DmTargetSnapshot;
62 using android::dm::DmTargetUser;
63 using android::dm::kSectorSize;
64 using android::dm::SnapshotStorageMode;
65 using android::fiemap::FiemapStatus;
66 using android::fiemap::IImageManager;
67 using android::fs_mgr::CreateDmTable;
68 using android::fs_mgr::CreateLogicalPartition;
69 using android::fs_mgr::CreateLogicalPartitionParams;
70 using android::fs_mgr::GetPartitionGroupName;
71 using android::fs_mgr::GetPartitionName;
72 using android::fs_mgr::LpMetadata;
73 using android::fs_mgr::MetadataBuilder;
74 using android::fs_mgr::SlotNumberForSlotSuffix;
75 using android::hardware::boot::V1_1::MergeStatus;
76 using chromeos_update_engine::DeltaArchiveManifest;
77 using chromeos_update_engine::Extent;
78 using chromeos_update_engine::FileDescriptor;
79 using chromeos_update_engine::PartitionUpdate;
80 template <typename T>
81 using RepeatedPtrField = google::protobuf::RepeatedPtrField<T>;
82 using std::chrono::duration_cast;
83 using namespace std::chrono_literals;
84 using namespace std::string_literals;
85 
86 static constexpr char kBootIndicatorPath[] = "/metadata/ota/snapshot-boot";
87 static constexpr char kRollbackIndicatorPath[] = "/metadata/ota/rollback-indicator";
88 static constexpr auto kUpdateStateCheckInterval = 2s;
89 
90 MergeFailureCode CheckMergeConsistency(const std::string& name, const SnapshotStatus& status);
91 
92 // Note: IImageManager is an incomplete type in the header, so the default
93 // destructor doesn't work.
~SnapshotManager()94 SnapshotManager::~SnapshotManager() {}
95 
New(IDeviceInfo * info)96 std::unique_ptr<SnapshotManager> SnapshotManager::New(IDeviceInfo* info) {
97     if (!info) {
98         info = new DeviceInfo();
99     }
100     return std::unique_ptr<SnapshotManager>(new SnapshotManager(info));
101 }
102 
NewForFirstStageMount(IDeviceInfo * info)103 std::unique_ptr<SnapshotManager> SnapshotManager::NewForFirstStageMount(IDeviceInfo* info) {
104     if (!info) {
105         DeviceInfo* impl = new DeviceInfo();
106         impl->set_first_stage_init(true);
107         info = impl;
108     }
109     auto sm = New(info);
110 
111     // The first-stage version of snapuserd is explicitly started by init. Do
112     // not attempt to using it during tests (which run in normal AOSP).
113     if (!sm->device()->IsTestDevice()) {
114         sm->use_first_stage_snapuserd_ = true;
115     }
116     return sm;
117 }
118 
SnapshotManager(IDeviceInfo * device)119 SnapshotManager::SnapshotManager(IDeviceInfo* device) : device_(device) {
120     metadata_dir_ = device_->GetMetadataDir();
121     merge_consistency_checker_ = android::snapshot::CheckMergeConsistency;
122 }
123 
GetCowName(const std::string & snapshot_name)124 static std::string GetCowName(const std::string& snapshot_name) {
125     return snapshot_name + "-cow";
126 }
127 
GetDmUserCowName(const std::string & snapshot_name)128 static std::string GetDmUserCowName(const std::string& snapshot_name) {
129     return snapshot_name + "-user-cow";
130 }
131 
GetCowImageDeviceName(const std::string & snapshot_name)132 static std::string GetCowImageDeviceName(const std::string& snapshot_name) {
133     return snapshot_name + "-cow-img";
134 }
135 
GetBaseDeviceName(const std::string & partition_name)136 static std::string GetBaseDeviceName(const std::string& partition_name) {
137     return partition_name + "-base";
138 }
139 
GetSourceDeviceName(const std::string & partition_name)140 static std::string GetSourceDeviceName(const std::string& partition_name) {
141     return partition_name + "-src";
142 }
143 
BeginUpdate()144 bool SnapshotManager::BeginUpdate() {
145     bool needs_merge = false;
146     if (!TryCancelUpdate(&needs_merge)) {
147         return false;
148     }
149     if (needs_merge) {
150         LOG(INFO) << "Wait for merge (if any) before beginning a new update.";
151         auto state = ProcessUpdateState();
152         LOG(INFO) << "Merged with state = " << state;
153     }
154 
155     auto file = LockExclusive();
156     if (!file) return false;
157 
158     // Purge the ImageManager just in case there is a corrupt lp_metadata file
159     // lying around. (NB: no need to return false on an error, we can let the
160     // update try to progress.)
161     if (EnsureImageManager()) {
162         images_->RemoveAllImages();
163     }
164 
165     // Clear any cached metadata (this allows re-using one manager across tests).
166     old_partition_metadata_ = nullptr;
167 
168     auto state = ReadUpdateState(file.get());
169     if (state != UpdateState::None) {
170         LOG(ERROR) << "An update is already in progress, cannot begin a new update";
171         return false;
172     }
173     return WriteUpdateState(file.get(), UpdateState::Initiated);
174 }
175 
CancelUpdate()176 bool SnapshotManager::CancelUpdate() {
177     bool needs_merge = false;
178     if (!TryCancelUpdate(&needs_merge)) {
179         return false;
180     }
181     if (needs_merge) {
182         LOG(ERROR) << "Cannot cancel update after it has completed or started merging";
183     }
184     return !needs_merge;
185 }
186 
TryCancelUpdate(bool * needs_merge)187 bool SnapshotManager::TryCancelUpdate(bool* needs_merge) {
188     *needs_merge = false;
189 
190     auto file = LockExclusive();
191     if (!file) return false;
192 
193     UpdateState state = ReadUpdateState(file.get());
194     if (state == UpdateState::None) return true;
195 
196     if (state == UpdateState::Initiated) {
197         LOG(INFO) << "Update has been initiated, now canceling";
198         return RemoveAllUpdateState(file.get());
199     }
200 
201     if (state == UpdateState::Unverified) {
202         // We completed an update, but it can still be canceled if we haven't booted into it.
203         auto slot = GetCurrentSlot();
204         if (slot != Slot::Target) {
205             LOG(INFO) << "Canceling previously completed updates (if any)";
206             return RemoveAllUpdateState(file.get());
207         }
208     }
209     *needs_merge = true;
210     return true;
211 }
212 
ReadUpdateSourceSlotSuffix()213 std::string SnapshotManager::ReadUpdateSourceSlotSuffix() {
214     auto boot_file = GetSnapshotBootIndicatorPath();
215     std::string contents;
216     if (!android::base::ReadFileToString(boot_file, &contents)) {
217         PLOG(WARNING) << "Cannot read " << boot_file;
218         return {};
219     }
220     return contents;
221 }
222 
GetCurrentSlot()223 SnapshotManager::Slot SnapshotManager::GetCurrentSlot() {
224     auto contents = ReadUpdateSourceSlotSuffix();
225     if (contents.empty()) {
226         return Slot::Unknown;
227     }
228     if (device_->GetSlotSuffix() == contents) {
229         return Slot::Source;
230     }
231     return Slot::Target;
232 }
233 
GetSnapshotSlotSuffix()234 std::string SnapshotManager::GetSnapshotSlotSuffix() {
235     switch (GetCurrentSlot()) {
236         case Slot::Target:
237             return device_->GetSlotSuffix();
238         default:
239             return device_->GetOtherSlotSuffix();
240     }
241 }
242 
RemoveFileIfExists(const std::string & path)243 static bool RemoveFileIfExists(const std::string& path) {
244     std::string message;
245     if (!android::base::RemoveFileIfExists(path, &message)) {
246         LOG(ERROR) << "Remove failed: " << path << ": " << message;
247         return false;
248     }
249     return true;
250 }
251 
RemoveAllUpdateState(LockedFile * lock,const std::function<bool ()> & prolog)252 bool SnapshotManager::RemoveAllUpdateState(LockedFile* lock, const std::function<bool()>& prolog) {
253     if (prolog && !prolog()) {
254         LOG(WARNING) << "Can't RemoveAllUpdateState: prolog failed.";
255         return false;
256     }
257 
258     LOG(INFO) << "Removing all update state.";
259 
260     if (!RemoveAllSnapshots(lock)) {
261         LOG(ERROR) << "Could not remove all snapshots";
262         return false;
263     }
264 
265     // It's okay if these fail:
266     // - For SnapshotBoot and Rollback, first-stage init performs a deeper check after
267     // reading the indicator file, so it's not a problem if it still exists
268     // after the update completes.
269     // - For ForwardMerge, FinishedSnapshotWrites asserts that the existence of the indicator
270     // matches the incoming update.
271     std::vector<std::string> files = {
272             GetSnapshotBootIndicatorPath(),
273             GetRollbackIndicatorPath(),
274             GetForwardMergeIndicatorPath(),
275             GetOldPartitionMetadataPath(),
276     };
277     for (const auto& file : files) {
278         RemoveFileIfExists(file);
279     }
280 
281     // If this fails, we'll keep trying to remove the update state (as the
282     // device reboots or starts a new update) until it finally succeeds.
283     return WriteUpdateState(lock, UpdateState::None);
284 }
285 
FinishedSnapshotWrites(bool wipe)286 bool SnapshotManager::FinishedSnapshotWrites(bool wipe) {
287     auto lock = LockExclusive();
288     if (!lock) return false;
289 
290     auto update_state = ReadUpdateState(lock.get());
291     if (update_state == UpdateState::Unverified) {
292         LOG(INFO) << "FinishedSnapshotWrites already called before. Ignored.";
293         return true;
294     }
295 
296     if (update_state != UpdateState::Initiated) {
297         LOG(ERROR) << "Can only transition to the Unverified state from the Initiated state.";
298         return false;
299     }
300 
301     if (!EnsureNoOverflowSnapshot(lock.get())) {
302         LOG(ERROR) << "Cannot ensure there are no overflow snapshots.";
303         return false;
304     }
305 
306     if (!UpdateForwardMergeIndicator(wipe)) {
307         return false;
308     }
309 
310     // This file is written on boot to detect whether a rollback occurred. It
311     // MUST NOT exist before rebooting, otherwise, we're at risk of deleting
312     // snapshots too early.
313     if (!RemoveFileIfExists(GetRollbackIndicatorPath())) {
314         return false;
315     }
316 
317     // This file acts as both a quick indicator for init (it can use access(2)
318     // to decide how to do first-stage mounts), and it stores the old slot, so
319     // we can tell whether or not we performed a rollback.
320     auto contents = device_->GetSlotSuffix();
321     auto boot_file = GetSnapshotBootIndicatorPath();
322     if (!WriteStringToFileAtomic(contents, boot_file)) {
323         PLOG(ERROR) << "write failed: " << boot_file;
324         return false;
325     }
326     return WriteUpdateState(lock.get(), UpdateState::Unverified);
327 }
328 
CreateSnapshot(LockedFile * lock,PartitionCowCreator * cow_creator,SnapshotStatus * status)329 bool SnapshotManager::CreateSnapshot(LockedFile* lock, PartitionCowCreator* cow_creator,
330                                      SnapshotStatus* status) {
331     CHECK(lock);
332     CHECK(lock->lock_mode() == LOCK_EX);
333     CHECK(status);
334 
335     if (status->name().empty()) {
336         LOG(ERROR) << "SnapshotStatus has no name.";
337         return false;
338     }
339     // Check these sizes. Like liblp, we guarantee the partition size is
340     // respected, which means it has to be sector-aligned. (This guarantee is
341     // useful for locating avb footers correctly). The COW file size, however,
342     // can be arbitrarily larger than specified, so we can safely round it up.
343     if (status->device_size() % kSectorSize != 0) {
344         LOG(ERROR) << "Snapshot " << status->name()
345                    << " device size is not a multiple of the sector size: "
346                    << status->device_size();
347         return false;
348     }
349     if (status->snapshot_size() % kSectorSize != 0) {
350         LOG(ERROR) << "Snapshot " << status->name()
351                    << " snapshot size is not a multiple of the sector size: "
352                    << status->snapshot_size();
353         return false;
354     }
355     if (status->cow_partition_size() % kSectorSize != 0) {
356         LOG(ERROR) << "Snapshot " << status->name()
357                    << " cow partition size is not a multiple of the sector size: "
358                    << status->cow_partition_size();
359         return false;
360     }
361     if (status->cow_file_size() % kSectorSize != 0) {
362         LOG(ERROR) << "Snapshot " << status->name()
363                    << " cow file size is not a multiple of the sector size: "
364                    << status->cow_file_size();
365         return false;
366     }
367 
368     status->set_state(SnapshotState::CREATED);
369     status->set_sectors_allocated(0);
370     status->set_metadata_sectors(0);
371     status->set_compression_enabled(cow_creator->compression_enabled);
372     status->set_compression_algorithm(cow_creator->compression_algorithm);
373 
374     if (!WriteSnapshotStatus(lock, *status)) {
375         PLOG(ERROR) << "Could not write snapshot status: " << status->name();
376         return false;
377     }
378     return true;
379 }
380 
CreateCowImage(LockedFile * lock,const std::string & name)381 Return SnapshotManager::CreateCowImage(LockedFile* lock, const std::string& name) {
382     CHECK(lock);
383     CHECK(lock->lock_mode() == LOCK_EX);
384     if (!EnsureImageManager()) return Return::Error();
385 
386     SnapshotStatus status;
387     if (!ReadSnapshotStatus(lock, name, &status)) {
388         return Return::Error();
389     }
390 
391     // The COW file size should have been rounded up to the nearest sector in CreateSnapshot.
392     if (status.cow_file_size() % kSectorSize != 0) {
393         LOG(ERROR) << "Snapshot " << name << " COW file size is not a multiple of the sector size: "
394                    << status.cow_file_size();
395         return Return::Error();
396     }
397 
398     std::string cow_image_name = GetCowImageDeviceName(name);
399     int cow_flags = IImageManager::CREATE_IMAGE_DEFAULT;
400     return Return(images_->CreateBackingImage(cow_image_name, status.cow_file_size(), cow_flags));
401 }
402 
MapDmUserCow(LockedFile * lock,const std::string & name,const std::string & cow_file,const std::string & base_device,const std::chrono::milliseconds & timeout_ms,std::string * path)403 bool SnapshotManager::MapDmUserCow(LockedFile* lock, const std::string& name,
404                                    const std::string& cow_file, const std::string& base_device,
405                                    const std::chrono::milliseconds& timeout_ms, std::string* path) {
406     CHECK(lock);
407 
408     auto& dm = DeviceMapper::Instance();
409 
410     // Use an extra decoration for first-stage init, so we can transition
411     // to a new table entry in second-stage.
412     std::string misc_name = name;
413     if (use_first_stage_snapuserd_) {
414         misc_name += "-init";
415     }
416 
417     if (!EnsureSnapuserdConnected()) {
418         return false;
419     }
420 
421     uint64_t base_sectors = snapuserd_client_->InitDmUserCow(misc_name, cow_file, base_device);
422     if (base_sectors == 0) {
423         LOG(ERROR) << "Failed to retrieve base_sectors from Snapuserd";
424         return false;
425     }
426 
427     DmTable table;
428     table.Emplace<DmTargetUser>(0, base_sectors, misc_name);
429     if (!dm.CreateDevice(name, table, path, timeout_ms)) {
430         return false;
431     }
432     if (!WaitForDevice(*path, timeout_ms)) {
433         return false;
434     }
435 
436     auto control_device = "/dev/dm-user/" + misc_name;
437     if (!WaitForDevice(control_device, timeout_ms)) {
438         return false;
439     }
440 
441     return snapuserd_client_->AttachDmUser(misc_name);
442 }
443 
MapSnapshot(LockedFile * lock,const std::string & name,const std::string & base_device,const std::string & cow_device,const std::chrono::milliseconds & timeout_ms,std::string * dev_path)444 bool SnapshotManager::MapSnapshot(LockedFile* lock, const std::string& name,
445                                   const std::string& base_device, const std::string& cow_device,
446                                   const std::chrono::milliseconds& timeout_ms,
447                                   std::string* dev_path) {
448     CHECK(lock);
449 
450     SnapshotStatus status;
451     if (!ReadSnapshotStatus(lock, name, &status)) {
452         return false;
453     }
454     if (status.state() == SnapshotState::NONE || status.state() == SnapshotState::MERGE_COMPLETED) {
455         LOG(ERROR) << "Should not create a snapshot device for " << name
456                    << " after merging has completed.";
457         return false;
458     }
459 
460     // Validate the block device size, as well as the requested snapshot size.
461     // Note that during first-stage init, we don't have the device paths.
462     if (android::base::StartsWith(base_device, "/")) {
463         unique_fd fd(open(base_device.c_str(), O_RDONLY | O_CLOEXEC));
464         if (fd < 0) {
465             PLOG(ERROR) << "open failed: " << base_device;
466             return false;
467         }
468         auto dev_size = get_block_device_size(fd);
469         if (!dev_size) {
470             PLOG(ERROR) << "Could not determine block device size: " << base_device;
471             return false;
472         }
473         if (status.device_size() != dev_size) {
474             LOG(ERROR) << "Block device size for " << base_device << " does not match"
475                        << "(expected " << status.device_size() << ", got " << dev_size << ")";
476             return false;
477         }
478     }
479     if (status.device_size() % kSectorSize != 0) {
480         LOG(ERROR) << "invalid blockdev size for " << base_device << ": " << status.device_size();
481         return false;
482     }
483     if (status.snapshot_size() % kSectorSize != 0 ||
484         status.snapshot_size() > status.device_size()) {
485         LOG(ERROR) << "Invalid snapshot size for " << base_device << ": " << status.snapshot_size();
486         return false;
487     }
488     if (status.device_size() != status.snapshot_size()) {
489         LOG(ERROR) << "Device size and snapshot size must be the same (device size = "
490                    << status.device_size() << ", snapshot size = " << status.snapshot_size();
491         return false;
492     }
493 
494     uint64_t snapshot_sectors = status.snapshot_size() / kSectorSize;
495 
496     auto& dm = DeviceMapper::Instance();
497 
498     // Note that merging is a global state. We do track whether individual devices
499     // have completed merging, but the start of the merge process is considered
500     // atomic.
501     SnapshotStorageMode mode;
502     SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock);
503     switch (update_status.state()) {
504         case UpdateState::MergeCompleted:
505         case UpdateState::MergeNeedsReboot:
506             LOG(ERROR) << "Should not create a snapshot device for " << name
507                        << " after global merging has completed.";
508             return false;
509         case UpdateState::Merging:
510         case UpdateState::MergeFailed:
511             // Note: MergeFailed indicates that a merge is in progress, but
512             // is possibly stalled. We still have to honor the merge.
513             if (DecideMergePhase(status) == update_status.merge_phase()) {
514                 mode = SnapshotStorageMode::Merge;
515             } else {
516                 mode = SnapshotStorageMode::Persistent;
517             }
518             break;
519         default:
520             mode = SnapshotStorageMode::Persistent;
521             break;
522     }
523 
524     if (mode == SnapshotStorageMode::Persistent && status.state() == SnapshotState::MERGING) {
525         LOG(ERROR) << "Snapshot: " << name
526                    << " has snapshot status Merging but mode set to Persistent."
527                    << " Changing mode to Snapshot-Merge.";
528         mode = SnapshotStorageMode::Merge;
529     }
530 
531     DmTable table;
532     table.Emplace<DmTargetSnapshot>(0, snapshot_sectors, base_device, cow_device, mode,
533                                     kSnapshotChunkSize);
534     if (!dm.CreateDevice(name, table, dev_path, timeout_ms)) {
535         LOG(ERROR) << "Could not create snapshot device: " << name;
536         return false;
537     }
538     return true;
539 }
540 
MapCowImage(const std::string & name,const std::chrono::milliseconds & timeout_ms)541 std::optional<std::string> SnapshotManager::MapCowImage(
542         const std::string& name, const std::chrono::milliseconds& timeout_ms) {
543     if (!EnsureImageManager()) return std::nullopt;
544     auto cow_image_name = GetCowImageDeviceName(name);
545 
546     bool ok;
547     std::string cow_dev;
548     if (device_->IsRecovery() || device_->IsFirstStageInit()) {
549         const auto& opener = device_->GetPartitionOpener();
550         ok = images_->MapImageWithDeviceMapper(opener, cow_image_name, &cow_dev);
551     } else {
552         ok = images_->MapImageDevice(cow_image_name, timeout_ms, &cow_dev);
553     }
554 
555     if (ok) {
556         LOG(INFO) << "Mapped " << cow_image_name << " to " << cow_dev;
557         return cow_dev;
558     }
559     LOG(ERROR) << "Could not map image device: " << cow_image_name;
560     return std::nullopt;
561 }
562 
MapSourceDevice(LockedFile * lock,const std::string & name,const std::chrono::milliseconds & timeout_ms,std::string * path)563 bool SnapshotManager::MapSourceDevice(LockedFile* lock, const std::string& name,
564                                       const std::chrono::milliseconds& timeout_ms,
565                                       std::string* path) {
566     CHECK(lock);
567 
568     auto metadata = ReadOldPartitionMetadata(lock);
569     if (!metadata) {
570         LOG(ERROR) << "Could not map source device due to missing or corrupt metadata";
571         return false;
572     }
573 
574     auto old_name = GetOtherPartitionName(name);
575     auto slot_suffix = device_->GetSlotSuffix();
576     auto slot = SlotNumberForSlotSuffix(slot_suffix);
577 
578     CreateLogicalPartitionParams params = {
579             .block_device = device_->GetSuperDevice(slot),
580             .metadata = metadata,
581             .partition_name = old_name,
582             .timeout_ms = timeout_ms,
583             .device_name = GetSourceDeviceName(name),
584             .partition_opener = &device_->GetPartitionOpener(),
585     };
586     if (!CreateLogicalPartition(std::move(params), path)) {
587         LOG(ERROR) << "Could not create source device for snapshot " << name;
588         return false;
589     }
590     return true;
591 }
592 
UnmapSnapshot(LockedFile * lock,const std::string & name)593 bool SnapshotManager::UnmapSnapshot(LockedFile* lock, const std::string& name) {
594     CHECK(lock);
595 
596     if (!DeleteDeviceIfExists(name)) {
597         LOG(ERROR) << "Could not delete snapshot device: " << name;
598         return false;
599     }
600     return true;
601 }
602 
UnmapCowImage(const std::string & name)603 bool SnapshotManager::UnmapCowImage(const std::string& name) {
604     if (!EnsureImageManager()) return false;
605     return images_->UnmapImageIfExists(GetCowImageDeviceName(name));
606 }
607 
DeleteSnapshot(LockedFile * lock,const std::string & name)608 bool SnapshotManager::DeleteSnapshot(LockedFile* lock, const std::string& name) {
609     CHECK(lock);
610     CHECK(lock->lock_mode() == LOCK_EX);
611     if (!EnsureImageManager()) return false;
612 
613     if (!UnmapCowDevices(lock, name)) {
614         return false;
615     }
616 
617     // We can't delete snapshots in recovery. The only way we'd try is it we're
618     // completing or canceling a merge in preparation for a data wipe, in which
619     // case, we don't care if the file sticks around.
620     if (device_->IsRecovery()) {
621         LOG(INFO) << "Skipping delete of snapshot " << name << " in recovery.";
622         return true;
623     }
624 
625     auto cow_image_name = GetCowImageDeviceName(name);
626     if (images_->BackingImageExists(cow_image_name)) {
627         if (!images_->DeleteBackingImage(cow_image_name)) {
628             return false;
629         }
630     }
631 
632     std::string error;
633     auto file_path = GetSnapshotStatusFilePath(name);
634     if (!android::base::RemoveFileIfExists(file_path, &error)) {
635         LOG(ERROR) << "Failed to remove status file " << file_path << ": " << error;
636         return false;
637     }
638     return true;
639 }
640 
InitiateMerge()641 bool SnapshotManager::InitiateMerge() {
642     auto lock = LockExclusive();
643     if (!lock) return false;
644 
645     UpdateState state = ReadUpdateState(lock.get());
646     if (state != UpdateState::Unverified) {
647         LOG(ERROR) << "Cannot begin a merge if an update has not been verified";
648         return false;
649     }
650 
651     auto slot = GetCurrentSlot();
652     if (slot != Slot::Target) {
653         LOG(ERROR) << "Device cannot merge while not booting from new slot";
654         return false;
655     }
656 
657     std::vector<std::string> snapshots;
658     if (!ListSnapshots(lock.get(), &snapshots)) {
659         LOG(ERROR) << "Could not list snapshots";
660         return false;
661     }
662 
663     auto other_suffix = device_->GetOtherSlotSuffix();
664 
665     auto& dm = DeviceMapper::Instance();
666     for (const auto& snapshot : snapshots) {
667         if (android::base::EndsWith(snapshot, other_suffix)) {
668             // Allow the merge to continue, but log this unexpected case.
669             LOG(ERROR) << "Unexpected snapshot found during merge: " << snapshot;
670             continue;
671         }
672 
673         // The device has to be mapped, since everything should be merged at
674         // the same time. This is a fairly serious error. We could forcefully
675         // map everything here, but it should have been mapped during first-
676         // stage init.
677         if (dm.GetState(snapshot) == DmDeviceState::INVALID) {
678             LOG(ERROR) << "Cannot begin merge; device " << snapshot << " is not mapped.";
679             return false;
680         }
681     }
682 
683     auto metadata = ReadCurrentMetadata();
684     for (auto it = snapshots.begin(); it != snapshots.end();) {
685         switch (GetMetadataPartitionState(*metadata, *it)) {
686             case MetadataPartitionState::Flashed:
687                 LOG(WARNING) << "Detected re-flashing for partition " << *it
688                              << ". Skip merging it.";
689                 [[fallthrough]];
690             case MetadataPartitionState::None: {
691                 LOG(WARNING) << "Deleting snapshot for partition " << *it;
692                 if (!DeleteSnapshot(lock.get(), *it)) {
693                     LOG(WARNING) << "Cannot delete snapshot for partition " << *it
694                                  << ". Skip merging it anyways.";
695                 }
696                 it = snapshots.erase(it);
697             } break;
698             case MetadataPartitionState::Updated: {
699                 ++it;
700             } break;
701         }
702     }
703 
704     bool compression_enabled = false;
705 
706     std::vector<std::string> first_merge_group;
707 
708     DmTargetSnapshot::Status initial_target_values = {};
709     for (const auto& snapshot : snapshots) {
710         DmTargetSnapshot::Status current_status;
711         if (!QuerySnapshotStatus(snapshot, nullptr, &current_status)) {
712             return false;
713         }
714         initial_target_values.sectors_allocated += current_status.sectors_allocated;
715         initial_target_values.total_sectors += current_status.total_sectors;
716         initial_target_values.metadata_sectors += current_status.metadata_sectors;
717 
718         SnapshotStatus snapshot_status;
719         if (!ReadSnapshotStatus(lock.get(), snapshot, &snapshot_status)) {
720             return false;
721         }
722 
723         compression_enabled |= snapshot_status.compression_enabled();
724         if (DecideMergePhase(snapshot_status) == MergePhase::FIRST_PHASE) {
725             first_merge_group.emplace_back(snapshot);
726         }
727     }
728 
729     SnapshotUpdateStatus initial_status = ReadSnapshotUpdateStatus(lock.get());
730     initial_status.set_state(UpdateState::Merging);
731     initial_status.set_sectors_allocated(initial_target_values.sectors_allocated);
732     initial_status.set_total_sectors(initial_target_values.total_sectors);
733     initial_status.set_metadata_sectors(initial_target_values.metadata_sectors);
734     initial_status.set_compression_enabled(compression_enabled);
735 
736     // If any partitions shrunk, we need to merge them before we merge any other
737     // partitions (see b/177935716). Otherwise, a merge from another partition
738     // may overwrite the source block of a copy operation.
739     const std::vector<std::string>* merge_group;
740     if (first_merge_group.empty()) {
741         merge_group = &snapshots;
742         initial_status.set_merge_phase(MergePhase::SECOND_PHASE);
743     } else {
744         merge_group = &first_merge_group;
745         initial_status.set_merge_phase(MergePhase::FIRST_PHASE);
746     }
747 
748     // Point of no return - mark that we're starting a merge. From now on every
749     // eligible snapshot must be a merge target.
750     if (!WriteSnapshotUpdateStatus(lock.get(), initial_status)) {
751         return false;
752     }
753 
754     auto reported_code = MergeFailureCode::Ok;
755     for (const auto& snapshot : *merge_group) {
756         // If this fails, we have no choice but to continue. Everything must
757         // be merged. This is not an ideal state to be in, but it is safe,
758         // because we the next boot will try again.
759         auto code = SwitchSnapshotToMerge(lock.get(), snapshot);
760         if (code != MergeFailureCode::Ok) {
761             LOG(ERROR) << "Failed to switch snapshot to a merge target: " << snapshot;
762             if (reported_code == MergeFailureCode::Ok) {
763                 reported_code = code;
764             }
765         }
766     }
767 
768     // If we couldn't switch everything to a merge target, pre-emptively mark
769     // this merge as failed. It will get acknowledged when WaitForMerge() is
770     // called.
771     if (reported_code != MergeFailureCode::Ok) {
772         WriteUpdateState(lock.get(), UpdateState::MergeFailed, reported_code);
773     }
774 
775     // Return true no matter what, because a merge was initiated.
776     return true;
777 }
778 
SwitchSnapshotToMerge(LockedFile * lock,const std::string & name)779 MergeFailureCode SnapshotManager::SwitchSnapshotToMerge(LockedFile* lock, const std::string& name) {
780     SnapshotStatus status;
781     if (!ReadSnapshotStatus(lock, name, &status)) {
782         return MergeFailureCode::ReadStatus;
783     }
784     if (status.state() != SnapshotState::CREATED) {
785         LOG(WARNING) << "Snapshot " << name
786                      << " has unexpected state: " << SnapshotState_Name(status.state());
787     }
788 
789     // After this, we return true because we technically did switch to a merge
790     // target. Everything else we do here is just informational.
791     if (auto code = RewriteSnapshotDeviceTable(name); code != MergeFailureCode::Ok) {
792         return code;
793     }
794 
795     status.set_state(SnapshotState::MERGING);
796 
797     DmTargetSnapshot::Status dm_status;
798     if (!QuerySnapshotStatus(name, nullptr, &dm_status)) {
799         LOG(ERROR) << "Could not query merge status for snapshot: " << name;
800     }
801     status.set_sectors_allocated(dm_status.sectors_allocated);
802     status.set_metadata_sectors(dm_status.metadata_sectors);
803     if (!WriteSnapshotStatus(lock, status)) {
804         LOG(ERROR) << "Could not update status file for snapshot: " << name;
805     }
806     return MergeFailureCode::Ok;
807 }
808 
RewriteSnapshotDeviceTable(const std::string & name)809 MergeFailureCode SnapshotManager::RewriteSnapshotDeviceTable(const std::string& name) {
810     auto& dm = DeviceMapper::Instance();
811 
812     std::vector<DeviceMapper::TargetInfo> old_targets;
813     if (!dm.GetTableInfo(name, &old_targets)) {
814         LOG(ERROR) << "Could not read snapshot device table: " << name;
815         return MergeFailureCode::GetTableInfo;
816     }
817     if (old_targets.size() != 1 || DeviceMapper::GetTargetType(old_targets[0].spec) != "snapshot") {
818         LOG(ERROR) << "Unexpected device-mapper table for snapshot: " << name;
819         return MergeFailureCode::UnknownTable;
820     }
821 
822     std::string base_device, cow_device;
823     if (!DmTargetSnapshot::GetDevicesFromParams(old_targets[0].data, &base_device, &cow_device)) {
824         LOG(ERROR) << "Could not derive underlying devices for snapshot: " << name;
825         return MergeFailureCode::GetTableParams;
826     }
827 
828     DmTable table;
829     table.Emplace<DmTargetSnapshot>(0, old_targets[0].spec.length, base_device, cow_device,
830                                     SnapshotStorageMode::Merge, kSnapshotChunkSize);
831     if (!dm.LoadTableAndActivate(name, table)) {
832         LOG(ERROR) << "Could not swap device-mapper tables on snapshot device " << name;
833         return MergeFailureCode::ActivateNewTable;
834     }
835     LOG(INFO) << "Successfully switched snapshot device to a merge target: " << name;
836     return MergeFailureCode::Ok;
837 }
838 
839 enum class TableQuery {
840     Table,
841     Status,
842 };
843 
GetSingleTarget(const std::string & dm_name,TableQuery query,DeviceMapper::TargetInfo * target)844 static bool GetSingleTarget(const std::string& dm_name, TableQuery query,
845                             DeviceMapper::TargetInfo* target) {
846     auto& dm = DeviceMapper::Instance();
847     if (dm.GetState(dm_name) == DmDeviceState::INVALID) {
848         return false;
849     }
850 
851     std::vector<DeviceMapper::TargetInfo> targets;
852     bool result;
853     if (query == TableQuery::Status) {
854         result = dm.GetTableStatus(dm_name, &targets);
855     } else {
856         result = dm.GetTableInfo(dm_name, &targets);
857     }
858     if (!result) {
859         LOG(ERROR) << "Could not query device: " << dm_name;
860         return false;
861     }
862     if (targets.size() != 1) {
863         return false;
864     }
865 
866     *target = std::move(targets[0]);
867     return true;
868 }
869 
IsSnapshotDevice(const std::string & dm_name,TargetInfo * target)870 bool SnapshotManager::IsSnapshotDevice(const std::string& dm_name, TargetInfo* target) {
871     DeviceMapper::TargetInfo snap_target;
872     if (!GetSingleTarget(dm_name, TableQuery::Status, &snap_target)) {
873         return false;
874     }
875     auto type = DeviceMapper::GetTargetType(snap_target.spec);
876     if (type != "snapshot" && type != "snapshot-merge") {
877         return false;
878     }
879     if (target) {
880         *target = std::move(snap_target);
881     }
882     return true;
883 }
884 
QuerySnapshotStatus(const std::string & dm_name,std::string * target_type,DmTargetSnapshot::Status * status)885 bool SnapshotManager::QuerySnapshotStatus(const std::string& dm_name, std::string* target_type,
886                                           DmTargetSnapshot::Status* status) {
887     DeviceMapper::TargetInfo target;
888     if (!IsSnapshotDevice(dm_name, &target)) {
889         LOG(ERROR) << "Device " << dm_name << " is not a snapshot or snapshot-merge device";
890         return false;
891     }
892     if (!DmTargetSnapshot::ParseStatusText(target.data, status)) {
893         LOG(ERROR) << "Could not parse snapshot status text: " << dm_name;
894         return false;
895     }
896     if (target_type) {
897         *target_type = DeviceMapper::GetTargetType(target.spec);
898     }
899     if (!status->error.empty()) {
900         LOG(ERROR) << "Snapshot: " << dm_name << " returned error code: " << status->error;
901         return false;
902     }
903     return true;
904 }
905 
906 // Note that when a merge fails, we will *always* try again to complete the
907 // merge each time the device boots. There is no harm in doing so, and if
908 // the problem was transient, we might manage to get a new outcome.
ProcessUpdateState(const std::function<bool ()> & callback,const std::function<bool ()> & before_cancel)909 UpdateState SnapshotManager::ProcessUpdateState(const std::function<bool()>& callback,
910                                                 const std::function<bool()>& before_cancel) {
911     while (true) {
912         auto result = CheckMergeState(before_cancel);
913         LOG(INFO) << "ProcessUpdateState handling state: " << result.state;
914 
915         if (result.state == UpdateState::MergeFailed) {
916             AcknowledgeMergeFailure(result.failure_code);
917         }
918         if (result.state != UpdateState::Merging) {
919             // Either there is no merge, or the merge was finished, so no need
920             // to keep waiting.
921             return result.state;
922         }
923 
924         if (callback && !callback()) {
925             return result.state;
926         }
927 
928         // This wait is not super time sensitive, so we have a relatively
929         // low polling frequency.
930         std::this_thread::sleep_for(kUpdateStateCheckInterval);
931     }
932 }
933 
CheckMergeState(const std::function<bool ()> & before_cancel)934 auto SnapshotManager::CheckMergeState(const std::function<bool()>& before_cancel) -> MergeResult {
935     auto lock = LockExclusive();
936     if (!lock) {
937         return MergeResult(UpdateState::MergeFailed, MergeFailureCode::AcquireLock);
938     }
939 
940     auto result = CheckMergeState(lock.get(), before_cancel);
941     LOG(INFO) << "CheckMergeState for snapshots returned: " << result.state;
942 
943     if (result.state == UpdateState::MergeCompleted) {
944         // Do this inside the same lock. Failures get acknowledged without the
945         // lock, because flock() might have failed.
946         AcknowledgeMergeSuccess(lock.get());
947     } else if (result.state == UpdateState::Cancelled) {
948         if (!device_->IsRecovery() && !RemoveAllUpdateState(lock.get(), before_cancel)) {
949             LOG(ERROR) << "Failed to remove all update state after acknowleding cancelled update.";
950         }
951     }
952     return result;
953 }
954 
CheckMergeState(LockedFile * lock,const std::function<bool ()> & before_cancel)955 auto SnapshotManager::CheckMergeState(LockedFile* lock, const std::function<bool()>& before_cancel)
956         -> MergeResult {
957     SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock);
958     switch (update_status.state()) {
959         case UpdateState::None:
960         case UpdateState::MergeCompleted:
961             // Harmless races are allowed between two callers of WaitForMerge,
962             // so in both of these cases we just propagate the state.
963             return MergeResult(update_status.state());
964 
965         case UpdateState::Merging:
966         case UpdateState::MergeNeedsReboot:
967         case UpdateState::MergeFailed:
968             // We'll poll each snapshot below. Note that for the NeedsReboot
969             // case, we always poll once to give cleanup another opportunity to
970             // run.
971             break;
972 
973         case UpdateState::Unverified:
974             // This is an edge case. Normally cancelled updates are detected
975             // via the merge poll below, but if we never started a merge, we
976             // need to also check here.
977             if (HandleCancelledUpdate(lock, before_cancel)) {
978                 return MergeResult(UpdateState::Cancelled);
979             }
980             return MergeResult(update_status.state());
981 
982         default:
983             return MergeResult(update_status.state());
984     }
985 
986     std::vector<std::string> snapshots;
987     if (!ListSnapshots(lock, &snapshots)) {
988         return MergeResult(UpdateState::MergeFailed, MergeFailureCode::ListSnapshots);
989     }
990 
991     auto other_suffix = device_->GetOtherSlotSuffix();
992 
993     bool cancelled = false;
994     bool merging = false;
995     bool needs_reboot = false;
996     bool wrong_phase = false;
997     MergeFailureCode failure_code = MergeFailureCode::Ok;
998     for (const auto& snapshot : snapshots) {
999         if (android::base::EndsWith(snapshot, other_suffix)) {
1000             // This will have triggered an error message in InitiateMerge already.
1001             LOG(INFO) << "Skipping merge validation of unexpected snapshot: " << snapshot;
1002             continue;
1003         }
1004 
1005         auto result = CheckTargetMergeState(lock, snapshot, update_status);
1006         LOG(INFO) << "CheckTargetMergeState for " << snapshot << " returned: " << result.state;
1007 
1008         switch (result.state) {
1009             case UpdateState::MergeFailed:
1010                 // Take the first failure code in case other failures compound.
1011                 if (failure_code == MergeFailureCode::Ok) {
1012                     failure_code = result.failure_code;
1013                 }
1014                 break;
1015             case UpdateState::Merging:
1016                 merging = true;
1017                 break;
1018             case UpdateState::MergeNeedsReboot:
1019                 needs_reboot = true;
1020                 break;
1021             case UpdateState::MergeCompleted:
1022                 break;
1023             case UpdateState::Cancelled:
1024                 cancelled = true;
1025                 break;
1026             case UpdateState::None:
1027                 wrong_phase = true;
1028                 break;
1029             default:
1030                 LOG(ERROR) << "Unknown merge status for \"" << snapshot << "\": "
1031                            << "\"" << result.state << "\"";
1032                 if (failure_code == MergeFailureCode::Ok) {
1033                     failure_code = MergeFailureCode::UnexpectedMergeState;
1034                 }
1035                 break;
1036         }
1037     }
1038 
1039     if (merging) {
1040         // Note that we handle "Merging" before we handle anything else. We
1041         // want to poll until *nothing* is merging if we can, so everything has
1042         // a chance to get marked as completed or failed.
1043         return MergeResult(UpdateState::Merging);
1044     }
1045     if (failure_code != MergeFailureCode::Ok) {
1046         // Note: since there are many drop-out cases for failure, we acknowledge
1047         // it in WaitForMerge rather than here and elsewhere.
1048         return MergeResult(UpdateState::MergeFailed, failure_code);
1049     }
1050     if (wrong_phase) {
1051         // If we got here, no other partitions are being merged, and nothing
1052         // failed to merge. It's safe to move to the next merge phase.
1053         auto code = MergeSecondPhaseSnapshots(lock);
1054         if (code != MergeFailureCode::Ok) {
1055             return MergeResult(UpdateState::MergeFailed, code);
1056         }
1057         return MergeResult(UpdateState::Merging);
1058     }
1059     if (needs_reboot) {
1060         WriteUpdateState(lock, UpdateState::MergeNeedsReboot);
1061         return MergeResult(UpdateState::MergeNeedsReboot);
1062     }
1063     if (cancelled) {
1064         // This is an edge case, that we handle as correctly as we sensibly can.
1065         // The underlying partition has changed behind update_engine, and we've
1066         // removed the snapshot as a result. The exact state of the update is
1067         // undefined now, but this can only happen on an unlocked device where
1068         // partitions can be flashed without wiping userdata.
1069         return MergeResult(UpdateState::Cancelled);
1070     }
1071     return MergeResult(UpdateState::MergeCompleted);
1072 }
1073 
CheckTargetMergeState(LockedFile * lock,const std::string & name,const SnapshotUpdateStatus & update_status)1074 auto SnapshotManager::CheckTargetMergeState(LockedFile* lock, const std::string& name,
1075                                             const SnapshotUpdateStatus& update_status)
1076         -> MergeResult {
1077     SnapshotStatus snapshot_status;
1078     if (!ReadSnapshotStatus(lock, name, &snapshot_status)) {
1079         return MergeResult(UpdateState::MergeFailed, MergeFailureCode::ReadStatus);
1080     }
1081 
1082     std::unique_ptr<LpMetadata> current_metadata;
1083 
1084     if (!IsSnapshotDevice(name)) {
1085         if (!current_metadata) {
1086             current_metadata = ReadCurrentMetadata();
1087         }
1088 
1089         if (!current_metadata ||
1090             GetMetadataPartitionState(*current_metadata, name) != MetadataPartitionState::Updated) {
1091             DeleteSnapshot(lock, name);
1092             return MergeResult(UpdateState::Cancelled);
1093         }
1094 
1095         // During a check, we decided the merge was complete, but we were unable to
1096         // collapse the device-mapper stack and perform COW cleanup. If we haven't
1097         // rebooted after this check, the device will still be a snapshot-merge
1098         // target. If we have rebooted, the device will now be a linear target,
1099         // and we can try cleanup again.
1100         if (snapshot_status.state() == SnapshotState::MERGE_COMPLETED) {
1101             // NB: It's okay if this fails now, we gave cleanup our best effort.
1102             OnSnapshotMergeComplete(lock, name, snapshot_status);
1103             return MergeResult(UpdateState::MergeCompleted);
1104         }
1105 
1106         LOG(ERROR) << "Expected snapshot or snapshot-merge for device: " << name;
1107         return MergeResult(UpdateState::MergeFailed, MergeFailureCode::UnknownTargetType);
1108     }
1109 
1110     // This check is expensive so it is only enabled for debugging.
1111     DCHECK((current_metadata = ReadCurrentMetadata()) &&
1112            GetMetadataPartitionState(*current_metadata, name) == MetadataPartitionState::Updated);
1113 
1114     std::string target_type;
1115     DmTargetSnapshot::Status status;
1116     if (!QuerySnapshotStatus(name, &target_type, &status)) {
1117         return MergeResult(UpdateState::MergeFailed, MergeFailureCode::QuerySnapshotStatus);
1118     }
1119     if (target_type == "snapshot" &&
1120         DecideMergePhase(snapshot_status) == MergePhase::SECOND_PHASE &&
1121         update_status.merge_phase() == MergePhase::FIRST_PHASE) {
1122         // The snapshot is not being merged because it's in the wrong phase.
1123         return MergeResult(UpdateState::None);
1124     }
1125     if (target_type != "snapshot-merge") {
1126         // We can get here if we failed to rewrite the target type in
1127         // InitiateMerge(). If we failed to create the target in first-stage
1128         // init, boot would not succeed.
1129         LOG(ERROR) << "Snapshot " << name << " has incorrect target type: " << target_type;
1130         return MergeResult(UpdateState::MergeFailed, MergeFailureCode::ExpectedMergeTarget);
1131     }
1132 
1133     // These two values are equal when merging is complete.
1134     if (status.sectors_allocated != status.metadata_sectors) {
1135         if (snapshot_status.state() == SnapshotState::MERGE_COMPLETED) {
1136             LOG(ERROR) << "Snapshot " << name << " is merging after being marked merge-complete.";
1137             return MergeResult(UpdateState::MergeFailed,
1138                                MergeFailureCode::UnmergedSectorsAfterCompletion);
1139         }
1140         return MergeResult(UpdateState::Merging);
1141     }
1142 
1143     auto code = CheckMergeConsistency(lock, name, snapshot_status);
1144     if (code != MergeFailureCode::Ok) {
1145         return MergeResult(UpdateState::MergeFailed, code);
1146     }
1147 
1148     // Merging is done. First, update the status file to indicate the merge
1149     // is complete. We do this before calling OnSnapshotMergeComplete, even
1150     // though this means the write is potentially wasted work (since in the
1151     // ideal case we'll immediately delete the file).
1152     //
1153     // This makes it simpler to reason about the next reboot: no matter what
1154     // part of cleanup failed, first-stage init won't try to create another
1155     // snapshot device for this partition.
1156     snapshot_status.set_state(SnapshotState::MERGE_COMPLETED);
1157     if (!WriteSnapshotStatus(lock, snapshot_status)) {
1158         return MergeResult(UpdateState::MergeFailed, MergeFailureCode::WriteStatus);
1159     }
1160     if (!OnSnapshotMergeComplete(lock, name, snapshot_status)) {
1161         return MergeResult(UpdateState::MergeNeedsReboot);
1162     }
1163     return MergeResult(UpdateState::MergeCompleted, MergeFailureCode::Ok);
1164 }
1165 
1166 // This returns the backing device, not the dm-user layer.
GetMappedCowDeviceName(const std::string & snapshot,const SnapshotStatus & status)1167 static std::string GetMappedCowDeviceName(const std::string& snapshot,
1168                                           const SnapshotStatus& status) {
1169     // If no partition was created (the COW exists entirely on /data), the
1170     // device-mapper layering is different than if we had a partition.
1171     if (status.cow_partition_size() == 0) {
1172         return GetCowImageDeviceName(snapshot);
1173     }
1174     return GetCowName(snapshot);
1175 }
1176 
CheckMergeConsistency(LockedFile * lock,const std::string & name,const SnapshotStatus & status)1177 MergeFailureCode SnapshotManager::CheckMergeConsistency(LockedFile* lock, const std::string& name,
1178                                                         const SnapshotStatus& status) {
1179     CHECK(lock);
1180 
1181     return merge_consistency_checker_(name, status);
1182 }
1183 
CheckMergeConsistency(const std::string & name,const SnapshotStatus & status)1184 MergeFailureCode CheckMergeConsistency(const std::string& name, const SnapshotStatus& status) {
1185     if (!status.compression_enabled()) {
1186         // Do not try to verify old-style COWs yet.
1187         return MergeFailureCode::Ok;
1188     }
1189 
1190     auto& dm = DeviceMapper::Instance();
1191 
1192     std::string cow_image_name = GetMappedCowDeviceName(name, status);
1193     std::string cow_image_path;
1194     if (!dm.GetDmDevicePathByName(cow_image_name, &cow_image_path)) {
1195         LOG(ERROR) << "Failed to get path for cow device: " << cow_image_name;
1196         return MergeFailureCode::GetCowPathConsistencyCheck;
1197     }
1198 
1199     // First pass, count # of ops.
1200     size_t num_ops = 0;
1201     {
1202         unique_fd fd(open(cow_image_path.c_str(), O_RDONLY | O_CLOEXEC));
1203         if (fd < 0) {
1204             PLOG(ERROR) << "Failed to open " << cow_image_name;
1205             return MergeFailureCode::OpenCowConsistencyCheck;
1206         }
1207 
1208         CowReader reader;
1209         if (!reader.Parse(std::move(fd))) {
1210             LOG(ERROR) << "Failed to parse cow " << cow_image_path;
1211             return MergeFailureCode::ParseCowConsistencyCheck;
1212         }
1213 
1214         for (auto iter = reader.GetOpIter(); !iter->Done(); iter->Next()) {
1215             if (!IsMetadataOp(iter->Get())) {
1216                 num_ops++;
1217             }
1218         }
1219     }
1220 
1221     // Second pass, try as hard as we can to get the actual number of blocks
1222     // the system thinks is merged.
1223     unique_fd fd(open(cow_image_path.c_str(), O_RDONLY | O_DIRECT | O_SYNC | O_CLOEXEC));
1224     if (fd < 0) {
1225         PLOG(ERROR) << "Failed to open direct " << cow_image_name;
1226         return MergeFailureCode::OpenCowDirectConsistencyCheck;
1227     }
1228 
1229     void* addr;
1230     size_t page_size = getpagesize();
1231     if (posix_memalign(&addr, page_size, page_size) < 0) {
1232         PLOG(ERROR) << "posix_memalign with page size " << page_size;
1233         return MergeFailureCode::MemAlignConsistencyCheck;
1234     }
1235 
1236     // COWs are always at least 2MB, this is guaranteed in snapshot creation.
1237     std::unique_ptr<void, decltype(&::free)> buffer(addr, ::free);
1238     if (!android::base::ReadFully(fd, buffer.get(), page_size)) {
1239         PLOG(ERROR) << "Direct read failed " << cow_image_name;
1240         return MergeFailureCode::DirectReadConsistencyCheck;
1241     }
1242 
1243     auto header = reinterpret_cast<CowHeader*>(buffer.get());
1244     if (header->num_merge_ops != num_ops) {
1245         LOG(ERROR) << "COW consistency check failed, expected " << num_ops << " to be merged, "
1246                    << "but " << header->num_merge_ops << " were actually recorded.";
1247         LOG(ERROR) << "Aborting merge progress for snapshot " << name
1248                    << ", will try again next boot";
1249         return MergeFailureCode::WrongMergeCountConsistencyCheck;
1250     }
1251 
1252     return MergeFailureCode::Ok;
1253 }
1254 
MergeSecondPhaseSnapshots(LockedFile * lock)1255 MergeFailureCode SnapshotManager::MergeSecondPhaseSnapshots(LockedFile* lock) {
1256     std::vector<std::string> snapshots;
1257     if (!ListSnapshots(lock, &snapshots)) {
1258         return MergeFailureCode::ListSnapshots;
1259     }
1260 
1261     SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock);
1262     CHECK(update_status.state() == UpdateState::Merging ||
1263           update_status.state() == UpdateState::MergeFailed);
1264     CHECK(update_status.merge_phase() == MergePhase::FIRST_PHASE);
1265 
1266     update_status.set_state(UpdateState::Merging);
1267     update_status.set_merge_phase(MergePhase::SECOND_PHASE);
1268     if (!WriteSnapshotUpdateStatus(lock, update_status)) {
1269         return MergeFailureCode::WriteStatus;
1270     }
1271 
1272     MergeFailureCode result = MergeFailureCode::Ok;
1273     for (const auto& snapshot : snapshots) {
1274         SnapshotStatus snapshot_status;
1275         if (!ReadSnapshotStatus(lock, snapshot, &snapshot_status)) {
1276             return MergeFailureCode::ReadStatus;
1277         }
1278         if (DecideMergePhase(snapshot_status) != MergePhase::SECOND_PHASE) {
1279             continue;
1280         }
1281         auto code = SwitchSnapshotToMerge(lock, snapshot);
1282         if (code != MergeFailureCode::Ok) {
1283             LOG(ERROR) << "Failed to switch snapshot to a second-phase merge target: " << snapshot;
1284             if (result == MergeFailureCode::Ok) {
1285                 result = code;
1286             }
1287         }
1288     }
1289     return result;
1290 }
1291 
GetSnapshotBootIndicatorPath()1292 std::string SnapshotManager::GetSnapshotBootIndicatorPath() {
1293     return metadata_dir_ + "/" + android::base::Basename(kBootIndicatorPath);
1294 }
1295 
GetRollbackIndicatorPath()1296 std::string SnapshotManager::GetRollbackIndicatorPath() {
1297     return metadata_dir_ + "/" + android::base::Basename(kRollbackIndicatorPath);
1298 }
1299 
GetForwardMergeIndicatorPath()1300 std::string SnapshotManager::GetForwardMergeIndicatorPath() {
1301     return metadata_dir_ + "/allow-forward-merge";
1302 }
1303 
GetOldPartitionMetadataPath()1304 std::string SnapshotManager::GetOldPartitionMetadataPath() {
1305     return metadata_dir_ + "/old-partition-metadata";
1306 }
1307 
AcknowledgeMergeSuccess(LockedFile * lock)1308 void SnapshotManager::AcknowledgeMergeSuccess(LockedFile* lock) {
1309     // It's not possible to remove update state in recovery, so write an
1310     // indicator that cleanup is needed on reboot. If a factory data reset
1311     // was requested, it doesn't matter, everything will get wiped anyway.
1312     // To make testing easier we consider a /data wipe as cleaned up.
1313     if (device_->IsRecovery()) {
1314         WriteUpdateState(lock, UpdateState::MergeCompleted);
1315         return;
1316     }
1317 
1318     RemoveAllUpdateState(lock);
1319 }
1320 
AcknowledgeMergeFailure(MergeFailureCode failure_code)1321 void SnapshotManager::AcknowledgeMergeFailure(MergeFailureCode failure_code) {
1322     // Log first, so worst case, we always have a record of why the calls below
1323     // were being made.
1324     LOG(ERROR) << "Merge could not be completed and will be marked as failed.";
1325 
1326     auto lock = LockExclusive();
1327     if (!lock) return;
1328 
1329     // Since we released the lock in between WaitForMerge and here, it's
1330     // possible (1) the merge successfully completed or (2) was already
1331     // marked as a failure. So make sure to check the state again, and
1332     // only mark as a failure if appropriate.
1333     UpdateState state = ReadUpdateState(lock.get());
1334     if (state != UpdateState::Merging && state != UpdateState::MergeNeedsReboot) {
1335         return;
1336     }
1337 
1338     WriteUpdateState(lock.get(), UpdateState::MergeFailed, failure_code);
1339 }
1340 
OnSnapshotMergeComplete(LockedFile * lock,const std::string & name,const SnapshotStatus & status)1341 bool SnapshotManager::OnSnapshotMergeComplete(LockedFile* lock, const std::string& name,
1342                                               const SnapshotStatus& status) {
1343     if (IsSnapshotDevice(name)) {
1344         // We are extra-cautious here, to avoid deleting the wrong table.
1345         std::string target_type;
1346         DmTargetSnapshot::Status dm_status;
1347         if (!QuerySnapshotStatus(name, &target_type, &dm_status)) {
1348             return false;
1349         }
1350         if (target_type != "snapshot-merge") {
1351             LOG(ERROR) << "Unexpected target type " << target_type
1352                        << " for snapshot device: " << name;
1353             return false;
1354         }
1355         if (dm_status.sectors_allocated != dm_status.metadata_sectors) {
1356             LOG(ERROR) << "Merge is unexpectedly incomplete for device " << name;
1357             return false;
1358         }
1359         if (!CollapseSnapshotDevice(name, status)) {
1360             LOG(ERROR) << "Unable to collapse snapshot: " << name;
1361             return false;
1362         }
1363         // Note that collapsing is implicitly an Unmap, so we don't need to
1364         // unmap the snapshot.
1365     }
1366 
1367     if (!DeleteSnapshot(lock, name)) {
1368         LOG(ERROR) << "Could not delete snapshot: " << name;
1369         return false;
1370     }
1371     return true;
1372 }
1373 
CollapseSnapshotDevice(const std::string & name,const SnapshotStatus & status)1374 bool SnapshotManager::CollapseSnapshotDevice(const std::string& name,
1375                                              const SnapshotStatus& status) {
1376     auto& dm = DeviceMapper::Instance();
1377 
1378     // Verify we have a snapshot-merge device.
1379     DeviceMapper::TargetInfo target;
1380     if (!GetSingleTarget(name, TableQuery::Table, &target)) {
1381         return false;
1382     }
1383     if (DeviceMapper::GetTargetType(target.spec) != "snapshot-merge") {
1384         // This should be impossible, it was checked earlier.
1385         LOG(ERROR) << "Snapshot device has invalid target type: " << name;
1386         return false;
1387     }
1388 
1389     std::string base_device, cow_device;
1390     if (!DmTargetSnapshot::GetDevicesFromParams(target.data, &base_device, &cow_device)) {
1391         LOG(ERROR) << "Could not parse snapshot device " << name << " parameters: " << target.data;
1392         return false;
1393     }
1394 
1395     uint64_t snapshot_sectors = status.snapshot_size() / kSectorSize;
1396     if (snapshot_sectors * kSectorSize != status.snapshot_size()) {
1397         LOG(ERROR) << "Snapshot " << name
1398                    << " size is not sector aligned: " << status.snapshot_size();
1399         return false;
1400     }
1401 
1402     uint32_t slot = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
1403     // Create a DmTable that is identical to the base device.
1404     CreateLogicalPartitionParams base_device_params{
1405             .block_device = device_->GetSuperDevice(slot),
1406             .metadata_slot = slot,
1407             .partition_name = name,
1408             .partition_opener = &device_->GetPartitionOpener(),
1409     };
1410     DmTable table;
1411     if (!CreateDmTable(base_device_params, &table)) {
1412         LOG(ERROR) << "Could not create a DmTable for partition: " << name;
1413         return false;
1414     }
1415 
1416     if (!dm.LoadTableAndActivate(name, table)) {
1417         return false;
1418     }
1419 
1420     // Attempt to delete the snapshot device if one still exists. Nothing
1421     // should be depending on the device, and device-mapper should have
1422     // flushed remaining I/O. We could in theory replace with dm-zero (or
1423     // re-use the table above), but for now it's better to know why this
1424     // would fail.
1425     if (status.compression_enabled()) {
1426         UnmapDmUserDevice(name);
1427     }
1428     auto base_name = GetBaseDeviceName(name);
1429     if (!DeleteDeviceIfExists(base_name)) {
1430         LOG(ERROR) << "Unable to delete base device for snapshot: " << base_name;
1431     }
1432 
1433     if (!DeleteDeviceIfExists(GetSourceDeviceName(name), 4000ms)) {
1434         LOG(ERROR) << "Unable to delete source device for snapshot: " << GetSourceDeviceName(name);
1435     }
1436 
1437     return true;
1438 }
1439 
HandleCancelledUpdate(LockedFile * lock,const std::function<bool ()> & before_cancel)1440 bool SnapshotManager::HandleCancelledUpdate(LockedFile* lock,
1441                                             const std::function<bool()>& before_cancel) {
1442     auto slot = GetCurrentSlot();
1443     if (slot == Slot::Unknown) {
1444         return false;
1445     }
1446 
1447     // If all snapshots were reflashed, then cancel the entire update.
1448     if (AreAllSnapshotsCancelled(lock)) {
1449         LOG(WARNING) << "Detected re-flashing, cancelling unverified update.";
1450         return RemoveAllUpdateState(lock, before_cancel);
1451     }
1452 
1453     // If update has been rolled back, then cancel the entire update.
1454     // Client (update_engine) is responsible for doing additional cleanup work on its own states
1455     // when ProcessUpdateState() returns UpdateState::Cancelled.
1456     auto current_slot = GetCurrentSlot();
1457     if (current_slot != Slot::Source) {
1458         LOG(INFO) << "Update state is being processed while booting at " << current_slot
1459                   << " slot, taking no action.";
1460         return false;
1461     }
1462 
1463     // current_slot == Source. Attempt to detect rollbacks.
1464     if (access(GetRollbackIndicatorPath().c_str(), F_OK) != 0) {
1465         // This unverified update is not attempted. Take no action.
1466         PLOG(INFO) << "Rollback indicator not detected. "
1467                    << "Update state is being processed before reboot, taking no action.";
1468         return false;
1469     }
1470 
1471     LOG(WARNING) << "Detected rollback, cancelling unverified update.";
1472     return RemoveAllUpdateState(lock, before_cancel);
1473 }
1474 
PerformInitTransition(InitTransition transition,std::vector<std::string> * snapuserd_argv)1475 bool SnapshotManager::PerformInitTransition(InitTransition transition,
1476                                             std::vector<std::string>* snapuserd_argv) {
1477     LOG(INFO) << "Performing transition for snapuserd.";
1478 
1479     // Don't use EnsureSnapuserdConnected() because this is called from init,
1480     // and attempting to do so will deadlock.
1481     if (!snapuserd_client_ && transition != InitTransition::SELINUX_DETACH) {
1482         snapuserd_client_ = SnapuserdClient::Connect(kSnapuserdSocket, 10s);
1483         if (!snapuserd_client_) {
1484             LOG(ERROR) << "Unable to connect to snapuserd";
1485             return false;
1486         }
1487     }
1488 
1489     auto& dm = DeviceMapper::Instance();
1490 
1491     auto lock = LockExclusive();
1492     if (!lock) return false;
1493 
1494     std::vector<std::string> snapshots;
1495     if (!ListSnapshots(lock.get(), &snapshots)) {
1496         LOG(ERROR) << "Failed to list snapshots.";
1497         return false;
1498     }
1499 
1500     size_t num_cows = 0;
1501     size_t ok_cows = 0;
1502     for (const auto& snapshot : snapshots) {
1503         std::string user_cow_name = GetDmUserCowName(snapshot);
1504         if (dm.GetState(user_cow_name) == DmDeviceState::INVALID) {
1505             continue;
1506         }
1507 
1508         DeviceMapper::TargetInfo target;
1509         if (!GetSingleTarget(user_cow_name, TableQuery::Table, &target)) {
1510             continue;
1511         }
1512 
1513         auto target_type = DeviceMapper::GetTargetType(target.spec);
1514         if (target_type != "user") {
1515             LOG(ERROR) << "Unexpected target type for " << user_cow_name << ": " << target_type;
1516             continue;
1517         }
1518 
1519         num_cows++;
1520 
1521         SnapshotStatus snapshot_status;
1522         if (!ReadSnapshotStatus(lock.get(), snapshot, &snapshot_status)) {
1523             LOG(ERROR) << "Unable to read snapshot status: " << snapshot;
1524             continue;
1525         }
1526 
1527         auto misc_name = user_cow_name;
1528 
1529         DmTable table;
1530         table.Emplace<DmTargetUser>(0, target.spec.length, misc_name);
1531         if (!dm.LoadTableAndActivate(user_cow_name, table)) {
1532             LOG(ERROR) << "Unable to swap tables for " << misc_name;
1533             continue;
1534         }
1535 
1536         std::string source_device_name;
1537         if (snapshot_status.old_partition_size() > 0) {
1538             source_device_name = GetSourceDeviceName(snapshot);
1539         } else {
1540             source_device_name = GetBaseDeviceName(snapshot);
1541         }
1542 
1543         std::string source_device;
1544         if (!dm.GetDmDevicePathByName(source_device_name, &source_device)) {
1545             LOG(ERROR) << "Could not get device path for " << GetSourceDeviceName(snapshot);
1546             continue;
1547         }
1548 
1549         std::string cow_image_name = GetMappedCowDeviceName(snapshot, snapshot_status);
1550 
1551         std::string cow_image_device;
1552         if (!dm.GetDmDevicePathByName(cow_image_name, &cow_image_device)) {
1553             LOG(ERROR) << "Could not get device path for " << cow_image_name;
1554             continue;
1555         }
1556 
1557         // Wait for ueventd to acknowledge and create the control device node.
1558         std::string control_device = "/dev/dm-user/" + misc_name;
1559         if (!WaitForDevice(control_device, 10s)) {
1560             LOG(ERROR) << "dm-user control device no found:  " << misc_name;
1561             continue;
1562         }
1563 
1564         if (transition == InitTransition::SELINUX_DETACH) {
1565             auto message = misc_name + "," + cow_image_device + "," + source_device;
1566             snapuserd_argv->emplace_back(std::move(message));
1567 
1568             // Do not attempt to connect to the new snapuserd yet, it hasn't
1569             // been started. We do however want to wait for the misc device
1570             // to have been created.
1571             ok_cows++;
1572             continue;
1573         }
1574 
1575         uint64_t base_sectors =
1576                 snapuserd_client_->InitDmUserCow(misc_name, cow_image_device, source_device);
1577         if (base_sectors == 0) {
1578             // Unrecoverable as metadata reads from cow device failed
1579             LOG(FATAL) << "Failed to retrieve base_sectors from Snapuserd";
1580             return false;
1581         }
1582 
1583         CHECK(base_sectors <= target.spec.length);
1584 
1585         if (!snapuserd_client_->AttachDmUser(misc_name)) {
1586             // This error is unrecoverable. We cannot proceed because reads to
1587             // the underlying device will fail.
1588             LOG(FATAL) << "Could not initialize snapuserd for " << user_cow_name;
1589             return false;
1590         }
1591 
1592         ok_cows++;
1593     }
1594 
1595     if (ok_cows != num_cows) {
1596         LOG(ERROR) << "Could not transition all snapuserd consumers.";
1597         return false;
1598     }
1599     return true;
1600 }
1601 
ReadCurrentMetadata()1602 std::unique_ptr<LpMetadata> SnapshotManager::ReadCurrentMetadata() {
1603     const auto& opener = device_->GetPartitionOpener();
1604     uint32_t slot = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
1605     auto super_device = device_->GetSuperDevice(slot);
1606     auto metadata = android::fs_mgr::ReadMetadata(opener, super_device, slot);
1607     if (!metadata) {
1608         LOG(ERROR) << "Could not read dynamic partition metadata for device: " << super_device;
1609         return nullptr;
1610     }
1611     return metadata;
1612 }
1613 
GetMetadataPartitionState(const LpMetadata & metadata,const std::string & name)1614 SnapshotManager::MetadataPartitionState SnapshotManager::GetMetadataPartitionState(
1615         const LpMetadata& metadata, const std::string& name) {
1616     auto partition = android::fs_mgr::FindPartition(metadata, name);
1617     if (!partition) return MetadataPartitionState::None;
1618     if (partition->attributes & LP_PARTITION_ATTR_UPDATED) {
1619         return MetadataPartitionState::Updated;
1620     }
1621     return MetadataPartitionState::Flashed;
1622 }
1623 
AreAllSnapshotsCancelled(LockedFile * lock)1624 bool SnapshotManager::AreAllSnapshotsCancelled(LockedFile* lock) {
1625     std::vector<std::string> snapshots;
1626     if (!ListSnapshots(lock, &snapshots)) {
1627         LOG(WARNING) << "Failed to list snapshots to determine whether device has been flashed "
1628                      << "after applying an update. Assuming no snapshots.";
1629         // Let HandleCancelledUpdate resets UpdateState.
1630         return true;
1631     }
1632 
1633     std::map<std::string, bool> flashing_status;
1634 
1635     if (!GetSnapshotFlashingStatus(lock, snapshots, &flashing_status)) {
1636         LOG(WARNING) << "Failed to determine whether partitions have been flashed. Not"
1637                      << "removing update states.";
1638         return false;
1639     }
1640 
1641     bool all_snapshots_cancelled = std::all_of(flashing_status.begin(), flashing_status.end(),
1642                                                [](const auto& pair) { return pair.second; });
1643 
1644     if (all_snapshots_cancelled) {
1645         LOG(WARNING) << "All partitions are re-flashed after update, removing all update states.";
1646     }
1647     return all_snapshots_cancelled;
1648 }
1649 
GetSnapshotFlashingStatus(LockedFile * lock,const std::vector<std::string> & snapshots,std::map<std::string,bool> * out)1650 bool SnapshotManager::GetSnapshotFlashingStatus(LockedFile* lock,
1651                                                 const std::vector<std::string>& snapshots,
1652                                                 std::map<std::string, bool>* out) {
1653     CHECK(lock);
1654 
1655     auto source_slot_suffix = ReadUpdateSourceSlotSuffix();
1656     if (source_slot_suffix.empty()) {
1657         return false;
1658     }
1659     uint32_t source_slot = SlotNumberForSlotSuffix(source_slot_suffix);
1660     uint32_t target_slot = (source_slot == 0) ? 1 : 0;
1661 
1662     // Attempt to detect re-flashing on each partition.
1663     // - If all partitions are re-flashed, we can proceed to cancel the whole update.
1664     // - If only some of the partitions are re-flashed, snapshots for re-flashed partitions are
1665     //   deleted. Caller is responsible for merging the rest of the snapshots.
1666     // - If none of the partitions are re-flashed, caller is responsible for merging the snapshots.
1667     //
1668     // Note that we use target slot metadata, since if an OTA has been applied
1669     // to the target slot, we can detect the UPDATED flag. Any kind of flash
1670     // operation against dynamic partitions ensures that all copies of the
1671     // metadata are in sync, so flashing all partitions on the source slot will
1672     // remove the UPDATED flag on the target slot as well.
1673     const auto& opener = device_->GetPartitionOpener();
1674     auto super_device = device_->GetSuperDevice(target_slot);
1675     auto metadata = android::fs_mgr::ReadMetadata(opener, super_device, target_slot);
1676     if (!metadata) {
1677         return false;
1678     }
1679 
1680     for (const auto& snapshot_name : snapshots) {
1681         if (GetMetadataPartitionState(*metadata, snapshot_name) ==
1682             MetadataPartitionState::Updated) {
1683             out->emplace(snapshot_name, false);
1684         } else {
1685             // Delete snapshots for partitions that are re-flashed after the update.
1686             LOG(WARNING) << "Detected re-flashing of partition " << snapshot_name << ".";
1687             out->emplace(snapshot_name, true);
1688         }
1689     }
1690     return true;
1691 }
1692 
RemoveAllSnapshots(LockedFile * lock)1693 bool SnapshotManager::RemoveAllSnapshots(LockedFile* lock) {
1694     std::vector<std::string> snapshots;
1695     if (!ListSnapshots(lock, &snapshots)) {
1696         LOG(ERROR) << "Could not list snapshots";
1697         return false;
1698     }
1699 
1700     std::map<std::string, bool> flashing_status;
1701     if (!GetSnapshotFlashingStatus(lock, snapshots, &flashing_status)) {
1702         LOG(WARNING) << "Failed to get flashing status";
1703     }
1704 
1705     auto current_slot = GetCurrentSlot();
1706     bool ok = true;
1707     bool has_mapped_cow_images = false;
1708     for (const auto& name : snapshots) {
1709         // If booting off source slot, it is okay to unmap and delete all the snapshots.
1710         // If boot indicator is missing, update state is None or Initiated, so
1711         //   it is also okay to unmap and delete all the snapshots.
1712         // If booting off target slot,
1713         //  - should not unmap because:
1714         //    - In Android mode, snapshots are not mapped, but
1715         //      filesystems are mounting off dm-linear targets directly.
1716         //    - In recovery mode, assume nothing is mapped, so it is optional to unmap.
1717         //  - If partition is flashed or unknown, it is okay to delete snapshots.
1718         //    Otherwise (UPDATED flag), only delete snapshots if they are not mapped
1719         //    as dm-snapshot (for example, after merge completes).
1720         bool should_unmap = current_slot != Slot::Target;
1721         bool should_delete = ShouldDeleteSnapshot(flashing_status, current_slot, name);
1722         if (should_unmap && android::base::EndsWith(name, device_->GetSlotSuffix())) {
1723             // Something very unexpected has happened - we want to unmap this
1724             // snapshot, but it's on the wrong slot. We can't unmap an active
1725             // partition. If this is not really a snapshot, skip the unmap
1726             // step.
1727             auto& dm = DeviceMapper::Instance();
1728             if (dm.GetState(name) == DmDeviceState::INVALID || !IsSnapshotDevice(name)) {
1729                 LOG(ERROR) << "Detected snapshot " << name << " on " << current_slot << " slot"
1730                            << " for source partition; removing without unmap.";
1731                 should_unmap = false;
1732             }
1733         }
1734 
1735         bool partition_ok = true;
1736         if (should_unmap && !UnmapPartitionWithSnapshot(lock, name)) {
1737             partition_ok = false;
1738         }
1739         if (partition_ok && should_delete && !DeleteSnapshot(lock, name)) {
1740             partition_ok = false;
1741         }
1742 
1743         if (!partition_ok) {
1744             // Remember whether or not we were able to unmap the cow image.
1745             auto cow_image_device = GetCowImageDeviceName(name);
1746             has_mapped_cow_images |=
1747                     (EnsureImageManager() && images_->IsImageMapped(cow_image_device));
1748 
1749             ok = false;
1750         }
1751     }
1752 
1753     if (ok || !has_mapped_cow_images) {
1754         // Delete any image artifacts as a precaution, in case an update is
1755         // being cancelled due to some corrupted state in an lp_metadata file.
1756         // Note that we do not do this if some cow images are still mapped,
1757         // since we must not remove backing storage if it's in use.
1758         if (!EnsureImageManager() || !images_->RemoveAllImages()) {
1759             LOG(ERROR) << "Could not remove all snapshot artifacts";
1760             return false;
1761         }
1762     }
1763     return ok;
1764 }
1765 
1766 // See comments in RemoveAllSnapshots().
ShouldDeleteSnapshot(const std::map<std::string,bool> & flashing_status,Slot current_slot,const std::string & name)1767 bool SnapshotManager::ShouldDeleteSnapshot(const std::map<std::string, bool>& flashing_status,
1768                                            Slot current_slot, const std::string& name) {
1769     if (current_slot != Slot::Target) {
1770         return true;
1771     }
1772     auto it = flashing_status.find(name);
1773     if (it == flashing_status.end()) {
1774         LOG(WARNING) << "Can't determine flashing status for " << name;
1775         return true;
1776     }
1777     if (it->second) {
1778         // partition flashed, okay to delete obsolete snapshots
1779         return true;
1780     }
1781     return !IsSnapshotDevice(name);
1782 }
1783 
GetUpdateState(double * progress)1784 UpdateState SnapshotManager::GetUpdateState(double* progress) {
1785     // If we've never started an update, the state file won't exist.
1786     auto state_file = GetStateFilePath();
1787     if (access(state_file.c_str(), F_OK) != 0 && errno == ENOENT) {
1788         return UpdateState::None;
1789     }
1790 
1791     auto lock = LockShared();
1792     if (!lock) {
1793         return UpdateState::None;
1794     }
1795 
1796     SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock.get());
1797     auto state = update_status.state();
1798     if (progress == nullptr) {
1799         return state;
1800     }
1801 
1802     if (state == UpdateState::MergeCompleted) {
1803         *progress = 100.0;
1804         return state;
1805     }
1806 
1807     *progress = 0.0;
1808     if (state != UpdateState::Merging) {
1809         return state;
1810     }
1811 
1812     // Sum all the snapshot states as if the system consists of a single huge
1813     // snapshots device, then compute the merge completion percentage of that
1814     // device.
1815     std::vector<std::string> snapshots;
1816     if (!ListSnapshots(lock.get(), &snapshots)) {
1817         LOG(ERROR) << "Could not list snapshots";
1818         return state;
1819     }
1820 
1821     DmTargetSnapshot::Status fake_snapshots_status = {};
1822     for (const auto& snapshot : snapshots) {
1823         DmTargetSnapshot::Status current_status;
1824 
1825         if (!IsSnapshotDevice(snapshot)) continue;
1826         if (!QuerySnapshotStatus(snapshot, nullptr, &current_status)) continue;
1827 
1828         fake_snapshots_status.sectors_allocated += current_status.sectors_allocated;
1829         fake_snapshots_status.total_sectors += current_status.total_sectors;
1830         fake_snapshots_status.metadata_sectors += current_status.metadata_sectors;
1831     }
1832 
1833     *progress = DmTargetSnapshot::MergePercent(fake_snapshots_status,
1834                                                update_status.sectors_allocated());
1835 
1836     return state;
1837 }
1838 
UpdateUsesCompression()1839 bool SnapshotManager::UpdateUsesCompression() {
1840     auto lock = LockShared();
1841     if (!lock) return false;
1842     return UpdateUsesCompression(lock.get());
1843 }
1844 
UpdateUsesCompression(LockedFile * lock)1845 bool SnapshotManager::UpdateUsesCompression(LockedFile* lock) {
1846     SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock);
1847     return update_status.compression_enabled();
1848 }
1849 
ListSnapshots(LockedFile * lock,std::vector<std::string> * snapshots,const std::string & suffix)1850 bool SnapshotManager::ListSnapshots(LockedFile* lock, std::vector<std::string>* snapshots,
1851                                     const std::string& suffix) {
1852     CHECK(lock);
1853 
1854     auto dir_path = metadata_dir_ + "/snapshots"s;
1855     std::unique_ptr<DIR, decltype(&closedir)> dir(opendir(dir_path.c_str()), closedir);
1856     if (!dir) {
1857         PLOG(ERROR) << "opendir failed: " << dir_path;
1858         return false;
1859     }
1860 
1861     struct dirent* dp;
1862     while ((dp = readdir(dir.get())) != nullptr) {
1863         if (dp->d_type != DT_REG) continue;
1864 
1865         std::string name(dp->d_name);
1866         if (!suffix.empty() && !android::base::EndsWith(name, suffix)) {
1867             continue;
1868         }
1869         snapshots->emplace_back(std::move(name));
1870     }
1871     return true;
1872 }
1873 
IsSnapshotManagerNeeded()1874 bool SnapshotManager::IsSnapshotManagerNeeded() {
1875     return access(kBootIndicatorPath, F_OK) == 0;
1876 }
1877 
GetGlobalRollbackIndicatorPath()1878 std::string SnapshotManager::GetGlobalRollbackIndicatorPath() {
1879     return kRollbackIndicatorPath;
1880 }
1881 
NeedSnapshotsInFirstStageMount()1882 bool SnapshotManager::NeedSnapshotsInFirstStageMount() {
1883     // If we fail to read, we'll wind up using CreateLogicalPartitions, which
1884     // will create devices that look like the old slot, except with extra
1885     // content at the end of each device. This will confuse dm-verity, and
1886     // ultimately we'll fail to boot. Why not make it a fatal error and have
1887     // the reason be clearer? Because the indicator file still exists, and
1888     // if this was FATAL, reverting to the old slot would be broken.
1889     auto slot = GetCurrentSlot();
1890 
1891     if (slot != Slot::Target) {
1892         if (slot == Slot::Source) {
1893             // Device is rebooting into the original slot, so mark this as a
1894             // rollback.
1895             auto path = GetRollbackIndicatorPath();
1896             if (!android::base::WriteStringToFile("1", path)) {
1897                 PLOG(ERROR) << "Unable to write rollback indicator: " << path;
1898             } else {
1899                 LOG(INFO) << "Rollback detected, writing rollback indicator to " << path;
1900             }
1901         }
1902         LOG(INFO) << "Not booting from new slot. Will not mount snapshots.";
1903         return false;
1904     }
1905 
1906     // If we can't read the update state, it's unlikely anything else will
1907     // succeed, so this is a fatal error. We'll eventually exhaust boot
1908     // attempts and revert to the old slot.
1909     auto lock = LockShared();
1910     if (!lock) {
1911         LOG(FATAL) << "Could not read update state to determine snapshot status";
1912         return false;
1913     }
1914     switch (ReadUpdateState(lock.get())) {
1915         case UpdateState::Unverified:
1916         case UpdateState::Merging:
1917         case UpdateState::MergeFailed:
1918             return true;
1919         default:
1920             return false;
1921     }
1922 }
1923 
CreateLogicalAndSnapshotPartitions(const std::string & super_device,const std::chrono::milliseconds & timeout_ms)1924 bool SnapshotManager::CreateLogicalAndSnapshotPartitions(
1925         const std::string& super_device, const std::chrono::milliseconds& timeout_ms) {
1926     LOG(INFO) << "Creating logical partitions with snapshots as needed";
1927 
1928     auto lock = LockExclusive();
1929     if (!lock) return false;
1930 
1931     uint32_t slot = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
1932     return MapAllPartitions(lock.get(), super_device, slot, timeout_ms);
1933 }
1934 
MapAllPartitions(LockedFile * lock,const std::string & super_device,uint32_t slot,const std::chrono::milliseconds & timeout_ms)1935 bool SnapshotManager::MapAllPartitions(LockedFile* lock, const std::string& super_device,
1936                                        uint32_t slot, const std::chrono::milliseconds& timeout_ms) {
1937     const auto& opener = device_->GetPartitionOpener();
1938     auto metadata = android::fs_mgr::ReadMetadata(opener, super_device, slot);
1939     if (!metadata) {
1940         LOG(ERROR) << "Could not read dynamic partition metadata for device: " << super_device;
1941         return false;
1942     }
1943 
1944     if (!EnsureImageManager()) {
1945         return false;
1946     }
1947 
1948     for (const auto& partition : metadata->partitions) {
1949         if (GetPartitionGroupName(metadata->groups[partition.group_index]) == kCowGroupName) {
1950             LOG(INFO) << "Skip mapping partition " << GetPartitionName(partition) << " in group "
1951                       << kCowGroupName;
1952             continue;
1953         }
1954 
1955         CreateLogicalPartitionParams params = {
1956                 .block_device = super_device,
1957                 .metadata = metadata.get(),
1958                 .partition = &partition,
1959                 .partition_opener = &opener,
1960                 .timeout_ms = timeout_ms,
1961         };
1962         if (!MapPartitionWithSnapshot(lock, std::move(params), SnapshotContext::Mount, nullptr)) {
1963             return false;
1964         }
1965     }
1966 
1967     LOG(INFO) << "Created logical partitions with snapshot.";
1968     return true;
1969 }
1970 
GetRemainingTime(const std::chrono::milliseconds & timeout,const std::chrono::time_point<std::chrono::steady_clock> & begin)1971 static std::chrono::milliseconds GetRemainingTime(
1972         const std::chrono::milliseconds& timeout,
1973         const std::chrono::time_point<std::chrono::steady_clock>& begin) {
1974     // If no timeout is specified, execute all commands without specifying any timeout.
1975     if (timeout.count() == 0) return std::chrono::milliseconds(0);
1976     auto passed_time = std::chrono::steady_clock::now() - begin;
1977     auto remaining_time = timeout - duration_cast<std::chrono::milliseconds>(passed_time);
1978     if (remaining_time.count() <= 0) {
1979         LOG(ERROR) << "MapPartitionWithSnapshot has reached timeout " << timeout.count() << "ms ("
1980                    << remaining_time.count() << "ms remaining)";
1981         // Return min() instead of remaining_time here because 0 is treated as a special value for
1982         // no timeout, where the rest of the commands will still be executed.
1983         return std::chrono::milliseconds::min();
1984     }
1985     return remaining_time;
1986 }
1987 
MapPartitionWithSnapshot(LockedFile * lock,CreateLogicalPartitionParams params,SnapshotContext context,SnapshotPaths * paths)1988 bool SnapshotManager::MapPartitionWithSnapshot(LockedFile* lock,
1989                                                CreateLogicalPartitionParams params,
1990                                                SnapshotContext context, SnapshotPaths* paths) {
1991     auto begin = std::chrono::steady_clock::now();
1992 
1993     CHECK(lock);
1994 
1995     if (params.GetPartitionName() != params.GetDeviceName()) {
1996         LOG(ERROR) << "Mapping snapshot with a different name is unsupported: partition_name = "
1997                    << params.GetPartitionName() << ", device_name = " << params.GetDeviceName();
1998         return false;
1999     }
2000 
2001     // Fill out fields in CreateLogicalPartitionParams so that we have more information (e.g. by
2002     // reading super partition metadata).
2003     CreateLogicalPartitionParams::OwnedData params_owned_data;
2004     if (!params.InitDefaults(&params_owned_data)) {
2005         return false;
2006     }
2007 
2008     if (!params.partition->num_extents) {
2009         LOG(INFO) << "Skipping zero-length logical partition: " << params.GetPartitionName();
2010         return true;  // leave path empty to indicate that nothing is mapped.
2011     }
2012 
2013     // Determine if there is a live snapshot for the SnapshotStatus of the partition; i.e. if the
2014     // partition still has a snapshot that needs to be mapped.  If no live snapshot or merge
2015     // completed, live_snapshot_status is set to nullopt.
2016     std::optional<SnapshotStatus> live_snapshot_status;
2017     do {
2018         if (!(params.partition->attributes & LP_PARTITION_ATTR_UPDATED)) {
2019             LOG(INFO) << "Detected re-flashing of partition, will skip snapshot: "
2020                       << params.GetPartitionName();
2021             break;
2022         }
2023         auto file_path = GetSnapshotStatusFilePath(params.GetPartitionName());
2024         if (access(file_path.c_str(), F_OK) != 0) {
2025             if (errno != ENOENT) {
2026                 PLOG(INFO) << "Can't map snapshot for " << params.GetPartitionName()
2027                            << ": Can't access " << file_path;
2028                 return false;
2029             }
2030             break;
2031         }
2032         live_snapshot_status = std::make_optional<SnapshotStatus>();
2033         if (!ReadSnapshotStatus(lock, params.GetPartitionName(), &*live_snapshot_status)) {
2034             return false;
2035         }
2036         // No live snapshot if merge is completed.
2037         if (live_snapshot_status->state() == SnapshotState::MERGE_COMPLETED) {
2038             live_snapshot_status.reset();
2039         }
2040 
2041         if (live_snapshot_status->state() == SnapshotState::NONE ||
2042             live_snapshot_status->cow_partition_size() + live_snapshot_status->cow_file_size() ==
2043                     0) {
2044             LOG(WARNING) << "Snapshot status for " << params.GetPartitionName()
2045                          << " is invalid, ignoring: state = "
2046                          << SnapshotState_Name(live_snapshot_status->state())
2047                          << ", cow_partition_size = " << live_snapshot_status->cow_partition_size()
2048                          << ", cow_file_size = " << live_snapshot_status->cow_file_size();
2049             live_snapshot_status.reset();
2050         }
2051     } while (0);
2052 
2053     if (live_snapshot_status.has_value()) {
2054         // dm-snapshot requires the base device to be writable.
2055         params.force_writable = true;
2056         // Map the base device with a different name to avoid collision.
2057         params.device_name = GetBaseDeviceName(params.GetPartitionName());
2058     }
2059 
2060     AutoDeviceList created_devices;
2061 
2062     // Create the base device for the snapshot, or if there is no snapshot, the
2063     // device itself. This device consists of the real blocks in the super
2064     // partition that this logical partition occupies.
2065     auto& dm = DeviceMapper::Instance();
2066     std::string base_path;
2067     if (!CreateLogicalPartition(params, &base_path)) {
2068         LOG(ERROR) << "Could not create logical partition " << params.GetPartitionName()
2069                    << " as device " << params.GetDeviceName();
2070         return false;
2071     }
2072     created_devices.EmplaceBack<AutoUnmapDevice>(&dm, params.GetDeviceName());
2073 
2074     if (paths) {
2075         paths->target_device = base_path;
2076     }
2077 
2078     if (!live_snapshot_status.has_value()) {
2079         created_devices.Release();
2080         return true;
2081     }
2082 
2083     // We don't have ueventd in first-stage init, so use device major:minor
2084     // strings instead.
2085     std::string base_device;
2086     if (!dm.GetDeviceString(params.GetDeviceName(), &base_device)) {
2087         LOG(ERROR) << "Could not determine major/minor for: " << params.GetDeviceName();
2088         return false;
2089     }
2090 
2091     auto remaining_time = GetRemainingTime(params.timeout_ms, begin);
2092     if (remaining_time.count() < 0) return false;
2093 
2094     std::string cow_name;
2095     CreateLogicalPartitionParams cow_params = params;
2096     cow_params.timeout_ms = remaining_time;
2097     if (!MapCowDevices(lock, cow_params, *live_snapshot_status, &created_devices, &cow_name)) {
2098         return false;
2099     }
2100     std::string cow_device;
2101     if (!GetMappedImageDeviceStringOrPath(cow_name, &cow_device)) {
2102         LOG(ERROR) << "Could not determine major/minor for: " << cow_name;
2103         return false;
2104     }
2105     if (paths) {
2106         paths->cow_device_name = cow_name;
2107     }
2108 
2109     remaining_time = GetRemainingTime(params.timeout_ms, begin);
2110     if (remaining_time.count() < 0) return false;
2111 
2112     if (context == SnapshotContext::Update && live_snapshot_status->compression_enabled()) {
2113         // Stop here, we can't run dm-user yet, the COW isn't built.
2114         created_devices.Release();
2115         return true;
2116     }
2117 
2118     if (live_snapshot_status->compression_enabled()) {
2119         // Get the source device (eg the view of the partition from before it was resized).
2120         std::string source_device_path;
2121         if (live_snapshot_status->old_partition_size() > 0) {
2122             if (!MapSourceDevice(lock, params.GetPartitionName(), remaining_time,
2123                                  &source_device_path)) {
2124                 LOG(ERROR) << "Could not map source device for: " << cow_name;
2125                 return false;
2126             }
2127 
2128             auto source_device = GetSourceDeviceName(params.GetPartitionName());
2129             created_devices.EmplaceBack<AutoUnmapDevice>(&dm, source_device);
2130         } else {
2131             source_device_path = base_path;
2132         }
2133 
2134         if (!WaitForDevice(source_device_path, remaining_time)) {
2135             return false;
2136         }
2137 
2138         std::string cow_path;
2139         if (!GetMappedImageDevicePath(cow_name, &cow_path)) {
2140             LOG(ERROR) << "Could not determine path for: " << cow_name;
2141             return false;
2142         }
2143         if (!WaitForDevice(cow_path, remaining_time)) {
2144             return false;
2145         }
2146 
2147         auto name = GetDmUserCowName(params.GetPartitionName());
2148 
2149         std::string new_cow_device;
2150         if (!MapDmUserCow(lock, name, cow_path, source_device_path, remaining_time,
2151                           &new_cow_device)) {
2152             LOG(ERROR) << "Could not map dm-user device for partition "
2153                        << params.GetPartitionName();
2154             return false;
2155         }
2156         created_devices.EmplaceBack<AutoUnmapDevice>(&dm, name);
2157 
2158         remaining_time = GetRemainingTime(params.timeout_ms, begin);
2159         if (remaining_time.count() < 0) return false;
2160 
2161         cow_device = new_cow_device;
2162     }
2163 
2164     std::string path;
2165     if (!MapSnapshot(lock, params.GetPartitionName(), base_device, cow_device, remaining_time,
2166                      &path)) {
2167         LOG(ERROR) << "Could not map snapshot for partition: " << params.GetPartitionName();
2168         return false;
2169     }
2170     // No need to add params.GetPartitionName() to created_devices since it is immediately released.
2171 
2172     if (paths) {
2173         paths->snapshot_device = path;
2174     }
2175 
2176     created_devices.Release();
2177 
2178     LOG(INFO) << "Mapped " << params.GetPartitionName() << " as snapshot device at " << path;
2179     return true;
2180 }
2181 
UnmapPartitionWithSnapshot(LockedFile * lock,const std::string & target_partition_name)2182 bool SnapshotManager::UnmapPartitionWithSnapshot(LockedFile* lock,
2183                                                  const std::string& target_partition_name) {
2184     CHECK(lock);
2185 
2186     if (!UnmapSnapshot(lock, target_partition_name)) {
2187         return false;
2188     }
2189 
2190     if (!UnmapCowDevices(lock, target_partition_name)) {
2191         return false;
2192     }
2193 
2194     auto base_name = GetBaseDeviceName(target_partition_name);
2195     if (!DeleteDeviceIfExists(base_name)) {
2196         LOG(ERROR) << "Cannot delete base device: " << base_name;
2197         return false;
2198     }
2199 
2200     auto source_name = GetSourceDeviceName(target_partition_name);
2201     if (!DeleteDeviceIfExists(source_name)) {
2202         LOG(ERROR) << "Cannot delete source device: " << source_name;
2203         return false;
2204     }
2205 
2206     LOG(INFO) << "Successfully unmapped snapshot " << target_partition_name;
2207 
2208     return true;
2209 }
2210 
MapCowDevices(LockedFile * lock,const CreateLogicalPartitionParams & params,const SnapshotStatus & snapshot_status,AutoDeviceList * created_devices,std::string * cow_name)2211 bool SnapshotManager::MapCowDevices(LockedFile* lock, const CreateLogicalPartitionParams& params,
2212                                     const SnapshotStatus& snapshot_status,
2213                                     AutoDeviceList* created_devices, std::string* cow_name) {
2214     CHECK(lock);
2215     CHECK(snapshot_status.cow_partition_size() + snapshot_status.cow_file_size() > 0);
2216     auto begin = std::chrono::steady_clock::now();
2217 
2218     std::string partition_name = params.GetPartitionName();
2219     std::string cow_image_name = GetCowImageDeviceName(partition_name);
2220     *cow_name = GetCowName(partition_name);
2221 
2222     auto& dm = DeviceMapper::Instance();
2223 
2224     // Map COW image if necessary.
2225     if (snapshot_status.cow_file_size() > 0) {
2226         if (!EnsureImageManager()) return false;
2227         auto remaining_time = GetRemainingTime(params.timeout_ms, begin);
2228         if (remaining_time.count() < 0) return false;
2229 
2230         if (!MapCowImage(partition_name, remaining_time).has_value()) {
2231             LOG(ERROR) << "Could not map cow image for partition: " << partition_name;
2232             return false;
2233         }
2234         created_devices->EmplaceBack<AutoUnmapImage>(images_.get(), cow_image_name);
2235 
2236         // If no COW partition exists, just return the image alone.
2237         if (snapshot_status.cow_partition_size() == 0) {
2238             *cow_name = std::move(cow_image_name);
2239             LOG(INFO) << "Mapped COW image for " << partition_name << " at " << *cow_name;
2240             return true;
2241         }
2242     }
2243 
2244     auto remaining_time = GetRemainingTime(params.timeout_ms, begin);
2245     if (remaining_time.count() < 0) return false;
2246 
2247     CHECK(snapshot_status.cow_partition_size() > 0);
2248 
2249     // Create the DmTable for the COW device. It is the DmTable of the COW partition plus
2250     // COW image device as the last extent.
2251     CreateLogicalPartitionParams cow_partition_params = params;
2252     cow_partition_params.partition = nullptr;
2253     cow_partition_params.partition_name = *cow_name;
2254     cow_partition_params.device_name.clear();
2255     DmTable table;
2256     if (!CreateDmTable(cow_partition_params, &table)) {
2257         return false;
2258     }
2259     // If the COW image exists, append it as the last extent.
2260     if (snapshot_status.cow_file_size() > 0) {
2261         std::string cow_image_device;
2262         if (!GetMappedImageDeviceStringOrPath(cow_image_name, &cow_image_device)) {
2263             LOG(ERROR) << "Cannot determine major/minor for: " << cow_image_name;
2264             return false;
2265         }
2266         auto cow_partition_sectors = snapshot_status.cow_partition_size() / kSectorSize;
2267         auto cow_image_sectors = snapshot_status.cow_file_size() / kSectorSize;
2268         table.Emplace<DmTargetLinear>(cow_partition_sectors, cow_image_sectors, cow_image_device,
2269                                       0);
2270     }
2271 
2272     // We have created the DmTable now. Map it.
2273     std::string cow_path;
2274     if (!dm.CreateDevice(*cow_name, table, &cow_path, remaining_time)) {
2275         LOG(ERROR) << "Could not create COW device: " << *cow_name;
2276         return false;
2277     }
2278     created_devices->EmplaceBack<AutoUnmapDevice>(&dm, *cow_name);
2279     LOG(INFO) << "Mapped COW device for " << params.GetPartitionName() << " at " << cow_path;
2280     return true;
2281 }
2282 
UnmapCowDevices(LockedFile * lock,const std::string & name)2283 bool SnapshotManager::UnmapCowDevices(LockedFile* lock, const std::string& name) {
2284     CHECK(lock);
2285     if (!EnsureImageManager()) return false;
2286 
2287     if (UpdateUsesCompression(lock) && !UnmapDmUserDevice(name)) {
2288         return false;
2289     }
2290 
2291     if (!DeleteDeviceIfExists(GetCowName(name), 4000ms)) {
2292         LOG(ERROR) << "Cannot unmap: " << GetCowName(name);
2293         return false;
2294     }
2295 
2296     std::string cow_image_name = GetCowImageDeviceName(name);
2297     if (!images_->UnmapImageIfExists(cow_image_name)) {
2298         LOG(ERROR) << "Cannot unmap image " << cow_image_name;
2299         return false;
2300     }
2301     return true;
2302 }
2303 
UnmapDmUserDevice(const std::string & snapshot_name)2304 bool SnapshotManager::UnmapDmUserDevice(const std::string& snapshot_name) {
2305     auto& dm = DeviceMapper::Instance();
2306 
2307     auto dm_user_name = GetDmUserCowName(snapshot_name);
2308     if (dm.GetState(dm_user_name) == DmDeviceState::INVALID) {
2309         return true;
2310     }
2311 
2312     if (!DeleteDeviceIfExists(dm_user_name)) {
2313         LOG(ERROR) << "Cannot unmap " << dm_user_name;
2314         return false;
2315     }
2316 
2317     if (EnsureSnapuserdConnected()) {
2318         if (!snapuserd_client_->WaitForDeviceDelete(dm_user_name)) {
2319             LOG(ERROR) << "Failed to wait for " << dm_user_name << " control device to delete";
2320             return false;
2321         }
2322     }
2323 
2324     // Ensure the control device is gone so we don't run into ABA problems.
2325     auto control_device = "/dev/dm-user/" + dm_user_name;
2326     if (!android::fs_mgr::WaitForFileDeleted(control_device, 10s)) {
2327         LOG(ERROR) << "Timed out waiting for " << control_device << " to unlink";
2328         return false;
2329     }
2330     return true;
2331 }
2332 
MapAllSnapshots(const std::chrono::milliseconds & timeout_ms)2333 bool SnapshotManager::MapAllSnapshots(const std::chrono::milliseconds& timeout_ms) {
2334     auto lock = LockExclusive();
2335     if (!lock) return false;
2336 
2337     auto state = ReadUpdateState(lock.get());
2338     if (state == UpdateState::Unverified) {
2339         if (GetCurrentSlot() == Slot::Target) {
2340             LOG(ERROR) << "Cannot call MapAllSnapshots when booting from the target slot.";
2341             return false;
2342         }
2343     } else if (state != UpdateState::Initiated) {
2344         LOG(ERROR) << "Cannot call MapAllSnapshots from update state: " << state;
2345         return false;
2346     }
2347 
2348     std::vector<std::string> snapshots;
2349     if (!ListSnapshots(lock.get(), &snapshots)) {
2350         return false;
2351     }
2352 
2353     const auto& opener = device_->GetPartitionOpener();
2354     auto slot_suffix = device_->GetOtherSlotSuffix();
2355     auto slot_number = SlotNumberForSlotSuffix(slot_suffix);
2356     auto super_device = device_->GetSuperDevice(slot_number);
2357     auto metadata = android::fs_mgr::ReadMetadata(opener, super_device, slot_number);
2358     if (!metadata) {
2359         LOG(ERROR) << "MapAllSnapshots could not read dynamic partition metadata for device: "
2360                    << super_device;
2361         return false;
2362     }
2363 
2364     for (const auto& snapshot : snapshots) {
2365         if (!UnmapPartitionWithSnapshot(lock.get(), snapshot)) {
2366             LOG(ERROR) << "MapAllSnapshots could not unmap snapshot: " << snapshot;
2367             return false;
2368         }
2369 
2370         CreateLogicalPartitionParams params = {
2371                 .block_device = super_device,
2372                 .metadata = metadata.get(),
2373                 .partition_name = snapshot,
2374                 .partition_opener = &opener,
2375                 .timeout_ms = timeout_ms,
2376         };
2377         if (!MapPartitionWithSnapshot(lock.get(), std::move(params), SnapshotContext::Mount,
2378                                       nullptr)) {
2379             LOG(ERROR) << "MapAllSnapshots failed to map: " << snapshot;
2380             return false;
2381         }
2382     }
2383 
2384     LOG(INFO) << "MapAllSnapshots succeeded.";
2385     return true;
2386 }
2387 
UnmapAllSnapshots()2388 bool SnapshotManager::UnmapAllSnapshots() {
2389     auto lock = LockExclusive();
2390     if (!lock) return false;
2391 
2392     return UnmapAllSnapshots(lock.get());
2393 }
2394 
UnmapAllSnapshots(LockedFile * lock)2395 bool SnapshotManager::UnmapAllSnapshots(LockedFile* lock) {
2396     std::vector<std::string> snapshots;
2397     if (!ListSnapshots(lock, &snapshots)) {
2398         return false;
2399     }
2400 
2401     for (const auto& snapshot : snapshots) {
2402         if (!UnmapPartitionWithSnapshot(lock, snapshot)) {
2403             LOG(ERROR) << "Failed to unmap snapshot: " << snapshot;
2404             return false;
2405         }
2406     }
2407 
2408     // Terminate the daemon and release the snapuserd_client_ object.
2409     // If we need to re-connect with the daemon, EnsureSnapuserdConnected()
2410     // will re-create the object and establish the socket connection.
2411     if (snapuserd_client_) {
2412         LOG(INFO) << "Shutdown snapuserd daemon";
2413         snapuserd_client_->DetachSnapuserd();
2414         snapuserd_client_->CloseConnection();
2415         snapuserd_client_ = nullptr;
2416     }
2417 
2418     return true;
2419 }
2420 
OpenFile(const std::string & file,int lock_flags)2421 auto SnapshotManager::OpenFile(const std::string& file, int lock_flags)
2422         -> std::unique_ptr<LockedFile> {
2423     unique_fd fd(open(file.c_str(), O_RDONLY | O_CLOEXEC | O_NOFOLLOW));
2424     if (fd < 0) {
2425         PLOG(ERROR) << "Open failed: " << file;
2426         return nullptr;
2427     }
2428     if (lock_flags != 0 && TEMP_FAILURE_RETRY(flock(fd, lock_flags)) < 0) {
2429         PLOG(ERROR) << "Acquire flock failed: " << file;
2430         return nullptr;
2431     }
2432     // For simplicity, we want to CHECK that lock_mode == LOCK_EX, in some
2433     // calls, so strip extra flags.
2434     int lock_mode = lock_flags & (LOCK_EX | LOCK_SH);
2435     return std::make_unique<LockedFile>(file, std::move(fd), lock_mode);
2436 }
2437 
~LockedFile()2438 SnapshotManager::LockedFile::~LockedFile() {
2439     if (TEMP_FAILURE_RETRY(flock(fd_, LOCK_UN)) < 0) {
2440         PLOG(ERROR) << "Failed to unlock file: " << path_;
2441     }
2442 }
2443 
GetStateFilePath() const2444 std::string SnapshotManager::GetStateFilePath() const {
2445     return metadata_dir_ + "/state"s;
2446 }
2447 
GetMergeStateFilePath() const2448 std::string SnapshotManager::GetMergeStateFilePath() const {
2449     return metadata_dir_ + "/merge_state"s;
2450 }
2451 
GetLockPath() const2452 std::string SnapshotManager::GetLockPath() const {
2453     return metadata_dir_;
2454 }
2455 
OpenLock(int lock_flags)2456 std::unique_ptr<SnapshotManager::LockedFile> SnapshotManager::OpenLock(int lock_flags) {
2457     auto lock_file = GetLockPath();
2458     return OpenFile(lock_file, lock_flags);
2459 }
2460 
LockShared()2461 std::unique_ptr<SnapshotManager::LockedFile> SnapshotManager::LockShared() {
2462     return OpenLock(LOCK_SH);
2463 }
2464 
LockExclusive()2465 std::unique_ptr<SnapshotManager::LockedFile> SnapshotManager::LockExclusive() {
2466     return OpenLock(LOCK_EX);
2467 }
2468 
UpdateStateFromString(const std::string & contents)2469 static UpdateState UpdateStateFromString(const std::string& contents) {
2470     if (contents.empty() || contents == "none") {
2471         return UpdateState::None;
2472     } else if (contents == "initiated") {
2473         return UpdateState::Initiated;
2474     } else if (contents == "unverified") {
2475         return UpdateState::Unverified;
2476     } else if (contents == "merging") {
2477         return UpdateState::Merging;
2478     } else if (contents == "merge-completed") {
2479         return UpdateState::MergeCompleted;
2480     } else if (contents == "merge-needs-reboot") {
2481         return UpdateState::MergeNeedsReboot;
2482     } else if (contents == "merge-failed") {
2483         return UpdateState::MergeFailed;
2484     } else if (contents == "cancelled") {
2485         return UpdateState::Cancelled;
2486     } else {
2487         LOG(ERROR) << "Unknown merge state in update state file: \"" << contents << "\"";
2488         return UpdateState::None;
2489     }
2490 }
2491 
operator <<(std::ostream & os,UpdateState state)2492 std::ostream& operator<<(std::ostream& os, UpdateState state) {
2493     switch (state) {
2494         case UpdateState::None:
2495             return os << "none";
2496         case UpdateState::Initiated:
2497             return os << "initiated";
2498         case UpdateState::Unverified:
2499             return os << "unverified";
2500         case UpdateState::Merging:
2501             return os << "merging";
2502         case UpdateState::MergeCompleted:
2503             return os << "merge-completed";
2504         case UpdateState::MergeNeedsReboot:
2505             return os << "merge-needs-reboot";
2506         case UpdateState::MergeFailed:
2507             return os << "merge-failed";
2508         case UpdateState::Cancelled:
2509             return os << "cancelled";
2510         default:
2511             LOG(ERROR) << "Unknown update state: " << static_cast<uint32_t>(state);
2512             return os;
2513     }
2514 }
2515 
ReadUpdateState(LockedFile * lock)2516 UpdateState SnapshotManager::ReadUpdateState(LockedFile* lock) {
2517     SnapshotUpdateStatus status = ReadSnapshotUpdateStatus(lock);
2518     return status.state();
2519 }
2520 
ReadSnapshotUpdateStatus(LockedFile * lock)2521 SnapshotUpdateStatus SnapshotManager::ReadSnapshotUpdateStatus(LockedFile* lock) {
2522     CHECK(lock);
2523 
2524     SnapshotUpdateStatus status = {};
2525     std::string contents;
2526     if (!android::base::ReadFileToString(GetStateFilePath(), &contents)) {
2527         PLOG(ERROR) << "Read state file failed";
2528         status.set_state(UpdateState::None);
2529         return status;
2530     }
2531 
2532     if (!status.ParseFromString(contents)) {
2533         LOG(WARNING) << "Unable to parse state file as SnapshotUpdateStatus, using the old format";
2534 
2535         // Try to rollback to legacy file to support devices that are
2536         // currently using the old file format.
2537         // TODO(b/147409432)
2538         status.set_state(UpdateStateFromString(contents));
2539     }
2540 
2541     return status;
2542 }
2543 
WriteUpdateState(LockedFile * lock,UpdateState state,MergeFailureCode failure_code)2544 bool SnapshotManager::WriteUpdateState(LockedFile* lock, UpdateState state,
2545                                        MergeFailureCode failure_code) {
2546     SnapshotUpdateStatus status;
2547     status.set_state(state);
2548 
2549     switch (state) {
2550         case UpdateState::MergeFailed:
2551             status.set_merge_failure_code(failure_code);
2552             break;
2553         case UpdateState::Initiated:
2554             status.set_source_build_fingerprint(
2555                     android::base::GetProperty("ro.build.fingerprint", ""));
2556             break;
2557         default:
2558             break;
2559     }
2560 
2561     // If we're transitioning between two valid states (eg, we're not beginning
2562     // or ending an OTA), then make sure to propagate the compression bit and
2563     // build fingerprint.
2564     if (!(state == UpdateState::Initiated || state == UpdateState::None)) {
2565         SnapshotUpdateStatus old_status = ReadSnapshotUpdateStatus(lock);
2566         status.set_compression_enabled(old_status.compression_enabled());
2567         status.set_source_build_fingerprint(old_status.source_build_fingerprint());
2568         status.set_merge_phase(old_status.merge_phase());
2569     }
2570     return WriteSnapshotUpdateStatus(lock, status);
2571 }
2572 
WriteSnapshotUpdateStatus(LockedFile * lock,const SnapshotUpdateStatus & status)2573 bool SnapshotManager::WriteSnapshotUpdateStatus(LockedFile* lock,
2574                                                 const SnapshotUpdateStatus& status) {
2575     CHECK(lock);
2576     CHECK(lock->lock_mode() == LOCK_EX);
2577 
2578     std::string contents;
2579     if (!status.SerializeToString(&contents)) {
2580         LOG(ERROR) << "Unable to serialize SnapshotUpdateStatus.";
2581         return false;
2582     }
2583 
2584 #ifdef LIBSNAPSHOT_USE_HAL
2585     auto merge_status = MergeStatus::UNKNOWN;
2586     switch (status.state()) {
2587         // The needs-reboot and completed cases imply that /data and /metadata
2588         // can be safely wiped, so we don't report a merge status.
2589         case UpdateState::None:
2590         case UpdateState::MergeNeedsReboot:
2591         case UpdateState::MergeCompleted:
2592         case UpdateState::Initiated:
2593             merge_status = MergeStatus::NONE;
2594             break;
2595         case UpdateState::Unverified:
2596             merge_status = MergeStatus::SNAPSHOTTED;
2597             break;
2598         case UpdateState::Merging:
2599         case UpdateState::MergeFailed:
2600             merge_status = MergeStatus::MERGING;
2601             break;
2602         default:
2603             // Note that Cancelled flows to here - it is never written, since
2604             // it only communicates a transient state to the caller.
2605             LOG(ERROR) << "Unexpected update status: " << status.state();
2606             break;
2607     }
2608 
2609     bool set_before_write =
2610             merge_status == MergeStatus::SNAPSHOTTED || merge_status == MergeStatus::MERGING;
2611     if (set_before_write && !device_->SetBootControlMergeStatus(merge_status)) {
2612         return false;
2613     }
2614 #endif
2615 
2616     if (!WriteStringToFileAtomic(contents, GetStateFilePath())) {
2617         PLOG(ERROR) << "Could not write to state file";
2618         return false;
2619     }
2620 
2621 #ifdef LIBSNAPSHOT_USE_HAL
2622     if (!set_before_write && !device_->SetBootControlMergeStatus(merge_status)) {
2623         return false;
2624     }
2625 #endif
2626     return true;
2627 }
2628 
GetSnapshotStatusFilePath(const std::string & name)2629 std::string SnapshotManager::GetSnapshotStatusFilePath(const std::string& name) {
2630     auto file = metadata_dir_ + "/snapshots/"s + name;
2631     return file;
2632 }
2633 
ReadSnapshotStatus(LockedFile * lock,const std::string & name,SnapshotStatus * status)2634 bool SnapshotManager::ReadSnapshotStatus(LockedFile* lock, const std::string& name,
2635                                          SnapshotStatus* status) {
2636     CHECK(lock);
2637     auto path = GetSnapshotStatusFilePath(name);
2638 
2639     unique_fd fd(open(path.c_str(), O_RDONLY | O_CLOEXEC | O_NOFOLLOW));
2640     if (fd < 0) {
2641         PLOG(ERROR) << "Open failed: " << path;
2642         return false;
2643     }
2644 
2645     if (!status->ParseFromFileDescriptor(fd.get())) {
2646         PLOG(ERROR) << "Unable to parse " << path << " as SnapshotStatus";
2647         return false;
2648     }
2649 
2650     if (status->name() != name) {
2651         LOG(WARNING) << "Found snapshot status named " << status->name() << " in " << path;
2652         status->set_name(name);
2653     }
2654 
2655     return true;
2656 }
2657 
WriteSnapshotStatus(LockedFile * lock,const SnapshotStatus & status)2658 bool SnapshotManager::WriteSnapshotStatus(LockedFile* lock, const SnapshotStatus& status) {
2659     // The caller must take an exclusive lock to modify snapshots.
2660     CHECK(lock);
2661     CHECK(lock->lock_mode() == LOCK_EX);
2662     CHECK(!status.name().empty());
2663 
2664     auto path = GetSnapshotStatusFilePath(status.name());
2665 
2666     std::string content;
2667     if (!status.SerializeToString(&content)) {
2668         LOG(ERROR) << "Unable to serialize SnapshotStatus for " << status.name();
2669         return false;
2670     }
2671 
2672     if (!WriteStringToFileAtomic(content, path)) {
2673         PLOG(ERROR) << "Unable to write SnapshotStatus to " << path;
2674         return false;
2675     }
2676 
2677     return true;
2678 }
2679 
EnsureImageManager()2680 bool SnapshotManager::EnsureImageManager() {
2681     if (images_) return true;
2682 
2683     images_ = device_->OpenImageManager();
2684     if (!images_) {
2685         LOG(ERROR) << "Could not open ImageManager";
2686         return false;
2687     }
2688     return true;
2689 }
2690 
EnsureSnapuserdConnected()2691 bool SnapshotManager::EnsureSnapuserdConnected() {
2692     if (snapuserd_client_) {
2693         return true;
2694     }
2695 
2696     if (!use_first_stage_snapuserd_ && !EnsureSnapuserdStarted()) {
2697         return false;
2698     }
2699 
2700     snapuserd_client_ = SnapuserdClient::Connect(kSnapuserdSocket, 10s);
2701     if (!snapuserd_client_) {
2702         LOG(ERROR) << "Unable to connect to snapuserd";
2703         return false;
2704     }
2705     return true;
2706 }
2707 
UnmapAndDeleteCowPartition(MetadataBuilder * current_metadata)2708 void SnapshotManager::UnmapAndDeleteCowPartition(MetadataBuilder* current_metadata) {
2709     std::vector<std::string> to_delete;
2710     for (auto* existing_cow_partition : current_metadata->ListPartitionsInGroup(kCowGroupName)) {
2711         if (!DeleteDeviceIfExists(existing_cow_partition->name())) {
2712             LOG(WARNING) << existing_cow_partition->name()
2713                          << " cannot be unmapped and its space cannot be reclaimed";
2714             continue;
2715         }
2716         to_delete.push_back(existing_cow_partition->name());
2717     }
2718     for (const auto& name : to_delete) {
2719         current_metadata->RemovePartition(name);
2720     }
2721 }
2722 
AddRequiredSpace(Return orig,const std::map<std::string,SnapshotStatus> & all_snapshot_status)2723 static Return AddRequiredSpace(Return orig,
2724                                const std::map<std::string, SnapshotStatus>& all_snapshot_status) {
2725     if (orig.error_code() != Return::ErrorCode::NO_SPACE) {
2726         return orig;
2727     }
2728     uint64_t sum = 0;
2729     for (auto&& [name, status] : all_snapshot_status) {
2730         sum += status.cow_file_size();
2731     }
2732     return Return::NoSpace(sum);
2733 }
2734 
CreateUpdateSnapshots(const DeltaArchiveManifest & manifest)2735 Return SnapshotManager::CreateUpdateSnapshots(const DeltaArchiveManifest& manifest) {
2736     auto lock = LockExclusive();
2737     if (!lock) return Return::Error();
2738 
2739     auto update_state = ReadUpdateState(lock.get());
2740     if (update_state != UpdateState::Initiated) {
2741         LOG(ERROR) << "Cannot create update snapshots in state " << update_state;
2742         return Return::Error();
2743     }
2744 
2745     // TODO(b/134949511): remove this check. Right now, with overlayfs mounted, the scratch
2746     // partition takes up a big chunk of space in super, causing COW images to be created on
2747     // retrofit Virtual A/B devices.
2748     if (device_->IsOverlayfsSetup()) {
2749         LOG(ERROR) << "Cannot create update snapshots with overlayfs setup. Run `adb enable-verity`"
2750                    << ", reboot, then try again.";
2751         return Return::Error();
2752     }
2753 
2754     const auto& opener = device_->GetPartitionOpener();
2755     auto current_suffix = device_->GetSlotSuffix();
2756     uint32_t current_slot = SlotNumberForSlotSuffix(current_suffix);
2757     auto target_suffix = device_->GetOtherSlotSuffix();
2758     uint32_t target_slot = SlotNumberForSlotSuffix(target_suffix);
2759     auto current_super = device_->GetSuperDevice(current_slot);
2760 
2761     auto current_metadata = MetadataBuilder::New(opener, current_super, current_slot);
2762     if (current_metadata == nullptr) {
2763         LOG(ERROR) << "Cannot create metadata builder.";
2764         return Return::Error();
2765     }
2766 
2767     auto target_metadata =
2768             MetadataBuilder::NewForUpdate(opener, current_super, current_slot, target_slot);
2769     if (target_metadata == nullptr) {
2770         LOG(ERROR) << "Cannot create target metadata builder.";
2771         return Return::Error();
2772     }
2773 
2774     // Delete partitions with target suffix in |current_metadata|. Otherwise,
2775     // partition_cow_creator recognizes these left-over partitions as used space.
2776     for (const auto& group_name : current_metadata->ListGroups()) {
2777         if (android::base::EndsWith(group_name, target_suffix)) {
2778             current_metadata->RemoveGroupAndPartitions(group_name);
2779         }
2780     }
2781 
2782     SnapshotMetadataUpdater metadata_updater(target_metadata.get(), target_slot, manifest);
2783     if (!metadata_updater.Update()) {
2784         LOG(ERROR) << "Cannot calculate new metadata.";
2785         return Return::Error();
2786     }
2787 
2788     // Delete previous COW partitions in current_metadata so that PartitionCowCreator marks those as
2789     // free regions.
2790     UnmapAndDeleteCowPartition(current_metadata.get());
2791 
2792     // Check that all these metadata is not retrofit dynamic partitions. Snapshots on
2793     // devices with retrofit dynamic partitions does not make sense.
2794     // This ensures that current_metadata->GetFreeRegions() uses the same device
2795     // indices as target_metadata (i.e. 0 -> "super").
2796     // This is also assumed in MapCowDevices() call below.
2797     CHECK(current_metadata->GetBlockDevicePartitionName(0) == LP_METADATA_DEFAULT_PARTITION_NAME &&
2798           target_metadata->GetBlockDevicePartitionName(0) == LP_METADATA_DEFAULT_PARTITION_NAME);
2799 
2800     std::map<std::string, SnapshotStatus> all_snapshot_status;
2801 
2802     // In case of error, automatically delete devices that are created along the way.
2803     // Note that "lock" is destroyed after "created_devices", so it is safe to use |lock| for
2804     // these devices.
2805     AutoDeviceList created_devices;
2806 
2807     const auto& dap_metadata = manifest.dynamic_partition_metadata();
2808     CowOptions options;
2809     CowWriter writer(options);
2810     bool cow_format_support = true;
2811     if (dap_metadata.cow_version() < writer.GetCowVersion()) {
2812         cow_format_support = false;
2813     }
2814 
2815     LOG(INFO) << " dap_metadata.cow_version(): " << dap_metadata.cow_version()
2816               << " writer.GetCowVersion(): " << writer.GetCowVersion();
2817 
2818     bool use_compression = IsCompressionEnabled() && dap_metadata.vabc_enabled() &&
2819                            !device_->IsRecovery() && cow_format_support;
2820 
2821     std::string compression_algorithm;
2822     if (use_compression) {
2823         compression_algorithm = dap_metadata.vabc_compression_param();
2824         if (compression_algorithm.empty()) {
2825             // Older OTAs don't set an explicit compression type, so default to gz.
2826             compression_algorithm = "gz";
2827         }
2828     } else {
2829         compression_algorithm = "none";
2830     }
2831 
2832     PartitionCowCreator cow_creator{
2833             .target_metadata = target_metadata.get(),
2834             .target_suffix = target_suffix,
2835             .target_partition = nullptr,
2836             .current_metadata = current_metadata.get(),
2837             .current_suffix = current_suffix,
2838             .update = nullptr,
2839             .extra_extents = {},
2840             .compression_enabled = use_compression,
2841             .compression_algorithm = compression_algorithm,
2842     };
2843 
2844     auto ret = CreateUpdateSnapshotsInternal(lock.get(), manifest, &cow_creator, &created_devices,
2845                                              &all_snapshot_status);
2846     if (!ret.is_ok()) return ret;
2847 
2848     auto exported_target_metadata = target_metadata->Export();
2849     if (exported_target_metadata == nullptr) {
2850         LOG(ERROR) << "Cannot export target metadata";
2851         return Return::Error();
2852     }
2853 
2854     ret = InitializeUpdateSnapshots(lock.get(), target_metadata.get(),
2855                                     exported_target_metadata.get(), target_suffix,
2856                                     all_snapshot_status);
2857     if (!ret.is_ok()) return ret;
2858 
2859     if (!UpdatePartitionTable(opener, device_->GetSuperDevice(target_slot),
2860                               *exported_target_metadata, target_slot)) {
2861         LOG(ERROR) << "Cannot write target metadata";
2862         return Return::Error();
2863     }
2864 
2865     // If compression is enabled, we need to retain a copy of the old metadata
2866     // so we can access original blocks in case they are moved around. We do
2867     // not want to rely on the old super metadata slot because we don't
2868     // guarantee its validity after the slot switch is successful.
2869     if (cow_creator.compression_enabled) {
2870         auto metadata = current_metadata->Export();
2871         if (!metadata) {
2872             LOG(ERROR) << "Could not export current metadata";
2873             return Return::Error();
2874         }
2875 
2876         auto path = GetOldPartitionMetadataPath();
2877         if (!android::fs_mgr::WriteToImageFile(path, *metadata.get())) {
2878             LOG(ERROR) << "Cannot write old metadata to " << path;
2879             return Return::Error();
2880         }
2881     }
2882 
2883     SnapshotUpdateStatus status = ReadSnapshotUpdateStatus(lock.get());
2884     status.set_state(update_state);
2885     status.set_compression_enabled(cow_creator.compression_enabled);
2886     if (!WriteSnapshotUpdateStatus(lock.get(), status)) {
2887         LOG(ERROR) << "Unable to write new update state";
2888         return Return::Error();
2889     }
2890 
2891     created_devices.Release();
2892     LOG(INFO) << "Successfully created all snapshots for target slot " << target_suffix;
2893 
2894     return Return::Ok();
2895 }
2896 
CreateUpdateSnapshotsInternal(LockedFile * lock,const DeltaArchiveManifest & manifest,PartitionCowCreator * cow_creator,AutoDeviceList * created_devices,std::map<std::string,SnapshotStatus> * all_snapshot_status)2897 Return SnapshotManager::CreateUpdateSnapshotsInternal(
2898         LockedFile* lock, const DeltaArchiveManifest& manifest, PartitionCowCreator* cow_creator,
2899         AutoDeviceList* created_devices,
2900         std::map<std::string, SnapshotStatus>* all_snapshot_status) {
2901     CHECK(lock);
2902 
2903     auto* target_metadata = cow_creator->target_metadata;
2904     const auto& target_suffix = cow_creator->target_suffix;
2905 
2906     if (!target_metadata->AddGroup(kCowGroupName, 0)) {
2907         LOG(ERROR) << "Cannot add group " << kCowGroupName;
2908         return Return::Error();
2909     }
2910 
2911     std::map<std::string, const PartitionUpdate*> partition_map;
2912     std::map<std::string, std::vector<Extent>> extra_extents_map;
2913     for (const auto& partition_update : manifest.partitions()) {
2914         auto suffixed_name = partition_update.partition_name() + target_suffix;
2915         auto&& [it, inserted] = partition_map.emplace(suffixed_name, &partition_update);
2916         if (!inserted) {
2917             LOG(ERROR) << "Duplicated partition " << partition_update.partition_name()
2918                        << " in update manifest.";
2919             return Return::Error();
2920         }
2921 
2922         auto& extra_extents = extra_extents_map[suffixed_name];
2923         if (partition_update.has_hash_tree_extent()) {
2924             extra_extents.push_back(partition_update.hash_tree_extent());
2925         }
2926         if (partition_update.has_fec_extent()) {
2927             extra_extents.push_back(partition_update.fec_extent());
2928         }
2929     }
2930 
2931     for (auto* target_partition : ListPartitionsWithSuffix(target_metadata, target_suffix)) {
2932         cow_creator->target_partition = target_partition;
2933         cow_creator->update = nullptr;
2934         auto iter = partition_map.find(target_partition->name());
2935         if (iter != partition_map.end()) {
2936             cow_creator->update = iter->second;
2937         } else {
2938             LOG(INFO) << target_partition->name()
2939                       << " isn't included in the payload, skipping the cow creation.";
2940             continue;
2941         }
2942 
2943         cow_creator->extra_extents.clear();
2944         auto extra_extents_it = extra_extents_map.find(target_partition->name());
2945         if (extra_extents_it != extra_extents_map.end()) {
2946             cow_creator->extra_extents = std::move(extra_extents_it->second);
2947         }
2948 
2949         // Compute the device sizes for the partition.
2950         auto cow_creator_ret = cow_creator->Run();
2951         if (!cow_creator_ret.has_value()) {
2952             LOG(ERROR) << "PartitionCowCreator returned no value for " << target_partition->name();
2953             return Return::Error();
2954         }
2955 
2956         LOG(INFO) << "For partition " << target_partition->name()
2957                   << ", device size = " << cow_creator_ret->snapshot_status.device_size()
2958                   << ", snapshot size = " << cow_creator_ret->snapshot_status.snapshot_size()
2959                   << ", cow partition size = "
2960                   << cow_creator_ret->snapshot_status.cow_partition_size()
2961                   << ", cow file size = " << cow_creator_ret->snapshot_status.cow_file_size();
2962 
2963         // Delete any existing snapshot before re-creating one.
2964         if (!DeleteSnapshot(lock, target_partition->name())) {
2965             LOG(ERROR) << "Cannot delete existing snapshot before creating a new one for partition "
2966                        << target_partition->name();
2967             return Return::Error();
2968         }
2969 
2970         // It is possible that the whole partition uses free space in super, and snapshot / COW
2971         // would not be needed. In this case, skip the partition.
2972         bool needs_snapshot = cow_creator_ret->snapshot_status.snapshot_size() > 0;
2973         bool needs_cow = (cow_creator_ret->snapshot_status.cow_partition_size() +
2974                           cow_creator_ret->snapshot_status.cow_file_size()) > 0;
2975         CHECK(needs_snapshot == needs_cow);
2976 
2977         if (!needs_snapshot) {
2978             LOG(INFO) << "Skip creating snapshot for partition " << target_partition->name()
2979                       << "because nothing needs to be snapshotted.";
2980             continue;
2981         }
2982 
2983         // Find the original partition size.
2984         auto name = target_partition->name();
2985         auto old_partition_name =
2986                 name.substr(0, name.size() - target_suffix.size()) + cow_creator->current_suffix;
2987         auto old_partition = cow_creator->current_metadata->FindPartition(old_partition_name);
2988         if (old_partition) {
2989             cow_creator_ret->snapshot_status.set_old_partition_size(old_partition->size());
2990         }
2991 
2992         // Store these device sizes to snapshot status file.
2993         if (!CreateSnapshot(lock, cow_creator, &cow_creator_ret->snapshot_status)) {
2994             return Return::Error();
2995         }
2996         created_devices->EmplaceBack<AutoDeleteSnapshot>(this, lock, target_partition->name());
2997 
2998         // Create the COW partition. That is, use any remaining free space in super partition before
2999         // creating the COW images.
3000         if (cow_creator_ret->snapshot_status.cow_partition_size() > 0) {
3001             CHECK(cow_creator_ret->snapshot_status.cow_partition_size() % kSectorSize == 0)
3002                     << "cow_partition_size == "
3003                     << cow_creator_ret->snapshot_status.cow_partition_size()
3004                     << " is not a multiple of sector size " << kSectorSize;
3005             auto cow_partition = target_metadata->AddPartition(GetCowName(target_partition->name()),
3006                                                                kCowGroupName, 0 /* flags */);
3007             if (cow_partition == nullptr) {
3008                 return Return::Error();
3009             }
3010 
3011             if (!target_metadata->ResizePartition(
3012                         cow_partition, cow_creator_ret->snapshot_status.cow_partition_size(),
3013                         cow_creator_ret->cow_partition_usable_regions)) {
3014                 LOG(ERROR) << "Cannot create COW partition on metadata with size "
3015                            << cow_creator_ret->snapshot_status.cow_partition_size();
3016                 return Return::Error();
3017             }
3018             // Only the in-memory target_metadata is modified; nothing to clean up if there is an
3019             // error in the future.
3020         }
3021 
3022         all_snapshot_status->emplace(target_partition->name(),
3023                                      std::move(cow_creator_ret->snapshot_status));
3024 
3025         LOG(INFO) << "Successfully created snapshot partition for " << target_partition->name();
3026     }
3027 
3028     LOG(INFO) << "Allocating CoW images.";
3029 
3030     for (auto&& [name, snapshot_status] : *all_snapshot_status) {
3031         // Create the backing COW image if necessary.
3032         if (snapshot_status.cow_file_size() > 0) {
3033             auto ret = CreateCowImage(lock, name);
3034             if (!ret.is_ok()) return AddRequiredSpace(ret, *all_snapshot_status);
3035         }
3036 
3037         LOG(INFO) << "Successfully created snapshot for " << name;
3038     }
3039 
3040     return Return::Ok();
3041 }
3042 
InitializeUpdateSnapshots(LockedFile * lock,MetadataBuilder * target_metadata,const LpMetadata * exported_target_metadata,const std::string & target_suffix,const std::map<std::string,SnapshotStatus> & all_snapshot_status)3043 Return SnapshotManager::InitializeUpdateSnapshots(
3044         LockedFile* lock, MetadataBuilder* target_metadata,
3045         const LpMetadata* exported_target_metadata, const std::string& target_suffix,
3046         const std::map<std::string, SnapshotStatus>& all_snapshot_status) {
3047     CHECK(lock);
3048 
3049     CreateLogicalPartitionParams cow_params{
3050             .block_device = LP_METADATA_DEFAULT_PARTITION_NAME,
3051             .metadata = exported_target_metadata,
3052             .timeout_ms = std::chrono::milliseconds::max(),
3053             .partition_opener = &device_->GetPartitionOpener(),
3054     };
3055     for (auto* target_partition : ListPartitionsWithSuffix(target_metadata, target_suffix)) {
3056         AutoDeviceList created_devices_for_cow;
3057 
3058         if (!UnmapPartitionWithSnapshot(lock, target_partition->name())) {
3059             LOG(ERROR) << "Cannot unmap existing COW devices before re-mapping them for zero-fill: "
3060                        << target_partition->name();
3061             return Return::Error();
3062         }
3063 
3064         auto it = all_snapshot_status.find(target_partition->name());
3065         if (it == all_snapshot_status.end()) continue;
3066         cow_params.partition_name = target_partition->name();
3067         std::string cow_name;
3068         if (!MapCowDevices(lock, cow_params, it->second, &created_devices_for_cow, &cow_name)) {
3069             return Return::Error();
3070         }
3071 
3072         std::string cow_path;
3073         if (!images_->GetMappedImageDevice(cow_name, &cow_path)) {
3074             LOG(ERROR) << "Cannot determine path for " << cow_name;
3075             return Return::Error();
3076         }
3077 
3078         if (it->second.compression_enabled()) {
3079             unique_fd fd(open(cow_path.c_str(), O_RDWR | O_CLOEXEC));
3080             if (fd < 0) {
3081                 PLOG(ERROR) << "open " << cow_path << " failed for snapshot "
3082                             << cow_params.partition_name;
3083                 return Return::Error();
3084             }
3085 
3086             CowOptions options;
3087             if (device()->IsTestDevice()) {
3088                 options.scratch_space = false;
3089             }
3090             options.compression = it->second.compression_algorithm();
3091 
3092             CowWriter writer(options);
3093             if (!writer.Initialize(fd) || !writer.Finalize()) {
3094                 LOG(ERROR) << "Could not initialize COW device for " << target_partition->name();
3095                 return Return::Error();
3096             }
3097         } else {
3098             auto ret = InitializeKernelCow(cow_path);
3099             if (!ret.is_ok()) {
3100                 LOG(ERROR) << "Can't zero-fill COW device for " << target_partition->name() << ": "
3101                            << cow_path;
3102                 return AddRequiredSpace(ret, all_snapshot_status);
3103             }
3104         }
3105         // Let destructor of created_devices_for_cow to unmap the COW devices.
3106     };
3107     return Return::Ok();
3108 }
3109 
MapUpdateSnapshot(const CreateLogicalPartitionParams & params,std::string * snapshot_path)3110 bool SnapshotManager::MapUpdateSnapshot(const CreateLogicalPartitionParams& params,
3111                                         std::string* snapshot_path) {
3112     auto lock = LockShared();
3113     if (!lock) return false;
3114     if (!UnmapPartitionWithSnapshot(lock.get(), params.GetPartitionName())) {
3115         LOG(ERROR) << "Cannot unmap existing snapshot before re-mapping it: "
3116                    << params.GetPartitionName();
3117         return false;
3118     }
3119 
3120     SnapshotStatus status;
3121     if (!ReadSnapshotStatus(lock.get(), params.GetPartitionName(), &status)) {
3122         return false;
3123     }
3124     if (status.compression_enabled()) {
3125         LOG(ERROR) << "Cannot use MapUpdateSnapshot with compressed snapshots";
3126         return false;
3127     }
3128 
3129     SnapshotPaths paths;
3130     if (!MapPartitionWithSnapshot(lock.get(), params, SnapshotContext::Update, &paths)) {
3131         return false;
3132     }
3133 
3134     if (!paths.snapshot_device.empty()) {
3135         *snapshot_path = paths.snapshot_device;
3136     } else {
3137         *snapshot_path = paths.target_device;
3138     }
3139     DCHECK(!snapshot_path->empty());
3140     return true;
3141 }
3142 
OpenSnapshotWriter(const android::fs_mgr::CreateLogicalPartitionParams & params,const std::optional<std::string> & source_device)3143 std::unique_ptr<ISnapshotWriter> SnapshotManager::OpenSnapshotWriter(
3144         const android::fs_mgr::CreateLogicalPartitionParams& params,
3145         const std::optional<std::string>& source_device) {
3146 #if defined(LIBSNAPSHOT_NO_COW_WRITE)
3147     (void)params;
3148     (void)source_device;
3149 
3150     LOG(ERROR) << "Snapshots cannot be written in first-stage init or recovery";
3151     return nullptr;
3152 #else
3153     // First unmap any existing mapping.
3154     auto lock = LockShared();
3155     if (!lock) return nullptr;
3156     if (!UnmapPartitionWithSnapshot(lock.get(), params.GetPartitionName())) {
3157         LOG(ERROR) << "Cannot unmap existing snapshot before re-mapping it: "
3158                    << params.GetPartitionName();
3159         return nullptr;
3160     }
3161 
3162     SnapshotPaths paths;
3163     if (!MapPartitionWithSnapshot(lock.get(), params, SnapshotContext::Update, &paths)) {
3164         return nullptr;
3165     }
3166 
3167     SnapshotStatus status;
3168     if (!paths.cow_device_name.empty()) {
3169         if (!ReadSnapshotStatus(lock.get(), params.GetPartitionName(), &status)) {
3170             return nullptr;
3171         }
3172     } else {
3173         // Currently, partition_cow_creator always creates snapshots. The
3174         // reason is that if partition X shrinks while partition Y grows, we
3175         // cannot bindly write to the newly freed extents in X. This would
3176         // make the old slot unusable. So, the entire size of the target
3177         // partition is currently considered snapshottable.
3178         LOG(ERROR) << "No snapshot available for partition " << params.GetPartitionName();
3179         return nullptr;
3180     }
3181 
3182     if (status.compression_enabled()) {
3183         return OpenCompressedSnapshotWriter(lock.get(), source_device, params.GetPartitionName(),
3184                                             status, paths);
3185     }
3186     return OpenKernelSnapshotWriter(lock.get(), source_device, params.GetPartitionName(), status,
3187                                     paths);
3188 #endif
3189 }
3190 
3191 #if !defined(LIBSNAPSHOT_NO_COW_WRITE)
3192 std::unique_ptr<ISnapshotWriter> SnapshotManager::OpenCompressedSnapshotWriter(
3193         LockedFile* lock, const std::optional<std::string>& source_device,
3194         [[maybe_unused]] const std::string& partition_name, const SnapshotStatus& status,
3195         const SnapshotPaths& paths) {
3196     CHECK(lock);
3197 
3198     CowOptions cow_options;
3199     cow_options.compression = status.compression_algorithm();
3200     cow_options.max_blocks = {status.device_size() / cow_options.block_size};
3201     // Disable scratch space for vts tests
3202     if (device()->IsTestDevice()) {
3203         cow_options.scratch_space = false;
3204     }
3205 
3206     // Currently we don't support partial snapshots, since partition_cow_creator
3207     // never creates this scenario.
3208     CHECK(status.snapshot_size() == status.device_size());
3209 
3210     auto writer = std::make_unique<CompressedSnapshotWriter>(cow_options);
3211     if (source_device) {
3212         writer->SetSourceDevice(*source_device);
3213     }
3214 
3215     std::string cow_path;
3216     if (!GetMappedImageDevicePath(paths.cow_device_name, &cow_path)) {
3217         LOG(ERROR) << "Could not determine path for " << paths.cow_device_name;
3218         return nullptr;
3219     }
3220 
3221     unique_fd cow_fd(open(cow_path.c_str(), O_RDWR | O_CLOEXEC));
3222     if (cow_fd < 0) {
3223         PLOG(ERROR) << "OpenCompressedSnapshotWriter: open " << cow_path;
3224         return nullptr;
3225     }
3226     if (!writer->SetCowDevice(std::move(cow_fd))) {
3227         LOG(ERROR) << "Could not create COW writer from " << cow_path;
3228         return nullptr;
3229     }
3230 
3231     return writer;
3232 }
3233 
3234 std::unique_ptr<ISnapshotWriter> SnapshotManager::OpenKernelSnapshotWriter(
3235         LockedFile* lock, const std::optional<std::string>& source_device,
3236         [[maybe_unused]] const std::string& partition_name, const SnapshotStatus& status,
3237         const SnapshotPaths& paths) {
3238     CHECK(lock);
3239 
3240     CowOptions cow_options;
3241     cow_options.max_blocks = {status.device_size() / cow_options.block_size};
3242 
3243     auto writer = std::make_unique<OnlineKernelSnapshotWriter>(cow_options);
3244 
3245     std::string path = paths.snapshot_device.empty() ? paths.target_device : paths.snapshot_device;
3246     unique_fd fd(open(path.c_str(), O_RDWR | O_CLOEXEC));
3247     if (fd < 0) {
3248         PLOG(ERROR) << "open failed: " << path;
3249         return nullptr;
3250     }
3251 
3252     if (source_device) {
3253         writer->SetSourceDevice(*source_device);
3254     }
3255 
3256     uint64_t cow_size = status.cow_partition_size() + status.cow_file_size();
3257     writer->SetSnapshotDevice(std::move(fd), cow_size);
3258 
3259     return writer;
3260 }
3261 #endif  // !defined(LIBSNAPSHOT_NO_COW_WRITE)
3262 
UnmapUpdateSnapshot(const std::string & target_partition_name)3263 bool SnapshotManager::UnmapUpdateSnapshot(const std::string& target_partition_name) {
3264     auto lock = LockShared();
3265     if (!lock) return false;
3266     return UnmapPartitionWithSnapshot(lock.get(), target_partition_name);
3267 }
3268 
UnmapAllPartitionsInRecovery()3269 bool SnapshotManager::UnmapAllPartitionsInRecovery() {
3270     auto lock = LockExclusive();
3271     if (!lock) return false;
3272 
3273     const auto& opener = device_->GetPartitionOpener();
3274     uint32_t slot = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
3275     auto super_device = device_->GetSuperDevice(slot);
3276     auto metadata = android::fs_mgr::ReadMetadata(opener, super_device, slot);
3277     if (!metadata) {
3278         LOG(ERROR) << "Could not read dynamic partition metadata for device: " << super_device;
3279         return false;
3280     }
3281 
3282     bool ok = true;
3283     for (const auto& partition : metadata->partitions) {
3284         auto partition_name = GetPartitionName(partition);
3285         ok &= UnmapPartitionWithSnapshot(lock.get(), partition_name);
3286     }
3287     return ok;
3288 }
3289 
operator <<(std::ostream & os,SnapshotManager::Slot slot)3290 std::ostream& operator<<(std::ostream& os, SnapshotManager::Slot slot) {
3291     switch (slot) {
3292         case SnapshotManager::Slot::Unknown:
3293             return os << "unknown";
3294         case SnapshotManager::Slot::Source:
3295             return os << "source";
3296         case SnapshotManager::Slot::Target:
3297             return os << "target";
3298     }
3299 }
3300 
Dump(std::ostream & os)3301 bool SnapshotManager::Dump(std::ostream& os) {
3302     // Don't actually lock. Dump() is for debugging purposes only, so it is okay
3303     // if it is racy.
3304     auto file = OpenLock(0 /* lock flag */);
3305     if (!file) return false;
3306 
3307     std::stringstream ss;
3308 
3309     auto update_status = ReadSnapshotUpdateStatus(file.get());
3310 
3311     ss << "Update state: " << ReadUpdateState(file.get()) << std::endl;
3312     ss << "Compression: " << update_status.compression_enabled() << std::endl;
3313     ss << "Current slot: " << device_->GetSlotSuffix() << std::endl;
3314     ss << "Boot indicator: booting from " << GetCurrentSlot() << " slot" << std::endl;
3315     ss << "Rollback indicator: "
3316        << (access(GetRollbackIndicatorPath().c_str(), F_OK) == 0 ? "exists" : strerror(errno))
3317        << std::endl;
3318     ss << "Forward merge indicator: "
3319        << (access(GetForwardMergeIndicatorPath().c_str(), F_OK) == 0 ? "exists" : strerror(errno))
3320        << std::endl;
3321     ss << "Source build fingerprint: " << update_status.source_build_fingerprint() << std::endl;
3322 
3323     bool ok = true;
3324     std::vector<std::string> snapshots;
3325     if (!ListSnapshots(file.get(), &snapshots)) {
3326         LOG(ERROR) << "Could not list snapshots";
3327         snapshots.clear();
3328         ok = false;
3329     }
3330     for (const auto& name : snapshots) {
3331         ss << "Snapshot: " << name << std::endl;
3332         SnapshotStatus status;
3333         if (!ReadSnapshotStatus(file.get(), name, &status)) {
3334             ok = false;
3335             continue;
3336         }
3337         ss << "    state: " << SnapshotState_Name(status.state()) << std::endl;
3338         ss << "    device size (bytes): " << status.device_size() << std::endl;
3339         ss << "    snapshot size (bytes): " << status.snapshot_size() << std::endl;
3340         ss << "    cow partition size (bytes): " << status.cow_partition_size() << std::endl;
3341         ss << "    cow file size (bytes): " << status.cow_file_size() << std::endl;
3342         ss << "    allocated sectors: " << status.sectors_allocated() << std::endl;
3343         ss << "    metadata sectors: " << status.metadata_sectors() << std::endl;
3344         ss << "    compression: " << status.compression_algorithm() << std::endl;
3345     }
3346     os << ss.rdbuf();
3347     return ok;
3348 }
3349 
EnsureMetadataMounted()3350 std::unique_ptr<AutoDevice> SnapshotManager::EnsureMetadataMounted() {
3351     if (!device_->IsRecovery()) {
3352         // No need to mount anything in recovery.
3353         LOG(INFO) << "EnsureMetadataMounted does nothing in Android mode.";
3354         return std::unique_ptr<AutoUnmountDevice>(new AutoUnmountDevice());
3355     }
3356     auto ret = AutoUnmountDevice::New(device_->GetMetadataDir());
3357     if (ret == nullptr) return nullptr;
3358 
3359     // In rescue mode, it is possible to erase and format metadata, but /metadata/ota is not
3360     // created to execute snapshot updates. Hence, subsequent calls is likely to fail because
3361     // Lock*() fails. By failing early and returning nullptr here, update_engine_sideload can
3362     // treat this case as if /metadata is not mounted.
3363     if (!LockShared()) {
3364         LOG(WARNING) << "/metadata is mounted, but errors occur when acquiring a shared lock. "
3365                         "Subsequent calls to SnapshotManager will fail. Unmounting /metadata now.";
3366         return nullptr;
3367     }
3368     return ret;
3369 }
3370 
HandleImminentDataWipe(const std::function<void ()> & callback)3371 bool SnapshotManager::HandleImminentDataWipe(const std::function<void()>& callback) {
3372     if (!device_->IsRecovery()) {
3373         LOG(ERROR) << "Data wipes are only allowed in recovery.";
3374         return false;
3375     }
3376 
3377     auto mount = EnsureMetadataMounted();
3378     if (!mount || !mount->HasDevice()) {
3379         // We allow the wipe to continue, because if we can't mount /metadata,
3380         // it is unlikely the device would have booted anyway. If there is no
3381         // metadata partition, then the device predates Virtual A/B.
3382         return true;
3383     }
3384 
3385     // Check this early, so we don't accidentally start trying to populate
3386     // the state file in recovery. Note we don't call GetUpdateState since
3387     // we want errors in acquiring the lock to be propagated, instead of
3388     // returning UpdateState::None.
3389     auto state_file = GetStateFilePath();
3390     if (access(state_file.c_str(), F_OK) != 0 && errno == ENOENT) {
3391         return true;
3392     }
3393 
3394     auto slot_number = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
3395     auto super_path = device_->GetSuperDevice(slot_number);
3396     if (!CreateLogicalAndSnapshotPartitions(super_path, 20s)) {
3397         LOG(ERROR) << "Unable to map partitions to complete merge.";
3398         return false;
3399     }
3400 
3401     auto process_callback = [&]() -> bool {
3402         if (callback) {
3403             callback();
3404         }
3405         return true;
3406     };
3407 
3408     in_factory_data_reset_ = true;
3409     UpdateState state =
3410             ProcessUpdateStateOnDataWipe(true /* allow_forward_merge */, process_callback);
3411     in_factory_data_reset_ = false;
3412 
3413     if (state == UpdateState::MergeFailed) {
3414         return false;
3415     }
3416 
3417     // Nothing should be depending on partitions now, so unmap them all.
3418     if (!UnmapAllPartitionsInRecovery()) {
3419         LOG(ERROR) << "Unable to unmap all partitions; fastboot may fail to flash.";
3420     }
3421 
3422     if (state != UpdateState::None) {
3423         auto lock = LockExclusive();
3424         if (!lock) return false;
3425 
3426         // Zap the update state so the bootloader doesn't think we're still
3427         // merging. It's okay if this fails, it's informative only at this
3428         // point.
3429         WriteUpdateState(lock.get(), UpdateState::None);
3430     }
3431     return true;
3432 }
3433 
FinishMergeInRecovery()3434 bool SnapshotManager::FinishMergeInRecovery() {
3435     if (!device_->IsRecovery()) {
3436         LOG(ERROR) << "Data wipes are only allowed in recovery.";
3437         return false;
3438     }
3439 
3440     auto mount = EnsureMetadataMounted();
3441     if (!mount || !mount->HasDevice()) {
3442         return false;
3443     }
3444 
3445     auto slot_number = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
3446     auto super_path = device_->GetSuperDevice(slot_number);
3447     if (!CreateLogicalAndSnapshotPartitions(super_path, 20s)) {
3448         LOG(ERROR) << "Unable to map partitions to complete merge.";
3449         return false;
3450     }
3451 
3452     UpdateState state = ProcessUpdateState();
3453     if (state != UpdateState::MergeCompleted) {
3454         LOG(ERROR) << "Merge returned unexpected status: " << state;
3455         return false;
3456     }
3457 
3458     // Nothing should be depending on partitions now, so unmap them all.
3459     if (!UnmapAllPartitionsInRecovery()) {
3460         LOG(ERROR) << "Unable to unmap all partitions; fastboot may fail to flash.";
3461     }
3462     return true;
3463 }
3464 
ProcessUpdateStateOnDataWipe(bool allow_forward_merge,const std::function<bool ()> & callback)3465 UpdateState SnapshotManager::ProcessUpdateStateOnDataWipe(bool allow_forward_merge,
3466                                                           const std::function<bool()>& callback) {
3467     auto slot_number = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
3468     UpdateState state = ProcessUpdateState(callback);
3469     LOG(INFO) << "Update state in recovery: " << state;
3470     switch (state) {
3471         case UpdateState::MergeFailed:
3472             LOG(ERROR) << "Unrecoverable merge failure detected.";
3473             return state;
3474         case UpdateState::Unverified: {
3475             // If an OTA was just applied but has not yet started merging:
3476             //
3477             // - if forward merge is allowed, initiate merge and call
3478             // ProcessUpdateState again.
3479             //
3480             // - if forward merge is not allowed, we
3481             // have no choice but to revert slots, because the current slot will
3482             // immediately become unbootable. Rather than wait for the device
3483             // to reboot N times until a rollback, we proactively disable the
3484             // new slot instead.
3485             //
3486             // Since the rollback is inevitable, we don't treat a HAL failure
3487             // as an error here.
3488             auto slot = GetCurrentSlot();
3489             if (slot == Slot::Target) {
3490                 if (allow_forward_merge &&
3491                     access(GetForwardMergeIndicatorPath().c_str(), F_OK) == 0) {
3492                     LOG(INFO) << "Forward merge allowed, initiating merge now.";
3493 
3494                     if (!InitiateMerge()) {
3495                         LOG(ERROR) << "Failed to initiate merge on data wipe.";
3496                         return UpdateState::MergeFailed;
3497                     }
3498                     return ProcessUpdateStateOnDataWipe(false /* allow_forward_merge */, callback);
3499                 }
3500 
3501                 LOG(ERROR) << "Reverting to old slot since update will be deleted.";
3502                 device_->SetSlotAsUnbootable(slot_number);
3503             } else {
3504                 LOG(INFO) << "Booting from " << slot << " slot, no action is taken.";
3505             }
3506             break;
3507         }
3508         case UpdateState::MergeNeedsReboot:
3509             // We shouldn't get here, because nothing is depending on
3510             // logical partitions.
3511             LOG(ERROR) << "Unexpected merge-needs-reboot state in recovery.";
3512             break;
3513         default:
3514             break;
3515     }
3516     return state;
3517 }
3518 
EnsureNoOverflowSnapshot(LockedFile * lock)3519 bool SnapshotManager::EnsureNoOverflowSnapshot(LockedFile* lock) {
3520     CHECK(lock);
3521 
3522     std::vector<std::string> snapshots;
3523     if (!ListSnapshots(lock, &snapshots)) {
3524         LOG(ERROR) << "Could not list snapshots.";
3525         return false;
3526     }
3527 
3528     auto& dm = DeviceMapper::Instance();
3529     for (const auto& snapshot : snapshots) {
3530         SnapshotStatus status;
3531         if (!ReadSnapshotStatus(lock, snapshot, &status)) {
3532             return false;
3533         }
3534         if (status.compression_enabled()) {
3535             continue;
3536         }
3537 
3538         std::vector<DeviceMapper::TargetInfo> targets;
3539         if (!dm.GetTableStatus(snapshot, &targets)) {
3540             LOG(ERROR) << "Could not read snapshot device table: " << snapshot;
3541             return false;
3542         }
3543         if (targets.size() != 1) {
3544             LOG(ERROR) << "Unexpected device-mapper table for snapshot: " << snapshot
3545                        << ", size = " << targets.size();
3546             return false;
3547         }
3548         if (targets[0].IsOverflowSnapshot()) {
3549             LOG(ERROR) << "Detected overflow in snapshot " << snapshot
3550                        << ", CoW device size computation is wrong!";
3551             return false;
3552         }
3553     }
3554 
3555     return true;
3556 }
3557 
RecoveryCreateSnapshotDevices()3558 CreateResult SnapshotManager::RecoveryCreateSnapshotDevices() {
3559     if (!device_->IsRecovery()) {
3560         LOG(ERROR) << __func__ << " is only allowed in recovery.";
3561         return CreateResult::NOT_CREATED;
3562     }
3563 
3564     auto mount = EnsureMetadataMounted();
3565     if (!mount || !mount->HasDevice()) {
3566         LOG(ERROR) << "Couldn't mount Metadata.";
3567         return CreateResult::NOT_CREATED;
3568     }
3569     return RecoveryCreateSnapshotDevices(mount);
3570 }
3571 
RecoveryCreateSnapshotDevices(const std::unique_ptr<AutoDevice> & metadata_device)3572 CreateResult SnapshotManager::RecoveryCreateSnapshotDevices(
3573         const std::unique_ptr<AutoDevice>& metadata_device) {
3574     if (!device_->IsRecovery()) {
3575         LOG(ERROR) << __func__ << " is only allowed in recovery.";
3576         return CreateResult::NOT_CREATED;
3577     }
3578 
3579     if (metadata_device == nullptr || !metadata_device->HasDevice()) {
3580         LOG(ERROR) << "Metadata not mounted.";
3581         return CreateResult::NOT_CREATED;
3582     }
3583 
3584     auto state_file = GetStateFilePath();
3585     if (access(state_file.c_str(), F_OK) != 0 && errno == ENOENT) {
3586         LOG(ERROR) << "Couldn't access state file.";
3587         return CreateResult::NOT_CREATED;
3588     }
3589 
3590     if (!NeedSnapshotsInFirstStageMount()) {
3591         return CreateResult::NOT_CREATED;
3592     }
3593 
3594     auto slot_suffix = device_->GetOtherSlotSuffix();
3595     auto slot_number = SlotNumberForSlotSuffix(slot_suffix);
3596     auto super_path = device_->GetSuperDevice(slot_number);
3597     if (!CreateLogicalAndSnapshotPartitions(super_path, 20s)) {
3598         LOG(ERROR) << "Unable to map partitions.";
3599         return CreateResult::ERROR;
3600     }
3601     return CreateResult::CREATED;
3602 }
3603 
UpdateForwardMergeIndicator(bool wipe)3604 bool SnapshotManager::UpdateForwardMergeIndicator(bool wipe) {
3605     auto path = GetForwardMergeIndicatorPath();
3606 
3607     if (!wipe) {
3608         LOG(INFO) << "Wipe is not scheduled. Deleting forward merge indicator.";
3609         return RemoveFileIfExists(path);
3610     }
3611 
3612     // TODO(b/152094219): Don't forward merge if no CoW file is allocated.
3613 
3614     LOG(INFO) << "Wipe will be scheduled. Allowing forward merge of snapshots.";
3615     if (!android::base::WriteStringToFile("1", path)) {
3616         PLOG(ERROR) << "Unable to write forward merge indicator: " << path;
3617         return false;
3618     }
3619 
3620     return true;
3621 }
3622 
GetSnapshotMergeStatsInstance()3623 ISnapshotMergeStats* SnapshotManager::GetSnapshotMergeStatsInstance() {
3624     return SnapshotMergeStats::GetInstance(*this);
3625 }
3626 
3627 // This is only to be used in recovery or normal Android (not first-stage init).
3628 // We don't guarantee dm paths are available in first-stage init, because ueventd
3629 // isn't running yet.
GetMappedImageDevicePath(const std::string & device_name,std::string * device_path)3630 bool SnapshotManager::GetMappedImageDevicePath(const std::string& device_name,
3631                                                std::string* device_path) {
3632     auto& dm = DeviceMapper::Instance();
3633 
3634     // Try getting the device string if it is a device mapper device.
3635     if (dm.GetState(device_name) != DmDeviceState::INVALID) {
3636         return dm.GetDmDevicePathByName(device_name, device_path);
3637     }
3638 
3639     // Otherwise, get path from IImageManager.
3640     return images_->GetMappedImageDevice(device_name, device_path);
3641 }
3642 
GetMappedImageDeviceStringOrPath(const std::string & device_name,std::string * device_string_or_mapped_path)3643 bool SnapshotManager::GetMappedImageDeviceStringOrPath(const std::string& device_name,
3644                                                        std::string* device_string_or_mapped_path) {
3645     auto& dm = DeviceMapper::Instance();
3646     // Try getting the device string if it is a device mapper device.
3647     if (dm.GetState(device_name) != DmDeviceState::INVALID) {
3648         return dm.GetDeviceString(device_name, device_string_or_mapped_path);
3649     }
3650 
3651     // Otherwise, get path from IImageManager.
3652     if (!images_->GetMappedImageDevice(device_name, device_string_or_mapped_path)) {
3653         return false;
3654     }
3655 
3656     LOG(WARNING) << "Calling GetMappedImageDevice with local image manager; device "
3657                  << (device_string_or_mapped_path ? *device_string_or_mapped_path : "(nullptr)")
3658                  << "may not be available in first stage init! ";
3659     return true;
3660 }
3661 
WaitForDevice(const std::string & device,std::chrono::milliseconds timeout_ms)3662 bool SnapshotManager::WaitForDevice(const std::string& device,
3663                                     std::chrono::milliseconds timeout_ms) {
3664     if (!android::base::StartsWith(device, "/")) {
3665         return true;
3666     }
3667 
3668     // In first-stage init, we rely on init setting a callback which can
3669     // regenerate uevents and populate /dev for us.
3670     if (uevent_regen_callback_) {
3671         if (!uevent_regen_callback_(device)) {
3672             LOG(ERROR) << "Failed to find device after regenerating uevents: " << device;
3673             return false;
3674         }
3675         return true;
3676     }
3677 
3678     // Otherwise, the only kind of device we need to wait for is a dm-user
3679     // misc device. Normal calls to DeviceMapper::CreateDevice() guarantee
3680     // the path has been created.
3681     if (!android::base::StartsWith(device, "/dev/dm-user/")) {
3682         return true;
3683     }
3684 
3685     if (timeout_ms.count() == 0) {
3686         LOG(ERROR) << "No timeout was specified to wait for device: " << device;
3687         return false;
3688     }
3689     if (!android::fs_mgr::WaitForFile(device, timeout_ms)) {
3690         LOG(ERROR) << "Timed out waiting for device to appear: " << device;
3691         return false;
3692     }
3693     return true;
3694 }
3695 
IsSnapuserdRequired()3696 bool SnapshotManager::IsSnapuserdRequired() {
3697     auto lock = LockExclusive();
3698     if (!lock) return false;
3699 
3700     auto status = ReadSnapshotUpdateStatus(lock.get());
3701     return status.state() != UpdateState::None && status.compression_enabled();
3702 }
3703 
DetachSnapuserdForSelinux(std::vector<std::string> * snapuserd_argv)3704 bool SnapshotManager::DetachSnapuserdForSelinux(std::vector<std::string>* snapuserd_argv) {
3705     return PerformInitTransition(InitTransition::SELINUX_DETACH, snapuserd_argv);
3706 }
3707 
PerformSecondStageInitTransition()3708 bool SnapshotManager::PerformSecondStageInitTransition() {
3709     return PerformInitTransition(InitTransition::SECOND_STAGE);
3710 }
3711 
ReadOldPartitionMetadata(LockedFile * lock)3712 const LpMetadata* SnapshotManager::ReadOldPartitionMetadata(LockedFile* lock) {
3713     CHECK(lock);
3714 
3715     if (!old_partition_metadata_) {
3716         auto path = GetOldPartitionMetadataPath();
3717         old_partition_metadata_ = android::fs_mgr::ReadFromImageFile(path);
3718         if (!old_partition_metadata_) {
3719             LOG(ERROR) << "Could not read old partition metadata from " << path;
3720             return nullptr;
3721         }
3722     }
3723     return old_partition_metadata_.get();
3724 }
3725 
DecideMergePhase(const SnapshotStatus & status)3726 MergePhase SnapshotManager::DecideMergePhase(const SnapshotStatus& status) {
3727     if (status.compression_enabled() && status.device_size() < status.old_partition_size()) {
3728         return MergePhase::FIRST_PHASE;
3729     }
3730     return MergePhase::SECOND_PHASE;
3731 }
3732 
UpdateCowStats(ISnapshotMergeStats * stats)3733 void SnapshotManager::UpdateCowStats(ISnapshotMergeStats* stats) {
3734     auto lock = LockExclusive();
3735     if (!lock) return;
3736 
3737     std::vector<std::string> snapshots;
3738     if (!ListSnapshots(lock.get(), &snapshots, GetSnapshotSlotSuffix())) {
3739         LOG(ERROR) << "Could not list snapshots";
3740         return;
3741     }
3742 
3743     uint64_t cow_file_size = 0;
3744     uint64_t total_cow_size = 0;
3745     uint64_t estimated_cow_size = 0;
3746     for (const auto& snapshot : snapshots) {
3747         SnapshotStatus status;
3748         if (!ReadSnapshotStatus(lock.get(), snapshot, &status)) {
3749             return;
3750         }
3751 
3752         cow_file_size += status.cow_file_size();
3753         total_cow_size += status.cow_file_size() + status.cow_partition_size();
3754         estimated_cow_size += status.estimated_cow_size();
3755     }
3756 
3757     stats->set_cow_file_size(cow_file_size);
3758     stats->set_total_cow_size_bytes(total_cow_size);
3759     stats->set_estimated_cow_size_bytes(estimated_cow_size);
3760 }
3761 
DeleteDeviceIfExists(const std::string & name,const std::chrono::milliseconds & timeout_ms)3762 bool SnapshotManager::DeleteDeviceIfExists(const std::string& name,
3763                                            const std::chrono::milliseconds& timeout_ms) {
3764     auto& dm = DeviceMapper::Instance();
3765     auto start = std::chrono::steady_clock::now();
3766     while (true) {
3767         if (dm.DeleteDeviceIfExists(name)) {
3768             return true;
3769         }
3770         auto now = std::chrono::steady_clock::now();
3771         auto elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(now - start);
3772         if (elapsed >= timeout_ms) {
3773             break;
3774         }
3775         std::this_thread::sleep_for(400ms);
3776     }
3777 
3778     // Try to diagnose why this failed. First get the actual device path.
3779     std::string full_path;
3780     if (!dm.GetDmDevicePathByName(name, &full_path)) {
3781         LOG(ERROR) << "Unable to diagnose DM_DEV_REMOVE failure.";
3782         return false;
3783     }
3784 
3785     // Check for child dm-devices.
3786     std::string block_name = android::base::Basename(full_path);
3787     std::string sysfs_holders = "/sys/class/block/" + block_name + "/holders";
3788 
3789     std::error_code ec;
3790     std::filesystem::directory_iterator dir_iter(sysfs_holders, ec);
3791     if (auto begin = std::filesystem::begin(dir_iter); begin != std::filesystem::end(dir_iter)) {
3792         LOG(ERROR) << "Child device-mapper device still mapped: " << begin->path();
3793         return false;
3794     }
3795 
3796     // Check for mounted partitions.
3797     android::fs_mgr::Fstab fstab;
3798     android::fs_mgr::ReadFstabFromFile("/proc/mounts", &fstab);
3799     for (const auto& entry : fstab) {
3800         if (android::base::Basename(entry.blk_device) == block_name) {
3801             LOG(ERROR) << "Partition still mounted: " << entry.mount_point;
3802             return false;
3803         }
3804     }
3805 
3806     // Check for detached mounted partitions.
3807     for (const auto& fs : std::filesystem::directory_iterator("/sys/fs", ec)) {
3808         std::string fs_type = android::base::Basename(fs.path().c_str());
3809         if (!(fs_type == "ext4" || fs_type == "f2fs")) {
3810             continue;
3811         }
3812 
3813         std::string path = fs.path().c_str() + "/"s + block_name;
3814         if (access(path.c_str(), F_OK) == 0) {
3815             LOG(ERROR) << "Block device was lazily unmounted and is still in-use: " << full_path
3816                        << "; possibly open file descriptor or attached loop device.";
3817             return false;
3818         }
3819     }
3820 
3821     LOG(ERROR) << "Device-mapper device " << name << "(" << full_path << ")"
3822                << " still in use."
3823                << "  Probably a file descriptor was leaked or held open, or a loop device is"
3824                << " attached.";
3825     return false;
3826 }
3827 
ReadMergeFailureCode()3828 MergeFailureCode SnapshotManager::ReadMergeFailureCode() {
3829     auto lock = LockExclusive();
3830     if (!lock) return MergeFailureCode::AcquireLock;
3831 
3832     SnapshotUpdateStatus status = ReadSnapshotUpdateStatus(lock.get());
3833     if (status.state() != UpdateState::MergeFailed) {
3834         return MergeFailureCode::Ok;
3835     }
3836     return status.merge_failure_code();
3837 }
3838 
ReadSourceBuildFingerprint()3839 std::string SnapshotManager::ReadSourceBuildFingerprint() {
3840     auto lock = LockExclusive();
3841     if (!lock) return {};
3842 
3843     SnapshotUpdateStatus status = ReadSnapshotUpdateStatus(lock.get());
3844     return status.source_build_fingerprint();
3845 }
3846 
3847 }  // namespace snapshot
3848 }  // namespace android
3849