1 // Copyright (C) 2019 The Android Open Source Project
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include <libsnapshot/snapshot.h>
16
17 #include <dirent.h>
18 #include <fcntl.h>
19 #include <math.h>
20 #include <sys/file.h>
21 #include <sys/types.h>
22 #include <sys/unistd.h>
23
24 #include <filesystem>
25 #include <optional>
26 #include <thread>
27 #include <unordered_set>
28
29 #include <android-base/file.h>
30 #include <android-base/logging.h>
31 #include <android-base/parseint.h>
32 #include <android-base/properties.h>
33 #include <android-base/strings.h>
34 #include <android-base/unique_fd.h>
35 #include <cutils/sockets.h>
36 #include <ext4_utils/ext4_utils.h>
37 #include <fs_mgr.h>
38 #include <fs_mgr/file_wait.h>
39 #include <fs_mgr_dm_linear.h>
40 #include <fstab/fstab.h>
41 #include <libdm/dm.h>
42 #include <libfiemap/image_manager.h>
43 #include <liblp/liblp.h>
44
45 #include <android/snapshot/snapshot.pb.h>
46 #include <libsnapshot/snapshot_stats.h>
47 #include "device_info.h"
48 #include "partition_cow_creator.h"
49 #include "snapshot_metadata_updater.h"
50 #include "snapshot_reader.h"
51 #include "utility.h"
52
53 namespace android {
54 namespace snapshot {
55
56 using android::base::unique_fd;
57 using android::dm::DeviceMapper;
58 using android::dm::DmDeviceState;
59 using android::dm::DmTable;
60 using android::dm::DmTargetLinear;
61 using android::dm::DmTargetSnapshot;
62 using android::dm::DmTargetUser;
63 using android::dm::kSectorSize;
64 using android::dm::SnapshotStorageMode;
65 using android::fiemap::FiemapStatus;
66 using android::fiemap::IImageManager;
67 using android::fs_mgr::CreateDmTable;
68 using android::fs_mgr::CreateLogicalPartition;
69 using android::fs_mgr::CreateLogicalPartitionParams;
70 using android::fs_mgr::GetPartitionGroupName;
71 using android::fs_mgr::GetPartitionName;
72 using android::fs_mgr::LpMetadata;
73 using android::fs_mgr::MetadataBuilder;
74 using android::fs_mgr::SlotNumberForSlotSuffix;
75 using android::hardware::boot::V1_1::MergeStatus;
76 using chromeos_update_engine::DeltaArchiveManifest;
77 using chromeos_update_engine::Extent;
78 using chromeos_update_engine::FileDescriptor;
79 using chromeos_update_engine::PartitionUpdate;
80 template <typename T>
81 using RepeatedPtrField = google::protobuf::RepeatedPtrField<T>;
82 using std::chrono::duration_cast;
83 using namespace std::chrono_literals;
84 using namespace std::string_literals;
85
86 static constexpr char kBootIndicatorPath[] = "/metadata/ota/snapshot-boot";
87 static constexpr char kRollbackIndicatorPath[] = "/metadata/ota/rollback-indicator";
88 static constexpr auto kUpdateStateCheckInterval = 2s;
89
90 MergeFailureCode CheckMergeConsistency(const std::string& name, const SnapshotStatus& status);
91
92 // Note: IImageManager is an incomplete type in the header, so the default
93 // destructor doesn't work.
~SnapshotManager()94 SnapshotManager::~SnapshotManager() {}
95
New(IDeviceInfo * info)96 std::unique_ptr<SnapshotManager> SnapshotManager::New(IDeviceInfo* info) {
97 if (!info) {
98 info = new DeviceInfo();
99 }
100 return std::unique_ptr<SnapshotManager>(new SnapshotManager(info));
101 }
102
NewForFirstStageMount(IDeviceInfo * info)103 std::unique_ptr<SnapshotManager> SnapshotManager::NewForFirstStageMount(IDeviceInfo* info) {
104 if (!info) {
105 DeviceInfo* impl = new DeviceInfo();
106 impl->set_first_stage_init(true);
107 info = impl;
108 }
109 auto sm = New(info);
110
111 // The first-stage version of snapuserd is explicitly started by init. Do
112 // not attempt to using it during tests (which run in normal AOSP).
113 if (!sm->device()->IsTestDevice()) {
114 sm->use_first_stage_snapuserd_ = true;
115 }
116 return sm;
117 }
118
SnapshotManager(IDeviceInfo * device)119 SnapshotManager::SnapshotManager(IDeviceInfo* device) : device_(device) {
120 metadata_dir_ = device_->GetMetadataDir();
121 merge_consistency_checker_ = android::snapshot::CheckMergeConsistency;
122 }
123
GetCowName(const std::string & snapshot_name)124 static std::string GetCowName(const std::string& snapshot_name) {
125 return snapshot_name + "-cow";
126 }
127
GetDmUserCowName(const std::string & snapshot_name)128 static std::string GetDmUserCowName(const std::string& snapshot_name) {
129 return snapshot_name + "-user-cow";
130 }
131
GetCowImageDeviceName(const std::string & snapshot_name)132 static std::string GetCowImageDeviceName(const std::string& snapshot_name) {
133 return snapshot_name + "-cow-img";
134 }
135
GetBaseDeviceName(const std::string & partition_name)136 static std::string GetBaseDeviceName(const std::string& partition_name) {
137 return partition_name + "-base";
138 }
139
GetSourceDeviceName(const std::string & partition_name)140 static std::string GetSourceDeviceName(const std::string& partition_name) {
141 return partition_name + "-src";
142 }
143
BeginUpdate()144 bool SnapshotManager::BeginUpdate() {
145 bool needs_merge = false;
146 if (!TryCancelUpdate(&needs_merge)) {
147 return false;
148 }
149 if (needs_merge) {
150 LOG(INFO) << "Wait for merge (if any) before beginning a new update.";
151 auto state = ProcessUpdateState();
152 LOG(INFO) << "Merged with state = " << state;
153 }
154
155 auto file = LockExclusive();
156 if (!file) return false;
157
158 // Purge the ImageManager just in case there is a corrupt lp_metadata file
159 // lying around. (NB: no need to return false on an error, we can let the
160 // update try to progress.)
161 if (EnsureImageManager()) {
162 images_->RemoveAllImages();
163 }
164
165 // Clear any cached metadata (this allows re-using one manager across tests).
166 old_partition_metadata_ = nullptr;
167
168 auto state = ReadUpdateState(file.get());
169 if (state != UpdateState::None) {
170 LOG(ERROR) << "An update is already in progress, cannot begin a new update";
171 return false;
172 }
173 return WriteUpdateState(file.get(), UpdateState::Initiated);
174 }
175
CancelUpdate()176 bool SnapshotManager::CancelUpdate() {
177 bool needs_merge = false;
178 if (!TryCancelUpdate(&needs_merge)) {
179 return false;
180 }
181 if (needs_merge) {
182 LOG(ERROR) << "Cannot cancel update after it has completed or started merging";
183 }
184 return !needs_merge;
185 }
186
TryCancelUpdate(bool * needs_merge)187 bool SnapshotManager::TryCancelUpdate(bool* needs_merge) {
188 *needs_merge = false;
189
190 auto file = LockExclusive();
191 if (!file) return false;
192
193 UpdateState state = ReadUpdateState(file.get());
194 if (state == UpdateState::None) return true;
195
196 if (state == UpdateState::Initiated) {
197 LOG(INFO) << "Update has been initiated, now canceling";
198 return RemoveAllUpdateState(file.get());
199 }
200
201 if (state == UpdateState::Unverified) {
202 // We completed an update, but it can still be canceled if we haven't booted into it.
203 auto slot = GetCurrentSlot();
204 if (slot != Slot::Target) {
205 LOG(INFO) << "Canceling previously completed updates (if any)";
206 return RemoveAllUpdateState(file.get());
207 }
208 }
209 *needs_merge = true;
210 return true;
211 }
212
ReadUpdateSourceSlotSuffix()213 std::string SnapshotManager::ReadUpdateSourceSlotSuffix() {
214 auto boot_file = GetSnapshotBootIndicatorPath();
215 std::string contents;
216 if (!android::base::ReadFileToString(boot_file, &contents)) {
217 PLOG(WARNING) << "Cannot read " << boot_file;
218 return {};
219 }
220 return contents;
221 }
222
GetCurrentSlot()223 SnapshotManager::Slot SnapshotManager::GetCurrentSlot() {
224 auto contents = ReadUpdateSourceSlotSuffix();
225 if (contents.empty()) {
226 return Slot::Unknown;
227 }
228 if (device_->GetSlotSuffix() == contents) {
229 return Slot::Source;
230 }
231 return Slot::Target;
232 }
233
GetSnapshotSlotSuffix()234 std::string SnapshotManager::GetSnapshotSlotSuffix() {
235 switch (GetCurrentSlot()) {
236 case Slot::Target:
237 return device_->GetSlotSuffix();
238 default:
239 return device_->GetOtherSlotSuffix();
240 }
241 }
242
RemoveFileIfExists(const std::string & path)243 static bool RemoveFileIfExists(const std::string& path) {
244 std::string message;
245 if (!android::base::RemoveFileIfExists(path, &message)) {
246 LOG(ERROR) << "Remove failed: " << path << ": " << message;
247 return false;
248 }
249 return true;
250 }
251
RemoveAllUpdateState(LockedFile * lock,const std::function<bool ()> & prolog)252 bool SnapshotManager::RemoveAllUpdateState(LockedFile* lock, const std::function<bool()>& prolog) {
253 if (prolog && !prolog()) {
254 LOG(WARNING) << "Can't RemoveAllUpdateState: prolog failed.";
255 return false;
256 }
257
258 LOG(INFO) << "Removing all update state.";
259
260 if (!RemoveAllSnapshots(lock)) {
261 LOG(ERROR) << "Could not remove all snapshots";
262 return false;
263 }
264
265 // It's okay if these fail:
266 // - For SnapshotBoot and Rollback, first-stage init performs a deeper check after
267 // reading the indicator file, so it's not a problem if it still exists
268 // after the update completes.
269 // - For ForwardMerge, FinishedSnapshotWrites asserts that the existence of the indicator
270 // matches the incoming update.
271 std::vector<std::string> files = {
272 GetSnapshotBootIndicatorPath(),
273 GetRollbackIndicatorPath(),
274 GetForwardMergeIndicatorPath(),
275 GetOldPartitionMetadataPath(),
276 };
277 for (const auto& file : files) {
278 RemoveFileIfExists(file);
279 }
280
281 // If this fails, we'll keep trying to remove the update state (as the
282 // device reboots or starts a new update) until it finally succeeds.
283 return WriteUpdateState(lock, UpdateState::None);
284 }
285
FinishedSnapshotWrites(bool wipe)286 bool SnapshotManager::FinishedSnapshotWrites(bool wipe) {
287 auto lock = LockExclusive();
288 if (!lock) return false;
289
290 auto update_state = ReadUpdateState(lock.get());
291 if (update_state == UpdateState::Unverified) {
292 LOG(INFO) << "FinishedSnapshotWrites already called before. Ignored.";
293 return true;
294 }
295
296 if (update_state != UpdateState::Initiated) {
297 LOG(ERROR) << "Can only transition to the Unverified state from the Initiated state.";
298 return false;
299 }
300
301 if (!EnsureNoOverflowSnapshot(lock.get())) {
302 LOG(ERROR) << "Cannot ensure there are no overflow snapshots.";
303 return false;
304 }
305
306 if (!UpdateForwardMergeIndicator(wipe)) {
307 return false;
308 }
309
310 // This file is written on boot to detect whether a rollback occurred. It
311 // MUST NOT exist before rebooting, otherwise, we're at risk of deleting
312 // snapshots too early.
313 if (!RemoveFileIfExists(GetRollbackIndicatorPath())) {
314 return false;
315 }
316
317 // This file acts as both a quick indicator for init (it can use access(2)
318 // to decide how to do first-stage mounts), and it stores the old slot, so
319 // we can tell whether or not we performed a rollback.
320 auto contents = device_->GetSlotSuffix();
321 auto boot_file = GetSnapshotBootIndicatorPath();
322 if (!WriteStringToFileAtomic(contents, boot_file)) {
323 PLOG(ERROR) << "write failed: " << boot_file;
324 return false;
325 }
326 return WriteUpdateState(lock.get(), UpdateState::Unverified);
327 }
328
CreateSnapshot(LockedFile * lock,PartitionCowCreator * cow_creator,SnapshotStatus * status)329 bool SnapshotManager::CreateSnapshot(LockedFile* lock, PartitionCowCreator* cow_creator,
330 SnapshotStatus* status) {
331 CHECK(lock);
332 CHECK(lock->lock_mode() == LOCK_EX);
333 CHECK(status);
334
335 if (status->name().empty()) {
336 LOG(ERROR) << "SnapshotStatus has no name.";
337 return false;
338 }
339 // Check these sizes. Like liblp, we guarantee the partition size is
340 // respected, which means it has to be sector-aligned. (This guarantee is
341 // useful for locating avb footers correctly). The COW file size, however,
342 // can be arbitrarily larger than specified, so we can safely round it up.
343 if (status->device_size() % kSectorSize != 0) {
344 LOG(ERROR) << "Snapshot " << status->name()
345 << " device size is not a multiple of the sector size: "
346 << status->device_size();
347 return false;
348 }
349 if (status->snapshot_size() % kSectorSize != 0) {
350 LOG(ERROR) << "Snapshot " << status->name()
351 << " snapshot size is not a multiple of the sector size: "
352 << status->snapshot_size();
353 return false;
354 }
355 if (status->cow_partition_size() % kSectorSize != 0) {
356 LOG(ERROR) << "Snapshot " << status->name()
357 << " cow partition size is not a multiple of the sector size: "
358 << status->cow_partition_size();
359 return false;
360 }
361 if (status->cow_file_size() % kSectorSize != 0) {
362 LOG(ERROR) << "Snapshot " << status->name()
363 << " cow file size is not a multiple of the sector size: "
364 << status->cow_file_size();
365 return false;
366 }
367
368 status->set_state(SnapshotState::CREATED);
369 status->set_sectors_allocated(0);
370 status->set_metadata_sectors(0);
371 status->set_compression_enabled(cow_creator->compression_enabled);
372 status->set_compression_algorithm(cow_creator->compression_algorithm);
373
374 if (!WriteSnapshotStatus(lock, *status)) {
375 PLOG(ERROR) << "Could not write snapshot status: " << status->name();
376 return false;
377 }
378 return true;
379 }
380
CreateCowImage(LockedFile * lock,const std::string & name)381 Return SnapshotManager::CreateCowImage(LockedFile* lock, const std::string& name) {
382 CHECK(lock);
383 CHECK(lock->lock_mode() == LOCK_EX);
384 if (!EnsureImageManager()) return Return::Error();
385
386 SnapshotStatus status;
387 if (!ReadSnapshotStatus(lock, name, &status)) {
388 return Return::Error();
389 }
390
391 // The COW file size should have been rounded up to the nearest sector in CreateSnapshot.
392 if (status.cow_file_size() % kSectorSize != 0) {
393 LOG(ERROR) << "Snapshot " << name << " COW file size is not a multiple of the sector size: "
394 << status.cow_file_size();
395 return Return::Error();
396 }
397
398 std::string cow_image_name = GetCowImageDeviceName(name);
399 int cow_flags = IImageManager::CREATE_IMAGE_DEFAULT;
400 return Return(images_->CreateBackingImage(cow_image_name, status.cow_file_size(), cow_flags));
401 }
402
MapDmUserCow(LockedFile * lock,const std::string & name,const std::string & cow_file,const std::string & base_device,const std::chrono::milliseconds & timeout_ms,std::string * path)403 bool SnapshotManager::MapDmUserCow(LockedFile* lock, const std::string& name,
404 const std::string& cow_file, const std::string& base_device,
405 const std::chrono::milliseconds& timeout_ms, std::string* path) {
406 CHECK(lock);
407
408 auto& dm = DeviceMapper::Instance();
409
410 // Use an extra decoration for first-stage init, so we can transition
411 // to a new table entry in second-stage.
412 std::string misc_name = name;
413 if (use_first_stage_snapuserd_) {
414 misc_name += "-init";
415 }
416
417 if (!EnsureSnapuserdConnected()) {
418 return false;
419 }
420
421 uint64_t base_sectors = snapuserd_client_->InitDmUserCow(misc_name, cow_file, base_device);
422 if (base_sectors == 0) {
423 LOG(ERROR) << "Failed to retrieve base_sectors from Snapuserd";
424 return false;
425 }
426
427 DmTable table;
428 table.Emplace<DmTargetUser>(0, base_sectors, misc_name);
429 if (!dm.CreateDevice(name, table, path, timeout_ms)) {
430 return false;
431 }
432 if (!WaitForDevice(*path, timeout_ms)) {
433 return false;
434 }
435
436 auto control_device = "/dev/dm-user/" + misc_name;
437 if (!WaitForDevice(control_device, timeout_ms)) {
438 return false;
439 }
440
441 return snapuserd_client_->AttachDmUser(misc_name);
442 }
443
MapSnapshot(LockedFile * lock,const std::string & name,const std::string & base_device,const std::string & cow_device,const std::chrono::milliseconds & timeout_ms,std::string * dev_path)444 bool SnapshotManager::MapSnapshot(LockedFile* lock, const std::string& name,
445 const std::string& base_device, const std::string& cow_device,
446 const std::chrono::milliseconds& timeout_ms,
447 std::string* dev_path) {
448 CHECK(lock);
449
450 SnapshotStatus status;
451 if (!ReadSnapshotStatus(lock, name, &status)) {
452 return false;
453 }
454 if (status.state() == SnapshotState::NONE || status.state() == SnapshotState::MERGE_COMPLETED) {
455 LOG(ERROR) << "Should not create a snapshot device for " << name
456 << " after merging has completed.";
457 return false;
458 }
459
460 // Validate the block device size, as well as the requested snapshot size.
461 // Note that during first-stage init, we don't have the device paths.
462 if (android::base::StartsWith(base_device, "/")) {
463 unique_fd fd(open(base_device.c_str(), O_RDONLY | O_CLOEXEC));
464 if (fd < 0) {
465 PLOG(ERROR) << "open failed: " << base_device;
466 return false;
467 }
468 auto dev_size = get_block_device_size(fd);
469 if (!dev_size) {
470 PLOG(ERROR) << "Could not determine block device size: " << base_device;
471 return false;
472 }
473 if (status.device_size() != dev_size) {
474 LOG(ERROR) << "Block device size for " << base_device << " does not match"
475 << "(expected " << status.device_size() << ", got " << dev_size << ")";
476 return false;
477 }
478 }
479 if (status.device_size() % kSectorSize != 0) {
480 LOG(ERROR) << "invalid blockdev size for " << base_device << ": " << status.device_size();
481 return false;
482 }
483 if (status.snapshot_size() % kSectorSize != 0 ||
484 status.snapshot_size() > status.device_size()) {
485 LOG(ERROR) << "Invalid snapshot size for " << base_device << ": " << status.snapshot_size();
486 return false;
487 }
488 if (status.device_size() != status.snapshot_size()) {
489 LOG(ERROR) << "Device size and snapshot size must be the same (device size = "
490 << status.device_size() << ", snapshot size = " << status.snapshot_size();
491 return false;
492 }
493
494 uint64_t snapshot_sectors = status.snapshot_size() / kSectorSize;
495
496 auto& dm = DeviceMapper::Instance();
497
498 // Note that merging is a global state. We do track whether individual devices
499 // have completed merging, but the start of the merge process is considered
500 // atomic.
501 SnapshotStorageMode mode;
502 SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock);
503 switch (update_status.state()) {
504 case UpdateState::MergeCompleted:
505 case UpdateState::MergeNeedsReboot:
506 LOG(ERROR) << "Should not create a snapshot device for " << name
507 << " after global merging has completed.";
508 return false;
509 case UpdateState::Merging:
510 case UpdateState::MergeFailed:
511 // Note: MergeFailed indicates that a merge is in progress, but
512 // is possibly stalled. We still have to honor the merge.
513 if (DecideMergePhase(status) == update_status.merge_phase()) {
514 mode = SnapshotStorageMode::Merge;
515 } else {
516 mode = SnapshotStorageMode::Persistent;
517 }
518 break;
519 default:
520 mode = SnapshotStorageMode::Persistent;
521 break;
522 }
523
524 if (mode == SnapshotStorageMode::Persistent && status.state() == SnapshotState::MERGING) {
525 LOG(ERROR) << "Snapshot: " << name
526 << " has snapshot status Merging but mode set to Persistent."
527 << " Changing mode to Snapshot-Merge.";
528 mode = SnapshotStorageMode::Merge;
529 }
530
531 DmTable table;
532 table.Emplace<DmTargetSnapshot>(0, snapshot_sectors, base_device, cow_device, mode,
533 kSnapshotChunkSize);
534 if (!dm.CreateDevice(name, table, dev_path, timeout_ms)) {
535 LOG(ERROR) << "Could not create snapshot device: " << name;
536 return false;
537 }
538 return true;
539 }
540
MapCowImage(const std::string & name,const std::chrono::milliseconds & timeout_ms)541 std::optional<std::string> SnapshotManager::MapCowImage(
542 const std::string& name, const std::chrono::milliseconds& timeout_ms) {
543 if (!EnsureImageManager()) return std::nullopt;
544 auto cow_image_name = GetCowImageDeviceName(name);
545
546 bool ok;
547 std::string cow_dev;
548 if (device_->IsRecovery() || device_->IsFirstStageInit()) {
549 const auto& opener = device_->GetPartitionOpener();
550 ok = images_->MapImageWithDeviceMapper(opener, cow_image_name, &cow_dev);
551 } else {
552 ok = images_->MapImageDevice(cow_image_name, timeout_ms, &cow_dev);
553 }
554
555 if (ok) {
556 LOG(INFO) << "Mapped " << cow_image_name << " to " << cow_dev;
557 return cow_dev;
558 }
559 LOG(ERROR) << "Could not map image device: " << cow_image_name;
560 return std::nullopt;
561 }
562
MapSourceDevice(LockedFile * lock,const std::string & name,const std::chrono::milliseconds & timeout_ms,std::string * path)563 bool SnapshotManager::MapSourceDevice(LockedFile* lock, const std::string& name,
564 const std::chrono::milliseconds& timeout_ms,
565 std::string* path) {
566 CHECK(lock);
567
568 auto metadata = ReadOldPartitionMetadata(lock);
569 if (!metadata) {
570 LOG(ERROR) << "Could not map source device due to missing or corrupt metadata";
571 return false;
572 }
573
574 auto old_name = GetOtherPartitionName(name);
575 auto slot_suffix = device_->GetSlotSuffix();
576 auto slot = SlotNumberForSlotSuffix(slot_suffix);
577
578 CreateLogicalPartitionParams params = {
579 .block_device = device_->GetSuperDevice(slot),
580 .metadata = metadata,
581 .partition_name = old_name,
582 .timeout_ms = timeout_ms,
583 .device_name = GetSourceDeviceName(name),
584 .partition_opener = &device_->GetPartitionOpener(),
585 };
586 if (!CreateLogicalPartition(std::move(params), path)) {
587 LOG(ERROR) << "Could not create source device for snapshot " << name;
588 return false;
589 }
590 return true;
591 }
592
UnmapSnapshot(LockedFile * lock,const std::string & name)593 bool SnapshotManager::UnmapSnapshot(LockedFile* lock, const std::string& name) {
594 CHECK(lock);
595
596 if (!DeleteDeviceIfExists(name)) {
597 LOG(ERROR) << "Could not delete snapshot device: " << name;
598 return false;
599 }
600 return true;
601 }
602
UnmapCowImage(const std::string & name)603 bool SnapshotManager::UnmapCowImage(const std::string& name) {
604 if (!EnsureImageManager()) return false;
605 return images_->UnmapImageIfExists(GetCowImageDeviceName(name));
606 }
607
DeleteSnapshot(LockedFile * lock,const std::string & name)608 bool SnapshotManager::DeleteSnapshot(LockedFile* lock, const std::string& name) {
609 CHECK(lock);
610 CHECK(lock->lock_mode() == LOCK_EX);
611 if (!EnsureImageManager()) return false;
612
613 if (!UnmapCowDevices(lock, name)) {
614 return false;
615 }
616
617 // We can't delete snapshots in recovery. The only way we'd try is it we're
618 // completing or canceling a merge in preparation for a data wipe, in which
619 // case, we don't care if the file sticks around.
620 if (device_->IsRecovery()) {
621 LOG(INFO) << "Skipping delete of snapshot " << name << " in recovery.";
622 return true;
623 }
624
625 auto cow_image_name = GetCowImageDeviceName(name);
626 if (images_->BackingImageExists(cow_image_name)) {
627 if (!images_->DeleteBackingImage(cow_image_name)) {
628 return false;
629 }
630 }
631
632 std::string error;
633 auto file_path = GetSnapshotStatusFilePath(name);
634 if (!android::base::RemoveFileIfExists(file_path, &error)) {
635 LOG(ERROR) << "Failed to remove status file " << file_path << ": " << error;
636 return false;
637 }
638 return true;
639 }
640
InitiateMerge()641 bool SnapshotManager::InitiateMerge() {
642 auto lock = LockExclusive();
643 if (!lock) return false;
644
645 UpdateState state = ReadUpdateState(lock.get());
646 if (state != UpdateState::Unverified) {
647 LOG(ERROR) << "Cannot begin a merge if an update has not been verified";
648 return false;
649 }
650
651 auto slot = GetCurrentSlot();
652 if (slot != Slot::Target) {
653 LOG(ERROR) << "Device cannot merge while not booting from new slot";
654 return false;
655 }
656
657 std::vector<std::string> snapshots;
658 if (!ListSnapshots(lock.get(), &snapshots)) {
659 LOG(ERROR) << "Could not list snapshots";
660 return false;
661 }
662
663 auto other_suffix = device_->GetOtherSlotSuffix();
664
665 auto& dm = DeviceMapper::Instance();
666 for (const auto& snapshot : snapshots) {
667 if (android::base::EndsWith(snapshot, other_suffix)) {
668 // Allow the merge to continue, but log this unexpected case.
669 LOG(ERROR) << "Unexpected snapshot found during merge: " << snapshot;
670 continue;
671 }
672
673 // The device has to be mapped, since everything should be merged at
674 // the same time. This is a fairly serious error. We could forcefully
675 // map everything here, but it should have been mapped during first-
676 // stage init.
677 if (dm.GetState(snapshot) == DmDeviceState::INVALID) {
678 LOG(ERROR) << "Cannot begin merge; device " << snapshot << " is not mapped.";
679 return false;
680 }
681 }
682
683 auto metadata = ReadCurrentMetadata();
684 for (auto it = snapshots.begin(); it != snapshots.end();) {
685 switch (GetMetadataPartitionState(*metadata, *it)) {
686 case MetadataPartitionState::Flashed:
687 LOG(WARNING) << "Detected re-flashing for partition " << *it
688 << ". Skip merging it.";
689 [[fallthrough]];
690 case MetadataPartitionState::None: {
691 LOG(WARNING) << "Deleting snapshot for partition " << *it;
692 if (!DeleteSnapshot(lock.get(), *it)) {
693 LOG(WARNING) << "Cannot delete snapshot for partition " << *it
694 << ". Skip merging it anyways.";
695 }
696 it = snapshots.erase(it);
697 } break;
698 case MetadataPartitionState::Updated: {
699 ++it;
700 } break;
701 }
702 }
703
704 bool compression_enabled = false;
705
706 std::vector<std::string> first_merge_group;
707
708 DmTargetSnapshot::Status initial_target_values = {};
709 for (const auto& snapshot : snapshots) {
710 DmTargetSnapshot::Status current_status;
711 if (!QuerySnapshotStatus(snapshot, nullptr, ¤t_status)) {
712 return false;
713 }
714 initial_target_values.sectors_allocated += current_status.sectors_allocated;
715 initial_target_values.total_sectors += current_status.total_sectors;
716 initial_target_values.metadata_sectors += current_status.metadata_sectors;
717
718 SnapshotStatus snapshot_status;
719 if (!ReadSnapshotStatus(lock.get(), snapshot, &snapshot_status)) {
720 return false;
721 }
722
723 compression_enabled |= snapshot_status.compression_enabled();
724 if (DecideMergePhase(snapshot_status) == MergePhase::FIRST_PHASE) {
725 first_merge_group.emplace_back(snapshot);
726 }
727 }
728
729 SnapshotUpdateStatus initial_status = ReadSnapshotUpdateStatus(lock.get());
730 initial_status.set_state(UpdateState::Merging);
731 initial_status.set_sectors_allocated(initial_target_values.sectors_allocated);
732 initial_status.set_total_sectors(initial_target_values.total_sectors);
733 initial_status.set_metadata_sectors(initial_target_values.metadata_sectors);
734 initial_status.set_compression_enabled(compression_enabled);
735
736 // If any partitions shrunk, we need to merge them before we merge any other
737 // partitions (see b/177935716). Otherwise, a merge from another partition
738 // may overwrite the source block of a copy operation.
739 const std::vector<std::string>* merge_group;
740 if (first_merge_group.empty()) {
741 merge_group = &snapshots;
742 initial_status.set_merge_phase(MergePhase::SECOND_PHASE);
743 } else {
744 merge_group = &first_merge_group;
745 initial_status.set_merge_phase(MergePhase::FIRST_PHASE);
746 }
747
748 // Point of no return - mark that we're starting a merge. From now on every
749 // eligible snapshot must be a merge target.
750 if (!WriteSnapshotUpdateStatus(lock.get(), initial_status)) {
751 return false;
752 }
753
754 auto reported_code = MergeFailureCode::Ok;
755 for (const auto& snapshot : *merge_group) {
756 // If this fails, we have no choice but to continue. Everything must
757 // be merged. This is not an ideal state to be in, but it is safe,
758 // because we the next boot will try again.
759 auto code = SwitchSnapshotToMerge(lock.get(), snapshot);
760 if (code != MergeFailureCode::Ok) {
761 LOG(ERROR) << "Failed to switch snapshot to a merge target: " << snapshot;
762 if (reported_code == MergeFailureCode::Ok) {
763 reported_code = code;
764 }
765 }
766 }
767
768 // If we couldn't switch everything to a merge target, pre-emptively mark
769 // this merge as failed. It will get acknowledged when WaitForMerge() is
770 // called.
771 if (reported_code != MergeFailureCode::Ok) {
772 WriteUpdateState(lock.get(), UpdateState::MergeFailed, reported_code);
773 }
774
775 // Return true no matter what, because a merge was initiated.
776 return true;
777 }
778
SwitchSnapshotToMerge(LockedFile * lock,const std::string & name)779 MergeFailureCode SnapshotManager::SwitchSnapshotToMerge(LockedFile* lock, const std::string& name) {
780 SnapshotStatus status;
781 if (!ReadSnapshotStatus(lock, name, &status)) {
782 return MergeFailureCode::ReadStatus;
783 }
784 if (status.state() != SnapshotState::CREATED) {
785 LOG(WARNING) << "Snapshot " << name
786 << " has unexpected state: " << SnapshotState_Name(status.state());
787 }
788
789 // After this, we return true because we technically did switch to a merge
790 // target. Everything else we do here is just informational.
791 if (auto code = RewriteSnapshotDeviceTable(name); code != MergeFailureCode::Ok) {
792 return code;
793 }
794
795 status.set_state(SnapshotState::MERGING);
796
797 DmTargetSnapshot::Status dm_status;
798 if (!QuerySnapshotStatus(name, nullptr, &dm_status)) {
799 LOG(ERROR) << "Could not query merge status for snapshot: " << name;
800 }
801 status.set_sectors_allocated(dm_status.sectors_allocated);
802 status.set_metadata_sectors(dm_status.metadata_sectors);
803 if (!WriteSnapshotStatus(lock, status)) {
804 LOG(ERROR) << "Could not update status file for snapshot: " << name;
805 }
806 return MergeFailureCode::Ok;
807 }
808
RewriteSnapshotDeviceTable(const std::string & name)809 MergeFailureCode SnapshotManager::RewriteSnapshotDeviceTable(const std::string& name) {
810 auto& dm = DeviceMapper::Instance();
811
812 std::vector<DeviceMapper::TargetInfo> old_targets;
813 if (!dm.GetTableInfo(name, &old_targets)) {
814 LOG(ERROR) << "Could not read snapshot device table: " << name;
815 return MergeFailureCode::GetTableInfo;
816 }
817 if (old_targets.size() != 1 || DeviceMapper::GetTargetType(old_targets[0].spec) != "snapshot") {
818 LOG(ERROR) << "Unexpected device-mapper table for snapshot: " << name;
819 return MergeFailureCode::UnknownTable;
820 }
821
822 std::string base_device, cow_device;
823 if (!DmTargetSnapshot::GetDevicesFromParams(old_targets[0].data, &base_device, &cow_device)) {
824 LOG(ERROR) << "Could not derive underlying devices for snapshot: " << name;
825 return MergeFailureCode::GetTableParams;
826 }
827
828 DmTable table;
829 table.Emplace<DmTargetSnapshot>(0, old_targets[0].spec.length, base_device, cow_device,
830 SnapshotStorageMode::Merge, kSnapshotChunkSize);
831 if (!dm.LoadTableAndActivate(name, table)) {
832 LOG(ERROR) << "Could not swap device-mapper tables on snapshot device " << name;
833 return MergeFailureCode::ActivateNewTable;
834 }
835 LOG(INFO) << "Successfully switched snapshot device to a merge target: " << name;
836 return MergeFailureCode::Ok;
837 }
838
839 enum class TableQuery {
840 Table,
841 Status,
842 };
843
GetSingleTarget(const std::string & dm_name,TableQuery query,DeviceMapper::TargetInfo * target)844 static bool GetSingleTarget(const std::string& dm_name, TableQuery query,
845 DeviceMapper::TargetInfo* target) {
846 auto& dm = DeviceMapper::Instance();
847 if (dm.GetState(dm_name) == DmDeviceState::INVALID) {
848 return false;
849 }
850
851 std::vector<DeviceMapper::TargetInfo> targets;
852 bool result;
853 if (query == TableQuery::Status) {
854 result = dm.GetTableStatus(dm_name, &targets);
855 } else {
856 result = dm.GetTableInfo(dm_name, &targets);
857 }
858 if (!result) {
859 LOG(ERROR) << "Could not query device: " << dm_name;
860 return false;
861 }
862 if (targets.size() != 1) {
863 return false;
864 }
865
866 *target = std::move(targets[0]);
867 return true;
868 }
869
IsSnapshotDevice(const std::string & dm_name,TargetInfo * target)870 bool SnapshotManager::IsSnapshotDevice(const std::string& dm_name, TargetInfo* target) {
871 DeviceMapper::TargetInfo snap_target;
872 if (!GetSingleTarget(dm_name, TableQuery::Status, &snap_target)) {
873 return false;
874 }
875 auto type = DeviceMapper::GetTargetType(snap_target.spec);
876 if (type != "snapshot" && type != "snapshot-merge") {
877 return false;
878 }
879 if (target) {
880 *target = std::move(snap_target);
881 }
882 return true;
883 }
884
QuerySnapshotStatus(const std::string & dm_name,std::string * target_type,DmTargetSnapshot::Status * status)885 bool SnapshotManager::QuerySnapshotStatus(const std::string& dm_name, std::string* target_type,
886 DmTargetSnapshot::Status* status) {
887 DeviceMapper::TargetInfo target;
888 if (!IsSnapshotDevice(dm_name, &target)) {
889 LOG(ERROR) << "Device " << dm_name << " is not a snapshot or snapshot-merge device";
890 return false;
891 }
892 if (!DmTargetSnapshot::ParseStatusText(target.data, status)) {
893 LOG(ERROR) << "Could not parse snapshot status text: " << dm_name;
894 return false;
895 }
896 if (target_type) {
897 *target_type = DeviceMapper::GetTargetType(target.spec);
898 }
899 if (!status->error.empty()) {
900 LOG(ERROR) << "Snapshot: " << dm_name << " returned error code: " << status->error;
901 return false;
902 }
903 return true;
904 }
905
906 // Note that when a merge fails, we will *always* try again to complete the
907 // merge each time the device boots. There is no harm in doing so, and if
908 // the problem was transient, we might manage to get a new outcome.
ProcessUpdateState(const std::function<bool ()> & callback,const std::function<bool ()> & before_cancel)909 UpdateState SnapshotManager::ProcessUpdateState(const std::function<bool()>& callback,
910 const std::function<bool()>& before_cancel) {
911 while (true) {
912 auto result = CheckMergeState(before_cancel);
913 LOG(INFO) << "ProcessUpdateState handling state: " << result.state;
914
915 if (result.state == UpdateState::MergeFailed) {
916 AcknowledgeMergeFailure(result.failure_code);
917 }
918 if (result.state != UpdateState::Merging) {
919 // Either there is no merge, or the merge was finished, so no need
920 // to keep waiting.
921 return result.state;
922 }
923
924 if (callback && !callback()) {
925 return result.state;
926 }
927
928 // This wait is not super time sensitive, so we have a relatively
929 // low polling frequency.
930 std::this_thread::sleep_for(kUpdateStateCheckInterval);
931 }
932 }
933
CheckMergeState(const std::function<bool ()> & before_cancel)934 auto SnapshotManager::CheckMergeState(const std::function<bool()>& before_cancel) -> MergeResult {
935 auto lock = LockExclusive();
936 if (!lock) {
937 return MergeResult(UpdateState::MergeFailed, MergeFailureCode::AcquireLock);
938 }
939
940 auto result = CheckMergeState(lock.get(), before_cancel);
941 LOG(INFO) << "CheckMergeState for snapshots returned: " << result.state;
942
943 if (result.state == UpdateState::MergeCompleted) {
944 // Do this inside the same lock. Failures get acknowledged without the
945 // lock, because flock() might have failed.
946 AcknowledgeMergeSuccess(lock.get());
947 } else if (result.state == UpdateState::Cancelled) {
948 if (!device_->IsRecovery() && !RemoveAllUpdateState(lock.get(), before_cancel)) {
949 LOG(ERROR) << "Failed to remove all update state after acknowleding cancelled update.";
950 }
951 }
952 return result;
953 }
954
CheckMergeState(LockedFile * lock,const std::function<bool ()> & before_cancel)955 auto SnapshotManager::CheckMergeState(LockedFile* lock, const std::function<bool()>& before_cancel)
956 -> MergeResult {
957 SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock);
958 switch (update_status.state()) {
959 case UpdateState::None:
960 case UpdateState::MergeCompleted:
961 // Harmless races are allowed between two callers of WaitForMerge,
962 // so in both of these cases we just propagate the state.
963 return MergeResult(update_status.state());
964
965 case UpdateState::Merging:
966 case UpdateState::MergeNeedsReboot:
967 case UpdateState::MergeFailed:
968 // We'll poll each snapshot below. Note that for the NeedsReboot
969 // case, we always poll once to give cleanup another opportunity to
970 // run.
971 break;
972
973 case UpdateState::Unverified:
974 // This is an edge case. Normally cancelled updates are detected
975 // via the merge poll below, but if we never started a merge, we
976 // need to also check here.
977 if (HandleCancelledUpdate(lock, before_cancel)) {
978 return MergeResult(UpdateState::Cancelled);
979 }
980 return MergeResult(update_status.state());
981
982 default:
983 return MergeResult(update_status.state());
984 }
985
986 std::vector<std::string> snapshots;
987 if (!ListSnapshots(lock, &snapshots)) {
988 return MergeResult(UpdateState::MergeFailed, MergeFailureCode::ListSnapshots);
989 }
990
991 auto other_suffix = device_->GetOtherSlotSuffix();
992
993 bool cancelled = false;
994 bool merging = false;
995 bool needs_reboot = false;
996 bool wrong_phase = false;
997 MergeFailureCode failure_code = MergeFailureCode::Ok;
998 for (const auto& snapshot : snapshots) {
999 if (android::base::EndsWith(snapshot, other_suffix)) {
1000 // This will have triggered an error message in InitiateMerge already.
1001 LOG(INFO) << "Skipping merge validation of unexpected snapshot: " << snapshot;
1002 continue;
1003 }
1004
1005 auto result = CheckTargetMergeState(lock, snapshot, update_status);
1006 LOG(INFO) << "CheckTargetMergeState for " << snapshot << " returned: " << result.state;
1007
1008 switch (result.state) {
1009 case UpdateState::MergeFailed:
1010 // Take the first failure code in case other failures compound.
1011 if (failure_code == MergeFailureCode::Ok) {
1012 failure_code = result.failure_code;
1013 }
1014 break;
1015 case UpdateState::Merging:
1016 merging = true;
1017 break;
1018 case UpdateState::MergeNeedsReboot:
1019 needs_reboot = true;
1020 break;
1021 case UpdateState::MergeCompleted:
1022 break;
1023 case UpdateState::Cancelled:
1024 cancelled = true;
1025 break;
1026 case UpdateState::None:
1027 wrong_phase = true;
1028 break;
1029 default:
1030 LOG(ERROR) << "Unknown merge status for \"" << snapshot << "\": "
1031 << "\"" << result.state << "\"";
1032 if (failure_code == MergeFailureCode::Ok) {
1033 failure_code = MergeFailureCode::UnexpectedMergeState;
1034 }
1035 break;
1036 }
1037 }
1038
1039 if (merging) {
1040 // Note that we handle "Merging" before we handle anything else. We
1041 // want to poll until *nothing* is merging if we can, so everything has
1042 // a chance to get marked as completed or failed.
1043 return MergeResult(UpdateState::Merging);
1044 }
1045 if (failure_code != MergeFailureCode::Ok) {
1046 // Note: since there are many drop-out cases for failure, we acknowledge
1047 // it in WaitForMerge rather than here and elsewhere.
1048 return MergeResult(UpdateState::MergeFailed, failure_code);
1049 }
1050 if (wrong_phase) {
1051 // If we got here, no other partitions are being merged, and nothing
1052 // failed to merge. It's safe to move to the next merge phase.
1053 auto code = MergeSecondPhaseSnapshots(lock);
1054 if (code != MergeFailureCode::Ok) {
1055 return MergeResult(UpdateState::MergeFailed, code);
1056 }
1057 return MergeResult(UpdateState::Merging);
1058 }
1059 if (needs_reboot) {
1060 WriteUpdateState(lock, UpdateState::MergeNeedsReboot);
1061 return MergeResult(UpdateState::MergeNeedsReboot);
1062 }
1063 if (cancelled) {
1064 // This is an edge case, that we handle as correctly as we sensibly can.
1065 // The underlying partition has changed behind update_engine, and we've
1066 // removed the snapshot as a result. The exact state of the update is
1067 // undefined now, but this can only happen on an unlocked device where
1068 // partitions can be flashed without wiping userdata.
1069 return MergeResult(UpdateState::Cancelled);
1070 }
1071 return MergeResult(UpdateState::MergeCompleted);
1072 }
1073
CheckTargetMergeState(LockedFile * lock,const std::string & name,const SnapshotUpdateStatus & update_status)1074 auto SnapshotManager::CheckTargetMergeState(LockedFile* lock, const std::string& name,
1075 const SnapshotUpdateStatus& update_status)
1076 -> MergeResult {
1077 SnapshotStatus snapshot_status;
1078 if (!ReadSnapshotStatus(lock, name, &snapshot_status)) {
1079 return MergeResult(UpdateState::MergeFailed, MergeFailureCode::ReadStatus);
1080 }
1081
1082 std::unique_ptr<LpMetadata> current_metadata;
1083
1084 if (!IsSnapshotDevice(name)) {
1085 if (!current_metadata) {
1086 current_metadata = ReadCurrentMetadata();
1087 }
1088
1089 if (!current_metadata ||
1090 GetMetadataPartitionState(*current_metadata, name) != MetadataPartitionState::Updated) {
1091 DeleteSnapshot(lock, name);
1092 return MergeResult(UpdateState::Cancelled);
1093 }
1094
1095 // During a check, we decided the merge was complete, but we were unable to
1096 // collapse the device-mapper stack and perform COW cleanup. If we haven't
1097 // rebooted after this check, the device will still be a snapshot-merge
1098 // target. If we have rebooted, the device will now be a linear target,
1099 // and we can try cleanup again.
1100 if (snapshot_status.state() == SnapshotState::MERGE_COMPLETED) {
1101 // NB: It's okay if this fails now, we gave cleanup our best effort.
1102 OnSnapshotMergeComplete(lock, name, snapshot_status);
1103 return MergeResult(UpdateState::MergeCompleted);
1104 }
1105
1106 LOG(ERROR) << "Expected snapshot or snapshot-merge for device: " << name;
1107 return MergeResult(UpdateState::MergeFailed, MergeFailureCode::UnknownTargetType);
1108 }
1109
1110 // This check is expensive so it is only enabled for debugging.
1111 DCHECK((current_metadata = ReadCurrentMetadata()) &&
1112 GetMetadataPartitionState(*current_metadata, name) == MetadataPartitionState::Updated);
1113
1114 std::string target_type;
1115 DmTargetSnapshot::Status status;
1116 if (!QuerySnapshotStatus(name, &target_type, &status)) {
1117 return MergeResult(UpdateState::MergeFailed, MergeFailureCode::QuerySnapshotStatus);
1118 }
1119 if (target_type == "snapshot" &&
1120 DecideMergePhase(snapshot_status) == MergePhase::SECOND_PHASE &&
1121 update_status.merge_phase() == MergePhase::FIRST_PHASE) {
1122 // The snapshot is not being merged because it's in the wrong phase.
1123 return MergeResult(UpdateState::None);
1124 }
1125 if (target_type != "snapshot-merge") {
1126 // We can get here if we failed to rewrite the target type in
1127 // InitiateMerge(). If we failed to create the target in first-stage
1128 // init, boot would not succeed.
1129 LOG(ERROR) << "Snapshot " << name << " has incorrect target type: " << target_type;
1130 return MergeResult(UpdateState::MergeFailed, MergeFailureCode::ExpectedMergeTarget);
1131 }
1132
1133 // These two values are equal when merging is complete.
1134 if (status.sectors_allocated != status.metadata_sectors) {
1135 if (snapshot_status.state() == SnapshotState::MERGE_COMPLETED) {
1136 LOG(ERROR) << "Snapshot " << name << " is merging after being marked merge-complete.";
1137 return MergeResult(UpdateState::MergeFailed,
1138 MergeFailureCode::UnmergedSectorsAfterCompletion);
1139 }
1140 return MergeResult(UpdateState::Merging);
1141 }
1142
1143 auto code = CheckMergeConsistency(lock, name, snapshot_status);
1144 if (code != MergeFailureCode::Ok) {
1145 return MergeResult(UpdateState::MergeFailed, code);
1146 }
1147
1148 // Merging is done. First, update the status file to indicate the merge
1149 // is complete. We do this before calling OnSnapshotMergeComplete, even
1150 // though this means the write is potentially wasted work (since in the
1151 // ideal case we'll immediately delete the file).
1152 //
1153 // This makes it simpler to reason about the next reboot: no matter what
1154 // part of cleanup failed, first-stage init won't try to create another
1155 // snapshot device for this partition.
1156 snapshot_status.set_state(SnapshotState::MERGE_COMPLETED);
1157 if (!WriteSnapshotStatus(lock, snapshot_status)) {
1158 return MergeResult(UpdateState::MergeFailed, MergeFailureCode::WriteStatus);
1159 }
1160 if (!OnSnapshotMergeComplete(lock, name, snapshot_status)) {
1161 return MergeResult(UpdateState::MergeNeedsReboot);
1162 }
1163 return MergeResult(UpdateState::MergeCompleted, MergeFailureCode::Ok);
1164 }
1165
1166 // This returns the backing device, not the dm-user layer.
GetMappedCowDeviceName(const std::string & snapshot,const SnapshotStatus & status)1167 static std::string GetMappedCowDeviceName(const std::string& snapshot,
1168 const SnapshotStatus& status) {
1169 // If no partition was created (the COW exists entirely on /data), the
1170 // device-mapper layering is different than if we had a partition.
1171 if (status.cow_partition_size() == 0) {
1172 return GetCowImageDeviceName(snapshot);
1173 }
1174 return GetCowName(snapshot);
1175 }
1176
CheckMergeConsistency(LockedFile * lock,const std::string & name,const SnapshotStatus & status)1177 MergeFailureCode SnapshotManager::CheckMergeConsistency(LockedFile* lock, const std::string& name,
1178 const SnapshotStatus& status) {
1179 CHECK(lock);
1180
1181 return merge_consistency_checker_(name, status);
1182 }
1183
CheckMergeConsistency(const std::string & name,const SnapshotStatus & status)1184 MergeFailureCode CheckMergeConsistency(const std::string& name, const SnapshotStatus& status) {
1185 if (!status.compression_enabled()) {
1186 // Do not try to verify old-style COWs yet.
1187 return MergeFailureCode::Ok;
1188 }
1189
1190 auto& dm = DeviceMapper::Instance();
1191
1192 std::string cow_image_name = GetMappedCowDeviceName(name, status);
1193 std::string cow_image_path;
1194 if (!dm.GetDmDevicePathByName(cow_image_name, &cow_image_path)) {
1195 LOG(ERROR) << "Failed to get path for cow device: " << cow_image_name;
1196 return MergeFailureCode::GetCowPathConsistencyCheck;
1197 }
1198
1199 // First pass, count # of ops.
1200 size_t num_ops = 0;
1201 {
1202 unique_fd fd(open(cow_image_path.c_str(), O_RDONLY | O_CLOEXEC));
1203 if (fd < 0) {
1204 PLOG(ERROR) << "Failed to open " << cow_image_name;
1205 return MergeFailureCode::OpenCowConsistencyCheck;
1206 }
1207
1208 CowReader reader;
1209 if (!reader.Parse(std::move(fd))) {
1210 LOG(ERROR) << "Failed to parse cow " << cow_image_path;
1211 return MergeFailureCode::ParseCowConsistencyCheck;
1212 }
1213
1214 for (auto iter = reader.GetOpIter(); !iter->Done(); iter->Next()) {
1215 if (!IsMetadataOp(iter->Get())) {
1216 num_ops++;
1217 }
1218 }
1219 }
1220
1221 // Second pass, try as hard as we can to get the actual number of blocks
1222 // the system thinks is merged.
1223 unique_fd fd(open(cow_image_path.c_str(), O_RDONLY | O_DIRECT | O_SYNC | O_CLOEXEC));
1224 if (fd < 0) {
1225 PLOG(ERROR) << "Failed to open direct " << cow_image_name;
1226 return MergeFailureCode::OpenCowDirectConsistencyCheck;
1227 }
1228
1229 void* addr;
1230 size_t page_size = getpagesize();
1231 if (posix_memalign(&addr, page_size, page_size) < 0) {
1232 PLOG(ERROR) << "posix_memalign with page size " << page_size;
1233 return MergeFailureCode::MemAlignConsistencyCheck;
1234 }
1235
1236 // COWs are always at least 2MB, this is guaranteed in snapshot creation.
1237 std::unique_ptr<void, decltype(&::free)> buffer(addr, ::free);
1238 if (!android::base::ReadFully(fd, buffer.get(), page_size)) {
1239 PLOG(ERROR) << "Direct read failed " << cow_image_name;
1240 return MergeFailureCode::DirectReadConsistencyCheck;
1241 }
1242
1243 auto header = reinterpret_cast<CowHeader*>(buffer.get());
1244 if (header->num_merge_ops != num_ops) {
1245 LOG(ERROR) << "COW consistency check failed, expected " << num_ops << " to be merged, "
1246 << "but " << header->num_merge_ops << " were actually recorded.";
1247 LOG(ERROR) << "Aborting merge progress for snapshot " << name
1248 << ", will try again next boot";
1249 return MergeFailureCode::WrongMergeCountConsistencyCheck;
1250 }
1251
1252 return MergeFailureCode::Ok;
1253 }
1254
MergeSecondPhaseSnapshots(LockedFile * lock)1255 MergeFailureCode SnapshotManager::MergeSecondPhaseSnapshots(LockedFile* lock) {
1256 std::vector<std::string> snapshots;
1257 if (!ListSnapshots(lock, &snapshots)) {
1258 return MergeFailureCode::ListSnapshots;
1259 }
1260
1261 SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock);
1262 CHECK(update_status.state() == UpdateState::Merging ||
1263 update_status.state() == UpdateState::MergeFailed);
1264 CHECK(update_status.merge_phase() == MergePhase::FIRST_PHASE);
1265
1266 update_status.set_state(UpdateState::Merging);
1267 update_status.set_merge_phase(MergePhase::SECOND_PHASE);
1268 if (!WriteSnapshotUpdateStatus(lock, update_status)) {
1269 return MergeFailureCode::WriteStatus;
1270 }
1271
1272 MergeFailureCode result = MergeFailureCode::Ok;
1273 for (const auto& snapshot : snapshots) {
1274 SnapshotStatus snapshot_status;
1275 if (!ReadSnapshotStatus(lock, snapshot, &snapshot_status)) {
1276 return MergeFailureCode::ReadStatus;
1277 }
1278 if (DecideMergePhase(snapshot_status) != MergePhase::SECOND_PHASE) {
1279 continue;
1280 }
1281 auto code = SwitchSnapshotToMerge(lock, snapshot);
1282 if (code != MergeFailureCode::Ok) {
1283 LOG(ERROR) << "Failed to switch snapshot to a second-phase merge target: " << snapshot;
1284 if (result == MergeFailureCode::Ok) {
1285 result = code;
1286 }
1287 }
1288 }
1289 return result;
1290 }
1291
GetSnapshotBootIndicatorPath()1292 std::string SnapshotManager::GetSnapshotBootIndicatorPath() {
1293 return metadata_dir_ + "/" + android::base::Basename(kBootIndicatorPath);
1294 }
1295
GetRollbackIndicatorPath()1296 std::string SnapshotManager::GetRollbackIndicatorPath() {
1297 return metadata_dir_ + "/" + android::base::Basename(kRollbackIndicatorPath);
1298 }
1299
GetForwardMergeIndicatorPath()1300 std::string SnapshotManager::GetForwardMergeIndicatorPath() {
1301 return metadata_dir_ + "/allow-forward-merge";
1302 }
1303
GetOldPartitionMetadataPath()1304 std::string SnapshotManager::GetOldPartitionMetadataPath() {
1305 return metadata_dir_ + "/old-partition-metadata";
1306 }
1307
AcknowledgeMergeSuccess(LockedFile * lock)1308 void SnapshotManager::AcknowledgeMergeSuccess(LockedFile* lock) {
1309 // It's not possible to remove update state in recovery, so write an
1310 // indicator that cleanup is needed on reboot. If a factory data reset
1311 // was requested, it doesn't matter, everything will get wiped anyway.
1312 // To make testing easier we consider a /data wipe as cleaned up.
1313 if (device_->IsRecovery()) {
1314 WriteUpdateState(lock, UpdateState::MergeCompleted);
1315 return;
1316 }
1317
1318 RemoveAllUpdateState(lock);
1319 }
1320
AcknowledgeMergeFailure(MergeFailureCode failure_code)1321 void SnapshotManager::AcknowledgeMergeFailure(MergeFailureCode failure_code) {
1322 // Log first, so worst case, we always have a record of why the calls below
1323 // were being made.
1324 LOG(ERROR) << "Merge could not be completed and will be marked as failed.";
1325
1326 auto lock = LockExclusive();
1327 if (!lock) return;
1328
1329 // Since we released the lock in between WaitForMerge and here, it's
1330 // possible (1) the merge successfully completed or (2) was already
1331 // marked as a failure. So make sure to check the state again, and
1332 // only mark as a failure if appropriate.
1333 UpdateState state = ReadUpdateState(lock.get());
1334 if (state != UpdateState::Merging && state != UpdateState::MergeNeedsReboot) {
1335 return;
1336 }
1337
1338 WriteUpdateState(lock.get(), UpdateState::MergeFailed, failure_code);
1339 }
1340
OnSnapshotMergeComplete(LockedFile * lock,const std::string & name,const SnapshotStatus & status)1341 bool SnapshotManager::OnSnapshotMergeComplete(LockedFile* lock, const std::string& name,
1342 const SnapshotStatus& status) {
1343 if (IsSnapshotDevice(name)) {
1344 // We are extra-cautious here, to avoid deleting the wrong table.
1345 std::string target_type;
1346 DmTargetSnapshot::Status dm_status;
1347 if (!QuerySnapshotStatus(name, &target_type, &dm_status)) {
1348 return false;
1349 }
1350 if (target_type != "snapshot-merge") {
1351 LOG(ERROR) << "Unexpected target type " << target_type
1352 << " for snapshot device: " << name;
1353 return false;
1354 }
1355 if (dm_status.sectors_allocated != dm_status.metadata_sectors) {
1356 LOG(ERROR) << "Merge is unexpectedly incomplete for device " << name;
1357 return false;
1358 }
1359 if (!CollapseSnapshotDevice(name, status)) {
1360 LOG(ERROR) << "Unable to collapse snapshot: " << name;
1361 return false;
1362 }
1363 // Note that collapsing is implicitly an Unmap, so we don't need to
1364 // unmap the snapshot.
1365 }
1366
1367 if (!DeleteSnapshot(lock, name)) {
1368 LOG(ERROR) << "Could not delete snapshot: " << name;
1369 return false;
1370 }
1371 return true;
1372 }
1373
CollapseSnapshotDevice(const std::string & name,const SnapshotStatus & status)1374 bool SnapshotManager::CollapseSnapshotDevice(const std::string& name,
1375 const SnapshotStatus& status) {
1376 auto& dm = DeviceMapper::Instance();
1377
1378 // Verify we have a snapshot-merge device.
1379 DeviceMapper::TargetInfo target;
1380 if (!GetSingleTarget(name, TableQuery::Table, &target)) {
1381 return false;
1382 }
1383 if (DeviceMapper::GetTargetType(target.spec) != "snapshot-merge") {
1384 // This should be impossible, it was checked earlier.
1385 LOG(ERROR) << "Snapshot device has invalid target type: " << name;
1386 return false;
1387 }
1388
1389 std::string base_device, cow_device;
1390 if (!DmTargetSnapshot::GetDevicesFromParams(target.data, &base_device, &cow_device)) {
1391 LOG(ERROR) << "Could not parse snapshot device " << name << " parameters: " << target.data;
1392 return false;
1393 }
1394
1395 uint64_t snapshot_sectors = status.snapshot_size() / kSectorSize;
1396 if (snapshot_sectors * kSectorSize != status.snapshot_size()) {
1397 LOG(ERROR) << "Snapshot " << name
1398 << " size is not sector aligned: " << status.snapshot_size();
1399 return false;
1400 }
1401
1402 uint32_t slot = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
1403 // Create a DmTable that is identical to the base device.
1404 CreateLogicalPartitionParams base_device_params{
1405 .block_device = device_->GetSuperDevice(slot),
1406 .metadata_slot = slot,
1407 .partition_name = name,
1408 .partition_opener = &device_->GetPartitionOpener(),
1409 };
1410 DmTable table;
1411 if (!CreateDmTable(base_device_params, &table)) {
1412 LOG(ERROR) << "Could not create a DmTable for partition: " << name;
1413 return false;
1414 }
1415
1416 if (!dm.LoadTableAndActivate(name, table)) {
1417 return false;
1418 }
1419
1420 // Attempt to delete the snapshot device if one still exists. Nothing
1421 // should be depending on the device, and device-mapper should have
1422 // flushed remaining I/O. We could in theory replace with dm-zero (or
1423 // re-use the table above), but for now it's better to know why this
1424 // would fail.
1425 if (status.compression_enabled()) {
1426 UnmapDmUserDevice(name);
1427 }
1428 auto base_name = GetBaseDeviceName(name);
1429 if (!DeleteDeviceIfExists(base_name)) {
1430 LOG(ERROR) << "Unable to delete base device for snapshot: " << base_name;
1431 }
1432
1433 if (!DeleteDeviceIfExists(GetSourceDeviceName(name), 4000ms)) {
1434 LOG(ERROR) << "Unable to delete source device for snapshot: " << GetSourceDeviceName(name);
1435 }
1436
1437 return true;
1438 }
1439
HandleCancelledUpdate(LockedFile * lock,const std::function<bool ()> & before_cancel)1440 bool SnapshotManager::HandleCancelledUpdate(LockedFile* lock,
1441 const std::function<bool()>& before_cancel) {
1442 auto slot = GetCurrentSlot();
1443 if (slot == Slot::Unknown) {
1444 return false;
1445 }
1446
1447 // If all snapshots were reflashed, then cancel the entire update.
1448 if (AreAllSnapshotsCancelled(lock)) {
1449 LOG(WARNING) << "Detected re-flashing, cancelling unverified update.";
1450 return RemoveAllUpdateState(lock, before_cancel);
1451 }
1452
1453 // If update has been rolled back, then cancel the entire update.
1454 // Client (update_engine) is responsible for doing additional cleanup work on its own states
1455 // when ProcessUpdateState() returns UpdateState::Cancelled.
1456 auto current_slot = GetCurrentSlot();
1457 if (current_slot != Slot::Source) {
1458 LOG(INFO) << "Update state is being processed while booting at " << current_slot
1459 << " slot, taking no action.";
1460 return false;
1461 }
1462
1463 // current_slot == Source. Attempt to detect rollbacks.
1464 if (access(GetRollbackIndicatorPath().c_str(), F_OK) != 0) {
1465 // This unverified update is not attempted. Take no action.
1466 PLOG(INFO) << "Rollback indicator not detected. "
1467 << "Update state is being processed before reboot, taking no action.";
1468 return false;
1469 }
1470
1471 LOG(WARNING) << "Detected rollback, cancelling unverified update.";
1472 return RemoveAllUpdateState(lock, before_cancel);
1473 }
1474
PerformInitTransition(InitTransition transition,std::vector<std::string> * snapuserd_argv)1475 bool SnapshotManager::PerformInitTransition(InitTransition transition,
1476 std::vector<std::string>* snapuserd_argv) {
1477 LOG(INFO) << "Performing transition for snapuserd.";
1478
1479 // Don't use EnsureSnapuserdConnected() because this is called from init,
1480 // and attempting to do so will deadlock.
1481 if (!snapuserd_client_ && transition != InitTransition::SELINUX_DETACH) {
1482 snapuserd_client_ = SnapuserdClient::Connect(kSnapuserdSocket, 10s);
1483 if (!snapuserd_client_) {
1484 LOG(ERROR) << "Unable to connect to snapuserd";
1485 return false;
1486 }
1487 }
1488
1489 auto& dm = DeviceMapper::Instance();
1490
1491 auto lock = LockExclusive();
1492 if (!lock) return false;
1493
1494 std::vector<std::string> snapshots;
1495 if (!ListSnapshots(lock.get(), &snapshots)) {
1496 LOG(ERROR) << "Failed to list snapshots.";
1497 return false;
1498 }
1499
1500 size_t num_cows = 0;
1501 size_t ok_cows = 0;
1502 for (const auto& snapshot : snapshots) {
1503 std::string user_cow_name = GetDmUserCowName(snapshot);
1504 if (dm.GetState(user_cow_name) == DmDeviceState::INVALID) {
1505 continue;
1506 }
1507
1508 DeviceMapper::TargetInfo target;
1509 if (!GetSingleTarget(user_cow_name, TableQuery::Table, &target)) {
1510 continue;
1511 }
1512
1513 auto target_type = DeviceMapper::GetTargetType(target.spec);
1514 if (target_type != "user") {
1515 LOG(ERROR) << "Unexpected target type for " << user_cow_name << ": " << target_type;
1516 continue;
1517 }
1518
1519 num_cows++;
1520
1521 SnapshotStatus snapshot_status;
1522 if (!ReadSnapshotStatus(lock.get(), snapshot, &snapshot_status)) {
1523 LOG(ERROR) << "Unable to read snapshot status: " << snapshot;
1524 continue;
1525 }
1526
1527 auto misc_name = user_cow_name;
1528
1529 DmTable table;
1530 table.Emplace<DmTargetUser>(0, target.spec.length, misc_name);
1531 if (!dm.LoadTableAndActivate(user_cow_name, table)) {
1532 LOG(ERROR) << "Unable to swap tables for " << misc_name;
1533 continue;
1534 }
1535
1536 std::string source_device_name;
1537 if (snapshot_status.old_partition_size() > 0) {
1538 source_device_name = GetSourceDeviceName(snapshot);
1539 } else {
1540 source_device_name = GetBaseDeviceName(snapshot);
1541 }
1542
1543 std::string source_device;
1544 if (!dm.GetDmDevicePathByName(source_device_name, &source_device)) {
1545 LOG(ERROR) << "Could not get device path for " << GetSourceDeviceName(snapshot);
1546 continue;
1547 }
1548
1549 std::string cow_image_name = GetMappedCowDeviceName(snapshot, snapshot_status);
1550
1551 std::string cow_image_device;
1552 if (!dm.GetDmDevicePathByName(cow_image_name, &cow_image_device)) {
1553 LOG(ERROR) << "Could not get device path for " << cow_image_name;
1554 continue;
1555 }
1556
1557 // Wait for ueventd to acknowledge and create the control device node.
1558 std::string control_device = "/dev/dm-user/" + misc_name;
1559 if (!WaitForDevice(control_device, 10s)) {
1560 LOG(ERROR) << "dm-user control device no found: " << misc_name;
1561 continue;
1562 }
1563
1564 if (transition == InitTransition::SELINUX_DETACH) {
1565 auto message = misc_name + "," + cow_image_device + "," + source_device;
1566 snapuserd_argv->emplace_back(std::move(message));
1567
1568 // Do not attempt to connect to the new snapuserd yet, it hasn't
1569 // been started. We do however want to wait for the misc device
1570 // to have been created.
1571 ok_cows++;
1572 continue;
1573 }
1574
1575 uint64_t base_sectors =
1576 snapuserd_client_->InitDmUserCow(misc_name, cow_image_device, source_device);
1577 if (base_sectors == 0) {
1578 // Unrecoverable as metadata reads from cow device failed
1579 LOG(FATAL) << "Failed to retrieve base_sectors from Snapuserd";
1580 return false;
1581 }
1582
1583 CHECK(base_sectors <= target.spec.length);
1584
1585 if (!snapuserd_client_->AttachDmUser(misc_name)) {
1586 // This error is unrecoverable. We cannot proceed because reads to
1587 // the underlying device will fail.
1588 LOG(FATAL) << "Could not initialize snapuserd for " << user_cow_name;
1589 return false;
1590 }
1591
1592 ok_cows++;
1593 }
1594
1595 if (ok_cows != num_cows) {
1596 LOG(ERROR) << "Could not transition all snapuserd consumers.";
1597 return false;
1598 }
1599 return true;
1600 }
1601
ReadCurrentMetadata()1602 std::unique_ptr<LpMetadata> SnapshotManager::ReadCurrentMetadata() {
1603 const auto& opener = device_->GetPartitionOpener();
1604 uint32_t slot = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
1605 auto super_device = device_->GetSuperDevice(slot);
1606 auto metadata = android::fs_mgr::ReadMetadata(opener, super_device, slot);
1607 if (!metadata) {
1608 LOG(ERROR) << "Could not read dynamic partition metadata for device: " << super_device;
1609 return nullptr;
1610 }
1611 return metadata;
1612 }
1613
GetMetadataPartitionState(const LpMetadata & metadata,const std::string & name)1614 SnapshotManager::MetadataPartitionState SnapshotManager::GetMetadataPartitionState(
1615 const LpMetadata& metadata, const std::string& name) {
1616 auto partition = android::fs_mgr::FindPartition(metadata, name);
1617 if (!partition) return MetadataPartitionState::None;
1618 if (partition->attributes & LP_PARTITION_ATTR_UPDATED) {
1619 return MetadataPartitionState::Updated;
1620 }
1621 return MetadataPartitionState::Flashed;
1622 }
1623
AreAllSnapshotsCancelled(LockedFile * lock)1624 bool SnapshotManager::AreAllSnapshotsCancelled(LockedFile* lock) {
1625 std::vector<std::string> snapshots;
1626 if (!ListSnapshots(lock, &snapshots)) {
1627 LOG(WARNING) << "Failed to list snapshots to determine whether device has been flashed "
1628 << "after applying an update. Assuming no snapshots.";
1629 // Let HandleCancelledUpdate resets UpdateState.
1630 return true;
1631 }
1632
1633 std::map<std::string, bool> flashing_status;
1634
1635 if (!GetSnapshotFlashingStatus(lock, snapshots, &flashing_status)) {
1636 LOG(WARNING) << "Failed to determine whether partitions have been flashed. Not"
1637 << "removing update states.";
1638 return false;
1639 }
1640
1641 bool all_snapshots_cancelled = std::all_of(flashing_status.begin(), flashing_status.end(),
1642 [](const auto& pair) { return pair.second; });
1643
1644 if (all_snapshots_cancelled) {
1645 LOG(WARNING) << "All partitions are re-flashed after update, removing all update states.";
1646 }
1647 return all_snapshots_cancelled;
1648 }
1649
GetSnapshotFlashingStatus(LockedFile * lock,const std::vector<std::string> & snapshots,std::map<std::string,bool> * out)1650 bool SnapshotManager::GetSnapshotFlashingStatus(LockedFile* lock,
1651 const std::vector<std::string>& snapshots,
1652 std::map<std::string, bool>* out) {
1653 CHECK(lock);
1654
1655 auto source_slot_suffix = ReadUpdateSourceSlotSuffix();
1656 if (source_slot_suffix.empty()) {
1657 return false;
1658 }
1659 uint32_t source_slot = SlotNumberForSlotSuffix(source_slot_suffix);
1660 uint32_t target_slot = (source_slot == 0) ? 1 : 0;
1661
1662 // Attempt to detect re-flashing on each partition.
1663 // - If all partitions are re-flashed, we can proceed to cancel the whole update.
1664 // - If only some of the partitions are re-flashed, snapshots for re-flashed partitions are
1665 // deleted. Caller is responsible for merging the rest of the snapshots.
1666 // - If none of the partitions are re-flashed, caller is responsible for merging the snapshots.
1667 //
1668 // Note that we use target slot metadata, since if an OTA has been applied
1669 // to the target slot, we can detect the UPDATED flag. Any kind of flash
1670 // operation against dynamic partitions ensures that all copies of the
1671 // metadata are in sync, so flashing all partitions on the source slot will
1672 // remove the UPDATED flag on the target slot as well.
1673 const auto& opener = device_->GetPartitionOpener();
1674 auto super_device = device_->GetSuperDevice(target_slot);
1675 auto metadata = android::fs_mgr::ReadMetadata(opener, super_device, target_slot);
1676 if (!metadata) {
1677 return false;
1678 }
1679
1680 for (const auto& snapshot_name : snapshots) {
1681 if (GetMetadataPartitionState(*metadata, snapshot_name) ==
1682 MetadataPartitionState::Updated) {
1683 out->emplace(snapshot_name, false);
1684 } else {
1685 // Delete snapshots for partitions that are re-flashed after the update.
1686 LOG(WARNING) << "Detected re-flashing of partition " << snapshot_name << ".";
1687 out->emplace(snapshot_name, true);
1688 }
1689 }
1690 return true;
1691 }
1692
RemoveAllSnapshots(LockedFile * lock)1693 bool SnapshotManager::RemoveAllSnapshots(LockedFile* lock) {
1694 std::vector<std::string> snapshots;
1695 if (!ListSnapshots(lock, &snapshots)) {
1696 LOG(ERROR) << "Could not list snapshots";
1697 return false;
1698 }
1699
1700 std::map<std::string, bool> flashing_status;
1701 if (!GetSnapshotFlashingStatus(lock, snapshots, &flashing_status)) {
1702 LOG(WARNING) << "Failed to get flashing status";
1703 }
1704
1705 auto current_slot = GetCurrentSlot();
1706 bool ok = true;
1707 bool has_mapped_cow_images = false;
1708 for (const auto& name : snapshots) {
1709 // If booting off source slot, it is okay to unmap and delete all the snapshots.
1710 // If boot indicator is missing, update state is None or Initiated, so
1711 // it is also okay to unmap and delete all the snapshots.
1712 // If booting off target slot,
1713 // - should not unmap because:
1714 // - In Android mode, snapshots are not mapped, but
1715 // filesystems are mounting off dm-linear targets directly.
1716 // - In recovery mode, assume nothing is mapped, so it is optional to unmap.
1717 // - If partition is flashed or unknown, it is okay to delete snapshots.
1718 // Otherwise (UPDATED flag), only delete snapshots if they are not mapped
1719 // as dm-snapshot (for example, after merge completes).
1720 bool should_unmap = current_slot != Slot::Target;
1721 bool should_delete = ShouldDeleteSnapshot(flashing_status, current_slot, name);
1722 if (should_unmap && android::base::EndsWith(name, device_->GetSlotSuffix())) {
1723 // Something very unexpected has happened - we want to unmap this
1724 // snapshot, but it's on the wrong slot. We can't unmap an active
1725 // partition. If this is not really a snapshot, skip the unmap
1726 // step.
1727 auto& dm = DeviceMapper::Instance();
1728 if (dm.GetState(name) == DmDeviceState::INVALID || !IsSnapshotDevice(name)) {
1729 LOG(ERROR) << "Detected snapshot " << name << " on " << current_slot << " slot"
1730 << " for source partition; removing without unmap.";
1731 should_unmap = false;
1732 }
1733 }
1734
1735 bool partition_ok = true;
1736 if (should_unmap && !UnmapPartitionWithSnapshot(lock, name)) {
1737 partition_ok = false;
1738 }
1739 if (partition_ok && should_delete && !DeleteSnapshot(lock, name)) {
1740 partition_ok = false;
1741 }
1742
1743 if (!partition_ok) {
1744 // Remember whether or not we were able to unmap the cow image.
1745 auto cow_image_device = GetCowImageDeviceName(name);
1746 has_mapped_cow_images |=
1747 (EnsureImageManager() && images_->IsImageMapped(cow_image_device));
1748
1749 ok = false;
1750 }
1751 }
1752
1753 if (ok || !has_mapped_cow_images) {
1754 // Delete any image artifacts as a precaution, in case an update is
1755 // being cancelled due to some corrupted state in an lp_metadata file.
1756 // Note that we do not do this if some cow images are still mapped,
1757 // since we must not remove backing storage if it's in use.
1758 if (!EnsureImageManager() || !images_->RemoveAllImages()) {
1759 LOG(ERROR) << "Could not remove all snapshot artifacts";
1760 return false;
1761 }
1762 }
1763 return ok;
1764 }
1765
1766 // See comments in RemoveAllSnapshots().
ShouldDeleteSnapshot(const std::map<std::string,bool> & flashing_status,Slot current_slot,const std::string & name)1767 bool SnapshotManager::ShouldDeleteSnapshot(const std::map<std::string, bool>& flashing_status,
1768 Slot current_slot, const std::string& name) {
1769 if (current_slot != Slot::Target) {
1770 return true;
1771 }
1772 auto it = flashing_status.find(name);
1773 if (it == flashing_status.end()) {
1774 LOG(WARNING) << "Can't determine flashing status for " << name;
1775 return true;
1776 }
1777 if (it->second) {
1778 // partition flashed, okay to delete obsolete snapshots
1779 return true;
1780 }
1781 return !IsSnapshotDevice(name);
1782 }
1783
GetUpdateState(double * progress)1784 UpdateState SnapshotManager::GetUpdateState(double* progress) {
1785 // If we've never started an update, the state file won't exist.
1786 auto state_file = GetStateFilePath();
1787 if (access(state_file.c_str(), F_OK) != 0 && errno == ENOENT) {
1788 return UpdateState::None;
1789 }
1790
1791 auto lock = LockShared();
1792 if (!lock) {
1793 return UpdateState::None;
1794 }
1795
1796 SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock.get());
1797 auto state = update_status.state();
1798 if (progress == nullptr) {
1799 return state;
1800 }
1801
1802 if (state == UpdateState::MergeCompleted) {
1803 *progress = 100.0;
1804 return state;
1805 }
1806
1807 *progress = 0.0;
1808 if (state != UpdateState::Merging) {
1809 return state;
1810 }
1811
1812 // Sum all the snapshot states as if the system consists of a single huge
1813 // snapshots device, then compute the merge completion percentage of that
1814 // device.
1815 std::vector<std::string> snapshots;
1816 if (!ListSnapshots(lock.get(), &snapshots)) {
1817 LOG(ERROR) << "Could not list snapshots";
1818 return state;
1819 }
1820
1821 DmTargetSnapshot::Status fake_snapshots_status = {};
1822 for (const auto& snapshot : snapshots) {
1823 DmTargetSnapshot::Status current_status;
1824
1825 if (!IsSnapshotDevice(snapshot)) continue;
1826 if (!QuerySnapshotStatus(snapshot, nullptr, ¤t_status)) continue;
1827
1828 fake_snapshots_status.sectors_allocated += current_status.sectors_allocated;
1829 fake_snapshots_status.total_sectors += current_status.total_sectors;
1830 fake_snapshots_status.metadata_sectors += current_status.metadata_sectors;
1831 }
1832
1833 *progress = DmTargetSnapshot::MergePercent(fake_snapshots_status,
1834 update_status.sectors_allocated());
1835
1836 return state;
1837 }
1838
UpdateUsesCompression()1839 bool SnapshotManager::UpdateUsesCompression() {
1840 auto lock = LockShared();
1841 if (!lock) return false;
1842 return UpdateUsesCompression(lock.get());
1843 }
1844
UpdateUsesCompression(LockedFile * lock)1845 bool SnapshotManager::UpdateUsesCompression(LockedFile* lock) {
1846 SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock);
1847 return update_status.compression_enabled();
1848 }
1849
ListSnapshots(LockedFile * lock,std::vector<std::string> * snapshots,const std::string & suffix)1850 bool SnapshotManager::ListSnapshots(LockedFile* lock, std::vector<std::string>* snapshots,
1851 const std::string& suffix) {
1852 CHECK(lock);
1853
1854 auto dir_path = metadata_dir_ + "/snapshots"s;
1855 std::unique_ptr<DIR, decltype(&closedir)> dir(opendir(dir_path.c_str()), closedir);
1856 if (!dir) {
1857 PLOG(ERROR) << "opendir failed: " << dir_path;
1858 return false;
1859 }
1860
1861 struct dirent* dp;
1862 while ((dp = readdir(dir.get())) != nullptr) {
1863 if (dp->d_type != DT_REG) continue;
1864
1865 std::string name(dp->d_name);
1866 if (!suffix.empty() && !android::base::EndsWith(name, suffix)) {
1867 continue;
1868 }
1869 snapshots->emplace_back(std::move(name));
1870 }
1871 return true;
1872 }
1873
IsSnapshotManagerNeeded()1874 bool SnapshotManager::IsSnapshotManagerNeeded() {
1875 return access(kBootIndicatorPath, F_OK) == 0;
1876 }
1877
GetGlobalRollbackIndicatorPath()1878 std::string SnapshotManager::GetGlobalRollbackIndicatorPath() {
1879 return kRollbackIndicatorPath;
1880 }
1881
NeedSnapshotsInFirstStageMount()1882 bool SnapshotManager::NeedSnapshotsInFirstStageMount() {
1883 // If we fail to read, we'll wind up using CreateLogicalPartitions, which
1884 // will create devices that look like the old slot, except with extra
1885 // content at the end of each device. This will confuse dm-verity, and
1886 // ultimately we'll fail to boot. Why not make it a fatal error and have
1887 // the reason be clearer? Because the indicator file still exists, and
1888 // if this was FATAL, reverting to the old slot would be broken.
1889 auto slot = GetCurrentSlot();
1890
1891 if (slot != Slot::Target) {
1892 if (slot == Slot::Source) {
1893 // Device is rebooting into the original slot, so mark this as a
1894 // rollback.
1895 auto path = GetRollbackIndicatorPath();
1896 if (!android::base::WriteStringToFile("1", path)) {
1897 PLOG(ERROR) << "Unable to write rollback indicator: " << path;
1898 } else {
1899 LOG(INFO) << "Rollback detected, writing rollback indicator to " << path;
1900 }
1901 }
1902 LOG(INFO) << "Not booting from new slot. Will not mount snapshots.";
1903 return false;
1904 }
1905
1906 // If we can't read the update state, it's unlikely anything else will
1907 // succeed, so this is a fatal error. We'll eventually exhaust boot
1908 // attempts and revert to the old slot.
1909 auto lock = LockShared();
1910 if (!lock) {
1911 LOG(FATAL) << "Could not read update state to determine snapshot status";
1912 return false;
1913 }
1914 switch (ReadUpdateState(lock.get())) {
1915 case UpdateState::Unverified:
1916 case UpdateState::Merging:
1917 case UpdateState::MergeFailed:
1918 return true;
1919 default:
1920 return false;
1921 }
1922 }
1923
CreateLogicalAndSnapshotPartitions(const std::string & super_device,const std::chrono::milliseconds & timeout_ms)1924 bool SnapshotManager::CreateLogicalAndSnapshotPartitions(
1925 const std::string& super_device, const std::chrono::milliseconds& timeout_ms) {
1926 LOG(INFO) << "Creating logical partitions with snapshots as needed";
1927
1928 auto lock = LockExclusive();
1929 if (!lock) return false;
1930
1931 uint32_t slot = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
1932 return MapAllPartitions(lock.get(), super_device, slot, timeout_ms);
1933 }
1934
MapAllPartitions(LockedFile * lock,const std::string & super_device,uint32_t slot,const std::chrono::milliseconds & timeout_ms)1935 bool SnapshotManager::MapAllPartitions(LockedFile* lock, const std::string& super_device,
1936 uint32_t slot, const std::chrono::milliseconds& timeout_ms) {
1937 const auto& opener = device_->GetPartitionOpener();
1938 auto metadata = android::fs_mgr::ReadMetadata(opener, super_device, slot);
1939 if (!metadata) {
1940 LOG(ERROR) << "Could not read dynamic partition metadata for device: " << super_device;
1941 return false;
1942 }
1943
1944 if (!EnsureImageManager()) {
1945 return false;
1946 }
1947
1948 for (const auto& partition : metadata->partitions) {
1949 if (GetPartitionGroupName(metadata->groups[partition.group_index]) == kCowGroupName) {
1950 LOG(INFO) << "Skip mapping partition " << GetPartitionName(partition) << " in group "
1951 << kCowGroupName;
1952 continue;
1953 }
1954
1955 CreateLogicalPartitionParams params = {
1956 .block_device = super_device,
1957 .metadata = metadata.get(),
1958 .partition = &partition,
1959 .partition_opener = &opener,
1960 .timeout_ms = timeout_ms,
1961 };
1962 if (!MapPartitionWithSnapshot(lock, std::move(params), SnapshotContext::Mount, nullptr)) {
1963 return false;
1964 }
1965 }
1966
1967 LOG(INFO) << "Created logical partitions with snapshot.";
1968 return true;
1969 }
1970
GetRemainingTime(const std::chrono::milliseconds & timeout,const std::chrono::time_point<std::chrono::steady_clock> & begin)1971 static std::chrono::milliseconds GetRemainingTime(
1972 const std::chrono::milliseconds& timeout,
1973 const std::chrono::time_point<std::chrono::steady_clock>& begin) {
1974 // If no timeout is specified, execute all commands without specifying any timeout.
1975 if (timeout.count() == 0) return std::chrono::milliseconds(0);
1976 auto passed_time = std::chrono::steady_clock::now() - begin;
1977 auto remaining_time = timeout - duration_cast<std::chrono::milliseconds>(passed_time);
1978 if (remaining_time.count() <= 0) {
1979 LOG(ERROR) << "MapPartitionWithSnapshot has reached timeout " << timeout.count() << "ms ("
1980 << remaining_time.count() << "ms remaining)";
1981 // Return min() instead of remaining_time here because 0 is treated as a special value for
1982 // no timeout, where the rest of the commands will still be executed.
1983 return std::chrono::milliseconds::min();
1984 }
1985 return remaining_time;
1986 }
1987
MapPartitionWithSnapshot(LockedFile * lock,CreateLogicalPartitionParams params,SnapshotContext context,SnapshotPaths * paths)1988 bool SnapshotManager::MapPartitionWithSnapshot(LockedFile* lock,
1989 CreateLogicalPartitionParams params,
1990 SnapshotContext context, SnapshotPaths* paths) {
1991 auto begin = std::chrono::steady_clock::now();
1992
1993 CHECK(lock);
1994
1995 if (params.GetPartitionName() != params.GetDeviceName()) {
1996 LOG(ERROR) << "Mapping snapshot with a different name is unsupported: partition_name = "
1997 << params.GetPartitionName() << ", device_name = " << params.GetDeviceName();
1998 return false;
1999 }
2000
2001 // Fill out fields in CreateLogicalPartitionParams so that we have more information (e.g. by
2002 // reading super partition metadata).
2003 CreateLogicalPartitionParams::OwnedData params_owned_data;
2004 if (!params.InitDefaults(¶ms_owned_data)) {
2005 return false;
2006 }
2007
2008 if (!params.partition->num_extents) {
2009 LOG(INFO) << "Skipping zero-length logical partition: " << params.GetPartitionName();
2010 return true; // leave path empty to indicate that nothing is mapped.
2011 }
2012
2013 // Determine if there is a live snapshot for the SnapshotStatus of the partition; i.e. if the
2014 // partition still has a snapshot that needs to be mapped. If no live snapshot or merge
2015 // completed, live_snapshot_status is set to nullopt.
2016 std::optional<SnapshotStatus> live_snapshot_status;
2017 do {
2018 if (!(params.partition->attributes & LP_PARTITION_ATTR_UPDATED)) {
2019 LOG(INFO) << "Detected re-flashing of partition, will skip snapshot: "
2020 << params.GetPartitionName();
2021 break;
2022 }
2023 auto file_path = GetSnapshotStatusFilePath(params.GetPartitionName());
2024 if (access(file_path.c_str(), F_OK) != 0) {
2025 if (errno != ENOENT) {
2026 PLOG(INFO) << "Can't map snapshot for " << params.GetPartitionName()
2027 << ": Can't access " << file_path;
2028 return false;
2029 }
2030 break;
2031 }
2032 live_snapshot_status = std::make_optional<SnapshotStatus>();
2033 if (!ReadSnapshotStatus(lock, params.GetPartitionName(), &*live_snapshot_status)) {
2034 return false;
2035 }
2036 // No live snapshot if merge is completed.
2037 if (live_snapshot_status->state() == SnapshotState::MERGE_COMPLETED) {
2038 live_snapshot_status.reset();
2039 }
2040
2041 if (live_snapshot_status->state() == SnapshotState::NONE ||
2042 live_snapshot_status->cow_partition_size() + live_snapshot_status->cow_file_size() ==
2043 0) {
2044 LOG(WARNING) << "Snapshot status for " << params.GetPartitionName()
2045 << " is invalid, ignoring: state = "
2046 << SnapshotState_Name(live_snapshot_status->state())
2047 << ", cow_partition_size = " << live_snapshot_status->cow_partition_size()
2048 << ", cow_file_size = " << live_snapshot_status->cow_file_size();
2049 live_snapshot_status.reset();
2050 }
2051 } while (0);
2052
2053 if (live_snapshot_status.has_value()) {
2054 // dm-snapshot requires the base device to be writable.
2055 params.force_writable = true;
2056 // Map the base device with a different name to avoid collision.
2057 params.device_name = GetBaseDeviceName(params.GetPartitionName());
2058 }
2059
2060 AutoDeviceList created_devices;
2061
2062 // Create the base device for the snapshot, or if there is no snapshot, the
2063 // device itself. This device consists of the real blocks in the super
2064 // partition that this logical partition occupies.
2065 auto& dm = DeviceMapper::Instance();
2066 std::string base_path;
2067 if (!CreateLogicalPartition(params, &base_path)) {
2068 LOG(ERROR) << "Could not create logical partition " << params.GetPartitionName()
2069 << " as device " << params.GetDeviceName();
2070 return false;
2071 }
2072 created_devices.EmplaceBack<AutoUnmapDevice>(&dm, params.GetDeviceName());
2073
2074 if (paths) {
2075 paths->target_device = base_path;
2076 }
2077
2078 if (!live_snapshot_status.has_value()) {
2079 created_devices.Release();
2080 return true;
2081 }
2082
2083 // We don't have ueventd in first-stage init, so use device major:minor
2084 // strings instead.
2085 std::string base_device;
2086 if (!dm.GetDeviceString(params.GetDeviceName(), &base_device)) {
2087 LOG(ERROR) << "Could not determine major/minor for: " << params.GetDeviceName();
2088 return false;
2089 }
2090
2091 auto remaining_time = GetRemainingTime(params.timeout_ms, begin);
2092 if (remaining_time.count() < 0) return false;
2093
2094 std::string cow_name;
2095 CreateLogicalPartitionParams cow_params = params;
2096 cow_params.timeout_ms = remaining_time;
2097 if (!MapCowDevices(lock, cow_params, *live_snapshot_status, &created_devices, &cow_name)) {
2098 return false;
2099 }
2100 std::string cow_device;
2101 if (!GetMappedImageDeviceStringOrPath(cow_name, &cow_device)) {
2102 LOG(ERROR) << "Could not determine major/minor for: " << cow_name;
2103 return false;
2104 }
2105 if (paths) {
2106 paths->cow_device_name = cow_name;
2107 }
2108
2109 remaining_time = GetRemainingTime(params.timeout_ms, begin);
2110 if (remaining_time.count() < 0) return false;
2111
2112 if (context == SnapshotContext::Update && live_snapshot_status->compression_enabled()) {
2113 // Stop here, we can't run dm-user yet, the COW isn't built.
2114 created_devices.Release();
2115 return true;
2116 }
2117
2118 if (live_snapshot_status->compression_enabled()) {
2119 // Get the source device (eg the view of the partition from before it was resized).
2120 std::string source_device_path;
2121 if (live_snapshot_status->old_partition_size() > 0) {
2122 if (!MapSourceDevice(lock, params.GetPartitionName(), remaining_time,
2123 &source_device_path)) {
2124 LOG(ERROR) << "Could not map source device for: " << cow_name;
2125 return false;
2126 }
2127
2128 auto source_device = GetSourceDeviceName(params.GetPartitionName());
2129 created_devices.EmplaceBack<AutoUnmapDevice>(&dm, source_device);
2130 } else {
2131 source_device_path = base_path;
2132 }
2133
2134 if (!WaitForDevice(source_device_path, remaining_time)) {
2135 return false;
2136 }
2137
2138 std::string cow_path;
2139 if (!GetMappedImageDevicePath(cow_name, &cow_path)) {
2140 LOG(ERROR) << "Could not determine path for: " << cow_name;
2141 return false;
2142 }
2143 if (!WaitForDevice(cow_path, remaining_time)) {
2144 return false;
2145 }
2146
2147 auto name = GetDmUserCowName(params.GetPartitionName());
2148
2149 std::string new_cow_device;
2150 if (!MapDmUserCow(lock, name, cow_path, source_device_path, remaining_time,
2151 &new_cow_device)) {
2152 LOG(ERROR) << "Could not map dm-user device for partition "
2153 << params.GetPartitionName();
2154 return false;
2155 }
2156 created_devices.EmplaceBack<AutoUnmapDevice>(&dm, name);
2157
2158 remaining_time = GetRemainingTime(params.timeout_ms, begin);
2159 if (remaining_time.count() < 0) return false;
2160
2161 cow_device = new_cow_device;
2162 }
2163
2164 std::string path;
2165 if (!MapSnapshot(lock, params.GetPartitionName(), base_device, cow_device, remaining_time,
2166 &path)) {
2167 LOG(ERROR) << "Could not map snapshot for partition: " << params.GetPartitionName();
2168 return false;
2169 }
2170 // No need to add params.GetPartitionName() to created_devices since it is immediately released.
2171
2172 if (paths) {
2173 paths->snapshot_device = path;
2174 }
2175
2176 created_devices.Release();
2177
2178 LOG(INFO) << "Mapped " << params.GetPartitionName() << " as snapshot device at " << path;
2179 return true;
2180 }
2181
UnmapPartitionWithSnapshot(LockedFile * lock,const std::string & target_partition_name)2182 bool SnapshotManager::UnmapPartitionWithSnapshot(LockedFile* lock,
2183 const std::string& target_partition_name) {
2184 CHECK(lock);
2185
2186 if (!UnmapSnapshot(lock, target_partition_name)) {
2187 return false;
2188 }
2189
2190 if (!UnmapCowDevices(lock, target_partition_name)) {
2191 return false;
2192 }
2193
2194 auto base_name = GetBaseDeviceName(target_partition_name);
2195 if (!DeleteDeviceIfExists(base_name)) {
2196 LOG(ERROR) << "Cannot delete base device: " << base_name;
2197 return false;
2198 }
2199
2200 auto source_name = GetSourceDeviceName(target_partition_name);
2201 if (!DeleteDeviceIfExists(source_name)) {
2202 LOG(ERROR) << "Cannot delete source device: " << source_name;
2203 return false;
2204 }
2205
2206 LOG(INFO) << "Successfully unmapped snapshot " << target_partition_name;
2207
2208 return true;
2209 }
2210
MapCowDevices(LockedFile * lock,const CreateLogicalPartitionParams & params,const SnapshotStatus & snapshot_status,AutoDeviceList * created_devices,std::string * cow_name)2211 bool SnapshotManager::MapCowDevices(LockedFile* lock, const CreateLogicalPartitionParams& params,
2212 const SnapshotStatus& snapshot_status,
2213 AutoDeviceList* created_devices, std::string* cow_name) {
2214 CHECK(lock);
2215 CHECK(snapshot_status.cow_partition_size() + snapshot_status.cow_file_size() > 0);
2216 auto begin = std::chrono::steady_clock::now();
2217
2218 std::string partition_name = params.GetPartitionName();
2219 std::string cow_image_name = GetCowImageDeviceName(partition_name);
2220 *cow_name = GetCowName(partition_name);
2221
2222 auto& dm = DeviceMapper::Instance();
2223
2224 // Map COW image if necessary.
2225 if (snapshot_status.cow_file_size() > 0) {
2226 if (!EnsureImageManager()) return false;
2227 auto remaining_time = GetRemainingTime(params.timeout_ms, begin);
2228 if (remaining_time.count() < 0) return false;
2229
2230 if (!MapCowImage(partition_name, remaining_time).has_value()) {
2231 LOG(ERROR) << "Could not map cow image for partition: " << partition_name;
2232 return false;
2233 }
2234 created_devices->EmplaceBack<AutoUnmapImage>(images_.get(), cow_image_name);
2235
2236 // If no COW partition exists, just return the image alone.
2237 if (snapshot_status.cow_partition_size() == 0) {
2238 *cow_name = std::move(cow_image_name);
2239 LOG(INFO) << "Mapped COW image for " << partition_name << " at " << *cow_name;
2240 return true;
2241 }
2242 }
2243
2244 auto remaining_time = GetRemainingTime(params.timeout_ms, begin);
2245 if (remaining_time.count() < 0) return false;
2246
2247 CHECK(snapshot_status.cow_partition_size() > 0);
2248
2249 // Create the DmTable for the COW device. It is the DmTable of the COW partition plus
2250 // COW image device as the last extent.
2251 CreateLogicalPartitionParams cow_partition_params = params;
2252 cow_partition_params.partition = nullptr;
2253 cow_partition_params.partition_name = *cow_name;
2254 cow_partition_params.device_name.clear();
2255 DmTable table;
2256 if (!CreateDmTable(cow_partition_params, &table)) {
2257 return false;
2258 }
2259 // If the COW image exists, append it as the last extent.
2260 if (snapshot_status.cow_file_size() > 0) {
2261 std::string cow_image_device;
2262 if (!GetMappedImageDeviceStringOrPath(cow_image_name, &cow_image_device)) {
2263 LOG(ERROR) << "Cannot determine major/minor for: " << cow_image_name;
2264 return false;
2265 }
2266 auto cow_partition_sectors = snapshot_status.cow_partition_size() / kSectorSize;
2267 auto cow_image_sectors = snapshot_status.cow_file_size() / kSectorSize;
2268 table.Emplace<DmTargetLinear>(cow_partition_sectors, cow_image_sectors, cow_image_device,
2269 0);
2270 }
2271
2272 // We have created the DmTable now. Map it.
2273 std::string cow_path;
2274 if (!dm.CreateDevice(*cow_name, table, &cow_path, remaining_time)) {
2275 LOG(ERROR) << "Could not create COW device: " << *cow_name;
2276 return false;
2277 }
2278 created_devices->EmplaceBack<AutoUnmapDevice>(&dm, *cow_name);
2279 LOG(INFO) << "Mapped COW device for " << params.GetPartitionName() << " at " << cow_path;
2280 return true;
2281 }
2282
UnmapCowDevices(LockedFile * lock,const std::string & name)2283 bool SnapshotManager::UnmapCowDevices(LockedFile* lock, const std::string& name) {
2284 CHECK(lock);
2285 if (!EnsureImageManager()) return false;
2286
2287 if (UpdateUsesCompression(lock) && !UnmapDmUserDevice(name)) {
2288 return false;
2289 }
2290
2291 if (!DeleteDeviceIfExists(GetCowName(name), 4000ms)) {
2292 LOG(ERROR) << "Cannot unmap: " << GetCowName(name);
2293 return false;
2294 }
2295
2296 std::string cow_image_name = GetCowImageDeviceName(name);
2297 if (!images_->UnmapImageIfExists(cow_image_name)) {
2298 LOG(ERROR) << "Cannot unmap image " << cow_image_name;
2299 return false;
2300 }
2301 return true;
2302 }
2303
UnmapDmUserDevice(const std::string & snapshot_name)2304 bool SnapshotManager::UnmapDmUserDevice(const std::string& snapshot_name) {
2305 auto& dm = DeviceMapper::Instance();
2306
2307 auto dm_user_name = GetDmUserCowName(snapshot_name);
2308 if (dm.GetState(dm_user_name) == DmDeviceState::INVALID) {
2309 return true;
2310 }
2311
2312 if (!DeleteDeviceIfExists(dm_user_name)) {
2313 LOG(ERROR) << "Cannot unmap " << dm_user_name;
2314 return false;
2315 }
2316
2317 if (EnsureSnapuserdConnected()) {
2318 if (!snapuserd_client_->WaitForDeviceDelete(dm_user_name)) {
2319 LOG(ERROR) << "Failed to wait for " << dm_user_name << " control device to delete";
2320 return false;
2321 }
2322 }
2323
2324 // Ensure the control device is gone so we don't run into ABA problems.
2325 auto control_device = "/dev/dm-user/" + dm_user_name;
2326 if (!android::fs_mgr::WaitForFileDeleted(control_device, 10s)) {
2327 LOG(ERROR) << "Timed out waiting for " << control_device << " to unlink";
2328 return false;
2329 }
2330 return true;
2331 }
2332
MapAllSnapshots(const std::chrono::milliseconds & timeout_ms)2333 bool SnapshotManager::MapAllSnapshots(const std::chrono::milliseconds& timeout_ms) {
2334 auto lock = LockExclusive();
2335 if (!lock) return false;
2336
2337 auto state = ReadUpdateState(lock.get());
2338 if (state == UpdateState::Unverified) {
2339 if (GetCurrentSlot() == Slot::Target) {
2340 LOG(ERROR) << "Cannot call MapAllSnapshots when booting from the target slot.";
2341 return false;
2342 }
2343 } else if (state != UpdateState::Initiated) {
2344 LOG(ERROR) << "Cannot call MapAllSnapshots from update state: " << state;
2345 return false;
2346 }
2347
2348 std::vector<std::string> snapshots;
2349 if (!ListSnapshots(lock.get(), &snapshots)) {
2350 return false;
2351 }
2352
2353 const auto& opener = device_->GetPartitionOpener();
2354 auto slot_suffix = device_->GetOtherSlotSuffix();
2355 auto slot_number = SlotNumberForSlotSuffix(slot_suffix);
2356 auto super_device = device_->GetSuperDevice(slot_number);
2357 auto metadata = android::fs_mgr::ReadMetadata(opener, super_device, slot_number);
2358 if (!metadata) {
2359 LOG(ERROR) << "MapAllSnapshots could not read dynamic partition metadata for device: "
2360 << super_device;
2361 return false;
2362 }
2363
2364 for (const auto& snapshot : snapshots) {
2365 if (!UnmapPartitionWithSnapshot(lock.get(), snapshot)) {
2366 LOG(ERROR) << "MapAllSnapshots could not unmap snapshot: " << snapshot;
2367 return false;
2368 }
2369
2370 CreateLogicalPartitionParams params = {
2371 .block_device = super_device,
2372 .metadata = metadata.get(),
2373 .partition_name = snapshot,
2374 .partition_opener = &opener,
2375 .timeout_ms = timeout_ms,
2376 };
2377 if (!MapPartitionWithSnapshot(lock.get(), std::move(params), SnapshotContext::Mount,
2378 nullptr)) {
2379 LOG(ERROR) << "MapAllSnapshots failed to map: " << snapshot;
2380 return false;
2381 }
2382 }
2383
2384 LOG(INFO) << "MapAllSnapshots succeeded.";
2385 return true;
2386 }
2387
UnmapAllSnapshots()2388 bool SnapshotManager::UnmapAllSnapshots() {
2389 auto lock = LockExclusive();
2390 if (!lock) return false;
2391
2392 return UnmapAllSnapshots(lock.get());
2393 }
2394
UnmapAllSnapshots(LockedFile * lock)2395 bool SnapshotManager::UnmapAllSnapshots(LockedFile* lock) {
2396 std::vector<std::string> snapshots;
2397 if (!ListSnapshots(lock, &snapshots)) {
2398 return false;
2399 }
2400
2401 for (const auto& snapshot : snapshots) {
2402 if (!UnmapPartitionWithSnapshot(lock, snapshot)) {
2403 LOG(ERROR) << "Failed to unmap snapshot: " << snapshot;
2404 return false;
2405 }
2406 }
2407
2408 // Terminate the daemon and release the snapuserd_client_ object.
2409 // If we need to re-connect with the daemon, EnsureSnapuserdConnected()
2410 // will re-create the object and establish the socket connection.
2411 if (snapuserd_client_) {
2412 LOG(INFO) << "Shutdown snapuserd daemon";
2413 snapuserd_client_->DetachSnapuserd();
2414 snapuserd_client_->CloseConnection();
2415 snapuserd_client_ = nullptr;
2416 }
2417
2418 return true;
2419 }
2420
OpenFile(const std::string & file,int lock_flags)2421 auto SnapshotManager::OpenFile(const std::string& file, int lock_flags)
2422 -> std::unique_ptr<LockedFile> {
2423 unique_fd fd(open(file.c_str(), O_RDONLY | O_CLOEXEC | O_NOFOLLOW));
2424 if (fd < 0) {
2425 PLOG(ERROR) << "Open failed: " << file;
2426 return nullptr;
2427 }
2428 if (lock_flags != 0 && TEMP_FAILURE_RETRY(flock(fd, lock_flags)) < 0) {
2429 PLOG(ERROR) << "Acquire flock failed: " << file;
2430 return nullptr;
2431 }
2432 // For simplicity, we want to CHECK that lock_mode == LOCK_EX, in some
2433 // calls, so strip extra flags.
2434 int lock_mode = lock_flags & (LOCK_EX | LOCK_SH);
2435 return std::make_unique<LockedFile>(file, std::move(fd), lock_mode);
2436 }
2437
~LockedFile()2438 SnapshotManager::LockedFile::~LockedFile() {
2439 if (TEMP_FAILURE_RETRY(flock(fd_, LOCK_UN)) < 0) {
2440 PLOG(ERROR) << "Failed to unlock file: " << path_;
2441 }
2442 }
2443
GetStateFilePath() const2444 std::string SnapshotManager::GetStateFilePath() const {
2445 return metadata_dir_ + "/state"s;
2446 }
2447
GetMergeStateFilePath() const2448 std::string SnapshotManager::GetMergeStateFilePath() const {
2449 return metadata_dir_ + "/merge_state"s;
2450 }
2451
GetLockPath() const2452 std::string SnapshotManager::GetLockPath() const {
2453 return metadata_dir_;
2454 }
2455
OpenLock(int lock_flags)2456 std::unique_ptr<SnapshotManager::LockedFile> SnapshotManager::OpenLock(int lock_flags) {
2457 auto lock_file = GetLockPath();
2458 return OpenFile(lock_file, lock_flags);
2459 }
2460
LockShared()2461 std::unique_ptr<SnapshotManager::LockedFile> SnapshotManager::LockShared() {
2462 return OpenLock(LOCK_SH);
2463 }
2464
LockExclusive()2465 std::unique_ptr<SnapshotManager::LockedFile> SnapshotManager::LockExclusive() {
2466 return OpenLock(LOCK_EX);
2467 }
2468
UpdateStateFromString(const std::string & contents)2469 static UpdateState UpdateStateFromString(const std::string& contents) {
2470 if (contents.empty() || contents == "none") {
2471 return UpdateState::None;
2472 } else if (contents == "initiated") {
2473 return UpdateState::Initiated;
2474 } else if (contents == "unverified") {
2475 return UpdateState::Unverified;
2476 } else if (contents == "merging") {
2477 return UpdateState::Merging;
2478 } else if (contents == "merge-completed") {
2479 return UpdateState::MergeCompleted;
2480 } else if (contents == "merge-needs-reboot") {
2481 return UpdateState::MergeNeedsReboot;
2482 } else if (contents == "merge-failed") {
2483 return UpdateState::MergeFailed;
2484 } else if (contents == "cancelled") {
2485 return UpdateState::Cancelled;
2486 } else {
2487 LOG(ERROR) << "Unknown merge state in update state file: \"" << contents << "\"";
2488 return UpdateState::None;
2489 }
2490 }
2491
operator <<(std::ostream & os,UpdateState state)2492 std::ostream& operator<<(std::ostream& os, UpdateState state) {
2493 switch (state) {
2494 case UpdateState::None:
2495 return os << "none";
2496 case UpdateState::Initiated:
2497 return os << "initiated";
2498 case UpdateState::Unverified:
2499 return os << "unverified";
2500 case UpdateState::Merging:
2501 return os << "merging";
2502 case UpdateState::MergeCompleted:
2503 return os << "merge-completed";
2504 case UpdateState::MergeNeedsReboot:
2505 return os << "merge-needs-reboot";
2506 case UpdateState::MergeFailed:
2507 return os << "merge-failed";
2508 case UpdateState::Cancelled:
2509 return os << "cancelled";
2510 default:
2511 LOG(ERROR) << "Unknown update state: " << static_cast<uint32_t>(state);
2512 return os;
2513 }
2514 }
2515
ReadUpdateState(LockedFile * lock)2516 UpdateState SnapshotManager::ReadUpdateState(LockedFile* lock) {
2517 SnapshotUpdateStatus status = ReadSnapshotUpdateStatus(lock);
2518 return status.state();
2519 }
2520
ReadSnapshotUpdateStatus(LockedFile * lock)2521 SnapshotUpdateStatus SnapshotManager::ReadSnapshotUpdateStatus(LockedFile* lock) {
2522 CHECK(lock);
2523
2524 SnapshotUpdateStatus status = {};
2525 std::string contents;
2526 if (!android::base::ReadFileToString(GetStateFilePath(), &contents)) {
2527 PLOG(ERROR) << "Read state file failed";
2528 status.set_state(UpdateState::None);
2529 return status;
2530 }
2531
2532 if (!status.ParseFromString(contents)) {
2533 LOG(WARNING) << "Unable to parse state file as SnapshotUpdateStatus, using the old format";
2534
2535 // Try to rollback to legacy file to support devices that are
2536 // currently using the old file format.
2537 // TODO(b/147409432)
2538 status.set_state(UpdateStateFromString(contents));
2539 }
2540
2541 return status;
2542 }
2543
WriteUpdateState(LockedFile * lock,UpdateState state,MergeFailureCode failure_code)2544 bool SnapshotManager::WriteUpdateState(LockedFile* lock, UpdateState state,
2545 MergeFailureCode failure_code) {
2546 SnapshotUpdateStatus status;
2547 status.set_state(state);
2548
2549 switch (state) {
2550 case UpdateState::MergeFailed:
2551 status.set_merge_failure_code(failure_code);
2552 break;
2553 case UpdateState::Initiated:
2554 status.set_source_build_fingerprint(
2555 android::base::GetProperty("ro.build.fingerprint", ""));
2556 break;
2557 default:
2558 break;
2559 }
2560
2561 // If we're transitioning between two valid states (eg, we're not beginning
2562 // or ending an OTA), then make sure to propagate the compression bit and
2563 // build fingerprint.
2564 if (!(state == UpdateState::Initiated || state == UpdateState::None)) {
2565 SnapshotUpdateStatus old_status = ReadSnapshotUpdateStatus(lock);
2566 status.set_compression_enabled(old_status.compression_enabled());
2567 status.set_source_build_fingerprint(old_status.source_build_fingerprint());
2568 status.set_merge_phase(old_status.merge_phase());
2569 }
2570 return WriteSnapshotUpdateStatus(lock, status);
2571 }
2572
WriteSnapshotUpdateStatus(LockedFile * lock,const SnapshotUpdateStatus & status)2573 bool SnapshotManager::WriteSnapshotUpdateStatus(LockedFile* lock,
2574 const SnapshotUpdateStatus& status) {
2575 CHECK(lock);
2576 CHECK(lock->lock_mode() == LOCK_EX);
2577
2578 std::string contents;
2579 if (!status.SerializeToString(&contents)) {
2580 LOG(ERROR) << "Unable to serialize SnapshotUpdateStatus.";
2581 return false;
2582 }
2583
2584 #ifdef LIBSNAPSHOT_USE_HAL
2585 auto merge_status = MergeStatus::UNKNOWN;
2586 switch (status.state()) {
2587 // The needs-reboot and completed cases imply that /data and /metadata
2588 // can be safely wiped, so we don't report a merge status.
2589 case UpdateState::None:
2590 case UpdateState::MergeNeedsReboot:
2591 case UpdateState::MergeCompleted:
2592 case UpdateState::Initiated:
2593 merge_status = MergeStatus::NONE;
2594 break;
2595 case UpdateState::Unverified:
2596 merge_status = MergeStatus::SNAPSHOTTED;
2597 break;
2598 case UpdateState::Merging:
2599 case UpdateState::MergeFailed:
2600 merge_status = MergeStatus::MERGING;
2601 break;
2602 default:
2603 // Note that Cancelled flows to here - it is never written, since
2604 // it only communicates a transient state to the caller.
2605 LOG(ERROR) << "Unexpected update status: " << status.state();
2606 break;
2607 }
2608
2609 bool set_before_write =
2610 merge_status == MergeStatus::SNAPSHOTTED || merge_status == MergeStatus::MERGING;
2611 if (set_before_write && !device_->SetBootControlMergeStatus(merge_status)) {
2612 return false;
2613 }
2614 #endif
2615
2616 if (!WriteStringToFileAtomic(contents, GetStateFilePath())) {
2617 PLOG(ERROR) << "Could not write to state file";
2618 return false;
2619 }
2620
2621 #ifdef LIBSNAPSHOT_USE_HAL
2622 if (!set_before_write && !device_->SetBootControlMergeStatus(merge_status)) {
2623 return false;
2624 }
2625 #endif
2626 return true;
2627 }
2628
GetSnapshotStatusFilePath(const std::string & name)2629 std::string SnapshotManager::GetSnapshotStatusFilePath(const std::string& name) {
2630 auto file = metadata_dir_ + "/snapshots/"s + name;
2631 return file;
2632 }
2633
ReadSnapshotStatus(LockedFile * lock,const std::string & name,SnapshotStatus * status)2634 bool SnapshotManager::ReadSnapshotStatus(LockedFile* lock, const std::string& name,
2635 SnapshotStatus* status) {
2636 CHECK(lock);
2637 auto path = GetSnapshotStatusFilePath(name);
2638
2639 unique_fd fd(open(path.c_str(), O_RDONLY | O_CLOEXEC | O_NOFOLLOW));
2640 if (fd < 0) {
2641 PLOG(ERROR) << "Open failed: " << path;
2642 return false;
2643 }
2644
2645 if (!status->ParseFromFileDescriptor(fd.get())) {
2646 PLOG(ERROR) << "Unable to parse " << path << " as SnapshotStatus";
2647 return false;
2648 }
2649
2650 if (status->name() != name) {
2651 LOG(WARNING) << "Found snapshot status named " << status->name() << " in " << path;
2652 status->set_name(name);
2653 }
2654
2655 return true;
2656 }
2657
WriteSnapshotStatus(LockedFile * lock,const SnapshotStatus & status)2658 bool SnapshotManager::WriteSnapshotStatus(LockedFile* lock, const SnapshotStatus& status) {
2659 // The caller must take an exclusive lock to modify snapshots.
2660 CHECK(lock);
2661 CHECK(lock->lock_mode() == LOCK_EX);
2662 CHECK(!status.name().empty());
2663
2664 auto path = GetSnapshotStatusFilePath(status.name());
2665
2666 std::string content;
2667 if (!status.SerializeToString(&content)) {
2668 LOG(ERROR) << "Unable to serialize SnapshotStatus for " << status.name();
2669 return false;
2670 }
2671
2672 if (!WriteStringToFileAtomic(content, path)) {
2673 PLOG(ERROR) << "Unable to write SnapshotStatus to " << path;
2674 return false;
2675 }
2676
2677 return true;
2678 }
2679
EnsureImageManager()2680 bool SnapshotManager::EnsureImageManager() {
2681 if (images_) return true;
2682
2683 images_ = device_->OpenImageManager();
2684 if (!images_) {
2685 LOG(ERROR) << "Could not open ImageManager";
2686 return false;
2687 }
2688 return true;
2689 }
2690
EnsureSnapuserdConnected()2691 bool SnapshotManager::EnsureSnapuserdConnected() {
2692 if (snapuserd_client_) {
2693 return true;
2694 }
2695
2696 if (!use_first_stage_snapuserd_ && !EnsureSnapuserdStarted()) {
2697 return false;
2698 }
2699
2700 snapuserd_client_ = SnapuserdClient::Connect(kSnapuserdSocket, 10s);
2701 if (!snapuserd_client_) {
2702 LOG(ERROR) << "Unable to connect to snapuserd";
2703 return false;
2704 }
2705 return true;
2706 }
2707
UnmapAndDeleteCowPartition(MetadataBuilder * current_metadata)2708 void SnapshotManager::UnmapAndDeleteCowPartition(MetadataBuilder* current_metadata) {
2709 std::vector<std::string> to_delete;
2710 for (auto* existing_cow_partition : current_metadata->ListPartitionsInGroup(kCowGroupName)) {
2711 if (!DeleteDeviceIfExists(existing_cow_partition->name())) {
2712 LOG(WARNING) << existing_cow_partition->name()
2713 << " cannot be unmapped and its space cannot be reclaimed";
2714 continue;
2715 }
2716 to_delete.push_back(existing_cow_partition->name());
2717 }
2718 for (const auto& name : to_delete) {
2719 current_metadata->RemovePartition(name);
2720 }
2721 }
2722
AddRequiredSpace(Return orig,const std::map<std::string,SnapshotStatus> & all_snapshot_status)2723 static Return AddRequiredSpace(Return orig,
2724 const std::map<std::string, SnapshotStatus>& all_snapshot_status) {
2725 if (orig.error_code() != Return::ErrorCode::NO_SPACE) {
2726 return orig;
2727 }
2728 uint64_t sum = 0;
2729 for (auto&& [name, status] : all_snapshot_status) {
2730 sum += status.cow_file_size();
2731 }
2732 return Return::NoSpace(sum);
2733 }
2734
CreateUpdateSnapshots(const DeltaArchiveManifest & manifest)2735 Return SnapshotManager::CreateUpdateSnapshots(const DeltaArchiveManifest& manifest) {
2736 auto lock = LockExclusive();
2737 if (!lock) return Return::Error();
2738
2739 auto update_state = ReadUpdateState(lock.get());
2740 if (update_state != UpdateState::Initiated) {
2741 LOG(ERROR) << "Cannot create update snapshots in state " << update_state;
2742 return Return::Error();
2743 }
2744
2745 // TODO(b/134949511): remove this check. Right now, with overlayfs mounted, the scratch
2746 // partition takes up a big chunk of space in super, causing COW images to be created on
2747 // retrofit Virtual A/B devices.
2748 if (device_->IsOverlayfsSetup()) {
2749 LOG(ERROR) << "Cannot create update snapshots with overlayfs setup. Run `adb enable-verity`"
2750 << ", reboot, then try again.";
2751 return Return::Error();
2752 }
2753
2754 const auto& opener = device_->GetPartitionOpener();
2755 auto current_suffix = device_->GetSlotSuffix();
2756 uint32_t current_slot = SlotNumberForSlotSuffix(current_suffix);
2757 auto target_suffix = device_->GetOtherSlotSuffix();
2758 uint32_t target_slot = SlotNumberForSlotSuffix(target_suffix);
2759 auto current_super = device_->GetSuperDevice(current_slot);
2760
2761 auto current_metadata = MetadataBuilder::New(opener, current_super, current_slot);
2762 if (current_metadata == nullptr) {
2763 LOG(ERROR) << "Cannot create metadata builder.";
2764 return Return::Error();
2765 }
2766
2767 auto target_metadata =
2768 MetadataBuilder::NewForUpdate(opener, current_super, current_slot, target_slot);
2769 if (target_metadata == nullptr) {
2770 LOG(ERROR) << "Cannot create target metadata builder.";
2771 return Return::Error();
2772 }
2773
2774 // Delete partitions with target suffix in |current_metadata|. Otherwise,
2775 // partition_cow_creator recognizes these left-over partitions as used space.
2776 for (const auto& group_name : current_metadata->ListGroups()) {
2777 if (android::base::EndsWith(group_name, target_suffix)) {
2778 current_metadata->RemoveGroupAndPartitions(group_name);
2779 }
2780 }
2781
2782 SnapshotMetadataUpdater metadata_updater(target_metadata.get(), target_slot, manifest);
2783 if (!metadata_updater.Update()) {
2784 LOG(ERROR) << "Cannot calculate new metadata.";
2785 return Return::Error();
2786 }
2787
2788 // Delete previous COW partitions in current_metadata so that PartitionCowCreator marks those as
2789 // free regions.
2790 UnmapAndDeleteCowPartition(current_metadata.get());
2791
2792 // Check that all these metadata is not retrofit dynamic partitions. Snapshots on
2793 // devices with retrofit dynamic partitions does not make sense.
2794 // This ensures that current_metadata->GetFreeRegions() uses the same device
2795 // indices as target_metadata (i.e. 0 -> "super").
2796 // This is also assumed in MapCowDevices() call below.
2797 CHECK(current_metadata->GetBlockDevicePartitionName(0) == LP_METADATA_DEFAULT_PARTITION_NAME &&
2798 target_metadata->GetBlockDevicePartitionName(0) == LP_METADATA_DEFAULT_PARTITION_NAME);
2799
2800 std::map<std::string, SnapshotStatus> all_snapshot_status;
2801
2802 // In case of error, automatically delete devices that are created along the way.
2803 // Note that "lock" is destroyed after "created_devices", so it is safe to use |lock| for
2804 // these devices.
2805 AutoDeviceList created_devices;
2806
2807 const auto& dap_metadata = manifest.dynamic_partition_metadata();
2808 CowOptions options;
2809 CowWriter writer(options);
2810 bool cow_format_support = true;
2811 if (dap_metadata.cow_version() < writer.GetCowVersion()) {
2812 cow_format_support = false;
2813 }
2814
2815 LOG(INFO) << " dap_metadata.cow_version(): " << dap_metadata.cow_version()
2816 << " writer.GetCowVersion(): " << writer.GetCowVersion();
2817
2818 bool use_compression = IsCompressionEnabled() && dap_metadata.vabc_enabled() &&
2819 !device_->IsRecovery() && cow_format_support;
2820
2821 std::string compression_algorithm;
2822 if (use_compression) {
2823 compression_algorithm = dap_metadata.vabc_compression_param();
2824 if (compression_algorithm.empty()) {
2825 // Older OTAs don't set an explicit compression type, so default to gz.
2826 compression_algorithm = "gz";
2827 }
2828 } else {
2829 compression_algorithm = "none";
2830 }
2831
2832 PartitionCowCreator cow_creator{
2833 .target_metadata = target_metadata.get(),
2834 .target_suffix = target_suffix,
2835 .target_partition = nullptr,
2836 .current_metadata = current_metadata.get(),
2837 .current_suffix = current_suffix,
2838 .update = nullptr,
2839 .extra_extents = {},
2840 .compression_enabled = use_compression,
2841 .compression_algorithm = compression_algorithm,
2842 };
2843
2844 auto ret = CreateUpdateSnapshotsInternal(lock.get(), manifest, &cow_creator, &created_devices,
2845 &all_snapshot_status);
2846 if (!ret.is_ok()) return ret;
2847
2848 auto exported_target_metadata = target_metadata->Export();
2849 if (exported_target_metadata == nullptr) {
2850 LOG(ERROR) << "Cannot export target metadata";
2851 return Return::Error();
2852 }
2853
2854 ret = InitializeUpdateSnapshots(lock.get(), target_metadata.get(),
2855 exported_target_metadata.get(), target_suffix,
2856 all_snapshot_status);
2857 if (!ret.is_ok()) return ret;
2858
2859 if (!UpdatePartitionTable(opener, device_->GetSuperDevice(target_slot),
2860 *exported_target_metadata, target_slot)) {
2861 LOG(ERROR) << "Cannot write target metadata";
2862 return Return::Error();
2863 }
2864
2865 // If compression is enabled, we need to retain a copy of the old metadata
2866 // so we can access original blocks in case they are moved around. We do
2867 // not want to rely on the old super metadata slot because we don't
2868 // guarantee its validity after the slot switch is successful.
2869 if (cow_creator.compression_enabled) {
2870 auto metadata = current_metadata->Export();
2871 if (!metadata) {
2872 LOG(ERROR) << "Could not export current metadata";
2873 return Return::Error();
2874 }
2875
2876 auto path = GetOldPartitionMetadataPath();
2877 if (!android::fs_mgr::WriteToImageFile(path, *metadata.get())) {
2878 LOG(ERROR) << "Cannot write old metadata to " << path;
2879 return Return::Error();
2880 }
2881 }
2882
2883 SnapshotUpdateStatus status = ReadSnapshotUpdateStatus(lock.get());
2884 status.set_state(update_state);
2885 status.set_compression_enabled(cow_creator.compression_enabled);
2886 if (!WriteSnapshotUpdateStatus(lock.get(), status)) {
2887 LOG(ERROR) << "Unable to write new update state";
2888 return Return::Error();
2889 }
2890
2891 created_devices.Release();
2892 LOG(INFO) << "Successfully created all snapshots for target slot " << target_suffix;
2893
2894 return Return::Ok();
2895 }
2896
CreateUpdateSnapshotsInternal(LockedFile * lock,const DeltaArchiveManifest & manifest,PartitionCowCreator * cow_creator,AutoDeviceList * created_devices,std::map<std::string,SnapshotStatus> * all_snapshot_status)2897 Return SnapshotManager::CreateUpdateSnapshotsInternal(
2898 LockedFile* lock, const DeltaArchiveManifest& manifest, PartitionCowCreator* cow_creator,
2899 AutoDeviceList* created_devices,
2900 std::map<std::string, SnapshotStatus>* all_snapshot_status) {
2901 CHECK(lock);
2902
2903 auto* target_metadata = cow_creator->target_metadata;
2904 const auto& target_suffix = cow_creator->target_suffix;
2905
2906 if (!target_metadata->AddGroup(kCowGroupName, 0)) {
2907 LOG(ERROR) << "Cannot add group " << kCowGroupName;
2908 return Return::Error();
2909 }
2910
2911 std::map<std::string, const PartitionUpdate*> partition_map;
2912 std::map<std::string, std::vector<Extent>> extra_extents_map;
2913 for (const auto& partition_update : manifest.partitions()) {
2914 auto suffixed_name = partition_update.partition_name() + target_suffix;
2915 auto&& [it, inserted] = partition_map.emplace(suffixed_name, &partition_update);
2916 if (!inserted) {
2917 LOG(ERROR) << "Duplicated partition " << partition_update.partition_name()
2918 << " in update manifest.";
2919 return Return::Error();
2920 }
2921
2922 auto& extra_extents = extra_extents_map[suffixed_name];
2923 if (partition_update.has_hash_tree_extent()) {
2924 extra_extents.push_back(partition_update.hash_tree_extent());
2925 }
2926 if (partition_update.has_fec_extent()) {
2927 extra_extents.push_back(partition_update.fec_extent());
2928 }
2929 }
2930
2931 for (auto* target_partition : ListPartitionsWithSuffix(target_metadata, target_suffix)) {
2932 cow_creator->target_partition = target_partition;
2933 cow_creator->update = nullptr;
2934 auto iter = partition_map.find(target_partition->name());
2935 if (iter != partition_map.end()) {
2936 cow_creator->update = iter->second;
2937 } else {
2938 LOG(INFO) << target_partition->name()
2939 << " isn't included in the payload, skipping the cow creation.";
2940 continue;
2941 }
2942
2943 cow_creator->extra_extents.clear();
2944 auto extra_extents_it = extra_extents_map.find(target_partition->name());
2945 if (extra_extents_it != extra_extents_map.end()) {
2946 cow_creator->extra_extents = std::move(extra_extents_it->second);
2947 }
2948
2949 // Compute the device sizes for the partition.
2950 auto cow_creator_ret = cow_creator->Run();
2951 if (!cow_creator_ret.has_value()) {
2952 LOG(ERROR) << "PartitionCowCreator returned no value for " << target_partition->name();
2953 return Return::Error();
2954 }
2955
2956 LOG(INFO) << "For partition " << target_partition->name()
2957 << ", device size = " << cow_creator_ret->snapshot_status.device_size()
2958 << ", snapshot size = " << cow_creator_ret->snapshot_status.snapshot_size()
2959 << ", cow partition size = "
2960 << cow_creator_ret->snapshot_status.cow_partition_size()
2961 << ", cow file size = " << cow_creator_ret->snapshot_status.cow_file_size();
2962
2963 // Delete any existing snapshot before re-creating one.
2964 if (!DeleteSnapshot(lock, target_partition->name())) {
2965 LOG(ERROR) << "Cannot delete existing snapshot before creating a new one for partition "
2966 << target_partition->name();
2967 return Return::Error();
2968 }
2969
2970 // It is possible that the whole partition uses free space in super, and snapshot / COW
2971 // would not be needed. In this case, skip the partition.
2972 bool needs_snapshot = cow_creator_ret->snapshot_status.snapshot_size() > 0;
2973 bool needs_cow = (cow_creator_ret->snapshot_status.cow_partition_size() +
2974 cow_creator_ret->snapshot_status.cow_file_size()) > 0;
2975 CHECK(needs_snapshot == needs_cow);
2976
2977 if (!needs_snapshot) {
2978 LOG(INFO) << "Skip creating snapshot for partition " << target_partition->name()
2979 << "because nothing needs to be snapshotted.";
2980 continue;
2981 }
2982
2983 // Find the original partition size.
2984 auto name = target_partition->name();
2985 auto old_partition_name =
2986 name.substr(0, name.size() - target_suffix.size()) + cow_creator->current_suffix;
2987 auto old_partition = cow_creator->current_metadata->FindPartition(old_partition_name);
2988 if (old_partition) {
2989 cow_creator_ret->snapshot_status.set_old_partition_size(old_partition->size());
2990 }
2991
2992 // Store these device sizes to snapshot status file.
2993 if (!CreateSnapshot(lock, cow_creator, &cow_creator_ret->snapshot_status)) {
2994 return Return::Error();
2995 }
2996 created_devices->EmplaceBack<AutoDeleteSnapshot>(this, lock, target_partition->name());
2997
2998 // Create the COW partition. That is, use any remaining free space in super partition before
2999 // creating the COW images.
3000 if (cow_creator_ret->snapshot_status.cow_partition_size() > 0) {
3001 CHECK(cow_creator_ret->snapshot_status.cow_partition_size() % kSectorSize == 0)
3002 << "cow_partition_size == "
3003 << cow_creator_ret->snapshot_status.cow_partition_size()
3004 << " is not a multiple of sector size " << kSectorSize;
3005 auto cow_partition = target_metadata->AddPartition(GetCowName(target_partition->name()),
3006 kCowGroupName, 0 /* flags */);
3007 if (cow_partition == nullptr) {
3008 return Return::Error();
3009 }
3010
3011 if (!target_metadata->ResizePartition(
3012 cow_partition, cow_creator_ret->snapshot_status.cow_partition_size(),
3013 cow_creator_ret->cow_partition_usable_regions)) {
3014 LOG(ERROR) << "Cannot create COW partition on metadata with size "
3015 << cow_creator_ret->snapshot_status.cow_partition_size();
3016 return Return::Error();
3017 }
3018 // Only the in-memory target_metadata is modified; nothing to clean up if there is an
3019 // error in the future.
3020 }
3021
3022 all_snapshot_status->emplace(target_partition->name(),
3023 std::move(cow_creator_ret->snapshot_status));
3024
3025 LOG(INFO) << "Successfully created snapshot partition for " << target_partition->name();
3026 }
3027
3028 LOG(INFO) << "Allocating CoW images.";
3029
3030 for (auto&& [name, snapshot_status] : *all_snapshot_status) {
3031 // Create the backing COW image if necessary.
3032 if (snapshot_status.cow_file_size() > 0) {
3033 auto ret = CreateCowImage(lock, name);
3034 if (!ret.is_ok()) return AddRequiredSpace(ret, *all_snapshot_status);
3035 }
3036
3037 LOG(INFO) << "Successfully created snapshot for " << name;
3038 }
3039
3040 return Return::Ok();
3041 }
3042
InitializeUpdateSnapshots(LockedFile * lock,MetadataBuilder * target_metadata,const LpMetadata * exported_target_metadata,const std::string & target_suffix,const std::map<std::string,SnapshotStatus> & all_snapshot_status)3043 Return SnapshotManager::InitializeUpdateSnapshots(
3044 LockedFile* lock, MetadataBuilder* target_metadata,
3045 const LpMetadata* exported_target_metadata, const std::string& target_suffix,
3046 const std::map<std::string, SnapshotStatus>& all_snapshot_status) {
3047 CHECK(lock);
3048
3049 CreateLogicalPartitionParams cow_params{
3050 .block_device = LP_METADATA_DEFAULT_PARTITION_NAME,
3051 .metadata = exported_target_metadata,
3052 .timeout_ms = std::chrono::milliseconds::max(),
3053 .partition_opener = &device_->GetPartitionOpener(),
3054 };
3055 for (auto* target_partition : ListPartitionsWithSuffix(target_metadata, target_suffix)) {
3056 AutoDeviceList created_devices_for_cow;
3057
3058 if (!UnmapPartitionWithSnapshot(lock, target_partition->name())) {
3059 LOG(ERROR) << "Cannot unmap existing COW devices before re-mapping them for zero-fill: "
3060 << target_partition->name();
3061 return Return::Error();
3062 }
3063
3064 auto it = all_snapshot_status.find(target_partition->name());
3065 if (it == all_snapshot_status.end()) continue;
3066 cow_params.partition_name = target_partition->name();
3067 std::string cow_name;
3068 if (!MapCowDevices(lock, cow_params, it->second, &created_devices_for_cow, &cow_name)) {
3069 return Return::Error();
3070 }
3071
3072 std::string cow_path;
3073 if (!images_->GetMappedImageDevice(cow_name, &cow_path)) {
3074 LOG(ERROR) << "Cannot determine path for " << cow_name;
3075 return Return::Error();
3076 }
3077
3078 if (it->second.compression_enabled()) {
3079 unique_fd fd(open(cow_path.c_str(), O_RDWR | O_CLOEXEC));
3080 if (fd < 0) {
3081 PLOG(ERROR) << "open " << cow_path << " failed for snapshot "
3082 << cow_params.partition_name;
3083 return Return::Error();
3084 }
3085
3086 CowOptions options;
3087 if (device()->IsTestDevice()) {
3088 options.scratch_space = false;
3089 }
3090 options.compression = it->second.compression_algorithm();
3091
3092 CowWriter writer(options);
3093 if (!writer.Initialize(fd) || !writer.Finalize()) {
3094 LOG(ERROR) << "Could not initialize COW device for " << target_partition->name();
3095 return Return::Error();
3096 }
3097 } else {
3098 auto ret = InitializeKernelCow(cow_path);
3099 if (!ret.is_ok()) {
3100 LOG(ERROR) << "Can't zero-fill COW device for " << target_partition->name() << ": "
3101 << cow_path;
3102 return AddRequiredSpace(ret, all_snapshot_status);
3103 }
3104 }
3105 // Let destructor of created_devices_for_cow to unmap the COW devices.
3106 };
3107 return Return::Ok();
3108 }
3109
MapUpdateSnapshot(const CreateLogicalPartitionParams & params,std::string * snapshot_path)3110 bool SnapshotManager::MapUpdateSnapshot(const CreateLogicalPartitionParams& params,
3111 std::string* snapshot_path) {
3112 auto lock = LockShared();
3113 if (!lock) return false;
3114 if (!UnmapPartitionWithSnapshot(lock.get(), params.GetPartitionName())) {
3115 LOG(ERROR) << "Cannot unmap existing snapshot before re-mapping it: "
3116 << params.GetPartitionName();
3117 return false;
3118 }
3119
3120 SnapshotStatus status;
3121 if (!ReadSnapshotStatus(lock.get(), params.GetPartitionName(), &status)) {
3122 return false;
3123 }
3124 if (status.compression_enabled()) {
3125 LOG(ERROR) << "Cannot use MapUpdateSnapshot with compressed snapshots";
3126 return false;
3127 }
3128
3129 SnapshotPaths paths;
3130 if (!MapPartitionWithSnapshot(lock.get(), params, SnapshotContext::Update, &paths)) {
3131 return false;
3132 }
3133
3134 if (!paths.snapshot_device.empty()) {
3135 *snapshot_path = paths.snapshot_device;
3136 } else {
3137 *snapshot_path = paths.target_device;
3138 }
3139 DCHECK(!snapshot_path->empty());
3140 return true;
3141 }
3142
OpenSnapshotWriter(const android::fs_mgr::CreateLogicalPartitionParams & params,const std::optional<std::string> & source_device)3143 std::unique_ptr<ISnapshotWriter> SnapshotManager::OpenSnapshotWriter(
3144 const android::fs_mgr::CreateLogicalPartitionParams& params,
3145 const std::optional<std::string>& source_device) {
3146 #if defined(LIBSNAPSHOT_NO_COW_WRITE)
3147 (void)params;
3148 (void)source_device;
3149
3150 LOG(ERROR) << "Snapshots cannot be written in first-stage init or recovery";
3151 return nullptr;
3152 #else
3153 // First unmap any existing mapping.
3154 auto lock = LockShared();
3155 if (!lock) return nullptr;
3156 if (!UnmapPartitionWithSnapshot(lock.get(), params.GetPartitionName())) {
3157 LOG(ERROR) << "Cannot unmap existing snapshot before re-mapping it: "
3158 << params.GetPartitionName();
3159 return nullptr;
3160 }
3161
3162 SnapshotPaths paths;
3163 if (!MapPartitionWithSnapshot(lock.get(), params, SnapshotContext::Update, &paths)) {
3164 return nullptr;
3165 }
3166
3167 SnapshotStatus status;
3168 if (!paths.cow_device_name.empty()) {
3169 if (!ReadSnapshotStatus(lock.get(), params.GetPartitionName(), &status)) {
3170 return nullptr;
3171 }
3172 } else {
3173 // Currently, partition_cow_creator always creates snapshots. The
3174 // reason is that if partition X shrinks while partition Y grows, we
3175 // cannot bindly write to the newly freed extents in X. This would
3176 // make the old slot unusable. So, the entire size of the target
3177 // partition is currently considered snapshottable.
3178 LOG(ERROR) << "No snapshot available for partition " << params.GetPartitionName();
3179 return nullptr;
3180 }
3181
3182 if (status.compression_enabled()) {
3183 return OpenCompressedSnapshotWriter(lock.get(), source_device, params.GetPartitionName(),
3184 status, paths);
3185 }
3186 return OpenKernelSnapshotWriter(lock.get(), source_device, params.GetPartitionName(), status,
3187 paths);
3188 #endif
3189 }
3190
3191 #if !defined(LIBSNAPSHOT_NO_COW_WRITE)
3192 std::unique_ptr<ISnapshotWriter> SnapshotManager::OpenCompressedSnapshotWriter(
3193 LockedFile* lock, const std::optional<std::string>& source_device,
3194 [[maybe_unused]] const std::string& partition_name, const SnapshotStatus& status,
3195 const SnapshotPaths& paths) {
3196 CHECK(lock);
3197
3198 CowOptions cow_options;
3199 cow_options.compression = status.compression_algorithm();
3200 cow_options.max_blocks = {status.device_size() / cow_options.block_size};
3201 // Disable scratch space for vts tests
3202 if (device()->IsTestDevice()) {
3203 cow_options.scratch_space = false;
3204 }
3205
3206 // Currently we don't support partial snapshots, since partition_cow_creator
3207 // never creates this scenario.
3208 CHECK(status.snapshot_size() == status.device_size());
3209
3210 auto writer = std::make_unique<CompressedSnapshotWriter>(cow_options);
3211 if (source_device) {
3212 writer->SetSourceDevice(*source_device);
3213 }
3214
3215 std::string cow_path;
3216 if (!GetMappedImageDevicePath(paths.cow_device_name, &cow_path)) {
3217 LOG(ERROR) << "Could not determine path for " << paths.cow_device_name;
3218 return nullptr;
3219 }
3220
3221 unique_fd cow_fd(open(cow_path.c_str(), O_RDWR | O_CLOEXEC));
3222 if (cow_fd < 0) {
3223 PLOG(ERROR) << "OpenCompressedSnapshotWriter: open " << cow_path;
3224 return nullptr;
3225 }
3226 if (!writer->SetCowDevice(std::move(cow_fd))) {
3227 LOG(ERROR) << "Could not create COW writer from " << cow_path;
3228 return nullptr;
3229 }
3230
3231 return writer;
3232 }
3233
3234 std::unique_ptr<ISnapshotWriter> SnapshotManager::OpenKernelSnapshotWriter(
3235 LockedFile* lock, const std::optional<std::string>& source_device,
3236 [[maybe_unused]] const std::string& partition_name, const SnapshotStatus& status,
3237 const SnapshotPaths& paths) {
3238 CHECK(lock);
3239
3240 CowOptions cow_options;
3241 cow_options.max_blocks = {status.device_size() / cow_options.block_size};
3242
3243 auto writer = std::make_unique<OnlineKernelSnapshotWriter>(cow_options);
3244
3245 std::string path = paths.snapshot_device.empty() ? paths.target_device : paths.snapshot_device;
3246 unique_fd fd(open(path.c_str(), O_RDWR | O_CLOEXEC));
3247 if (fd < 0) {
3248 PLOG(ERROR) << "open failed: " << path;
3249 return nullptr;
3250 }
3251
3252 if (source_device) {
3253 writer->SetSourceDevice(*source_device);
3254 }
3255
3256 uint64_t cow_size = status.cow_partition_size() + status.cow_file_size();
3257 writer->SetSnapshotDevice(std::move(fd), cow_size);
3258
3259 return writer;
3260 }
3261 #endif // !defined(LIBSNAPSHOT_NO_COW_WRITE)
3262
UnmapUpdateSnapshot(const std::string & target_partition_name)3263 bool SnapshotManager::UnmapUpdateSnapshot(const std::string& target_partition_name) {
3264 auto lock = LockShared();
3265 if (!lock) return false;
3266 return UnmapPartitionWithSnapshot(lock.get(), target_partition_name);
3267 }
3268
UnmapAllPartitionsInRecovery()3269 bool SnapshotManager::UnmapAllPartitionsInRecovery() {
3270 auto lock = LockExclusive();
3271 if (!lock) return false;
3272
3273 const auto& opener = device_->GetPartitionOpener();
3274 uint32_t slot = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
3275 auto super_device = device_->GetSuperDevice(slot);
3276 auto metadata = android::fs_mgr::ReadMetadata(opener, super_device, slot);
3277 if (!metadata) {
3278 LOG(ERROR) << "Could not read dynamic partition metadata for device: " << super_device;
3279 return false;
3280 }
3281
3282 bool ok = true;
3283 for (const auto& partition : metadata->partitions) {
3284 auto partition_name = GetPartitionName(partition);
3285 ok &= UnmapPartitionWithSnapshot(lock.get(), partition_name);
3286 }
3287 return ok;
3288 }
3289
operator <<(std::ostream & os,SnapshotManager::Slot slot)3290 std::ostream& operator<<(std::ostream& os, SnapshotManager::Slot slot) {
3291 switch (slot) {
3292 case SnapshotManager::Slot::Unknown:
3293 return os << "unknown";
3294 case SnapshotManager::Slot::Source:
3295 return os << "source";
3296 case SnapshotManager::Slot::Target:
3297 return os << "target";
3298 }
3299 }
3300
Dump(std::ostream & os)3301 bool SnapshotManager::Dump(std::ostream& os) {
3302 // Don't actually lock. Dump() is for debugging purposes only, so it is okay
3303 // if it is racy.
3304 auto file = OpenLock(0 /* lock flag */);
3305 if (!file) return false;
3306
3307 std::stringstream ss;
3308
3309 auto update_status = ReadSnapshotUpdateStatus(file.get());
3310
3311 ss << "Update state: " << ReadUpdateState(file.get()) << std::endl;
3312 ss << "Compression: " << update_status.compression_enabled() << std::endl;
3313 ss << "Current slot: " << device_->GetSlotSuffix() << std::endl;
3314 ss << "Boot indicator: booting from " << GetCurrentSlot() << " slot" << std::endl;
3315 ss << "Rollback indicator: "
3316 << (access(GetRollbackIndicatorPath().c_str(), F_OK) == 0 ? "exists" : strerror(errno))
3317 << std::endl;
3318 ss << "Forward merge indicator: "
3319 << (access(GetForwardMergeIndicatorPath().c_str(), F_OK) == 0 ? "exists" : strerror(errno))
3320 << std::endl;
3321 ss << "Source build fingerprint: " << update_status.source_build_fingerprint() << std::endl;
3322
3323 bool ok = true;
3324 std::vector<std::string> snapshots;
3325 if (!ListSnapshots(file.get(), &snapshots)) {
3326 LOG(ERROR) << "Could not list snapshots";
3327 snapshots.clear();
3328 ok = false;
3329 }
3330 for (const auto& name : snapshots) {
3331 ss << "Snapshot: " << name << std::endl;
3332 SnapshotStatus status;
3333 if (!ReadSnapshotStatus(file.get(), name, &status)) {
3334 ok = false;
3335 continue;
3336 }
3337 ss << " state: " << SnapshotState_Name(status.state()) << std::endl;
3338 ss << " device size (bytes): " << status.device_size() << std::endl;
3339 ss << " snapshot size (bytes): " << status.snapshot_size() << std::endl;
3340 ss << " cow partition size (bytes): " << status.cow_partition_size() << std::endl;
3341 ss << " cow file size (bytes): " << status.cow_file_size() << std::endl;
3342 ss << " allocated sectors: " << status.sectors_allocated() << std::endl;
3343 ss << " metadata sectors: " << status.metadata_sectors() << std::endl;
3344 ss << " compression: " << status.compression_algorithm() << std::endl;
3345 }
3346 os << ss.rdbuf();
3347 return ok;
3348 }
3349
EnsureMetadataMounted()3350 std::unique_ptr<AutoDevice> SnapshotManager::EnsureMetadataMounted() {
3351 if (!device_->IsRecovery()) {
3352 // No need to mount anything in recovery.
3353 LOG(INFO) << "EnsureMetadataMounted does nothing in Android mode.";
3354 return std::unique_ptr<AutoUnmountDevice>(new AutoUnmountDevice());
3355 }
3356 auto ret = AutoUnmountDevice::New(device_->GetMetadataDir());
3357 if (ret == nullptr) return nullptr;
3358
3359 // In rescue mode, it is possible to erase and format metadata, but /metadata/ota is not
3360 // created to execute snapshot updates. Hence, subsequent calls is likely to fail because
3361 // Lock*() fails. By failing early and returning nullptr here, update_engine_sideload can
3362 // treat this case as if /metadata is not mounted.
3363 if (!LockShared()) {
3364 LOG(WARNING) << "/metadata is mounted, but errors occur when acquiring a shared lock. "
3365 "Subsequent calls to SnapshotManager will fail. Unmounting /metadata now.";
3366 return nullptr;
3367 }
3368 return ret;
3369 }
3370
HandleImminentDataWipe(const std::function<void ()> & callback)3371 bool SnapshotManager::HandleImminentDataWipe(const std::function<void()>& callback) {
3372 if (!device_->IsRecovery()) {
3373 LOG(ERROR) << "Data wipes are only allowed in recovery.";
3374 return false;
3375 }
3376
3377 auto mount = EnsureMetadataMounted();
3378 if (!mount || !mount->HasDevice()) {
3379 // We allow the wipe to continue, because if we can't mount /metadata,
3380 // it is unlikely the device would have booted anyway. If there is no
3381 // metadata partition, then the device predates Virtual A/B.
3382 return true;
3383 }
3384
3385 // Check this early, so we don't accidentally start trying to populate
3386 // the state file in recovery. Note we don't call GetUpdateState since
3387 // we want errors in acquiring the lock to be propagated, instead of
3388 // returning UpdateState::None.
3389 auto state_file = GetStateFilePath();
3390 if (access(state_file.c_str(), F_OK) != 0 && errno == ENOENT) {
3391 return true;
3392 }
3393
3394 auto slot_number = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
3395 auto super_path = device_->GetSuperDevice(slot_number);
3396 if (!CreateLogicalAndSnapshotPartitions(super_path, 20s)) {
3397 LOG(ERROR) << "Unable to map partitions to complete merge.";
3398 return false;
3399 }
3400
3401 auto process_callback = [&]() -> bool {
3402 if (callback) {
3403 callback();
3404 }
3405 return true;
3406 };
3407
3408 in_factory_data_reset_ = true;
3409 UpdateState state =
3410 ProcessUpdateStateOnDataWipe(true /* allow_forward_merge */, process_callback);
3411 in_factory_data_reset_ = false;
3412
3413 if (state == UpdateState::MergeFailed) {
3414 return false;
3415 }
3416
3417 // Nothing should be depending on partitions now, so unmap them all.
3418 if (!UnmapAllPartitionsInRecovery()) {
3419 LOG(ERROR) << "Unable to unmap all partitions; fastboot may fail to flash.";
3420 }
3421
3422 if (state != UpdateState::None) {
3423 auto lock = LockExclusive();
3424 if (!lock) return false;
3425
3426 // Zap the update state so the bootloader doesn't think we're still
3427 // merging. It's okay if this fails, it's informative only at this
3428 // point.
3429 WriteUpdateState(lock.get(), UpdateState::None);
3430 }
3431 return true;
3432 }
3433
FinishMergeInRecovery()3434 bool SnapshotManager::FinishMergeInRecovery() {
3435 if (!device_->IsRecovery()) {
3436 LOG(ERROR) << "Data wipes are only allowed in recovery.";
3437 return false;
3438 }
3439
3440 auto mount = EnsureMetadataMounted();
3441 if (!mount || !mount->HasDevice()) {
3442 return false;
3443 }
3444
3445 auto slot_number = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
3446 auto super_path = device_->GetSuperDevice(slot_number);
3447 if (!CreateLogicalAndSnapshotPartitions(super_path, 20s)) {
3448 LOG(ERROR) << "Unable to map partitions to complete merge.";
3449 return false;
3450 }
3451
3452 UpdateState state = ProcessUpdateState();
3453 if (state != UpdateState::MergeCompleted) {
3454 LOG(ERROR) << "Merge returned unexpected status: " << state;
3455 return false;
3456 }
3457
3458 // Nothing should be depending on partitions now, so unmap them all.
3459 if (!UnmapAllPartitionsInRecovery()) {
3460 LOG(ERROR) << "Unable to unmap all partitions; fastboot may fail to flash.";
3461 }
3462 return true;
3463 }
3464
ProcessUpdateStateOnDataWipe(bool allow_forward_merge,const std::function<bool ()> & callback)3465 UpdateState SnapshotManager::ProcessUpdateStateOnDataWipe(bool allow_forward_merge,
3466 const std::function<bool()>& callback) {
3467 auto slot_number = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
3468 UpdateState state = ProcessUpdateState(callback);
3469 LOG(INFO) << "Update state in recovery: " << state;
3470 switch (state) {
3471 case UpdateState::MergeFailed:
3472 LOG(ERROR) << "Unrecoverable merge failure detected.";
3473 return state;
3474 case UpdateState::Unverified: {
3475 // If an OTA was just applied but has not yet started merging:
3476 //
3477 // - if forward merge is allowed, initiate merge and call
3478 // ProcessUpdateState again.
3479 //
3480 // - if forward merge is not allowed, we
3481 // have no choice but to revert slots, because the current slot will
3482 // immediately become unbootable. Rather than wait for the device
3483 // to reboot N times until a rollback, we proactively disable the
3484 // new slot instead.
3485 //
3486 // Since the rollback is inevitable, we don't treat a HAL failure
3487 // as an error here.
3488 auto slot = GetCurrentSlot();
3489 if (slot == Slot::Target) {
3490 if (allow_forward_merge &&
3491 access(GetForwardMergeIndicatorPath().c_str(), F_OK) == 0) {
3492 LOG(INFO) << "Forward merge allowed, initiating merge now.";
3493
3494 if (!InitiateMerge()) {
3495 LOG(ERROR) << "Failed to initiate merge on data wipe.";
3496 return UpdateState::MergeFailed;
3497 }
3498 return ProcessUpdateStateOnDataWipe(false /* allow_forward_merge */, callback);
3499 }
3500
3501 LOG(ERROR) << "Reverting to old slot since update will be deleted.";
3502 device_->SetSlotAsUnbootable(slot_number);
3503 } else {
3504 LOG(INFO) << "Booting from " << slot << " slot, no action is taken.";
3505 }
3506 break;
3507 }
3508 case UpdateState::MergeNeedsReboot:
3509 // We shouldn't get here, because nothing is depending on
3510 // logical partitions.
3511 LOG(ERROR) << "Unexpected merge-needs-reboot state in recovery.";
3512 break;
3513 default:
3514 break;
3515 }
3516 return state;
3517 }
3518
EnsureNoOverflowSnapshot(LockedFile * lock)3519 bool SnapshotManager::EnsureNoOverflowSnapshot(LockedFile* lock) {
3520 CHECK(lock);
3521
3522 std::vector<std::string> snapshots;
3523 if (!ListSnapshots(lock, &snapshots)) {
3524 LOG(ERROR) << "Could not list snapshots.";
3525 return false;
3526 }
3527
3528 auto& dm = DeviceMapper::Instance();
3529 for (const auto& snapshot : snapshots) {
3530 SnapshotStatus status;
3531 if (!ReadSnapshotStatus(lock, snapshot, &status)) {
3532 return false;
3533 }
3534 if (status.compression_enabled()) {
3535 continue;
3536 }
3537
3538 std::vector<DeviceMapper::TargetInfo> targets;
3539 if (!dm.GetTableStatus(snapshot, &targets)) {
3540 LOG(ERROR) << "Could not read snapshot device table: " << snapshot;
3541 return false;
3542 }
3543 if (targets.size() != 1) {
3544 LOG(ERROR) << "Unexpected device-mapper table for snapshot: " << snapshot
3545 << ", size = " << targets.size();
3546 return false;
3547 }
3548 if (targets[0].IsOverflowSnapshot()) {
3549 LOG(ERROR) << "Detected overflow in snapshot " << snapshot
3550 << ", CoW device size computation is wrong!";
3551 return false;
3552 }
3553 }
3554
3555 return true;
3556 }
3557
RecoveryCreateSnapshotDevices()3558 CreateResult SnapshotManager::RecoveryCreateSnapshotDevices() {
3559 if (!device_->IsRecovery()) {
3560 LOG(ERROR) << __func__ << " is only allowed in recovery.";
3561 return CreateResult::NOT_CREATED;
3562 }
3563
3564 auto mount = EnsureMetadataMounted();
3565 if (!mount || !mount->HasDevice()) {
3566 LOG(ERROR) << "Couldn't mount Metadata.";
3567 return CreateResult::NOT_CREATED;
3568 }
3569 return RecoveryCreateSnapshotDevices(mount);
3570 }
3571
RecoveryCreateSnapshotDevices(const std::unique_ptr<AutoDevice> & metadata_device)3572 CreateResult SnapshotManager::RecoveryCreateSnapshotDevices(
3573 const std::unique_ptr<AutoDevice>& metadata_device) {
3574 if (!device_->IsRecovery()) {
3575 LOG(ERROR) << __func__ << " is only allowed in recovery.";
3576 return CreateResult::NOT_CREATED;
3577 }
3578
3579 if (metadata_device == nullptr || !metadata_device->HasDevice()) {
3580 LOG(ERROR) << "Metadata not mounted.";
3581 return CreateResult::NOT_CREATED;
3582 }
3583
3584 auto state_file = GetStateFilePath();
3585 if (access(state_file.c_str(), F_OK) != 0 && errno == ENOENT) {
3586 LOG(ERROR) << "Couldn't access state file.";
3587 return CreateResult::NOT_CREATED;
3588 }
3589
3590 if (!NeedSnapshotsInFirstStageMount()) {
3591 return CreateResult::NOT_CREATED;
3592 }
3593
3594 auto slot_suffix = device_->GetOtherSlotSuffix();
3595 auto slot_number = SlotNumberForSlotSuffix(slot_suffix);
3596 auto super_path = device_->GetSuperDevice(slot_number);
3597 if (!CreateLogicalAndSnapshotPartitions(super_path, 20s)) {
3598 LOG(ERROR) << "Unable to map partitions.";
3599 return CreateResult::ERROR;
3600 }
3601 return CreateResult::CREATED;
3602 }
3603
UpdateForwardMergeIndicator(bool wipe)3604 bool SnapshotManager::UpdateForwardMergeIndicator(bool wipe) {
3605 auto path = GetForwardMergeIndicatorPath();
3606
3607 if (!wipe) {
3608 LOG(INFO) << "Wipe is not scheduled. Deleting forward merge indicator.";
3609 return RemoveFileIfExists(path);
3610 }
3611
3612 // TODO(b/152094219): Don't forward merge if no CoW file is allocated.
3613
3614 LOG(INFO) << "Wipe will be scheduled. Allowing forward merge of snapshots.";
3615 if (!android::base::WriteStringToFile("1", path)) {
3616 PLOG(ERROR) << "Unable to write forward merge indicator: " << path;
3617 return false;
3618 }
3619
3620 return true;
3621 }
3622
GetSnapshotMergeStatsInstance()3623 ISnapshotMergeStats* SnapshotManager::GetSnapshotMergeStatsInstance() {
3624 return SnapshotMergeStats::GetInstance(*this);
3625 }
3626
3627 // This is only to be used in recovery or normal Android (not first-stage init).
3628 // We don't guarantee dm paths are available in first-stage init, because ueventd
3629 // isn't running yet.
GetMappedImageDevicePath(const std::string & device_name,std::string * device_path)3630 bool SnapshotManager::GetMappedImageDevicePath(const std::string& device_name,
3631 std::string* device_path) {
3632 auto& dm = DeviceMapper::Instance();
3633
3634 // Try getting the device string if it is a device mapper device.
3635 if (dm.GetState(device_name) != DmDeviceState::INVALID) {
3636 return dm.GetDmDevicePathByName(device_name, device_path);
3637 }
3638
3639 // Otherwise, get path from IImageManager.
3640 return images_->GetMappedImageDevice(device_name, device_path);
3641 }
3642
GetMappedImageDeviceStringOrPath(const std::string & device_name,std::string * device_string_or_mapped_path)3643 bool SnapshotManager::GetMappedImageDeviceStringOrPath(const std::string& device_name,
3644 std::string* device_string_or_mapped_path) {
3645 auto& dm = DeviceMapper::Instance();
3646 // Try getting the device string if it is a device mapper device.
3647 if (dm.GetState(device_name) != DmDeviceState::INVALID) {
3648 return dm.GetDeviceString(device_name, device_string_or_mapped_path);
3649 }
3650
3651 // Otherwise, get path from IImageManager.
3652 if (!images_->GetMappedImageDevice(device_name, device_string_or_mapped_path)) {
3653 return false;
3654 }
3655
3656 LOG(WARNING) << "Calling GetMappedImageDevice with local image manager; device "
3657 << (device_string_or_mapped_path ? *device_string_or_mapped_path : "(nullptr)")
3658 << "may not be available in first stage init! ";
3659 return true;
3660 }
3661
WaitForDevice(const std::string & device,std::chrono::milliseconds timeout_ms)3662 bool SnapshotManager::WaitForDevice(const std::string& device,
3663 std::chrono::milliseconds timeout_ms) {
3664 if (!android::base::StartsWith(device, "/")) {
3665 return true;
3666 }
3667
3668 // In first-stage init, we rely on init setting a callback which can
3669 // regenerate uevents and populate /dev for us.
3670 if (uevent_regen_callback_) {
3671 if (!uevent_regen_callback_(device)) {
3672 LOG(ERROR) << "Failed to find device after regenerating uevents: " << device;
3673 return false;
3674 }
3675 return true;
3676 }
3677
3678 // Otherwise, the only kind of device we need to wait for is a dm-user
3679 // misc device. Normal calls to DeviceMapper::CreateDevice() guarantee
3680 // the path has been created.
3681 if (!android::base::StartsWith(device, "/dev/dm-user/")) {
3682 return true;
3683 }
3684
3685 if (timeout_ms.count() == 0) {
3686 LOG(ERROR) << "No timeout was specified to wait for device: " << device;
3687 return false;
3688 }
3689 if (!android::fs_mgr::WaitForFile(device, timeout_ms)) {
3690 LOG(ERROR) << "Timed out waiting for device to appear: " << device;
3691 return false;
3692 }
3693 return true;
3694 }
3695
IsSnapuserdRequired()3696 bool SnapshotManager::IsSnapuserdRequired() {
3697 auto lock = LockExclusive();
3698 if (!lock) return false;
3699
3700 auto status = ReadSnapshotUpdateStatus(lock.get());
3701 return status.state() != UpdateState::None && status.compression_enabled();
3702 }
3703
DetachSnapuserdForSelinux(std::vector<std::string> * snapuserd_argv)3704 bool SnapshotManager::DetachSnapuserdForSelinux(std::vector<std::string>* snapuserd_argv) {
3705 return PerformInitTransition(InitTransition::SELINUX_DETACH, snapuserd_argv);
3706 }
3707
PerformSecondStageInitTransition()3708 bool SnapshotManager::PerformSecondStageInitTransition() {
3709 return PerformInitTransition(InitTransition::SECOND_STAGE);
3710 }
3711
ReadOldPartitionMetadata(LockedFile * lock)3712 const LpMetadata* SnapshotManager::ReadOldPartitionMetadata(LockedFile* lock) {
3713 CHECK(lock);
3714
3715 if (!old_partition_metadata_) {
3716 auto path = GetOldPartitionMetadataPath();
3717 old_partition_metadata_ = android::fs_mgr::ReadFromImageFile(path);
3718 if (!old_partition_metadata_) {
3719 LOG(ERROR) << "Could not read old partition metadata from " << path;
3720 return nullptr;
3721 }
3722 }
3723 return old_partition_metadata_.get();
3724 }
3725
DecideMergePhase(const SnapshotStatus & status)3726 MergePhase SnapshotManager::DecideMergePhase(const SnapshotStatus& status) {
3727 if (status.compression_enabled() && status.device_size() < status.old_partition_size()) {
3728 return MergePhase::FIRST_PHASE;
3729 }
3730 return MergePhase::SECOND_PHASE;
3731 }
3732
UpdateCowStats(ISnapshotMergeStats * stats)3733 void SnapshotManager::UpdateCowStats(ISnapshotMergeStats* stats) {
3734 auto lock = LockExclusive();
3735 if (!lock) return;
3736
3737 std::vector<std::string> snapshots;
3738 if (!ListSnapshots(lock.get(), &snapshots, GetSnapshotSlotSuffix())) {
3739 LOG(ERROR) << "Could not list snapshots";
3740 return;
3741 }
3742
3743 uint64_t cow_file_size = 0;
3744 uint64_t total_cow_size = 0;
3745 uint64_t estimated_cow_size = 0;
3746 for (const auto& snapshot : snapshots) {
3747 SnapshotStatus status;
3748 if (!ReadSnapshotStatus(lock.get(), snapshot, &status)) {
3749 return;
3750 }
3751
3752 cow_file_size += status.cow_file_size();
3753 total_cow_size += status.cow_file_size() + status.cow_partition_size();
3754 estimated_cow_size += status.estimated_cow_size();
3755 }
3756
3757 stats->set_cow_file_size(cow_file_size);
3758 stats->set_total_cow_size_bytes(total_cow_size);
3759 stats->set_estimated_cow_size_bytes(estimated_cow_size);
3760 }
3761
DeleteDeviceIfExists(const std::string & name,const std::chrono::milliseconds & timeout_ms)3762 bool SnapshotManager::DeleteDeviceIfExists(const std::string& name,
3763 const std::chrono::milliseconds& timeout_ms) {
3764 auto& dm = DeviceMapper::Instance();
3765 auto start = std::chrono::steady_clock::now();
3766 while (true) {
3767 if (dm.DeleteDeviceIfExists(name)) {
3768 return true;
3769 }
3770 auto now = std::chrono::steady_clock::now();
3771 auto elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(now - start);
3772 if (elapsed >= timeout_ms) {
3773 break;
3774 }
3775 std::this_thread::sleep_for(400ms);
3776 }
3777
3778 // Try to diagnose why this failed. First get the actual device path.
3779 std::string full_path;
3780 if (!dm.GetDmDevicePathByName(name, &full_path)) {
3781 LOG(ERROR) << "Unable to diagnose DM_DEV_REMOVE failure.";
3782 return false;
3783 }
3784
3785 // Check for child dm-devices.
3786 std::string block_name = android::base::Basename(full_path);
3787 std::string sysfs_holders = "/sys/class/block/" + block_name + "/holders";
3788
3789 std::error_code ec;
3790 std::filesystem::directory_iterator dir_iter(sysfs_holders, ec);
3791 if (auto begin = std::filesystem::begin(dir_iter); begin != std::filesystem::end(dir_iter)) {
3792 LOG(ERROR) << "Child device-mapper device still mapped: " << begin->path();
3793 return false;
3794 }
3795
3796 // Check for mounted partitions.
3797 android::fs_mgr::Fstab fstab;
3798 android::fs_mgr::ReadFstabFromFile("/proc/mounts", &fstab);
3799 for (const auto& entry : fstab) {
3800 if (android::base::Basename(entry.blk_device) == block_name) {
3801 LOG(ERROR) << "Partition still mounted: " << entry.mount_point;
3802 return false;
3803 }
3804 }
3805
3806 // Check for detached mounted partitions.
3807 for (const auto& fs : std::filesystem::directory_iterator("/sys/fs", ec)) {
3808 std::string fs_type = android::base::Basename(fs.path().c_str());
3809 if (!(fs_type == "ext4" || fs_type == "f2fs")) {
3810 continue;
3811 }
3812
3813 std::string path = fs.path().c_str() + "/"s + block_name;
3814 if (access(path.c_str(), F_OK) == 0) {
3815 LOG(ERROR) << "Block device was lazily unmounted and is still in-use: " << full_path
3816 << "; possibly open file descriptor or attached loop device.";
3817 return false;
3818 }
3819 }
3820
3821 LOG(ERROR) << "Device-mapper device " << name << "(" << full_path << ")"
3822 << " still in use."
3823 << " Probably a file descriptor was leaked or held open, or a loop device is"
3824 << " attached.";
3825 return false;
3826 }
3827
ReadMergeFailureCode()3828 MergeFailureCode SnapshotManager::ReadMergeFailureCode() {
3829 auto lock = LockExclusive();
3830 if (!lock) return MergeFailureCode::AcquireLock;
3831
3832 SnapshotUpdateStatus status = ReadSnapshotUpdateStatus(lock.get());
3833 if (status.state() != UpdateState::MergeFailed) {
3834 return MergeFailureCode::Ok;
3835 }
3836 return status.merge_failure_code();
3837 }
3838
ReadSourceBuildFingerprint()3839 std::string SnapshotManager::ReadSourceBuildFingerprint() {
3840 auto lock = LockExclusive();
3841 if (!lock) return {};
3842
3843 SnapshotUpdateStatus status = ReadSnapshotUpdateStatus(lock.get());
3844 return status.source_build_fingerprint();
3845 }
3846
3847 } // namespace snapshot
3848 } // namespace android
3849