1 //
2 // Copyright (C) 2020 The Android Open Source Project
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //      http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include <stdio.h>
17 #include <sys/stat.h>
18 #include <sys/types.h>
19 #include <unistd.h>
20 
21 #include <iostream>
22 #include <memory>
23 #include <string>
24 #include <unordered_map>
25 #include <unordered_set>
26 
27 #include <android-base/file.h>
28 #include <android-base/logging.h>
29 #include <android-base/strings.h>
30 #include <android-base/unique_fd.h>
31 #include <gflags/gflags.h>
32 #include <libsnapshot/cow_writer.h>
33 #include <openssl/sha.h>
34 #include <sparse/sparse.h>
35 #include <ziparchive/zip_archive.h>
36 
37 DEFINE_string(source_tf, "", "Source target files (dir or zip file)");
38 DEFINE_string(ota_tf, "", "Target files of the build for an OTA");
39 DEFINE_string(compression, "gz", "Compression (options: none, gz, brotli)");
40 
41 namespace android {
42 namespace snapshot {
43 
44 using android::base::borrowed_fd;
45 using android::base::unique_fd;
46 
47 static constexpr size_t kBlockSize = 4096;
48 
MyLogger(android::base::LogId,android::base::LogSeverity severity,const char *,const char *,unsigned int,const char * message)49 void MyLogger(android::base::LogId, android::base::LogSeverity severity, const char*, const char*,
50               unsigned int, const char* message) {
51     if (severity == android::base::ERROR) {
52         fprintf(stderr, "%s\n", message);
53     } else {
54         fprintf(stdout, "%s\n", message);
55     }
56 }
57 
58 class TargetFilesPackage final {
59   public:
60     explicit TargetFilesPackage(const std::string& path);
61 
62     bool Open();
63     bool HasFile(const std::string& path);
64     std::unordered_set<std::string> GetDynamicPartitionNames();
65     unique_fd OpenFile(const std::string& path);
66     unique_fd OpenImage(const std::string& path);
67 
68   private:
69     std::string path_;
70     unique_fd fd_;
71     std::unique_ptr<ZipArchive, decltype(&CloseArchive)> zip_;
72 };
73 
TargetFilesPackage(const std::string & path)74 TargetFilesPackage::TargetFilesPackage(const std::string& path)
75     : path_(path), zip_(nullptr, &CloseArchive) {}
76 
Open()77 bool TargetFilesPackage::Open() {
78     fd_.reset(open(path_.c_str(), O_RDONLY));
79     if (fd_ < 0) {
80         PLOG(ERROR) << "open failed: " << path_;
81         return false;
82     }
83 
84     struct stat s;
85     if (fstat(fd_.get(), &s) < 0) {
86         PLOG(ERROR) << "fstat failed: " << path_;
87         return false;
88     }
89     if (S_ISDIR(s.st_mode)) {
90         return true;
91     }
92 
93     // Otherwise, assume it's a zip file.
94     ZipArchiveHandle handle;
95     if (OpenArchiveFd(fd_.get(), path_.c_str(), &handle, false)) {
96         LOG(ERROR) << "Could not open " << path_ << " as a zip archive.";
97         return false;
98     }
99     zip_.reset(handle);
100     return true;
101 }
102 
HasFile(const std::string & path)103 bool TargetFilesPackage::HasFile(const std::string& path) {
104     if (zip_) {
105         ZipEntry64 entry;
106         return !FindEntry(zip_.get(), path, &entry);
107     }
108 
109     auto full_path = path_ + "/" + path;
110     return access(full_path.c_str(), F_OK) == 0;
111 }
112 
OpenFile(const std::string & path)113 unique_fd TargetFilesPackage::OpenFile(const std::string& path) {
114     if (!zip_) {
115         auto full_path = path_ + "/" + path;
116         unique_fd fd(open(full_path.c_str(), O_RDONLY));
117         if (fd < 0) {
118             PLOG(ERROR) << "open failed: " << full_path;
119             return {};
120         }
121         return fd;
122     }
123 
124     ZipEntry64 entry;
125     if (FindEntry(zip_.get(), path, &entry)) {
126         LOG(ERROR) << path << " not found in archive: " << path_;
127         return {};
128     }
129 
130     TemporaryFile temp;
131     if (temp.fd < 0) {
132         PLOG(ERROR) << "mkstemp failed";
133         return {};
134     }
135 
136     LOG(INFO) << "Extracting " << path << " from " << path_ << " ...";
137     if (ExtractEntryToFile(zip_.get(), &entry, temp.fd)) {
138         LOG(ERROR) << "could not extract " << path << " from " << path_;
139         return {};
140     }
141     if (lseek(temp.fd, 0, SEEK_SET) < 0) {
142         PLOG(ERROR) << "lseek failed";
143         return {};
144     }
145     return unique_fd{temp.release()};
146 }
147 
OpenImage(const std::string & path)148 unique_fd TargetFilesPackage::OpenImage(const std::string& path) {
149     auto fd = OpenFile(path);
150     if (fd < 0) {
151         return {};
152     }
153 
154     LOG(INFO) << "Unsparsing " << path << " ...";
155     std::unique_ptr<struct sparse_file, decltype(&sparse_file_destroy)> s(
156             sparse_file_import(fd.get(), false, false), &sparse_file_destroy);
157     if (!s) {
158         return fd;
159     }
160 
161     TemporaryFile temp;
162     if (temp.fd < 0) {
163         PLOG(ERROR) << "mkstemp failed";
164         return {};
165     }
166     if (sparse_file_write(s.get(), temp.fd, false, false, false) < 0) {
167         LOG(ERROR) << "sparse_file_write failed";
168         return {};
169     }
170     if (lseek(temp.fd, 0, SEEK_SET) < 0) {
171         PLOG(ERROR) << "lseek failed";
172         return {};
173     }
174 
175     fd.reset(temp.release());
176     return fd;
177 }
178 
GetDynamicPartitionNames()179 std::unordered_set<std::string> TargetFilesPackage::GetDynamicPartitionNames() {
180     auto fd = OpenFile("META/misc_info.txt");
181     if (fd < 0) {
182         return {};
183     }
184 
185     std::string contents;
186     if (!android::base::ReadFdToString(fd, &contents)) {
187         PLOG(ERROR) << "read failed";
188         return {};
189     }
190 
191     std::unordered_set<std::string> set;
192 
193     auto lines = android::base::Split(contents, "\n");
194     for (const auto& line : lines) {
195         auto parts = android::base::Split(line, "=");
196         if (parts.size() == 2 && parts[0] == "dynamic_partition_list") {
197             auto partitions = android::base::Split(parts[1], " ");
198             for (const auto& name : partitions) {
199                 if (!name.empty()) {
200                     set.emplace(name);
201                 }
202             }
203             break;
204         }
205     }
206     return set;
207 }
208 
209 class NonAbEstimator final {
210   public:
NonAbEstimator(const std::string & ota_tf_path,const std::string & source_tf_path)211     NonAbEstimator(const std::string& ota_tf_path, const std::string& source_tf_path)
212         : ota_tf_path_(ota_tf_path), source_tf_path_(source_tf_path) {}
213 
214     bool Run();
215 
216   private:
217     bool OpenPackages();
218     bool AnalyzePartition(const std::string& partition_name);
219     std::unordered_map<std::string, uint64_t> GetBlockMap(borrowed_fd fd);
220 
221     std::string ota_tf_path_;
222     std::string source_tf_path_;
223     std::unique_ptr<TargetFilesPackage> ota_tf_;
224     std::unique_ptr<TargetFilesPackage> source_tf_;
225     uint64_t size_ = 0;
226 };
227 
Run()228 bool NonAbEstimator::Run() {
229     if (!OpenPackages()) {
230         return false;
231     }
232 
233     auto partitions = ota_tf_->GetDynamicPartitionNames();
234     if (partitions.empty()) {
235         LOG(ERROR) << "No dynamic partitions found in META/misc_info.txt";
236         return false;
237     }
238     for (const auto& partition : partitions) {
239         if (!AnalyzePartition(partition)) {
240             return false;
241         }
242     }
243 
244     int64_t size_in_mb = int64_t(double(size_) / 1024.0 / 1024.0);
245 
246     std::cout << "Estimated COW size: " << size_ << " (" << size_in_mb << "MiB)\n";
247     return true;
248 }
249 
OpenPackages()250 bool NonAbEstimator::OpenPackages() {
251     ota_tf_ = std::make_unique<TargetFilesPackage>(ota_tf_path_);
252     if (!ota_tf_->Open()) {
253         return false;
254     }
255     if (!source_tf_path_.empty()) {
256         source_tf_ = std::make_unique<TargetFilesPackage>(source_tf_path_);
257         if (!source_tf_->Open()) {
258             return false;
259         }
260     }
261     return true;
262 }
263 
SHA256(const std::string & input)264 static std::string SHA256(const std::string& input) {
265     std::string hash(32, '\0');
266     SHA256_CTX c;
267     SHA256_Init(&c);
268     SHA256_Update(&c, input.data(), input.size());
269     SHA256_Final(reinterpret_cast<unsigned char*>(hash.data()), &c);
270     return hash;
271 }
272 
AnalyzePartition(const std::string & partition_name)273 bool NonAbEstimator::AnalyzePartition(const std::string& partition_name) {
274     auto path = "IMAGES/" + partition_name + ".img";
275     auto fd = ota_tf_->OpenImage(path);
276     if (fd < 0) {
277         return false;
278     }
279 
280     unique_fd source_fd;
281     uint64_t source_size = 0;
282     std::unordered_map<std::string, uint64_t> source_blocks;
283     if (source_tf_) {
284         auto dap = source_tf_->GetDynamicPartitionNames();
285 
286         source_fd = source_tf_->OpenImage(path);
287         if (source_fd >= 0) {
288             struct stat s;
289             if (fstat(source_fd.get(), &s)) {
290                 PLOG(ERROR) << "fstat failed";
291                 return false;
292             }
293             source_size = s.st_size;
294 
295             std::cout << "Hashing blocks for " << partition_name << "...\n";
296             source_blocks = GetBlockMap(source_fd);
297             if (source_blocks.empty()) {
298                 LOG(ERROR) << "Could not build a block map for source partition: "
299                            << partition_name;
300                 return false;
301             }
302         } else {
303             if (dap.count(partition_name)) {
304                 return false;
305             }
306             LOG(ERROR) << "Warning: " << partition_name
307                        << " has no incremental diff since it's not in the source image.";
308         }
309     }
310 
311     TemporaryFile cow;
312     if (cow.fd < 0) {
313         PLOG(ERROR) << "mkstemp failed";
314         return false;
315     }
316 
317     CowOptions options;
318     options.block_size = kBlockSize;
319     options.compression = FLAGS_compression;
320 
321     auto writer = std::make_unique<CowWriter>(options);
322     if (!writer->Initialize(borrowed_fd{cow.fd})) {
323         LOG(ERROR) << "Could not initialize COW writer";
324         return false;
325     }
326 
327     LOG(INFO) << "Analyzing " << partition_name << " ...";
328 
329     std::string zeroes(kBlockSize, '\0');
330     std::string chunk(kBlockSize, '\0');
331     std::string src_chunk(kBlockSize, '\0');
332     uint64_t next_block_number = 0;
333     while (true) {
334         if (!android::base::ReadFully(fd, chunk.data(), chunk.size())) {
335             if (errno) {
336                 PLOG(ERROR) << "read failed";
337                 return false;
338             }
339             break;
340         }
341 
342         uint64_t block_number = next_block_number++;
343         if (chunk == zeroes) {
344             if (!writer->AddZeroBlocks(block_number, 1)) {
345                 LOG(ERROR) << "Could not add zero block";
346                 return false;
347             }
348             continue;
349         }
350 
351         uint64_t source_offset = block_number * kBlockSize;
352         if (source_fd >= 0 && source_offset <= source_size) {
353             off64_t offset = block_number * kBlockSize;
354             if (android::base::ReadFullyAtOffset(source_fd, src_chunk.data(), src_chunk.size(),
355                                                  offset)) {
356                 if (chunk == src_chunk) {
357                     continue;
358                 }
359             } else if (errno) {
360                 PLOG(ERROR) << "pread failed";
361                 return false;
362             }
363         }
364 
365         auto hash = SHA256(chunk);
366         if (auto iter = source_blocks.find(hash); iter != source_blocks.end()) {
367             if (!writer->AddCopy(block_number, iter->second)) {
368                 return false;
369             }
370             continue;
371         }
372 
373         if (!writer->AddRawBlocks(block_number, chunk.data(), chunk.size())) {
374             return false;
375         }
376     }
377 
378     if (!writer->Finalize()) {
379         return false;
380     }
381 
382     struct stat s;
383     if (fstat(cow.fd, &s) < 0) {
384         PLOG(ERROR) << "fstat failed";
385         return false;
386     }
387 
388     size_ += s.st_size;
389     return true;
390 }
391 
GetBlockMap(borrowed_fd fd)392 std::unordered_map<std::string, uint64_t> NonAbEstimator::GetBlockMap(borrowed_fd fd) {
393     std::string chunk(kBlockSize, '\0');
394 
395     std::unordered_map<std::string, uint64_t> block_map;
396     uint64_t block_number = 0;
397     while (true) {
398         if (!android::base::ReadFully(fd, chunk.data(), chunk.size())) {
399             if (errno) {
400                 PLOG(ERROR) << "read failed";
401                 return {};
402             }
403             break;
404         }
405         auto hash = SHA256(chunk);
406         block_map[hash] = block_number;
407         block_number++;
408     }
409     return block_map;
410 }
411 
412 }  // namespace snapshot
413 }  // namespace android
414 
415 using namespace android::snapshot;
416 
main(int argc,char ** argv)417 int main(int argc, char** argv) {
418     android::base::InitLogging(argv, android::snapshot::MyLogger);
419     gflags::SetUsageMessage("Estimate VAB disk usage from Non A/B builds");
420     gflags::ParseCommandLineFlags(&argc, &argv, false);
421 
422     if (FLAGS_ota_tf.empty()) {
423         std::cerr << "Must specify -ota_tf on the command-line." << std::endl;
424         return 1;
425     }
426 
427     NonAbEstimator estimator(FLAGS_ota_tf, FLAGS_source_tf);
428     if (!estimator.Run()) {
429         return 1;
430     }
431     return 0;
432 }
433