1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <inttypes.h>
18 #include <libgen.h>
19 #include <signal.h>
20 #include <sys/mman.h>
21 #include <sys/prctl.h>
22 #include <sys/utsname.h>
23 #include <time.h>
24 #include <unistd.h>
25 #include <filesystem>
26 #include <optional>
27 #include <set>
28 #include <string>
29 #include <unordered_map>
30 #include <unordered_set>
31 #include <vector>
32 
33 #include <android-base/file.h>
34 #include <android-base/logging.h>
35 #include <android-base/parseint.h>
36 #include <android-base/scopeguard.h>
37 #include <android-base/stringprintf.h>
38 #include <android-base/strings.h>
39 #include <android-base/unique_fd.h>
40 
41 #pragma clang diagnostic push
42 #pragma clang diagnostic ignored "-Wunused-parameter"
43 #include <llvm/Support/MemoryBuffer.h>
44 #pragma clang diagnostic pop
45 
46 #if defined(__ANDROID__)
47 #include <android-base/properties.h>
48 #endif
49 #include <unwindstack/Error.h>
50 
51 #include "CallChainJoiner.h"
52 #include "ETMRecorder.h"
53 #include "IOEventLoop.h"
54 #include "JITDebugReader.h"
55 #include "MapRecordReader.h"
56 #include "OfflineUnwinder.h"
57 #include "ProbeEvents.h"
58 #include "RecordFilter.h"
59 #include "cmd_record_impl.h"
60 #include "command.h"
61 #include "environment.h"
62 #include "event_selection_set.h"
63 #include "event_type.h"
64 #include "kallsyms.h"
65 #include "read_apk.h"
66 #include "read_elf.h"
67 #include "read_symbol_map.h"
68 #include "record.h"
69 #include "record_file.h"
70 #include "thread_tree.h"
71 #include "tracing.h"
72 #include "utils.h"
73 #include "workload.h"
74 
75 namespace simpleperf {
76 namespace {
77 
78 using android::base::ParseUint;
79 using android::base::Realpath;
80 
81 static std::string default_measured_event_type = "cpu-cycles";
82 
83 static std::unordered_map<std::string, uint64_t> branch_sampling_type_map = {
84     {"u", PERF_SAMPLE_BRANCH_USER},
85     {"k", PERF_SAMPLE_BRANCH_KERNEL},
86     {"any", PERF_SAMPLE_BRANCH_ANY},
87     {"any_call", PERF_SAMPLE_BRANCH_ANY_CALL},
88     {"any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN},
89     {"ind_call", PERF_SAMPLE_BRANCH_IND_CALL},
90 };
91 
92 static std::unordered_map<std::string, int> clockid_map = {
93     {"realtime", CLOCK_REALTIME},
94     {"monotonic", CLOCK_MONOTONIC},
95     {"monotonic_raw", CLOCK_MONOTONIC_RAW},
96     {"boottime", CLOCK_BOOTTIME},
97 };
98 
99 // The max size of records dumped by kernel is 65535, and dump stack size
100 // should be a multiply of 8, so MAX_DUMP_STACK_SIZE is 65528.
101 constexpr uint32_t MAX_DUMP_STACK_SIZE = 65528;
102 
103 // The max allowed pages in mapped buffer is decided by rlimit(RLIMIT_MEMLOCK).
104 // Here 1024 is a desired value for pages in mapped buffer. If mapped
105 // successfully, the buffer size = 1024 * 4K (page size) = 4M.
106 constexpr size_t DESIRED_PAGES_IN_MAPPED_BUFFER = 1024;
107 
108 // Cache size used by CallChainJoiner to cache call chains in memory.
109 constexpr size_t DEFAULT_CALL_CHAIN_JOINER_CACHE_SIZE = 8 * 1024 * 1024;
110 
111 // Currently, the record buffer size in user-space is set to match the kernel buffer size on a
112 // 8 core system. For system-wide recording, it is 8K pages * 4K page_size * 8 cores = 256MB.
113 // For non system-wide recording, it is 1K pages * 4K page_size * 8 cores = 64MB.
114 static constexpr size_t kRecordBufferSize = 64 * 1024 * 1024;
115 static constexpr size_t kSystemWideRecordBufferSize = 256 * 1024 * 1024;
116 
117 static constexpr size_t kDefaultAuxBufferSize = 4 * 1024 * 1024;
118 
119 // On Pixel 3, it takes about 1ms to enable ETM, and 16-40ms to disable ETM and copy 4M ETM data.
120 // So make default period to 100ms.
121 static constexpr double kDefaultEtmDataFlushPeriodInSec = 0.1;
122 
123 struct TimeStat {
124   uint64_t prepare_recording_time = 0;
125   uint64_t start_recording_time = 0;
126   uint64_t stop_recording_time = 0;
127   uint64_t finish_recording_time = 0;
128   uint64_t post_process_time = 0;
129 };
130 
131 class RecordCommand : public Command {
132  public:
RecordCommand()133   RecordCommand()
134       : Command(
135             "record", "record sampling info in perf.data",
136             // clang-format off
137 "Usage: simpleperf record [options] [--] [command [command-args]]\n"
138 "       Gather sampling information of running [command]. And -a/-p/-t option\n"
139 "       can be used to change target of sampling information.\n"
140 "       The default options are: -e cpu-cycles -f 4000 -o perf.data.\n"
141 "Select monitored threads:\n"
142 "-a     System-wide collection. Use with --exclude-perf to exclude samples for\n"
143 "       simpleperf process.\n"
144 #if defined(__ANDROID__)
145 "--app package_name    Profile the process of an Android application.\n"
146 "                      On non-rooted devices, the app must be debuggable,\n"
147 "                      because we use run-as to switch to the app's context.\n"
148 #endif
149 "-p pid1,pid2,...       Record events on existing processes. Mutually exclusive\n"
150 "                       with -a.\n"
151 "-t tid1,tid2,... Record events on existing threads. Mutually exclusive with -a.\n"
152 "\n"
153 "Select monitored event types:\n"
154 "-e event1[:modifier1],event2[:modifier2],...\n"
155 "             Select a list of events to record. An event can be:\n"
156 "               1) an event name listed in `simpleperf list`;\n"
157 "               2) a raw PMU event in rN format. N is a hex number.\n"
158 "                  For example, r1b selects event number 0x1b.\n"
159 "               3) a kprobe event added by --kprobe option.\n"
160 "             Modifiers can be added to define how the event should be\n"
161 "             monitored. Possible modifiers are:\n"
162 "                u - monitor user space events only\n"
163 "                k - monitor kernel space events only\n"
164 "--group event1[:modifier],event2[:modifier2],...\n"
165 "             Similar to -e option. But events specified in the same --group\n"
166 "             option are monitored as a group, and scheduled in and out at the\n"
167 "             same time.\n"
168 "--trace-offcpu   Generate samples when threads are scheduled off cpu.\n"
169 "                 Similar to \"-c 1 -e sched:sched_switch\".\n"
170 "--kprobe kprobe_event1,kprobe_event2,...\n"
171 "             Add kprobe events during recording. The kprobe_event format is in\n"
172 "             Documentation/trace/kprobetrace.rst in the kernel. Examples:\n"
173 "               'p:myprobe do_sys_open $arg2:string'   - add event kprobes:myprobe\n"
174 "               'r:myretprobe do_sys_open $retval:s64' - add event kprobes:myretprobe\n"
175 "\n"
176 "Select monitoring options:\n"
177 "-f freq      Set event sample frequency. It means recording at most [freq]\n"
178 "             samples every second. For non-tracepoint events, the default\n"
179 "             option is -f 4000. A -f/-c option affects all event types\n"
180 "             following it until meeting another -f/-c option. For example,\n"
181 "             for \"-f 1000 cpu-cycles -c 1 -e sched:sched_switch\", cpu-cycles\n"
182 "             has sample freq 1000, sched:sched_switch event has sample period 1.\n"
183 "-c count     Set event sample period. It means recording one sample when\n"
184 "             [count] events happen. For tracepoint events, the default option\n"
185 "             is -c 1.\n"
186 "--call-graph fp | dwarf[,<dump_stack_size>]\n"
187 "             Enable call graph recording. Use frame pointer or dwarf debug\n"
188 "             frame as the method to parse call graph in stack.\n"
189 "             Default is dwarf,65528.\n"
190 "-g           Same as '--call-graph dwarf'.\n"
191 "--clockid clock_id      Generate timestamps of samples using selected clock.\n"
192 "                        Possible values are: realtime, monotonic,\n"
193 "                        monotonic_raw, boottime, perf. If supported, default\n"
194 "                        is monotonic, otherwise is perf.\n"
195 "--cpu cpu_item1,cpu_item2,...\n"
196 "             Collect samples only on the selected cpus. cpu_item can be cpu\n"
197 "             number like 1, or cpu range like 0-3.\n"
198 "--duration time_in_sec  Monitor for time_in_sec seconds instead of running\n"
199 "                        [command]. Here time_in_sec may be any positive\n"
200 "                        floating point number.\n"
201 "-j branch_filter1,branch_filter2,...\n"
202 "             Enable taken branch stack sampling. Each sample captures a series\n"
203 "             of consecutive taken branches.\n"
204 "             The following filters are defined:\n"
205 "                any: any type of branch\n"
206 "                any_call: any function call or system call\n"
207 "                any_ret: any function return or system call return\n"
208 "                ind_call: any indirect branch\n"
209 "                u: only when the branch target is at the user level\n"
210 "                k: only when the branch target is in the kernel\n"
211 "             This option requires at least one branch type among any, any_call,\n"
212 "             any_ret, ind_call.\n"
213 "-b           Enable taken branch stack sampling. Same as '-j any'.\n"
214 "-m mmap_pages   Set the size of the buffer used to receiving sample data from\n"
215 "                the kernel. It should be a power of 2. If not set, the max\n"
216 "                possible value <= 1024 will be used.\n"
217 "--aux-buffer-size <buffer_size>  Set aux buffer size, only used in cs-etm event type.\n"
218 "                                 Need to be power of 2 and page size aligned.\n"
219 "                                 Used memory size is (buffer_size * (cpu_count + 1).\n"
220 "                                 Default is 4M.\n"
221 "--no-inherit  Don't record created child threads/processes.\n"
222 "--cpu-percent <percent>  Set the max percent of cpu time used for recording.\n"
223 "                         percent is in range [1-100], default is 25.\n"
224 "--addr-filter filter_str1,filter_str2,...\n"
225 "                Provide address filters for cs-etm instruction tracing.\n"
226 "                filter_str accepts below formats:\n"
227 "                  'filter  <addr-range>'  -- trace instructions in a range\n"
228 "                  'start <addr>'          -- start tracing when ip is <addr>\n"
229 "                  'stop <addr>'           -- stop tracing when ip is <addr>\n"
230 "                <addr-range> accepts below formats:\n"
231 "                  <file_path>                            -- code sections in a binary file\n"
232 "                  <vaddr_start>-<vaddr_end>@<file_path>  -- part of a binary file\n"
233 "                  <kernel_addr_start>-<kernel_addr_end>  -- part of kernel space\n"
234 "                <addr> accepts below formats:\n"
235 "                  <vaddr>@<file_path>      -- virtual addr in a binary file\n"
236 "                  <kernel_addr>            -- a kernel address\n"
237 "                Examples:\n"
238 "                  'filter 0x456-0x480@/system/lib/libc.so'\n"
239 "                  'start 0x456@/system/lib/libc.so,stop 0x480@/system/lib/libc.so'\n"
240 "\n"
241 "--tp-filter filter_string    Set filter_string for the previous tracepoint event.\n"
242 "                             Format is in Documentation/trace/events.rst in the kernel.\n"
243 "                             An example: 'prev_comm != \"simpleperf\" && (prev_pid > 1)'.\n"
244 "\n"
245 "Dwarf unwinding options:\n"
246 "--post-unwind=(yes|no) If `--call-graph dwarf` option is used, then the user's\n"
247 "                       stack will be recorded in perf.data and unwound while\n"
248 "                       recording by default. Use --post-unwind=yes to switch\n"
249 "                       to unwind after recording.\n"
250 "--no-unwind   If `--call-graph dwarf` option is used, then the user's stack\n"
251 "              will be unwound by default. Use this option to disable the\n"
252 "              unwinding of the user's stack.\n"
253 "--no-callchain-joiner  If `--call-graph dwarf` option is used, then by default\n"
254 "                       callchain joiner is used to break the 64k stack limit\n"
255 "                       and build more complete call graphs. However, the built\n"
256 "                       call graphs may not be correct in all cases.\n"
257 "--callchain-joiner-min-matching-nodes count\n"
258 "               When callchain joiner is used, set the matched nodes needed to join\n"
259 "               callchains. The count should be >= 1. By default it is 1.\n"
260 "--no-cut-samples   Simpleperf uses a record buffer to cache records received from the kernel.\n"
261 "                   When the available space in the buffer reaches low level, it cuts part of\n"
262 "                   the stack data in samples. When the available space reaches critical level,\n"
263 "                   it drops all samples. This option makes simpleperf not cut samples when the\n"
264 "                   available space reaches low level.\n"
265 "--keep-failed-unwinding-result        Keep reasons for failed unwinding cases\n"
266 "--keep-failed-unwinding-debug-info    Keep debug info for failed unwinding cases\n"
267 "\n"
268 "Sample filter options:\n"
269 "--exclude-perf                Exclude samples for simpleperf process.\n"
270 RECORD_FILTER_OPTION_HELP_MSG
271 "\n"
272 "Recording file options:\n"
273 "--no-dump-kernel-symbols  Don't dump kernel symbols in perf.data. By default\n"
274 "                          kernel symbols will be dumped when needed.\n"
275 "--no-dump-symbols       Don't dump symbols in perf.data. By default symbols are\n"
276 "                        dumped in perf.data, to support reporting in another\n"
277 "                        environment.\n"
278 "-o record_file_name    Set record file name, default is perf.data.\n"
279 "--size-limit SIZE[K|M|G]      Stop recording after SIZE bytes of records.\n"
280 "                              Default is unlimited.\n"
281 "--symfs <dir>    Look for files with symbols relative to this directory.\n"
282 "                 This option is used to provide files with symbol table and\n"
283 "                 debug information, which are used for unwinding and dumping symbols.\n"
284 "--add-meta-info key=value     Add extra meta info, which will be stored in the recording file.\n"
285 "\n"
286 "Other options:\n"
287 "--exit-with-parent            Stop recording when the process starting\n"
288 "                              simpleperf dies.\n"
289 "--start_profiling_fd fd_no    After starting profiling, write \"STARTED\" to\n"
290 "                              <fd_no>, then close <fd_no>.\n"
291 "--stdio-controls-profiling    Use stdin/stdout to pause/resume profiling.\n"
292 #if defined(__ANDROID__)
293 "--in-app                      We are already running in the app's context.\n"
294 "--tracepoint-events file_name   Read tracepoint events from [file_name] instead of tracefs.\n"
295 #endif
296 #if 0
297 // Below options are only used internally and shouldn't be visible to the public.
298 "--out-fd <fd>    Write perf.data to a file descriptor.\n"
299 "--stop-signal-fd <fd>  Stop recording when fd is readable.\n"
300 #endif
301             // clang-format on
302             ),
303         system_wide_collection_(false),
304         branch_sampling_(0),
305         fp_callchain_sampling_(false),
306         dwarf_callchain_sampling_(false),
307         dump_stack_size_in_dwarf_sampling_(MAX_DUMP_STACK_SIZE),
308         unwind_dwarf_callchain_(true),
309         post_unwind_(false),
310         child_inherit_(true),
311         duration_in_sec_(0),
312         can_dump_kernel_symbols_(true),
313         dump_symbols_(true),
314         event_selection_set_(false),
315         mmap_page_range_(std::make_pair(1, DESIRED_PAGES_IN_MAPPED_BUFFER)),
316         record_filename_("perf.data"),
317         sample_record_count_(0),
318         lost_record_count_(0),
319         in_app_context_(false),
320         trace_offcpu_(false),
321         exclude_kernel_callchain_(false),
322         allow_callchain_joiner_(true),
323         callchain_joiner_min_matching_nodes_(1u),
324         last_record_timestamp_(0u),
325         record_filter_(thread_tree_) {
326     // If we run `adb shell simpleperf record xxx` and stop profiling by ctrl-c, adb closes
327     // sockets connecting simpleperf. After that, simpleperf will receive SIGPIPE when writing
328     // to stdout/stderr, which is a problem when we use '--app' option. So ignore SIGPIPE to
329     // finish properly.
330     signal(SIGPIPE, SIG_IGN);
331   }
332 
333   bool Run(const std::vector<std::string>& args);
334 
335  private:
336   bool ParseOptions(const std::vector<std::string>& args, std::vector<std::string>* non_option_args,
337                     ProbeEvents* probe_events);
338   bool AdjustPerfEventLimit();
339   bool PrepareRecording(Workload* workload);
340   bool DoRecording(Workload* workload);
341   bool PostProcessRecording(const std::vector<std::string>& args);
342   // pre recording functions
343   bool TraceOffCpu();
344   bool SetEventSelectionFlags();
345   bool CreateAndInitRecordFile();
346   std::unique_ptr<RecordFileWriter> CreateRecordFile(const std::string& filename);
347   bool DumpKernelSymbol();
348   bool DumpTracingData();
349   bool DumpMaps();
350   bool DumpAuxTraceInfo();
351 
352   // recording functions
353   bool ProcessRecord(Record* record);
354   bool ShouldOmitRecord(Record* record);
355   bool DumpMapsForRecord(Record* record);
356   bool SaveRecordForPostUnwinding(Record* record);
357   bool SaveRecordAfterUnwinding(Record* record);
358   bool SaveRecordWithoutUnwinding(Record* record);
359   bool ProcessJITDebugInfo(const std::vector<JITDebugInfo>& debug_info, bool sync_kernel_records);
360   bool ProcessControlCmd(IOEventLoop* loop);
361   void UpdateRecord(Record* record);
362   bool UnwindRecord(SampleRecord& r);
363   bool KeepFailedUnwindingResult(const SampleRecord& r, const std::vector<uint64_t>& ips,
364                                  const std::vector<uint64_t>& sps);
365 
366   // post recording functions
367   std::unique_ptr<RecordFileReader> MoveRecordFile(const std::string& old_filename);
368   bool MergeMapRecords();
369   bool PostUnwindRecords();
370   bool JoinCallChains();
371   bool DumpAdditionalFeatures(const std::vector<std::string>& args);
372   bool DumpBuildIdFeature();
373   bool DumpFileFeature();
374   bool DumpMetaInfoFeature(bool kernel_symbols_available);
375   bool DumpDebugUnwindFeature(const std::unordered_set<Dso*>& dso_set);
376   void CollectHitFileInfo(const SampleRecord& r, std::unordered_set<Dso*>* dso_set);
377 
378   std::unique_ptr<SampleSpeed> sample_speed_;
379   bool system_wide_collection_;
380   uint64_t branch_sampling_;
381   bool fp_callchain_sampling_;
382   bool dwarf_callchain_sampling_;
383   uint32_t dump_stack_size_in_dwarf_sampling_;
384   bool unwind_dwarf_callchain_;
385   bool post_unwind_;
386   bool keep_failed_unwinding_result_ = false;
387   bool keep_failed_unwinding_debug_info_ = false;
388   std::unique_ptr<OfflineUnwinder> offline_unwinder_;
389   bool child_inherit_;
390   double duration_in_sec_;
391   bool can_dump_kernel_symbols_;
392   bool dump_symbols_;
393   std::string clockid_;
394   std::vector<int> cpus_;
395   EventSelectionSet event_selection_set_;
396 
397   std::pair<size_t, size_t> mmap_page_range_;
398   size_t aux_buffer_size_ = kDefaultAuxBufferSize;
399 
400   ThreadTree thread_tree_;
401   std::string record_filename_;
402   android::base::unique_fd out_fd_;
403   std::unique_ptr<RecordFileWriter> record_file_writer_;
404   android::base::unique_fd stop_signal_fd_;
405 
406   uint64_t sample_record_count_;
407   uint64_t lost_record_count_;
408   android::base::unique_fd start_profiling_fd_;
409   bool stdio_controls_profiling_ = false;
410 
411   std::string app_package_name_;
412   bool in_app_context_;
413   bool trace_offcpu_;
414   bool exclude_kernel_callchain_;
415   uint64_t size_limit_in_bytes_ = 0;
416   uint64_t max_sample_freq_ = DEFAULT_SAMPLE_FREQ_FOR_NONTRACEPOINT_EVENT;
417   size_t cpu_time_max_percent_ = 25;
418 
419   // For CallChainJoiner
420   bool allow_callchain_joiner_;
421   size_t callchain_joiner_min_matching_nodes_;
422   std::unique_ptr<CallChainJoiner> callchain_joiner_;
423   bool allow_cutting_samples_ = true;
424 
425   std::unique_ptr<JITDebugReader> jit_debug_reader_;
426   uint64_t last_record_timestamp_;  // used to insert Mmap2Records for JIT debug info
427   TimeStat time_stat_;
428   EventAttrWithId dumping_attr_id_;
429   // In system wide recording, record if we have dumped map info for a process.
430   std::unordered_set<pid_t> dumped_processes_;
431   bool exclude_perf_ = false;
432   RecordFilter record_filter_;
433 
434   std::optional<MapRecordReader> map_record_reader_;
435   std::optional<MapRecordThread> map_record_thread_;
436 
437   std::unordered_map<std::string, std::string> extra_meta_info_;
438 };
439 
Run(const std::vector<std::string> & args)440 bool RecordCommand::Run(const std::vector<std::string>& args) {
441   time_stat_.prepare_recording_time = GetSystemClock();
442   ScopedCurrentArch scoped_arch(GetMachineArch());
443 
444   if (!CheckPerfEventLimit()) {
445     return false;
446   }
447   AllowMoreOpenedFiles();
448 
449   std::vector<std::string> workload_args;
450   ProbeEvents probe_events;
451   auto clear_probe_events_guard = android::base::make_scope_guard([this, &probe_events] {
452     if (!probe_events.IsEmpty()) {
453       // probe events can be deleted only when no perf event file is using them.
454       event_selection_set_.CloseEventFiles();
455       probe_events.Clear();
456     }
457   });
458   if (!ParseOptions(args, &workload_args, &probe_events)) {
459     return false;
460   }
461   if (!AdjustPerfEventLimit()) {
462     return false;
463   }
464   std::unique_ptr<ScopedTempFiles> scoped_temp_files =
465       ScopedTempFiles::Create(android::base::Dirname(record_filename_));
466   if (!scoped_temp_files) {
467     PLOG(ERROR) << "Can't create output file in directory "
468                 << android::base::Dirname(record_filename_);
469     return false;
470   }
471   if (!app_package_name_.empty() && !in_app_context_) {
472     // Some users want to profile non debuggable apps on rooted devices. If we use run-as,
473     // it will be impossible when using --app. So don't switch to app's context when we are
474     // root.
475     if (!IsRoot()) {
476       return RunInAppContext(app_package_name_, "record", args, workload_args.size(),
477                              record_filename_, true);
478     }
479   }
480   std::unique_ptr<Workload> workload;
481   if (!workload_args.empty()) {
482     workload = Workload::CreateWorkload(workload_args);
483     if (workload == nullptr) {
484       return false;
485     }
486   }
487   if (!PrepareRecording(workload.get())) {
488     return false;
489   }
490   time_stat_.start_recording_time = GetSystemClock();
491   if (!DoRecording(workload.get())) {
492     return false;
493   }
494   return PostProcessRecording(args);
495 }
496 
PrepareRecording(Workload * workload)497 bool RecordCommand::PrepareRecording(Workload* workload) {
498   // 1. Prepare in other modules.
499   PrepareVdsoFile();
500 
501   // 2. Add default event type.
502   if (event_selection_set_.empty()) {
503     size_t group_id;
504     if (!event_selection_set_.AddEventType(default_measured_event_type, &group_id)) {
505       return false;
506     }
507     if (sample_speed_) {
508       event_selection_set_.SetSampleSpeed(group_id, *sample_speed_);
509     }
510   }
511 
512   // 3. Process options before opening perf event files.
513   exclude_kernel_callchain_ = event_selection_set_.ExcludeKernel();
514   if (trace_offcpu_ && !TraceOffCpu()) {
515     return false;
516   }
517   if (!SetEventSelectionFlags()) {
518     return false;
519   }
520   if (unwind_dwarf_callchain_) {
521     bool collect_stat = keep_failed_unwinding_result_;
522     offline_unwinder_ = OfflineUnwinder::Create(collect_stat);
523   }
524   if (unwind_dwarf_callchain_ && allow_callchain_joiner_) {
525     callchain_joiner_.reset(new CallChainJoiner(DEFAULT_CALL_CHAIN_JOINER_CACHE_SIZE,
526                                                 callchain_joiner_min_matching_nodes_, false));
527   }
528 
529   // 4. Add monitored targets.
530   bool need_to_check_targets = false;
531   if (system_wide_collection_) {
532     event_selection_set_.AddMonitoredThreads({-1});
533   } else if (!event_selection_set_.HasMonitoredTarget()) {
534     if (workload != nullptr) {
535       event_selection_set_.AddMonitoredProcesses({workload->GetPid()});
536       event_selection_set_.SetEnableOnExec(true);
537     } else if (!app_package_name_.empty()) {
538       // If app process is not created, wait for it. This allows simpleperf starts before
539       // app process. In this way, we can have a better support of app start-up time profiling.
540       std::set<pid_t> pids = WaitForAppProcesses(app_package_name_);
541       event_selection_set_.AddMonitoredProcesses(pids);
542       need_to_check_targets = true;
543     } else {
544       LOG(ERROR) << "No threads to monitor. Try `simpleperf help record` for help";
545       return false;
546     }
547   } else {
548     need_to_check_targets = true;
549   }
550   // Profiling JITed/interpreted Java code is supported starting from Android P.
551   // Also support profiling art interpreter on host.
552   if (GetAndroidVersion() >= kAndroidVersionP || GetAndroidVersion() == 0) {
553     // JIT symfiles are stored in temporary files, and are deleted after recording. But if
554     // `-g --no-unwind` option is used, we want to keep symfiles to support unwinding in
555     // the debug-unwind cmd.
556     auto symfile_option = (dwarf_callchain_sampling_ && !unwind_dwarf_callchain_)
557                               ? JITDebugReader::SymFileOption::kKeepSymFiles
558                               : JITDebugReader::SymFileOption::kDropSymFiles;
559     auto sync_option = (clockid_ == "monotonic") ? JITDebugReader::SyncOption::kSyncWithRecords
560                                                  : JITDebugReader::SyncOption::kNoSync;
561     jit_debug_reader_.reset(new JITDebugReader(record_filename_, symfile_option, sync_option));
562     // To profile java code, need to dump maps containing vdex files, which are not executable.
563     event_selection_set_.SetRecordNotExecutableMaps(true);
564   }
565 
566   // 5. Open perf event files and create mapped buffers.
567   if (!event_selection_set_.OpenEventFiles(cpus_)) {
568     return false;
569   }
570   size_t record_buffer_size =
571       system_wide_collection_ ? kSystemWideRecordBufferSize : kRecordBufferSize;
572   if (!event_selection_set_.MmapEventFiles(mmap_page_range_.first, mmap_page_range_.second,
573                                            aux_buffer_size_, record_buffer_size,
574                                            allow_cutting_samples_, exclude_perf_)) {
575     return false;
576   }
577   auto callback = std::bind(&RecordCommand::ProcessRecord, this, std::placeholders::_1);
578   if (!event_selection_set_.PrepareToReadMmapEventData(callback)) {
579     return false;
580   }
581 
582   // 6. Create perf.data.
583   if (!CreateAndInitRecordFile()) {
584     return false;
585   }
586 
587   // 7. Add read/signal/periodic Events.
588   if (need_to_check_targets && !event_selection_set_.StopWhenNoMoreTargets()) {
589     return false;
590   }
591   IOEventLoop* loop = event_selection_set_.GetIOEventLoop();
592   auto exit_loop_callback = [loop]() { return loop->ExitLoop(); };
593   if (!loop->AddSignalEvents({SIGCHLD, SIGINT, SIGTERM}, exit_loop_callback)) {
594     return false;
595   }
596 
597   // Only add an event for SIGHUP if we didn't inherit SIG_IGN (e.g. from nohup).
598   if (!SignalIsIgnored(SIGHUP)) {
599     if (!loop->AddSignalEvent(SIGHUP, exit_loop_callback)) {
600       return false;
601     }
602   }
603   if (stop_signal_fd_ != -1) {
604     if (!loop->AddReadEvent(stop_signal_fd_, exit_loop_callback)) {
605       return false;
606     }
607   }
608 
609   if (duration_in_sec_ != 0) {
610     if (!loop->AddPeriodicEvent(SecondToTimeval(duration_in_sec_),
611                                 [loop]() { return loop->ExitLoop(); })) {
612       return false;
613     }
614   }
615   if (stdio_controls_profiling_) {
616     if (!loop->AddReadEvent(0, [this, loop]() { return ProcessControlCmd(loop); })) {
617       return false;
618     }
619   }
620   if (jit_debug_reader_) {
621     auto callback = [this](const std::vector<JITDebugInfo>& debug_info, bool sync_kernel_records) {
622       return ProcessJITDebugInfo(debug_info, sync_kernel_records);
623     };
624     if (!jit_debug_reader_->RegisterDebugInfoCallback(loop, callback)) {
625       return false;
626     }
627     if (!system_wide_collection_) {
628       std::set<pid_t> pids = event_selection_set_.GetMonitoredProcesses();
629       for (pid_t tid : event_selection_set_.GetMonitoredThreads()) {
630         pid_t pid;
631         if (GetProcessForThread(tid, &pid)) {
632           pids.insert(pid);
633         }
634       }
635       for (pid_t pid : pids) {
636         if (!jit_debug_reader_->MonitorProcess(pid)) {
637           return false;
638         }
639       }
640       if (!jit_debug_reader_->ReadAllProcesses()) {
641         return false;
642       }
643     }
644   }
645   if (event_selection_set_.HasAuxTrace()) {
646     // ETM data is dumped to kernel buffer only when there is no thread traced by ETM. It happens
647     // either when all monitored threads are scheduled off cpu, or when all etm perf events are
648     // disabled.
649     // If ETM data isn't dumped to kernel buffer in time, overflow parts will be dropped. This
650     // makes less than expected data, especially in system wide recording. So add a periodic event
651     // to flush etm data by temporarily disable all perf events.
652     auto etm_flush = [this]() {
653       return event_selection_set_.SetEnableEvents(false) &&
654              event_selection_set_.SetEnableEvents(true);
655     };
656     if (!loop->AddPeriodicEvent(SecondToTimeval(kDefaultEtmDataFlushPeriodInSec), etm_flush)) {
657       return false;
658     }
659   }
660   return true;
661 }
662 
DoRecording(Workload * workload)663 bool RecordCommand::DoRecording(Workload* workload) {
664   // Write records in mapped buffers of perf_event_files to output file while workload is running.
665   if (workload != nullptr && !workload->IsStarted() && !workload->Start()) {
666     return false;
667   }
668   if (start_profiling_fd_.get() != -1) {
669     if (!android::base::WriteStringToFd("STARTED", start_profiling_fd_)) {
670       PLOG(ERROR) << "failed to write to start_profiling_fd_";
671     }
672     start_profiling_fd_.reset();
673   }
674   if (stdio_controls_profiling_) {
675     printf("started\n");
676     fflush(stdout);
677   }
678   if (!event_selection_set_.GetIOEventLoop()->RunLoop()) {
679     return false;
680   }
681   time_stat_.stop_recording_time = GetSystemClock();
682   if (!event_selection_set_.FinishReadMmapEventData()) {
683     return false;
684   }
685   time_stat_.finish_recording_time = GetSystemClock();
686   return true;
687 }
688 
WriteRecordDataToOutFd(const std::string & in_filename,android::base::unique_fd out_fd)689 static bool WriteRecordDataToOutFd(const std::string& in_filename,
690                                    android::base::unique_fd out_fd) {
691   android::base::unique_fd in_fd(FileHelper::OpenReadOnly(in_filename));
692   if (in_fd == -1) {
693     PLOG(ERROR) << "Failed to open " << in_filename;
694     return false;
695   }
696   char buf[8192];
697   while (true) {
698     ssize_t n = TEMP_FAILURE_RETRY(read(in_fd, buf, sizeof(buf)));
699     if (n < 0) {
700       PLOG(ERROR) << "Failed to read " << in_filename;
701       return false;
702     }
703     if (n == 0) {
704       break;
705     }
706     if (!android::base::WriteFully(out_fd, buf, n)) {
707       PLOG(ERROR) << "Failed to write to out_fd";
708       return false;
709     }
710   }
711   unlink(in_filename.c_str());
712   return true;
713 }
714 
PostProcessRecording(const std::vector<std::string> & args)715 bool RecordCommand::PostProcessRecording(const std::vector<std::string>& args) {
716   // 1. Merge map records dumped while recording by map record thread.
717   if (map_record_thread_) {
718     if (!map_record_thread_->Join() || !MergeMapRecords()) {
719       return false;
720     }
721   }
722 
723   // 2. Post unwind dwarf callchain.
724   if (unwind_dwarf_callchain_ && post_unwind_) {
725     if (!PostUnwindRecords()) {
726       return false;
727     }
728   }
729 
730   // 3. Optionally join Callchains.
731   if (callchain_joiner_) {
732     JoinCallChains();
733   }
734 
735   // 4. Dump additional features, and close record file.
736   if (!DumpAdditionalFeatures(args)) {
737     return false;
738   }
739   if (!record_file_writer_->Close()) {
740     return false;
741   }
742   if (out_fd_ != -1 && !WriteRecordDataToOutFd(record_filename_, std::move(out_fd_))) {
743     return false;
744   }
745   time_stat_.post_process_time = GetSystemClock();
746 
747   // 4. Show brief record result.
748   auto record_stat = event_selection_set_.GetRecordStat();
749   if (event_selection_set_.HasAuxTrace()) {
750     LOG(INFO) << "Aux data traced: " << record_stat.aux_data_size;
751     if (record_stat.lost_aux_data_size != 0) {
752       LOG(INFO) << "Aux data lost in user space: " << record_stat.lost_aux_data_size;
753     }
754   } else {
755     std::string cut_samples;
756     if (record_stat.cut_stack_samples > 0) {
757       cut_samples = android::base::StringPrintf(" (cut %zu)", record_stat.cut_stack_samples);
758     }
759     lost_record_count_ += record_stat.lost_samples + record_stat.lost_non_samples;
760     LOG(INFO) << "Samples recorded: " << sample_record_count_ << cut_samples
761               << ". Samples lost: " << lost_record_count_ << ".";
762     LOG(DEBUG) << "In user space, dropped " << record_stat.lost_samples << " samples, "
763                << record_stat.lost_non_samples << " non samples, cut stack of "
764                << record_stat.cut_stack_samples << " samples.";
765     if (sample_record_count_ + lost_record_count_ != 0) {
766       double lost_percent =
767           static_cast<double>(lost_record_count_) / (lost_record_count_ + sample_record_count_);
768       constexpr double LOST_PERCENT_WARNING_BAR = 0.1;
769       if (lost_percent >= LOST_PERCENT_WARNING_BAR) {
770         LOG(WARNING) << "Lost " << (lost_percent * 100) << "% of samples, "
771                      << "consider increasing mmap_pages(-m), "
772                      << "or decreasing sample frequency(-f), "
773                      << "or increasing sample period(-c).";
774       }
775     }
776     if (callchain_joiner_) {
777       callchain_joiner_->DumpStat();
778     }
779   }
780   LOG(DEBUG) << "Prepare recording time "
781              << (time_stat_.start_recording_time - time_stat_.prepare_recording_time) / 1e6
782              << " ms, recording time "
783              << (time_stat_.stop_recording_time - time_stat_.start_recording_time) / 1e6
784              << " ms, stop recording time "
785              << (time_stat_.finish_recording_time - time_stat_.stop_recording_time) / 1e6
786              << " ms, post process time "
787              << (time_stat_.post_process_time - time_stat_.finish_recording_time) / 1e6 << " ms.";
788   return true;
789 }
790 
ParseOptions(const std::vector<std::string> & args,std::vector<std::string> * non_option_args,ProbeEvents * probe_events)791 bool RecordCommand::ParseOptions(const std::vector<std::string>& args,
792                                  std::vector<std::string>* non_option_args,
793                                  ProbeEvents* probe_events) {
794   OptionValueMap options;
795   std::vector<std::pair<OptionName, OptionValue>> ordered_options;
796 
797   if (!PreprocessOptions(args, GetRecordCmdOptionFormats(), &options, &ordered_options,
798                          non_option_args)) {
799     return false;
800   }
801 
802   // Process options.
803   system_wide_collection_ = options.PullBoolValue("-a");
804 
805   for (const OptionValue& value : options.PullValues("--add-meta-info")) {
806     const std::string& s = *value.str_value;
807     auto split_pos = s.find('=');
808     if (split_pos == std::string::npos || split_pos == 0 || split_pos + 1 == s.size()) {
809       LOG(ERROR) << "invalid meta-info: " << s;
810       return false;
811     }
812     extra_meta_info_[s.substr(0, split_pos)] = s.substr(split_pos + 1);
813   }
814 
815   if (auto value = options.PullValue("--addr-filter"); value) {
816     auto filters = ParseAddrFilterOption(*value->str_value);
817     if (filters.empty()) {
818       return false;
819     }
820     event_selection_set_.SetAddrFilters(std::move(filters));
821   }
822 
823   if (auto value = options.PullValue("--app"); value) {
824     app_package_name_ = *value->str_value;
825   }
826 
827   if (auto value = options.PullValue("--aux-buffer-size"); value) {
828     uint64_t v = value->uint_value;
829     if (v > std::numeric_limits<size_t>::max() || !IsPowerOfTwo(v) || v % sysconf(_SC_PAGE_SIZE)) {
830       LOG(ERROR) << "invalid aux buffer size: " << v;
831       return false;
832     }
833     aux_buffer_size_ = static_cast<size_t>(v);
834   }
835 
836   if (options.PullValue("-b")) {
837     branch_sampling_ = branch_sampling_type_map["any"];
838   }
839 
840   if (!options.PullUintValue("--callchain-joiner-min-matching-nodes",
841                              &callchain_joiner_min_matching_nodes_, 1)) {
842     return false;
843   }
844 
845   if (auto value = options.PullValue("--clockid"); value) {
846     clockid_ = *value->str_value;
847     if (clockid_ != "perf") {
848       if (!IsSettingClockIdSupported()) {
849         LOG(ERROR) << "Setting clockid is not supported by the kernel.";
850         return false;
851       }
852       if (clockid_map.find(clockid_) == clockid_map.end()) {
853         LOG(ERROR) << "Invalid clockid: " << clockid_;
854         return false;
855       }
856     }
857   }
858 
859   if (auto value = options.PullValue("--cpu"); value) {
860     if (auto cpus = GetCpusFromString(*value->str_value); cpus) {
861       cpus_.assign(cpus->begin(), cpus->end());
862     } else {
863       return false;
864     }
865   }
866 
867   if (!options.PullUintValue("--cpu-percent", &cpu_time_max_percent_, 1, 100)) {
868     return false;
869   }
870 
871   if (!options.PullDoubleValue("--duration", &duration_in_sec_, 1e-9)) {
872     return false;
873   }
874 
875   exclude_perf_ = options.PullBoolValue("--exclude-perf");
876   if (!record_filter_.ParseOptions(options)) {
877     return false;
878   }
879 
880   if (options.PullValue("--exit-with-parent")) {
881     prctl(PR_SET_PDEATHSIG, SIGHUP, 0, 0, 0);
882   }
883 
884   in_app_context_ = options.PullBoolValue("--in-app");
885 
886   for (const OptionValue& value : options.PullValues("-j")) {
887     std::vector<std::string> branch_sampling_types = android::base::Split(*value.str_value, ",");
888     for (auto& type : branch_sampling_types) {
889       auto it = branch_sampling_type_map.find(type);
890       if (it == branch_sampling_type_map.end()) {
891         LOG(ERROR) << "unrecognized branch sampling filter: " << type;
892         return false;
893       }
894       branch_sampling_ |= it->second;
895     }
896   }
897   keep_failed_unwinding_result_ = options.PullBoolValue("--keep-failed-unwinding-result");
898   keep_failed_unwinding_debug_info_ = options.PullBoolValue("--keep-failed-unwinding-debug-info");
899   if (keep_failed_unwinding_debug_info_) {
900     keep_failed_unwinding_result_ = true;
901   }
902 
903   for (const OptionValue& value : options.PullValues("--kprobe")) {
904     std::vector<std::string> cmds = android::base::Split(*value.str_value, ",");
905     for (const auto& cmd : cmds) {
906       if (!probe_events->AddKprobe(cmd)) {
907         return false;
908       }
909     }
910   }
911 
912   if (auto value = options.PullValue("-m"); value) {
913     if (!IsPowerOfTwo(value->uint_value) ||
914         value->uint_value > std::numeric_limits<size_t>::max()) {
915       LOG(ERROR) << "Invalid mmap_pages: '" << value->uint_value << "'";
916       return false;
917     }
918     mmap_page_range_.first = mmap_page_range_.second = value->uint_value;
919   }
920 
921   allow_callchain_joiner_ = !options.PullBoolValue("--no-callchain-joiner");
922   allow_cutting_samples_ = !options.PullBoolValue("--no-cut-samples");
923   can_dump_kernel_symbols_ = !options.PullBoolValue("--no-dump-kernel-symbols");
924   dump_symbols_ = !options.PullBoolValue("--no-dump-symbols");
925   child_inherit_ = !options.PullBoolValue("--no-inherit");
926   unwind_dwarf_callchain_ = !options.PullBoolValue("--no-unwind");
927 
928   if (auto value = options.PullValue("-o"); value) {
929     record_filename_ = *value->str_value;
930   }
931 
932   if (auto value = options.PullValue("--out-fd"); value) {
933     out_fd_.reset(static_cast<int>(value->uint_value));
934   }
935 
936   for (const OptionValue& value : options.PullValues("-p")) {
937     if (auto pids = GetTidsFromString(*value.str_value, true); pids) {
938       event_selection_set_.AddMonitoredProcesses(pids.value());
939     } else {
940       return false;
941     }
942   }
943 
944   // Use explicit if statements instead of logical operators to avoid short-circuit.
945   if (options.PullValue("--post-unwind")) {
946     post_unwind_ = true;
947   }
948   if (options.PullValue("--post-unwind=yes")) {
949     post_unwind_ = true;
950   }
951   if (options.PullValue("--post-unwind=no")) {
952     post_unwind_ = false;
953   }
954 
955   if (!options.PullUintValue("--size-limit", &size_limit_in_bytes_, 1)) {
956     return false;
957   }
958 
959   if (auto value = options.PullValue("--start_profiling_fd"); value) {
960     start_profiling_fd_.reset(static_cast<int>(value->uint_value));
961   }
962 
963   stdio_controls_profiling_ = options.PullBoolValue("--stdio-controls-profiling");
964 
965   if (auto value = options.PullValue("--stop-signal-fd"); value) {
966     stop_signal_fd_.reset(static_cast<int>(value->uint_value));
967   }
968 
969   if (auto value = options.PullValue("--symfs"); value) {
970     if (!Dso::SetSymFsDir(*value->str_value)) {
971       return false;
972     }
973   }
974 
975   for (const OptionValue& value : options.PullValues("-t")) {
976     if (auto tids = GetTidsFromString(*value.str_value, true); tids) {
977       event_selection_set_.AddMonitoredThreads(tids.value());
978     } else {
979       return false;
980     }
981   }
982 
983   trace_offcpu_ = options.PullBoolValue("--trace-offcpu");
984 
985   if (auto value = options.PullValue("--tracepoint-events"); value) {
986     if (!EventTypeManager::Instance().ReadTracepointsFromFile(*value->str_value)) {
987       return false;
988     }
989   }
990 
991   CHECK(options.values.empty());
992 
993   // Process ordered options.
994   std::vector<size_t> wait_setting_speed_event_groups;
995 
996   for (const auto& pair : ordered_options) {
997     const OptionName& name = pair.first;
998     const OptionValue& value = pair.second;
999 
1000     if (name == "-c" || name == "-f") {
1001       if (value.uint_value < 1) {
1002         LOG(ERROR) << "invalid " << name << ": " << value.uint_value;
1003         return false;
1004       }
1005       if (name == "-c") {
1006         sample_speed_.reset(new SampleSpeed(0, value.uint_value));
1007       } else {
1008         if (value.uint_value >= INT_MAX) {
1009           LOG(ERROR) << "sample freq can't be bigger than INT_MAX: " << value.uint_value;
1010           return false;
1011         }
1012         sample_speed_.reset(new SampleSpeed(value.uint_value, 0));
1013       }
1014 
1015       for (auto groud_id : wait_setting_speed_event_groups) {
1016         event_selection_set_.SetSampleSpeed(groud_id, *sample_speed_);
1017       }
1018       wait_setting_speed_event_groups.clear();
1019 
1020     } else if (name == "--call-graph") {
1021       std::vector<std::string> strs = android::base::Split(*value.str_value, ",");
1022       if (strs[0] == "fp") {
1023         fp_callchain_sampling_ = true;
1024         dwarf_callchain_sampling_ = false;
1025       } else if (strs[0] == "dwarf") {
1026         fp_callchain_sampling_ = false;
1027         dwarf_callchain_sampling_ = true;
1028         if (strs.size() > 1) {
1029           uint64_t size;
1030           if (!ParseUint(strs[1], &size)) {
1031             LOG(ERROR) << "invalid dump stack size in --call-graph option: " << strs[1];
1032             return false;
1033           }
1034           if ((size & 7) != 0) {
1035             LOG(ERROR) << "dump stack size " << size << " is not 8-byte aligned.";
1036             return false;
1037           }
1038           if (size >= MAX_DUMP_STACK_SIZE) {
1039             LOG(ERROR) << "dump stack size " << size << " is bigger than max allowed size "
1040                        << MAX_DUMP_STACK_SIZE << ".";
1041             return false;
1042           }
1043           dump_stack_size_in_dwarf_sampling_ = static_cast<uint32_t>(size);
1044         }
1045       }
1046 
1047     } else if (name == "-e") {
1048       std::vector<std::string> event_types = android::base::Split(*value.str_value, ",");
1049       for (auto& event_type : event_types) {
1050         if (probe_events->IsProbeEvent(event_type)) {
1051           if (!probe_events->CreateProbeEventIfNotExist(event_type)) {
1052             return false;
1053           }
1054         }
1055         size_t group_id;
1056         if (!event_selection_set_.AddEventType(event_type, &group_id)) {
1057           return false;
1058         }
1059         if (sample_speed_) {
1060           event_selection_set_.SetSampleSpeed(group_id, *sample_speed_);
1061         } else {
1062           wait_setting_speed_event_groups.push_back(group_id);
1063         }
1064       }
1065 
1066     } else if (name == "-g") {
1067       fp_callchain_sampling_ = false;
1068       dwarf_callchain_sampling_ = true;
1069     } else if (name == "--group") {
1070       std::vector<std::string> event_types = android::base::Split(*value.str_value, ",");
1071       for (const auto& event_type : event_types) {
1072         if (probe_events->IsProbeEvent(event_type)) {
1073           if (!probe_events->CreateProbeEventIfNotExist(event_type)) {
1074             return false;
1075           }
1076         }
1077       }
1078       size_t group_id;
1079       if (!event_selection_set_.AddEventGroup(event_types, &group_id)) {
1080         return false;
1081       }
1082       if (sample_speed_) {
1083         event_selection_set_.SetSampleSpeed(group_id, *sample_speed_);
1084       } else {
1085         wait_setting_speed_event_groups.push_back(group_id);
1086       }
1087 
1088     } else if (name == "--tp-filter") {
1089       if (!event_selection_set_.SetTracepointFilter(*value.str_value)) {
1090         return false;
1091       }
1092     } else {
1093       CHECK(false) << "unprocessed option: " << name;
1094     }
1095   }
1096 
1097   if (!dwarf_callchain_sampling_) {
1098     if (!unwind_dwarf_callchain_) {
1099       LOG(ERROR) << "--no-unwind is only used with `--call-graph dwarf` option.";
1100       return false;
1101     }
1102     unwind_dwarf_callchain_ = false;
1103   }
1104   if (post_unwind_) {
1105     if (!dwarf_callchain_sampling_ || !unwind_dwarf_callchain_) {
1106       post_unwind_ = false;
1107     }
1108   }
1109 
1110   if (fp_callchain_sampling_) {
1111     if (GetBuildArch() == ARCH_ARM) {
1112       LOG(WARNING) << "`--callgraph fp` option doesn't work well on arm architecture, "
1113                    << "consider using `-g` option or profiling on aarch64 architecture.";
1114     }
1115   }
1116 
1117   if (system_wide_collection_ && event_selection_set_.HasMonitoredTarget()) {
1118     LOG(ERROR) << "Record system wide and existing processes/threads can't be "
1119                   "used at the same time.";
1120     return false;
1121   }
1122 
1123   if (system_wide_collection_ && !IsRoot()) {
1124     LOG(ERROR) << "System wide profiling needs root privilege.";
1125     return false;
1126   }
1127 
1128   if (dump_symbols_ && can_dump_kernel_symbols_) {
1129     // No need to dump kernel symbols as we will dump all required symbols.
1130     can_dump_kernel_symbols_ = false;
1131   }
1132   if (clockid_.empty()) {
1133     clockid_ = IsSettingClockIdSupported() ? "monotonic" : "perf";
1134   }
1135 
1136   return true;
1137 }
1138 
AdjustPerfEventLimit()1139 bool RecordCommand::AdjustPerfEventLimit() {
1140   bool set_prop = false;
1141   // 1. Adjust max_sample_rate.
1142   uint64_t cur_max_freq;
1143   if (GetMaxSampleFrequency(&cur_max_freq) && cur_max_freq < max_sample_freq_ &&
1144       !SetMaxSampleFrequency(max_sample_freq_)) {
1145     set_prop = true;
1146   }
1147   // 2. Adjust perf_cpu_time_max_percent.
1148   size_t cur_percent;
1149   if (GetCpuTimeMaxPercent(&cur_percent) && cur_percent != cpu_time_max_percent_ &&
1150       !SetCpuTimeMaxPercent(cpu_time_max_percent_)) {
1151     set_prop = true;
1152   }
1153   // 3. Adjust perf_event_mlock_kb.
1154   long cpus = sysconf(_SC_NPROCESSORS_CONF);
1155   uint64_t mlock_kb = cpus * (mmap_page_range_.second + 1) * 4;
1156   if (event_selection_set_.HasAuxTrace()) {
1157     mlock_kb += cpus * aux_buffer_size_ / 1024;
1158   }
1159   uint64_t cur_mlock_kb;
1160   if (GetPerfEventMlockKb(&cur_mlock_kb) && cur_mlock_kb < mlock_kb &&
1161       !SetPerfEventMlockKb(mlock_kb)) {
1162     set_prop = true;
1163   }
1164 
1165   if (GetAndroidVersion() >= kAndroidVersionQ && set_prop && !in_app_context_) {
1166     return SetPerfEventLimits(std::max(max_sample_freq_, cur_max_freq), cpu_time_max_percent_,
1167                               std::max(mlock_kb, cur_mlock_kb));
1168   }
1169   return true;
1170 }
1171 
TraceOffCpu()1172 bool RecordCommand::TraceOffCpu() {
1173   if (FindEventTypeByName("sched:sched_switch") == nullptr) {
1174     LOG(ERROR) << "Can't trace off cpu because sched:sched_switch event is not available";
1175     return false;
1176   }
1177   for (auto& event_type : event_selection_set_.GetTracepointEvents()) {
1178     if (event_type->name == "sched:sched_switch") {
1179       LOG(ERROR) << "Trace offcpu can't be used together with sched:sched_switch event";
1180       return false;
1181     }
1182   }
1183   if (!IsDumpingRegsForTracepointEventsSupported()) {
1184     LOG(ERROR) << "Dumping regs for tracepoint events is not supported by the kernel";
1185     return false;
1186   }
1187   // --trace-offcpu option only works with one of the selected event types.
1188   std::set<std::string> accepted_events = {"cpu-cycles", "cpu-clock", "task-clock"};
1189   std::vector<const EventType*> events = event_selection_set_.GetEvents();
1190   if (events.size() != 1 || accepted_events.find(events[0]->name) == accepted_events.end()) {
1191     LOG(ERROR) << "--trace-offcpu option only works with one of events "
1192                << android::base::Join(accepted_events, ' ');
1193     return false;
1194   }
1195   return event_selection_set_.AddEventType("sched:sched_switch");
1196 }
1197 
SetEventSelectionFlags()1198 bool RecordCommand::SetEventSelectionFlags() {
1199   event_selection_set_.SampleIdAll();
1200   if (!event_selection_set_.SetBranchSampling(branch_sampling_)) {
1201     return false;
1202   }
1203   if (fp_callchain_sampling_) {
1204     event_selection_set_.EnableFpCallChainSampling();
1205   } else if (dwarf_callchain_sampling_) {
1206     if (!event_selection_set_.EnableDwarfCallChainSampling(dump_stack_size_in_dwarf_sampling_)) {
1207       return false;
1208     }
1209   }
1210   event_selection_set_.SetInherit(child_inherit_);
1211   if (clockid_ != "perf") {
1212     event_selection_set_.SetClockId(clockid_map[clockid_]);
1213   }
1214   return true;
1215 }
1216 
CreateAndInitRecordFile()1217 bool RecordCommand::CreateAndInitRecordFile() {
1218   record_file_writer_ = CreateRecordFile(record_filename_);
1219   if (record_file_writer_ == nullptr) {
1220     return false;
1221   }
1222   // Use first perf_event_attr and first event id to dump mmap and comm records.
1223   EventAttrWithId dumping_attr_id = event_selection_set_.GetEventAttrWithId()[0];
1224   map_record_reader_.emplace(*dumping_attr_id.attr, dumping_attr_id.ids[0],
1225                              event_selection_set_.RecordNotExecutableMaps());
1226   map_record_reader_->SetCallback([this](Record* r) { return ProcessRecord(r); });
1227 
1228   return DumpKernelSymbol() && DumpTracingData() && DumpMaps() && DumpAuxTraceInfo();
1229 }
1230 
CreateRecordFile(const std::string & filename)1231 std::unique_ptr<RecordFileWriter> RecordCommand::CreateRecordFile(const std::string& filename) {
1232   std::unique_ptr<RecordFileWriter> writer = RecordFileWriter::CreateInstance(filename);
1233   if (writer == nullptr) {
1234     return nullptr;
1235   }
1236 
1237   if (!writer->WriteAttrSection(event_selection_set_.GetEventAttrWithId())) {
1238     return nullptr;
1239   }
1240   return writer;
1241 }
1242 
DumpKernelSymbol()1243 bool RecordCommand::DumpKernelSymbol() {
1244   if (can_dump_kernel_symbols_) {
1245     if (event_selection_set_.NeedKernelSymbol()) {
1246       std::string kallsyms;
1247       if (!LoadKernelSymbols(&kallsyms)) {
1248         // Symbol loading may have failed due to the lack of permissions. This
1249         // is not fatal, the symbols will appear as "unknown".
1250         return true;
1251       }
1252       KernelSymbolRecord r(kallsyms);
1253       if (!ProcessRecord(&r)) {
1254         return false;
1255       }
1256     }
1257   }
1258   return true;
1259 }
1260 
DumpTracingData()1261 bool RecordCommand::DumpTracingData() {
1262   std::vector<const EventType*> tracepoint_event_types = event_selection_set_.GetTracepointEvents();
1263   if (tracepoint_event_types.empty() || !CanRecordRawData() || in_app_context_) {
1264     return true;  // No need to dump tracing data, or can't do it.
1265   }
1266   std::vector<char> tracing_data;
1267   if (!GetTracingData(tracepoint_event_types, &tracing_data)) {
1268     return false;
1269   }
1270   TracingDataRecord record(tracing_data);
1271   if (!ProcessRecord(&record)) {
1272     return false;
1273   }
1274   return true;
1275 }
1276 
DumpMaps()1277 bool RecordCommand::DumpMaps() {
1278   if (system_wide_collection_) {
1279     // For system wide recording:
1280     //   If not aux tracing, only dump kernel maps. Maps of a process is dumped when needed (the
1281     //   first time a sample hits that process).
1282     //   If aux tracing, we don't know which maps will be needed, so dump all process maps. To
1283     //   reduce pre recording time, we dump process maps in map record thread while recording.
1284     if (event_selection_set_.HasAuxTrace()) {
1285       map_record_thread_.emplace(*map_record_reader_);
1286       return true;
1287     }
1288     if (!event_selection_set_.ExcludeKernel()) {
1289       return map_record_reader_->ReadKernelMaps();
1290     }
1291     return true;
1292   }
1293   if (!event_selection_set_.ExcludeKernel() && !map_record_reader_->ReadKernelMaps()) {
1294     return false;
1295   }
1296   // Map from process id to a set of thread ids in that process.
1297   std::unordered_map<pid_t, std::unordered_set<pid_t>> process_map;
1298   for (pid_t pid : event_selection_set_.GetMonitoredProcesses()) {
1299     std::vector<pid_t> tids = GetThreadsInProcess(pid);
1300     process_map[pid].insert(tids.begin(), tids.end());
1301   }
1302   for (pid_t tid : event_selection_set_.GetMonitoredThreads()) {
1303     pid_t pid;
1304     if (GetProcessForThread(tid, &pid)) {
1305       process_map[pid].insert(tid);
1306     }
1307   }
1308 
1309   // Dump each process.
1310   for (const auto& [pid, tids] : process_map) {
1311     if (!map_record_reader_->ReadProcessMaps(pid, tids, 0)) {
1312       return false;
1313     }
1314   }
1315   return true;
1316 }
1317 
ProcessRecord(Record * record)1318 bool RecordCommand::ProcessRecord(Record* record) {
1319   UpdateRecord(record);
1320   if (ShouldOmitRecord(record)) {
1321     return true;
1322   }
1323   if (size_limit_in_bytes_ > 0u) {
1324     if (size_limit_in_bytes_ < record_file_writer_->GetDataSectionSize()) {
1325       return event_selection_set_.GetIOEventLoop()->ExitLoop();
1326     }
1327   }
1328   if (jit_debug_reader_ && !jit_debug_reader_->UpdateRecord(record)) {
1329     return false;
1330   }
1331   last_record_timestamp_ = std::max(last_record_timestamp_, record->Timestamp());
1332   // In system wide recording, maps are dumped when they are needed by records.
1333   if (system_wide_collection_ && !DumpMapsForRecord(record)) {
1334     return false;
1335   }
1336   // Record filter check should go after DumpMapsForRecord(). Otherwise, process/thread name
1337   // filters don't work in system wide collection.
1338   if (record->type() == PERF_RECORD_SAMPLE) {
1339     if (!record_filter_.Check(static_cast<SampleRecord*>(record))) {
1340       return true;
1341     }
1342   }
1343   if (unwind_dwarf_callchain_) {
1344     if (post_unwind_) {
1345       return SaveRecordForPostUnwinding(record);
1346     }
1347     return SaveRecordAfterUnwinding(record);
1348   }
1349   return SaveRecordWithoutUnwinding(record);
1350 }
1351 
DumpAuxTraceInfo()1352 bool RecordCommand::DumpAuxTraceInfo() {
1353   if (event_selection_set_.HasAuxTrace()) {
1354     AuxTraceInfoRecord auxtrace_info = ETMRecorder::GetInstance().CreateAuxTraceInfoRecord();
1355     return ProcessRecord(&auxtrace_info);
1356   }
1357   return true;
1358 }
1359 
1360 template <typename MmapRecordType>
MapOnlyExistInMemory(MmapRecordType * record)1361 bool MapOnlyExistInMemory(MmapRecordType* record) {
1362   return !record->InKernel() && MappedFileOnlyExistInMemory(record->filename);
1363 }
1364 
ShouldOmitRecord(Record * record)1365 bool RecordCommand::ShouldOmitRecord(Record* record) {
1366   if (jit_debug_reader_) {
1367     // To profile jitted Java code, we need PROT_JIT_SYMFILE_MAP maps not overlapped by maps for
1368     // [anon:dalvik-jit-code-cache]. To profile interpreted Java code, we record maps that
1369     // are not executable. Some non-exec maps (like those for stack, heap) provide misleading map
1370     // entries for unwinding, as in http://b/77236599. So it is better to remove
1371     // dalvik-jit-code-cache and other maps that only exist in memory.
1372     switch (record->type()) {
1373       case PERF_RECORD_MMAP:
1374         return MapOnlyExistInMemory(static_cast<MmapRecord*>(record));
1375       case PERF_RECORD_MMAP2:
1376         return MapOnlyExistInMemory(static_cast<Mmap2Record*>(record));
1377     }
1378   }
1379   return false;
1380 }
1381 
DumpMapsForRecord(Record * record)1382 bool RecordCommand::DumpMapsForRecord(Record* record) {
1383   if (record->type() == PERF_RECORD_SAMPLE) {
1384     pid_t pid = static_cast<SampleRecord*>(record)->tid_data.pid;
1385     if (dumped_processes_.find(pid) == dumped_processes_.end()) {
1386       // Dump map info and all thread names for that process.
1387       if (!map_record_reader_->ReadProcessMaps(pid, last_record_timestamp_)) {
1388         return false;
1389       }
1390       dumped_processes_.insert(pid);
1391     }
1392   }
1393   return true;
1394 }
1395 
SaveRecordForPostUnwinding(Record * record)1396 bool RecordCommand::SaveRecordForPostUnwinding(Record* record) {
1397   if (!record_file_writer_->WriteRecord(*record)) {
1398     LOG(ERROR) << "If there isn't enough space for storing profiling data, consider using "
1399                << "--no-post-unwind option.";
1400     return false;
1401   }
1402   return true;
1403 }
1404 
SaveRecordAfterUnwinding(Record * record)1405 bool RecordCommand::SaveRecordAfterUnwinding(Record* record) {
1406   if (record->type() == PERF_RECORD_SAMPLE) {
1407     auto& r = *static_cast<SampleRecord*>(record);
1408     // AdjustCallChainGeneratedByKernel() should go before UnwindRecord(). Because we don't want
1409     // to adjust callchains generated by dwarf unwinder.
1410     r.AdjustCallChainGeneratedByKernel();
1411     if (!UnwindRecord(r)) {
1412       return false;
1413     }
1414     // ExcludeKernelCallChain() should go after UnwindRecord() to notice the generated user call
1415     // chain.
1416     if (r.InKernel() && exclude_kernel_callchain_ && !r.ExcludeKernelCallChain()) {
1417       // If current record contains no user callchain, skip it.
1418       return true;
1419     }
1420     sample_record_count_++;
1421   } else if (record->type() == PERF_RECORD_LOST) {
1422     lost_record_count_ += static_cast<LostRecord*>(record)->lost;
1423   } else {
1424     thread_tree_.Update(*record);
1425   }
1426   return record_file_writer_->WriteRecord(*record);
1427 }
1428 
SaveRecordWithoutUnwinding(Record * record)1429 bool RecordCommand::SaveRecordWithoutUnwinding(Record* record) {
1430   if (record->type() == PERF_RECORD_SAMPLE) {
1431     auto& r = *static_cast<SampleRecord*>(record);
1432     if (fp_callchain_sampling_ || dwarf_callchain_sampling_) {
1433       r.AdjustCallChainGeneratedByKernel();
1434     }
1435     if (r.InKernel() && exclude_kernel_callchain_ && !r.ExcludeKernelCallChain()) {
1436       // If current record contains no user callchain, skip it.
1437       return true;
1438     }
1439     sample_record_count_++;
1440   } else if (record->type() == PERF_RECORD_LOST) {
1441     lost_record_count_ += static_cast<LostRecord*>(record)->lost;
1442   }
1443   return record_file_writer_->WriteRecord(*record);
1444 }
1445 
ProcessJITDebugInfo(const std::vector<JITDebugInfo> & debug_info,bool sync_kernel_records)1446 bool RecordCommand::ProcessJITDebugInfo(const std::vector<JITDebugInfo>& debug_info,
1447                                         bool sync_kernel_records) {
1448   EventAttrWithId attr_id = event_selection_set_.GetEventAttrWithId()[0];
1449   for (auto& info : debug_info) {
1450     if (info.type == JITDebugInfo::JIT_DEBUG_JIT_CODE) {
1451       uint64_t timestamp =
1452           jit_debug_reader_->SyncWithRecords() ? info.timestamp : last_record_timestamp_;
1453       Mmap2Record record(*attr_id.attr, false, info.pid, info.pid, info.jit_code_addr,
1454                          info.jit_code_len, info.file_offset, map_flags::PROT_JIT_SYMFILE_MAP,
1455                          info.file_path, attr_id.ids[0], timestamp);
1456       if (!ProcessRecord(&record)) {
1457         return false;
1458       }
1459     } else {
1460       if (info.extracted_dex_file_map) {
1461         ThreadMmap& map = *info.extracted_dex_file_map;
1462         uint64_t timestamp =
1463             jit_debug_reader_->SyncWithRecords() ? info.timestamp : last_record_timestamp_;
1464         Mmap2Record record(*attr_id.attr, false, info.pid, info.pid, map.start_addr, map.len,
1465                            map.pgoff, map.prot, map.name, attr_id.ids[0], timestamp);
1466         if (!ProcessRecord(&record)) {
1467           return false;
1468         }
1469       }
1470       thread_tree_.AddDexFileOffset(info.file_path, info.dex_file_offset);
1471     }
1472   }
1473   // We want to let samples see the most recent JIT maps generated before them, but no JIT maps
1474   // generated after them. So process existing samples each time generating new JIT maps. We prefer
1475   // to process samples after processing JIT maps. Because some of the samples may hit the new JIT
1476   // maps, and we want to report them properly.
1477   if (sync_kernel_records && !event_selection_set_.SyncKernelBuffer()) {
1478     return false;
1479   }
1480   return true;
1481 }
1482 
ProcessControlCmd(IOEventLoop * loop)1483 bool RecordCommand::ProcessControlCmd(IOEventLoop* loop) {
1484   char* line = nullptr;
1485   size_t line_length = 0;
1486   if (getline(&line, &line_length, stdin) == -1) {
1487     free(line);
1488     // When the simpleperf Java API destroys the simpleperf process, it also closes the stdin pipe.
1489     // So we may see EOF of stdin.
1490     return loop->ExitLoop();
1491   }
1492   std::string cmd = android::base::Trim(line);
1493   free(line);
1494   LOG(DEBUG) << "process control cmd: " << cmd;
1495   bool result = false;
1496   if (cmd == "pause") {
1497     result = event_selection_set_.SetEnableEvents(false);
1498   } else if (cmd == "resume") {
1499     result = event_selection_set_.SetEnableEvents(true);
1500   } else {
1501     LOG(ERROR) << "unknown control cmd: " << cmd;
1502   }
1503   printf("%s\n", result ? "ok" : "error");
1504   fflush(stdout);
1505   return result;
1506 }
1507 
1508 template <class RecordType>
UpdateMmapRecordForEmbeddedPath(RecordType & r,bool has_prot,uint32_t prot)1509 void UpdateMmapRecordForEmbeddedPath(RecordType& r, bool has_prot, uint32_t prot) {
1510   if (r.InKernel()) {
1511     return;
1512   }
1513   std::string filename = r.filename;
1514   bool name_changed = false;
1515   // Some vdex files in map files are marked with deleted flag, but they exist in the file system.
1516   // It may be because a new file is used to replace the old one, but still worth to try.
1517   if (android::base::EndsWith(filename, " (deleted)")) {
1518     filename.resize(filename.size() - 10);
1519     name_changed = true;
1520   }
1521   if (r.data->pgoff != 0 && (!has_prot || (prot & PROT_EXEC))) {
1522     // For the case of a shared library "foobar.so" embedded
1523     // inside an APK, we rewrite the original MMAP from
1524     // ["path.apk" offset=X] to ["path.apk!/foobar.so" offset=W]
1525     // so as to make the library name explicit. This update is
1526     // done here (as part of the record operation) as opposed to
1527     // on the host during the report, since we want to report
1528     // the correct library name even if the the APK in question
1529     // is not present on the host. The new offset W is
1530     // calculated to be with respect to the start of foobar.so,
1531     // not to the start of path.apk.
1532     EmbeddedElf* ee = ApkInspector::FindElfInApkByOffset(filename, r.data->pgoff);
1533     if (ee != nullptr) {
1534       // Compute new offset relative to start of elf in APK.
1535       auto data = *r.data;
1536       data.pgoff -= ee->entry_offset();
1537       r.SetDataAndFilename(data, GetUrlInApk(filename, ee->entry_name()));
1538       return;
1539     }
1540   }
1541   std::string zip_path;
1542   std::string entry_name;
1543   if (ParseExtractedInMemoryPath(filename, &zip_path, &entry_name)) {
1544     filename = GetUrlInApk(zip_path, entry_name);
1545     name_changed = true;
1546   }
1547   if (name_changed) {
1548     auto data = *r.data;
1549     r.SetDataAndFilename(data, filename);
1550   }
1551 }
1552 
UpdateRecord(Record * record)1553 void RecordCommand::UpdateRecord(Record* record) {
1554   if (record->type() == PERF_RECORD_MMAP) {
1555     UpdateMmapRecordForEmbeddedPath(*static_cast<MmapRecord*>(record), false, 0);
1556   } else if (record->type() == PERF_RECORD_MMAP2) {
1557     auto r = static_cast<Mmap2Record*>(record);
1558     UpdateMmapRecordForEmbeddedPath(*r, true, r->data->prot);
1559   } else if (record->type() == PERF_RECORD_COMM) {
1560     auto r = static_cast<CommRecord*>(record);
1561     if (r->data->pid == r->data->tid) {
1562       std::string s = GetCompleteProcessName(r->data->pid);
1563       if (!s.empty()) {
1564         r->SetCommandName(s);
1565       }
1566     }
1567   }
1568 }
1569 
UnwindRecord(SampleRecord & r)1570 bool RecordCommand::UnwindRecord(SampleRecord& r) {
1571   if ((r.sample_type & PERF_SAMPLE_CALLCHAIN) && (r.sample_type & PERF_SAMPLE_REGS_USER) &&
1572       (r.regs_user_data.reg_mask != 0) && (r.sample_type & PERF_SAMPLE_STACK_USER) &&
1573       (r.GetValidStackSize() > 0)) {
1574     ThreadEntry* thread = thread_tree_.FindThreadOrNew(r.tid_data.pid, r.tid_data.tid);
1575     RegSet regs(r.regs_user_data.abi, r.regs_user_data.reg_mask, r.regs_user_data.regs);
1576     std::vector<uint64_t> ips;
1577     std::vector<uint64_t> sps;
1578     if (!offline_unwinder_->UnwindCallChain(*thread, regs, r.stack_user_data.data,
1579                                             r.GetValidStackSize(), &ips, &sps)) {
1580       return false;
1581     }
1582     // The unwinding may fail if JIT debug info isn't the latest. In this case, read JIT debug info
1583     // from the process and retry unwinding.
1584     if (jit_debug_reader_ && !post_unwind_ &&
1585         offline_unwinder_->IsCallChainBrokenForIncompleteJITDebugInfo()) {
1586       jit_debug_reader_->ReadProcess(r.tid_data.pid);
1587       jit_debug_reader_->FlushDebugInfo(r.Timestamp());
1588       if (!offline_unwinder_->UnwindCallChain(*thread, regs, r.stack_user_data.data,
1589                                               r.GetValidStackSize(), &ips, &sps)) {
1590         return false;
1591       }
1592     }
1593     if (keep_failed_unwinding_result_ && !KeepFailedUnwindingResult(r, ips, sps)) {
1594       return false;
1595     }
1596     r.ReplaceRegAndStackWithCallChain(ips);
1597     if (callchain_joiner_ &&
1598         !callchain_joiner_->AddCallChain(r.tid_data.pid, r.tid_data.tid,
1599                                          CallChainJoiner::ORIGINAL_OFFLINE, ips, sps)) {
1600       return false;
1601     }
1602   }
1603   return true;
1604 }
1605 
KeepFailedUnwindingResult(const SampleRecord & r,const std::vector<uint64_t> & ips,const std::vector<uint64_t> & sps)1606 bool RecordCommand::KeepFailedUnwindingResult(const SampleRecord& r,
1607                                               const std::vector<uint64_t>& ips,
1608                                               const std::vector<uint64_t>& sps) {
1609   auto& result = offline_unwinder_->GetUnwindingResult();
1610   if (result.error_code != unwindstack::ERROR_NONE) {
1611     if (keep_failed_unwinding_debug_info_) {
1612       return record_file_writer_->WriteRecord(UnwindingResultRecord(
1613           r.time_data.time, result, r.regs_user_data, r.stack_user_data, ips, sps));
1614     }
1615     return record_file_writer_->WriteRecord(
1616         UnwindingResultRecord(r.time_data.time, result, {}, {}, {}, {}));
1617   }
1618   return true;
1619 }
1620 
MoveRecordFile(const std::string & old_filename)1621 std::unique_ptr<RecordFileReader> RecordCommand::MoveRecordFile(const std::string& old_filename) {
1622   if (!record_file_writer_->Close()) {
1623     return nullptr;
1624   }
1625   record_file_writer_.reset();
1626   {
1627     std::error_code ec;
1628     std::filesystem::rename(record_filename_, old_filename, ec);
1629     if (ec) {
1630       LOG(ERROR) << "Failed to rename: " << ec.message();
1631       return nullptr;
1632     }
1633   }
1634   record_file_writer_ = CreateRecordFile(record_filename_);
1635   if (!record_file_writer_) {
1636     return nullptr;
1637   }
1638   return RecordFileReader::CreateInstance(old_filename);
1639 }
1640 
MergeMapRecords()1641 bool RecordCommand::MergeMapRecords() {
1642   // 1. Move records from record_filename_ to a temporary file.
1643   auto tmp_file = ScopedTempFiles::CreateTempFile();
1644   auto reader = MoveRecordFile(tmp_file->path);
1645   if (!reader) {
1646     return false;
1647   }
1648 
1649   // 2. Copy map records from map record thread.
1650   auto callback = [this](Record* r) {
1651     UpdateRecord(r);
1652     if (ShouldOmitRecord(r)) {
1653       return true;
1654     }
1655     return record_file_writer_->WriteRecord(*r);
1656   };
1657   if (!map_record_thread_->ReadMapRecords(callback)) {
1658     return false;
1659   }
1660 
1661   // 3. Copy data section from the old recording file.
1662   std::vector<char> buf(64 * 1024);
1663   uint64_t offset = reader->FileHeader().data.offset;
1664   uint64_t left_size = reader->FileHeader().data.size;
1665   while (left_size > 0) {
1666     size_t nread = std::min<size_t>(left_size, buf.size());
1667     if (!reader->ReadAtOffset(offset, buf.data(), nread) ||
1668         !record_file_writer_->WriteData(buf.data(), nread)) {
1669       return false;
1670     }
1671     offset += nread;
1672     left_size -= nread;
1673   }
1674   return true;
1675 }
1676 
PostUnwindRecords()1677 bool RecordCommand::PostUnwindRecords() {
1678   auto tmp_file = ScopedTempFiles::CreateTempFile();
1679   auto reader = MoveRecordFile(tmp_file->path);
1680   if (!reader) {
1681     return false;
1682   }
1683   sample_record_count_ = 0;
1684   lost_record_count_ = 0;
1685   auto callback = [this](std::unique_ptr<Record> record) {
1686     return SaveRecordAfterUnwinding(record.get());
1687   };
1688   return reader->ReadDataSection(callback);
1689 }
1690 
JoinCallChains()1691 bool RecordCommand::JoinCallChains() {
1692   // 1. Prepare joined callchains.
1693   if (!callchain_joiner_->JoinCallChains()) {
1694     return false;
1695   }
1696   // 2. Move records from record_filename_ to a temporary file.
1697   auto tmp_file = ScopedTempFiles::CreateTempFile();
1698   auto reader = MoveRecordFile(tmp_file->path);
1699   if (!reader) {
1700     return false;
1701   }
1702 
1703   // 3. Read records from the temporary file, and write record with joined call chains back
1704   // to record_filename_.
1705   auto record_callback = [&](std::unique_ptr<Record> r) {
1706     if (r->type() != PERF_RECORD_SAMPLE) {
1707       return record_file_writer_->WriteRecord(*r);
1708     }
1709     SampleRecord& sr = *static_cast<SampleRecord*>(r.get());
1710     if (!sr.HasUserCallChain()) {
1711       return record_file_writer_->WriteRecord(sr);
1712     }
1713     pid_t pid;
1714     pid_t tid;
1715     CallChainJoiner::ChainType type;
1716     std::vector<uint64_t> ips;
1717     std::vector<uint64_t> sps;
1718     if (!callchain_joiner_->GetNextCallChain(pid, tid, type, ips, sps)) {
1719       return false;
1720     }
1721     CHECK_EQ(type, CallChainJoiner::JOINED_OFFLINE);
1722     CHECK_EQ(pid, static_cast<pid_t>(sr.tid_data.pid));
1723     CHECK_EQ(tid, static_cast<pid_t>(sr.tid_data.tid));
1724     sr.UpdateUserCallChain(ips);
1725     return record_file_writer_->WriteRecord(sr);
1726   };
1727   return reader->ReadDataSection(record_callback);
1728 }
1729 
LoadSymbolMapFile(int pid,const std::string & package,ThreadTree * thread_tree)1730 static void LoadSymbolMapFile(int pid, const std::string& package, ThreadTree* thread_tree) {
1731   // On Linux, symbol map files usually go to /tmp/perf-<pid>.map
1732   // On Android, there is no directory where any process can create files.
1733   // For now, use /data/local/tmp/perf-<pid>.map, which works for standalone programs,
1734   // and /data/data/<package>/perf-<pid>.map, which works for apps.
1735   auto path = package.empty()
1736                   ? android::base::StringPrintf("/data/local/tmp/perf-%d.map", pid)
1737                   : android::base::StringPrintf("/data/data/%s/perf-%d.map", package.c_str(), pid);
1738 
1739   auto symbols = ReadSymbolMapFromFile(path);
1740   if (!symbols.empty()) {
1741     thread_tree->AddSymbolsForProcess(pid, &symbols);
1742   }
1743 }
1744 
DumpAdditionalFeatures(const std::vector<std::string> & args)1745 bool RecordCommand::DumpAdditionalFeatures(const std::vector<std::string>& args) {
1746   // Read data section of perf.data to collect hit file information.
1747   thread_tree_.ClearThreadAndMap();
1748   bool kernel_symbols_available = false;
1749   std::string kallsyms;
1750   if (event_selection_set_.NeedKernelSymbol() && LoadKernelSymbols(&kallsyms)) {
1751     Dso::SetKallsyms(kallsyms);
1752     kernel_symbols_available = true;
1753   }
1754   std::unordered_set<int> loaded_symbol_maps;
1755   std::vector<uint64_t> auxtrace_offset;
1756   std::unordered_set<Dso*> debug_unwinding_files;
1757   bool failed_unwinding_sample = false;
1758 
1759   auto callback = [&](const Record* r) {
1760     thread_tree_.Update(*r);
1761     if (r->type() == PERF_RECORD_SAMPLE) {
1762       auto sample = reinterpret_cast<const SampleRecord*>(r);
1763       // Symbol map files are available after recording. Load one for the process.
1764       if (loaded_symbol_maps.insert(sample->tid_data.pid).second) {
1765         LoadSymbolMapFile(sample->tid_data.pid, app_package_name_, &thread_tree_);
1766       }
1767       if (failed_unwinding_sample) {
1768         failed_unwinding_sample = false;
1769         CollectHitFileInfo(*sample, &debug_unwinding_files);
1770       } else {
1771         CollectHitFileInfo(*sample, nullptr);
1772       }
1773     } else if (r->type() == PERF_RECORD_AUXTRACE) {
1774       auto auxtrace = static_cast<const AuxTraceRecord*>(r);
1775       auxtrace_offset.emplace_back(auxtrace->location.file_offset - auxtrace->size());
1776     } else if (r->type() == SIMPLE_PERF_RECORD_UNWINDING_RESULT) {
1777       failed_unwinding_sample = true;
1778     }
1779   };
1780 
1781   if (!record_file_writer_->ReadDataSection(callback)) {
1782     return false;
1783   }
1784 
1785   size_t feature_count = 6;
1786   if (branch_sampling_) {
1787     feature_count++;
1788   }
1789   if (!auxtrace_offset.empty()) {
1790     feature_count++;
1791   }
1792   if (keep_failed_unwinding_debug_info_) {
1793     feature_count += 2;
1794   }
1795   if (!record_file_writer_->BeginWriteFeatures(feature_count)) {
1796     return false;
1797   }
1798   if (!DumpBuildIdFeature()) {
1799     return false;
1800   }
1801   if (!DumpFileFeature()) {
1802     return false;
1803   }
1804   utsname uname_buf;
1805   if (TEMP_FAILURE_RETRY(uname(&uname_buf)) != 0) {
1806     PLOG(ERROR) << "uname() failed";
1807     return false;
1808   }
1809   if (!record_file_writer_->WriteFeatureString(PerfFileFormat::FEAT_OSRELEASE, uname_buf.release)) {
1810     return false;
1811   }
1812   if (!record_file_writer_->WriteFeatureString(PerfFileFormat::FEAT_ARCH, uname_buf.machine)) {
1813     return false;
1814   }
1815 
1816   std::string exec_path = android::base::GetExecutablePath();
1817   if (exec_path.empty()) exec_path = "simpleperf";
1818   std::vector<std::string> cmdline;
1819   cmdline.push_back(exec_path);
1820   cmdline.push_back("record");
1821   cmdline.insert(cmdline.end(), args.begin(), args.end());
1822   if (!record_file_writer_->WriteCmdlineFeature(cmdline)) {
1823     return false;
1824   }
1825   if (branch_sampling_ != 0 && !record_file_writer_->WriteBranchStackFeature()) {
1826     return false;
1827   }
1828   if (!DumpMetaInfoFeature(kernel_symbols_available)) {
1829     return false;
1830   }
1831   if (!auxtrace_offset.empty() && !record_file_writer_->WriteAuxTraceFeature(auxtrace_offset)) {
1832     return false;
1833   }
1834   if (keep_failed_unwinding_debug_info_ && !DumpDebugUnwindFeature(debug_unwinding_files)) {
1835     return false;
1836   }
1837 
1838   if (!record_file_writer_->EndWriteFeatures()) {
1839     return false;
1840   }
1841   return true;
1842 }
1843 
DumpBuildIdFeature()1844 bool RecordCommand::DumpBuildIdFeature() {
1845   std::vector<BuildIdRecord> build_id_records;
1846   BuildId build_id;
1847   std::vector<Dso*> dso_v = thread_tree_.GetAllDsos();
1848   for (Dso* dso : dso_v) {
1849     // For aux tracing, we don't know which binaries are traced.
1850     // So dump build ids for all binaries.
1851     if (!dso->HasDumpId() && !event_selection_set_.HasAuxTrace()) {
1852       continue;
1853     }
1854     if (dso->type() == DSO_KERNEL) {
1855       if (!GetKernelBuildId(&build_id)) {
1856         continue;
1857       }
1858       build_id_records.push_back(BuildIdRecord(true, UINT_MAX, build_id, dso->Path()));
1859     } else if (dso->type() == DSO_KERNEL_MODULE) {
1860       bool has_build_id = false;
1861       if (android::base::EndsWith(dso->Path(), ".ko")) {
1862         has_build_id = GetBuildIdFromDsoPath(dso->Path(), &build_id);
1863       } else if (const std::string& path = dso->Path();
1864                  path.size() > 2 && path[0] == '[' && path.back() == ']') {
1865         // For kernel modules that we can't find the corresponding file, read build id from /sysfs.
1866         has_build_id = GetModuleBuildId(path.substr(1, path.size() - 2), &build_id);
1867       }
1868       if (has_build_id) {
1869         build_id_records.push_back(BuildIdRecord(true, UINT_MAX, build_id, dso->Path()));
1870       } else {
1871         LOG(DEBUG) << "Can't read build_id for module " << dso->Path();
1872       }
1873     } else if (dso->type() == DSO_ELF_FILE) {
1874       if (dso->Path() == DEFAULT_EXECNAME_FOR_THREAD_MMAP || dso->IsForJavaMethod()) {
1875         continue;
1876       }
1877       if (!GetBuildIdFromDsoPath(dso->Path(), &build_id)) {
1878         LOG(DEBUG) << "Can't read build_id from file " << dso->Path();
1879         continue;
1880       }
1881       build_id_records.push_back(BuildIdRecord(false, UINT_MAX, build_id, dso->Path()));
1882     }
1883   }
1884   if (!record_file_writer_->WriteBuildIdFeature(build_id_records)) {
1885     return false;
1886   }
1887   return true;
1888 }
1889 
DumpFileFeature()1890 bool RecordCommand::DumpFileFeature() {
1891   std::vector<Dso*> dso_v = thread_tree_.GetAllDsos();
1892   // To parse ETM data for kernel modules, we need to dump memory address for kernel modules.
1893   if (event_selection_set_.HasAuxTrace() && !event_selection_set_.ExcludeKernel()) {
1894     for (Dso* dso : dso_v) {
1895       if (dso->type() == DSO_KERNEL_MODULE) {
1896         dso->CreateDumpId();
1897       }
1898     }
1899   }
1900   return record_file_writer_->WriteFileFeatures(dso_v);
1901 }
1902 
DumpMetaInfoFeature(bool kernel_symbols_available)1903 bool RecordCommand::DumpMetaInfoFeature(bool kernel_symbols_available) {
1904   std::unordered_map<std::string, std::string> info_map = extra_meta_info_;
1905   info_map["simpleperf_version"] = GetSimpleperfVersion();
1906   info_map["system_wide_collection"] = system_wide_collection_ ? "true" : "false";
1907   info_map["trace_offcpu"] = trace_offcpu_ ? "true" : "false";
1908   // By storing event types information in perf.data, the readers of perf.data have the same
1909   // understanding of event types, even if they are on another machine.
1910   info_map["event_type_info"] = ScopedEventTypes::BuildString(event_selection_set_.GetEvents());
1911 #if defined(__ANDROID__)
1912   info_map["product_props"] = android::base::StringPrintf(
1913       "%s:%s:%s", android::base::GetProperty("ro.product.manufacturer", "").c_str(),
1914       android::base::GetProperty("ro.product.model", "").c_str(),
1915       android::base::GetProperty("ro.product.name", "").c_str());
1916   info_map["android_version"] = android::base::GetProperty("ro.build.version.release", "");
1917   info_map["android_sdk_version"] = android::base::GetProperty("ro.build.version.sdk", "");
1918   info_map["android_build_type"] = android::base::GetProperty("ro.build.type", "");
1919   if (!app_package_name_.empty()) {
1920     info_map["app_package_name"] = app_package_name_;
1921     if (IsRoot()) {
1922       info_map["app_type"] = GetAppType(app_package_name_);
1923     }
1924   }
1925   if (event_selection_set_.HasAuxTrace()) {
1926     // used by --exclude-perf in cmd_inject.cpp
1927     info_map["recording_process"] = std::to_string(getpid());
1928   }
1929 #endif
1930   info_map["clockid"] = clockid_;
1931   info_map["timestamp"] = std::to_string(time(nullptr));
1932   info_map["kernel_symbols_available"] = kernel_symbols_available ? "true" : "false";
1933   if (dwarf_callchain_sampling_ && !unwind_dwarf_callchain_) {
1934     OfflineUnwinder::CollectMetaInfo(&info_map);
1935   }
1936   return record_file_writer_->WriteMetaInfoFeature(info_map);
1937 }
1938 
DumpDebugUnwindFeature(const std::unordered_set<Dso * > & dso_set)1939 bool RecordCommand::DumpDebugUnwindFeature(const std::unordered_set<Dso*>& dso_set) {
1940   DebugUnwindFeature debug_unwind_feature;
1941   debug_unwind_feature.reserve(dso_set.size());
1942   for (const Dso* dso : dso_set) {
1943     if (dso->type() != DSO_ELF_FILE) {
1944       continue;
1945     }
1946     const std::string& filename = dso->GetDebugFilePath();
1947     std::unique_ptr<ElfFile> elf = ElfFile::Open(filename);
1948     if (elf) {
1949       llvm::MemoryBuffer* buffer = elf->GetMemoryBuffer();
1950       debug_unwind_feature.resize(debug_unwind_feature.size() + 1);
1951       auto& debug_unwind_file = debug_unwind_feature.back();
1952       debug_unwind_file.path = filename;
1953       debug_unwind_file.size = buffer->getBufferSize();
1954       if (!record_file_writer_->WriteFeature(PerfFileFormat::FEAT_DEBUG_UNWIND_FILE,
1955                                              buffer->getBufferStart(), buffer->getBufferSize())) {
1956         return false;
1957       }
1958     } else {
1959       LOG(WARNING) << "failed to keep " << filename << " in debug_unwind_feature section";
1960     }
1961   }
1962   return record_file_writer_->WriteDebugUnwindFeature(debug_unwind_feature);
1963 }
1964 
CollectHitFileInfo(const SampleRecord & r,std::unordered_set<Dso * > * dso_set)1965 void RecordCommand::CollectHitFileInfo(const SampleRecord& r, std::unordered_set<Dso*>* dso_set) {
1966   const ThreadEntry* thread = thread_tree_.FindThreadOrNew(r.tid_data.pid, r.tid_data.tid);
1967   size_t kernel_ip_count;
1968   std::vector<uint64_t> ips = r.GetCallChain(&kernel_ip_count);
1969   for (size_t i = 0; i < ips.size(); i++) {
1970     const MapEntry* map = thread_tree_.FindMap(thread, ips[i], i < kernel_ip_count);
1971     Dso* dso = map->dso;
1972     if (dump_symbols_) {
1973       const Symbol* symbol = thread_tree_.FindSymbol(map, ips[i], nullptr, &dso);
1974       if (!symbol->HasDumpId()) {
1975         dso->CreateSymbolDumpId(symbol);
1976       }
1977     }
1978     if (!dso->HasDumpId() && dso->type() != DSO_UNKNOWN_FILE) {
1979       dso->CreateDumpId();
1980     }
1981     if (dso_set != nullptr) {
1982       dso_set->insert(dso);
1983     }
1984   }
1985 }
1986 
1987 }  // namespace
1988 
ConsumeStr(const char * & p,const char * s)1989 static bool ConsumeStr(const char*& p, const char* s) {
1990   if (strncmp(p, s, strlen(s)) == 0) {
1991     p += strlen(s);
1992     return true;
1993   }
1994   return false;
1995 }
1996 
ConsumeAddr(const char * & p,uint64_t * addr)1997 static bool ConsumeAddr(const char*& p, uint64_t* addr) {
1998   errno = 0;
1999   char* end;
2000   *addr = strtoull(p, &end, 0);
2001   if (errno == 0 && p != end) {
2002     p = end;
2003     return true;
2004   }
2005   return false;
2006 }
2007 
2008 // To reduce function length, not all format errors are checked.
ParseOneAddrFilter(const std::string & s,std::vector<AddrFilter> * filters)2009 static bool ParseOneAddrFilter(const std::string& s, std::vector<AddrFilter>* filters) {
2010   std::vector<std::string> args = android::base::Split(s, " ");
2011   if (args.size() != 2) {
2012     return false;
2013   }
2014 
2015   uint64_t addr1;
2016   uint64_t addr2;
2017   uint64_t off1;
2018   uint64_t off2;
2019   std::string path;
2020 
2021   if (auto p = s.data(); ConsumeStr(p, "start") && ConsumeAddr(p, &addr1)) {
2022     if (*p == '\0') {
2023       // start <kernel_addr>
2024       filters->emplace_back(AddrFilter::KERNEL_START, addr1, 0, "");
2025       return true;
2026     }
2027     if (ConsumeStr(p, "@") && *p != '\0') {
2028       // start <vaddr>@<file_path>
2029       if (auto elf = ElfFile::Open(p); elf && elf->VaddrToOff(addr1, &off1) && Realpath(p, &path)) {
2030         filters->emplace_back(AddrFilter::FILE_START, off1, 0, path);
2031         return true;
2032       }
2033     }
2034   }
2035   if (auto p = s.data(); ConsumeStr(p, "stop") && ConsumeAddr(p, &addr1)) {
2036     if (*p == '\0') {
2037       // stop <kernel_addr>
2038       filters->emplace_back(AddrFilter::KERNEL_STOP, addr1, 0, "");
2039       return true;
2040     }
2041     if (ConsumeStr(p, "@") && *p != '\0') {
2042       // stop <vaddr>@<file_path>
2043       if (auto elf = ElfFile::Open(p); elf && elf->VaddrToOff(addr1, &off1) && Realpath(p, &path)) {
2044         filters->emplace_back(AddrFilter::FILE_STOP, off1, 0, path);
2045         return true;
2046       }
2047     }
2048   }
2049   if (auto p = s.data(); ConsumeStr(p, "filter") && ConsumeAddr(p, &addr1) && ConsumeStr(p, "-") &&
2050                          ConsumeAddr(p, &addr2)) {
2051     if (*p == '\0') {
2052       // filter <kernel_addr_start>-<kernel_addr_end>
2053       filters->emplace_back(AddrFilter::KERNEL_RANGE, addr1, addr2 - addr1, "");
2054       return true;
2055     }
2056     if (ConsumeStr(p, "@") && *p != '\0') {
2057       // filter <vaddr_start>-<vaddr_end>@<file_path>
2058       if (auto elf = ElfFile::Open(p); elf && elf->VaddrToOff(addr1, &off1) &&
2059                                        elf->VaddrToOff(addr2, &off2) && Realpath(p, &path)) {
2060         filters->emplace_back(AddrFilter::FILE_RANGE, off1, off2 - off1, path);
2061         return true;
2062       }
2063     }
2064   }
2065   if (auto p = s.data(); ConsumeStr(p, "filter") && *p != '\0') {
2066     // filter <file_path>
2067     path = android::base::Trim(p);
2068     if (auto elf = ElfFile::Open(path); elf) {
2069       for (const ElfSegment& seg : elf->GetProgramHeader()) {
2070         if (seg.is_executable) {
2071           filters->emplace_back(AddrFilter::FILE_RANGE, seg.file_offset, seg.file_size, path);
2072         }
2073       }
2074       return true;
2075     }
2076   }
2077   return false;
2078 }
2079 
ParseAddrFilterOption(const std::string & s)2080 std::vector<AddrFilter> ParseAddrFilterOption(const std::string& s) {
2081   std::vector<AddrFilter> filters;
2082   for (const auto& str : android::base::Split(s, ",")) {
2083     if (!ParseOneAddrFilter(str, &filters)) {
2084       LOG(ERROR) << "failed to parse addr filter: " << str;
2085       return {};
2086     }
2087   }
2088   return filters;
2089 }
2090 
RegisterRecordCommand()2091 void RegisterRecordCommand() {
2092   RegisterCommand("record", [] { return std::unique_ptr<Command>(new RecordCommand()); });
2093 }
2094 
2095 }  // namespace simpleperf
2096