1 /*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <inttypes.h>
18 #include <libgen.h>
19 #include <signal.h>
20 #include <sys/mman.h>
21 #include <sys/prctl.h>
22 #include <sys/utsname.h>
23 #include <time.h>
24 #include <unistd.h>
25 #include <filesystem>
26 #include <optional>
27 #include <set>
28 #include <string>
29 #include <unordered_map>
30 #include <unordered_set>
31 #include <vector>
32
33 #include <android-base/file.h>
34 #include <android-base/logging.h>
35 #include <android-base/parseint.h>
36 #include <android-base/scopeguard.h>
37 #include <android-base/stringprintf.h>
38 #include <android-base/strings.h>
39 #include <android-base/unique_fd.h>
40
41 #pragma clang diagnostic push
42 #pragma clang diagnostic ignored "-Wunused-parameter"
43 #include <llvm/Support/MemoryBuffer.h>
44 #pragma clang diagnostic pop
45
46 #if defined(__ANDROID__)
47 #include <android-base/properties.h>
48 #endif
49 #include <unwindstack/Error.h>
50
51 #include "CallChainJoiner.h"
52 #include "ETMRecorder.h"
53 #include "IOEventLoop.h"
54 #include "JITDebugReader.h"
55 #include "MapRecordReader.h"
56 #include "OfflineUnwinder.h"
57 #include "ProbeEvents.h"
58 #include "RecordFilter.h"
59 #include "cmd_record_impl.h"
60 #include "command.h"
61 #include "environment.h"
62 #include "event_selection_set.h"
63 #include "event_type.h"
64 #include "kallsyms.h"
65 #include "read_apk.h"
66 #include "read_elf.h"
67 #include "read_symbol_map.h"
68 #include "record.h"
69 #include "record_file.h"
70 #include "thread_tree.h"
71 #include "tracing.h"
72 #include "utils.h"
73 #include "workload.h"
74
75 namespace simpleperf {
76 namespace {
77
78 using android::base::ParseUint;
79 using android::base::Realpath;
80
81 static std::string default_measured_event_type = "cpu-cycles";
82
83 static std::unordered_map<std::string, uint64_t> branch_sampling_type_map = {
84 {"u", PERF_SAMPLE_BRANCH_USER},
85 {"k", PERF_SAMPLE_BRANCH_KERNEL},
86 {"any", PERF_SAMPLE_BRANCH_ANY},
87 {"any_call", PERF_SAMPLE_BRANCH_ANY_CALL},
88 {"any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN},
89 {"ind_call", PERF_SAMPLE_BRANCH_IND_CALL},
90 };
91
92 static std::unordered_map<std::string, int> clockid_map = {
93 {"realtime", CLOCK_REALTIME},
94 {"monotonic", CLOCK_MONOTONIC},
95 {"monotonic_raw", CLOCK_MONOTONIC_RAW},
96 {"boottime", CLOCK_BOOTTIME},
97 };
98
99 // The max size of records dumped by kernel is 65535, and dump stack size
100 // should be a multiply of 8, so MAX_DUMP_STACK_SIZE is 65528.
101 constexpr uint32_t MAX_DUMP_STACK_SIZE = 65528;
102
103 // The max allowed pages in mapped buffer is decided by rlimit(RLIMIT_MEMLOCK).
104 // Here 1024 is a desired value for pages in mapped buffer. If mapped
105 // successfully, the buffer size = 1024 * 4K (page size) = 4M.
106 constexpr size_t DESIRED_PAGES_IN_MAPPED_BUFFER = 1024;
107
108 // Cache size used by CallChainJoiner to cache call chains in memory.
109 constexpr size_t DEFAULT_CALL_CHAIN_JOINER_CACHE_SIZE = 8 * 1024 * 1024;
110
111 // Currently, the record buffer size in user-space is set to match the kernel buffer size on a
112 // 8 core system. For system-wide recording, it is 8K pages * 4K page_size * 8 cores = 256MB.
113 // For non system-wide recording, it is 1K pages * 4K page_size * 8 cores = 64MB.
114 static constexpr size_t kRecordBufferSize = 64 * 1024 * 1024;
115 static constexpr size_t kSystemWideRecordBufferSize = 256 * 1024 * 1024;
116
117 static constexpr size_t kDefaultAuxBufferSize = 4 * 1024 * 1024;
118
119 // On Pixel 3, it takes about 1ms to enable ETM, and 16-40ms to disable ETM and copy 4M ETM data.
120 // So make default period to 100ms.
121 static constexpr double kDefaultEtmDataFlushPeriodInSec = 0.1;
122
123 struct TimeStat {
124 uint64_t prepare_recording_time = 0;
125 uint64_t start_recording_time = 0;
126 uint64_t stop_recording_time = 0;
127 uint64_t finish_recording_time = 0;
128 uint64_t post_process_time = 0;
129 };
130
131 class RecordCommand : public Command {
132 public:
RecordCommand()133 RecordCommand()
134 : Command(
135 "record", "record sampling info in perf.data",
136 // clang-format off
137 "Usage: simpleperf record [options] [--] [command [command-args]]\n"
138 " Gather sampling information of running [command]. And -a/-p/-t option\n"
139 " can be used to change target of sampling information.\n"
140 " The default options are: -e cpu-cycles -f 4000 -o perf.data.\n"
141 "Select monitored threads:\n"
142 "-a System-wide collection. Use with --exclude-perf to exclude samples for\n"
143 " simpleperf process.\n"
144 #if defined(__ANDROID__)
145 "--app package_name Profile the process of an Android application.\n"
146 " On non-rooted devices, the app must be debuggable,\n"
147 " because we use run-as to switch to the app's context.\n"
148 #endif
149 "-p pid1,pid2,... Record events on existing processes. Mutually exclusive\n"
150 " with -a.\n"
151 "-t tid1,tid2,... Record events on existing threads. Mutually exclusive with -a.\n"
152 "\n"
153 "Select monitored event types:\n"
154 "-e event1[:modifier1],event2[:modifier2],...\n"
155 " Select a list of events to record. An event can be:\n"
156 " 1) an event name listed in `simpleperf list`;\n"
157 " 2) a raw PMU event in rN format. N is a hex number.\n"
158 " For example, r1b selects event number 0x1b.\n"
159 " 3) a kprobe event added by --kprobe option.\n"
160 " Modifiers can be added to define how the event should be\n"
161 " monitored. Possible modifiers are:\n"
162 " u - monitor user space events only\n"
163 " k - monitor kernel space events only\n"
164 "--group event1[:modifier],event2[:modifier2],...\n"
165 " Similar to -e option. But events specified in the same --group\n"
166 " option are monitored as a group, and scheduled in and out at the\n"
167 " same time.\n"
168 "--trace-offcpu Generate samples when threads are scheduled off cpu.\n"
169 " Similar to \"-c 1 -e sched:sched_switch\".\n"
170 "--kprobe kprobe_event1,kprobe_event2,...\n"
171 " Add kprobe events during recording. The kprobe_event format is in\n"
172 " Documentation/trace/kprobetrace.rst in the kernel. Examples:\n"
173 " 'p:myprobe do_sys_open $arg2:string' - add event kprobes:myprobe\n"
174 " 'r:myretprobe do_sys_open $retval:s64' - add event kprobes:myretprobe\n"
175 "\n"
176 "Select monitoring options:\n"
177 "-f freq Set event sample frequency. It means recording at most [freq]\n"
178 " samples every second. For non-tracepoint events, the default\n"
179 " option is -f 4000. A -f/-c option affects all event types\n"
180 " following it until meeting another -f/-c option. For example,\n"
181 " for \"-f 1000 cpu-cycles -c 1 -e sched:sched_switch\", cpu-cycles\n"
182 " has sample freq 1000, sched:sched_switch event has sample period 1.\n"
183 "-c count Set event sample period. It means recording one sample when\n"
184 " [count] events happen. For tracepoint events, the default option\n"
185 " is -c 1.\n"
186 "--call-graph fp | dwarf[,<dump_stack_size>]\n"
187 " Enable call graph recording. Use frame pointer or dwarf debug\n"
188 " frame as the method to parse call graph in stack.\n"
189 " Default is dwarf,65528.\n"
190 "-g Same as '--call-graph dwarf'.\n"
191 "--clockid clock_id Generate timestamps of samples using selected clock.\n"
192 " Possible values are: realtime, monotonic,\n"
193 " monotonic_raw, boottime, perf. If supported, default\n"
194 " is monotonic, otherwise is perf.\n"
195 "--cpu cpu_item1,cpu_item2,...\n"
196 " Collect samples only on the selected cpus. cpu_item can be cpu\n"
197 " number like 1, or cpu range like 0-3.\n"
198 "--duration time_in_sec Monitor for time_in_sec seconds instead of running\n"
199 " [command]. Here time_in_sec may be any positive\n"
200 " floating point number.\n"
201 "-j branch_filter1,branch_filter2,...\n"
202 " Enable taken branch stack sampling. Each sample captures a series\n"
203 " of consecutive taken branches.\n"
204 " The following filters are defined:\n"
205 " any: any type of branch\n"
206 " any_call: any function call or system call\n"
207 " any_ret: any function return or system call return\n"
208 " ind_call: any indirect branch\n"
209 " u: only when the branch target is at the user level\n"
210 " k: only when the branch target is in the kernel\n"
211 " This option requires at least one branch type among any, any_call,\n"
212 " any_ret, ind_call.\n"
213 "-b Enable taken branch stack sampling. Same as '-j any'.\n"
214 "-m mmap_pages Set the size of the buffer used to receiving sample data from\n"
215 " the kernel. It should be a power of 2. If not set, the max\n"
216 " possible value <= 1024 will be used.\n"
217 "--aux-buffer-size <buffer_size> Set aux buffer size, only used in cs-etm event type.\n"
218 " Need to be power of 2 and page size aligned.\n"
219 " Used memory size is (buffer_size * (cpu_count + 1).\n"
220 " Default is 4M.\n"
221 "--no-inherit Don't record created child threads/processes.\n"
222 "--cpu-percent <percent> Set the max percent of cpu time used for recording.\n"
223 " percent is in range [1-100], default is 25.\n"
224 "--addr-filter filter_str1,filter_str2,...\n"
225 " Provide address filters for cs-etm instruction tracing.\n"
226 " filter_str accepts below formats:\n"
227 " 'filter <addr-range>' -- trace instructions in a range\n"
228 " 'start <addr>' -- start tracing when ip is <addr>\n"
229 " 'stop <addr>' -- stop tracing when ip is <addr>\n"
230 " <addr-range> accepts below formats:\n"
231 " <file_path> -- code sections in a binary file\n"
232 " <vaddr_start>-<vaddr_end>@<file_path> -- part of a binary file\n"
233 " <kernel_addr_start>-<kernel_addr_end> -- part of kernel space\n"
234 " <addr> accepts below formats:\n"
235 " <vaddr>@<file_path> -- virtual addr in a binary file\n"
236 " <kernel_addr> -- a kernel address\n"
237 " Examples:\n"
238 " 'filter 0x456-0x480@/system/lib/libc.so'\n"
239 " 'start 0x456@/system/lib/libc.so,stop 0x480@/system/lib/libc.so'\n"
240 "\n"
241 "--tp-filter filter_string Set filter_string for the previous tracepoint event.\n"
242 " Format is in Documentation/trace/events.rst in the kernel.\n"
243 " An example: 'prev_comm != \"simpleperf\" && (prev_pid > 1)'.\n"
244 "\n"
245 "Dwarf unwinding options:\n"
246 "--post-unwind=(yes|no) If `--call-graph dwarf` option is used, then the user's\n"
247 " stack will be recorded in perf.data and unwound while\n"
248 " recording by default. Use --post-unwind=yes to switch\n"
249 " to unwind after recording.\n"
250 "--no-unwind If `--call-graph dwarf` option is used, then the user's stack\n"
251 " will be unwound by default. Use this option to disable the\n"
252 " unwinding of the user's stack.\n"
253 "--no-callchain-joiner If `--call-graph dwarf` option is used, then by default\n"
254 " callchain joiner is used to break the 64k stack limit\n"
255 " and build more complete call graphs. However, the built\n"
256 " call graphs may not be correct in all cases.\n"
257 "--callchain-joiner-min-matching-nodes count\n"
258 " When callchain joiner is used, set the matched nodes needed to join\n"
259 " callchains. The count should be >= 1. By default it is 1.\n"
260 "--no-cut-samples Simpleperf uses a record buffer to cache records received from the kernel.\n"
261 " When the available space in the buffer reaches low level, it cuts part of\n"
262 " the stack data in samples. When the available space reaches critical level,\n"
263 " it drops all samples. This option makes simpleperf not cut samples when the\n"
264 " available space reaches low level.\n"
265 "--keep-failed-unwinding-result Keep reasons for failed unwinding cases\n"
266 "--keep-failed-unwinding-debug-info Keep debug info for failed unwinding cases\n"
267 "\n"
268 "Sample filter options:\n"
269 "--exclude-perf Exclude samples for simpleperf process.\n"
270 RECORD_FILTER_OPTION_HELP_MSG
271 "\n"
272 "Recording file options:\n"
273 "--no-dump-kernel-symbols Don't dump kernel symbols in perf.data. By default\n"
274 " kernel symbols will be dumped when needed.\n"
275 "--no-dump-symbols Don't dump symbols in perf.data. By default symbols are\n"
276 " dumped in perf.data, to support reporting in another\n"
277 " environment.\n"
278 "-o record_file_name Set record file name, default is perf.data.\n"
279 "--size-limit SIZE[K|M|G] Stop recording after SIZE bytes of records.\n"
280 " Default is unlimited.\n"
281 "--symfs <dir> Look for files with symbols relative to this directory.\n"
282 " This option is used to provide files with symbol table and\n"
283 " debug information, which are used for unwinding and dumping symbols.\n"
284 "--add-meta-info key=value Add extra meta info, which will be stored in the recording file.\n"
285 "\n"
286 "Other options:\n"
287 "--exit-with-parent Stop recording when the process starting\n"
288 " simpleperf dies.\n"
289 "--start_profiling_fd fd_no After starting profiling, write \"STARTED\" to\n"
290 " <fd_no>, then close <fd_no>.\n"
291 "--stdio-controls-profiling Use stdin/stdout to pause/resume profiling.\n"
292 #if defined(__ANDROID__)
293 "--in-app We are already running in the app's context.\n"
294 "--tracepoint-events file_name Read tracepoint events from [file_name] instead of tracefs.\n"
295 #endif
296 #if 0
297 // Below options are only used internally and shouldn't be visible to the public.
298 "--out-fd <fd> Write perf.data to a file descriptor.\n"
299 "--stop-signal-fd <fd> Stop recording when fd is readable.\n"
300 #endif
301 // clang-format on
302 ),
303 system_wide_collection_(false),
304 branch_sampling_(0),
305 fp_callchain_sampling_(false),
306 dwarf_callchain_sampling_(false),
307 dump_stack_size_in_dwarf_sampling_(MAX_DUMP_STACK_SIZE),
308 unwind_dwarf_callchain_(true),
309 post_unwind_(false),
310 child_inherit_(true),
311 duration_in_sec_(0),
312 can_dump_kernel_symbols_(true),
313 dump_symbols_(true),
314 event_selection_set_(false),
315 mmap_page_range_(std::make_pair(1, DESIRED_PAGES_IN_MAPPED_BUFFER)),
316 record_filename_("perf.data"),
317 sample_record_count_(0),
318 lost_record_count_(0),
319 in_app_context_(false),
320 trace_offcpu_(false),
321 exclude_kernel_callchain_(false),
322 allow_callchain_joiner_(true),
323 callchain_joiner_min_matching_nodes_(1u),
324 last_record_timestamp_(0u),
325 record_filter_(thread_tree_) {
326 // If we run `adb shell simpleperf record xxx` and stop profiling by ctrl-c, adb closes
327 // sockets connecting simpleperf. After that, simpleperf will receive SIGPIPE when writing
328 // to stdout/stderr, which is a problem when we use '--app' option. So ignore SIGPIPE to
329 // finish properly.
330 signal(SIGPIPE, SIG_IGN);
331 }
332
333 bool Run(const std::vector<std::string>& args);
334
335 private:
336 bool ParseOptions(const std::vector<std::string>& args, std::vector<std::string>* non_option_args,
337 ProbeEvents* probe_events);
338 bool AdjustPerfEventLimit();
339 bool PrepareRecording(Workload* workload);
340 bool DoRecording(Workload* workload);
341 bool PostProcessRecording(const std::vector<std::string>& args);
342 // pre recording functions
343 bool TraceOffCpu();
344 bool SetEventSelectionFlags();
345 bool CreateAndInitRecordFile();
346 std::unique_ptr<RecordFileWriter> CreateRecordFile(const std::string& filename);
347 bool DumpKernelSymbol();
348 bool DumpTracingData();
349 bool DumpMaps();
350 bool DumpAuxTraceInfo();
351
352 // recording functions
353 bool ProcessRecord(Record* record);
354 bool ShouldOmitRecord(Record* record);
355 bool DumpMapsForRecord(Record* record);
356 bool SaveRecordForPostUnwinding(Record* record);
357 bool SaveRecordAfterUnwinding(Record* record);
358 bool SaveRecordWithoutUnwinding(Record* record);
359 bool ProcessJITDebugInfo(const std::vector<JITDebugInfo>& debug_info, bool sync_kernel_records);
360 bool ProcessControlCmd(IOEventLoop* loop);
361 void UpdateRecord(Record* record);
362 bool UnwindRecord(SampleRecord& r);
363 bool KeepFailedUnwindingResult(const SampleRecord& r, const std::vector<uint64_t>& ips,
364 const std::vector<uint64_t>& sps);
365
366 // post recording functions
367 std::unique_ptr<RecordFileReader> MoveRecordFile(const std::string& old_filename);
368 bool MergeMapRecords();
369 bool PostUnwindRecords();
370 bool JoinCallChains();
371 bool DumpAdditionalFeatures(const std::vector<std::string>& args);
372 bool DumpBuildIdFeature();
373 bool DumpFileFeature();
374 bool DumpMetaInfoFeature(bool kernel_symbols_available);
375 bool DumpDebugUnwindFeature(const std::unordered_set<Dso*>& dso_set);
376 void CollectHitFileInfo(const SampleRecord& r, std::unordered_set<Dso*>* dso_set);
377
378 std::unique_ptr<SampleSpeed> sample_speed_;
379 bool system_wide_collection_;
380 uint64_t branch_sampling_;
381 bool fp_callchain_sampling_;
382 bool dwarf_callchain_sampling_;
383 uint32_t dump_stack_size_in_dwarf_sampling_;
384 bool unwind_dwarf_callchain_;
385 bool post_unwind_;
386 bool keep_failed_unwinding_result_ = false;
387 bool keep_failed_unwinding_debug_info_ = false;
388 std::unique_ptr<OfflineUnwinder> offline_unwinder_;
389 bool child_inherit_;
390 double duration_in_sec_;
391 bool can_dump_kernel_symbols_;
392 bool dump_symbols_;
393 std::string clockid_;
394 std::vector<int> cpus_;
395 EventSelectionSet event_selection_set_;
396
397 std::pair<size_t, size_t> mmap_page_range_;
398 size_t aux_buffer_size_ = kDefaultAuxBufferSize;
399
400 ThreadTree thread_tree_;
401 std::string record_filename_;
402 android::base::unique_fd out_fd_;
403 std::unique_ptr<RecordFileWriter> record_file_writer_;
404 android::base::unique_fd stop_signal_fd_;
405
406 uint64_t sample_record_count_;
407 uint64_t lost_record_count_;
408 android::base::unique_fd start_profiling_fd_;
409 bool stdio_controls_profiling_ = false;
410
411 std::string app_package_name_;
412 bool in_app_context_;
413 bool trace_offcpu_;
414 bool exclude_kernel_callchain_;
415 uint64_t size_limit_in_bytes_ = 0;
416 uint64_t max_sample_freq_ = DEFAULT_SAMPLE_FREQ_FOR_NONTRACEPOINT_EVENT;
417 size_t cpu_time_max_percent_ = 25;
418
419 // For CallChainJoiner
420 bool allow_callchain_joiner_;
421 size_t callchain_joiner_min_matching_nodes_;
422 std::unique_ptr<CallChainJoiner> callchain_joiner_;
423 bool allow_cutting_samples_ = true;
424
425 std::unique_ptr<JITDebugReader> jit_debug_reader_;
426 uint64_t last_record_timestamp_; // used to insert Mmap2Records for JIT debug info
427 TimeStat time_stat_;
428 EventAttrWithId dumping_attr_id_;
429 // In system wide recording, record if we have dumped map info for a process.
430 std::unordered_set<pid_t> dumped_processes_;
431 bool exclude_perf_ = false;
432 RecordFilter record_filter_;
433
434 std::optional<MapRecordReader> map_record_reader_;
435 std::optional<MapRecordThread> map_record_thread_;
436
437 std::unordered_map<std::string, std::string> extra_meta_info_;
438 };
439
Run(const std::vector<std::string> & args)440 bool RecordCommand::Run(const std::vector<std::string>& args) {
441 time_stat_.prepare_recording_time = GetSystemClock();
442 ScopedCurrentArch scoped_arch(GetMachineArch());
443
444 if (!CheckPerfEventLimit()) {
445 return false;
446 }
447 AllowMoreOpenedFiles();
448
449 std::vector<std::string> workload_args;
450 ProbeEvents probe_events;
451 auto clear_probe_events_guard = android::base::make_scope_guard([this, &probe_events] {
452 if (!probe_events.IsEmpty()) {
453 // probe events can be deleted only when no perf event file is using them.
454 event_selection_set_.CloseEventFiles();
455 probe_events.Clear();
456 }
457 });
458 if (!ParseOptions(args, &workload_args, &probe_events)) {
459 return false;
460 }
461 if (!AdjustPerfEventLimit()) {
462 return false;
463 }
464 std::unique_ptr<ScopedTempFiles> scoped_temp_files =
465 ScopedTempFiles::Create(android::base::Dirname(record_filename_));
466 if (!scoped_temp_files) {
467 PLOG(ERROR) << "Can't create output file in directory "
468 << android::base::Dirname(record_filename_);
469 return false;
470 }
471 if (!app_package_name_.empty() && !in_app_context_) {
472 // Some users want to profile non debuggable apps on rooted devices. If we use run-as,
473 // it will be impossible when using --app. So don't switch to app's context when we are
474 // root.
475 if (!IsRoot()) {
476 return RunInAppContext(app_package_name_, "record", args, workload_args.size(),
477 record_filename_, true);
478 }
479 }
480 std::unique_ptr<Workload> workload;
481 if (!workload_args.empty()) {
482 workload = Workload::CreateWorkload(workload_args);
483 if (workload == nullptr) {
484 return false;
485 }
486 }
487 if (!PrepareRecording(workload.get())) {
488 return false;
489 }
490 time_stat_.start_recording_time = GetSystemClock();
491 if (!DoRecording(workload.get())) {
492 return false;
493 }
494 return PostProcessRecording(args);
495 }
496
PrepareRecording(Workload * workload)497 bool RecordCommand::PrepareRecording(Workload* workload) {
498 // 1. Prepare in other modules.
499 PrepareVdsoFile();
500
501 // 2. Add default event type.
502 if (event_selection_set_.empty()) {
503 size_t group_id;
504 if (!event_selection_set_.AddEventType(default_measured_event_type, &group_id)) {
505 return false;
506 }
507 if (sample_speed_) {
508 event_selection_set_.SetSampleSpeed(group_id, *sample_speed_);
509 }
510 }
511
512 // 3. Process options before opening perf event files.
513 exclude_kernel_callchain_ = event_selection_set_.ExcludeKernel();
514 if (trace_offcpu_ && !TraceOffCpu()) {
515 return false;
516 }
517 if (!SetEventSelectionFlags()) {
518 return false;
519 }
520 if (unwind_dwarf_callchain_) {
521 bool collect_stat = keep_failed_unwinding_result_;
522 offline_unwinder_ = OfflineUnwinder::Create(collect_stat);
523 }
524 if (unwind_dwarf_callchain_ && allow_callchain_joiner_) {
525 callchain_joiner_.reset(new CallChainJoiner(DEFAULT_CALL_CHAIN_JOINER_CACHE_SIZE,
526 callchain_joiner_min_matching_nodes_, false));
527 }
528
529 // 4. Add monitored targets.
530 bool need_to_check_targets = false;
531 if (system_wide_collection_) {
532 event_selection_set_.AddMonitoredThreads({-1});
533 } else if (!event_selection_set_.HasMonitoredTarget()) {
534 if (workload != nullptr) {
535 event_selection_set_.AddMonitoredProcesses({workload->GetPid()});
536 event_selection_set_.SetEnableOnExec(true);
537 } else if (!app_package_name_.empty()) {
538 // If app process is not created, wait for it. This allows simpleperf starts before
539 // app process. In this way, we can have a better support of app start-up time profiling.
540 std::set<pid_t> pids = WaitForAppProcesses(app_package_name_);
541 event_selection_set_.AddMonitoredProcesses(pids);
542 need_to_check_targets = true;
543 } else {
544 LOG(ERROR) << "No threads to monitor. Try `simpleperf help record` for help";
545 return false;
546 }
547 } else {
548 need_to_check_targets = true;
549 }
550 // Profiling JITed/interpreted Java code is supported starting from Android P.
551 // Also support profiling art interpreter on host.
552 if (GetAndroidVersion() >= kAndroidVersionP || GetAndroidVersion() == 0) {
553 // JIT symfiles are stored in temporary files, and are deleted after recording. But if
554 // `-g --no-unwind` option is used, we want to keep symfiles to support unwinding in
555 // the debug-unwind cmd.
556 auto symfile_option = (dwarf_callchain_sampling_ && !unwind_dwarf_callchain_)
557 ? JITDebugReader::SymFileOption::kKeepSymFiles
558 : JITDebugReader::SymFileOption::kDropSymFiles;
559 auto sync_option = (clockid_ == "monotonic") ? JITDebugReader::SyncOption::kSyncWithRecords
560 : JITDebugReader::SyncOption::kNoSync;
561 jit_debug_reader_.reset(new JITDebugReader(record_filename_, symfile_option, sync_option));
562 // To profile java code, need to dump maps containing vdex files, which are not executable.
563 event_selection_set_.SetRecordNotExecutableMaps(true);
564 }
565
566 // 5. Open perf event files and create mapped buffers.
567 if (!event_selection_set_.OpenEventFiles(cpus_)) {
568 return false;
569 }
570 size_t record_buffer_size =
571 system_wide_collection_ ? kSystemWideRecordBufferSize : kRecordBufferSize;
572 if (!event_selection_set_.MmapEventFiles(mmap_page_range_.first, mmap_page_range_.second,
573 aux_buffer_size_, record_buffer_size,
574 allow_cutting_samples_, exclude_perf_)) {
575 return false;
576 }
577 auto callback = std::bind(&RecordCommand::ProcessRecord, this, std::placeholders::_1);
578 if (!event_selection_set_.PrepareToReadMmapEventData(callback)) {
579 return false;
580 }
581
582 // 6. Create perf.data.
583 if (!CreateAndInitRecordFile()) {
584 return false;
585 }
586
587 // 7. Add read/signal/periodic Events.
588 if (need_to_check_targets && !event_selection_set_.StopWhenNoMoreTargets()) {
589 return false;
590 }
591 IOEventLoop* loop = event_selection_set_.GetIOEventLoop();
592 auto exit_loop_callback = [loop]() { return loop->ExitLoop(); };
593 if (!loop->AddSignalEvents({SIGCHLD, SIGINT, SIGTERM}, exit_loop_callback)) {
594 return false;
595 }
596
597 // Only add an event for SIGHUP if we didn't inherit SIG_IGN (e.g. from nohup).
598 if (!SignalIsIgnored(SIGHUP)) {
599 if (!loop->AddSignalEvent(SIGHUP, exit_loop_callback)) {
600 return false;
601 }
602 }
603 if (stop_signal_fd_ != -1) {
604 if (!loop->AddReadEvent(stop_signal_fd_, exit_loop_callback)) {
605 return false;
606 }
607 }
608
609 if (duration_in_sec_ != 0) {
610 if (!loop->AddPeriodicEvent(SecondToTimeval(duration_in_sec_),
611 [loop]() { return loop->ExitLoop(); })) {
612 return false;
613 }
614 }
615 if (stdio_controls_profiling_) {
616 if (!loop->AddReadEvent(0, [this, loop]() { return ProcessControlCmd(loop); })) {
617 return false;
618 }
619 }
620 if (jit_debug_reader_) {
621 auto callback = [this](const std::vector<JITDebugInfo>& debug_info, bool sync_kernel_records) {
622 return ProcessJITDebugInfo(debug_info, sync_kernel_records);
623 };
624 if (!jit_debug_reader_->RegisterDebugInfoCallback(loop, callback)) {
625 return false;
626 }
627 if (!system_wide_collection_) {
628 std::set<pid_t> pids = event_selection_set_.GetMonitoredProcesses();
629 for (pid_t tid : event_selection_set_.GetMonitoredThreads()) {
630 pid_t pid;
631 if (GetProcessForThread(tid, &pid)) {
632 pids.insert(pid);
633 }
634 }
635 for (pid_t pid : pids) {
636 if (!jit_debug_reader_->MonitorProcess(pid)) {
637 return false;
638 }
639 }
640 if (!jit_debug_reader_->ReadAllProcesses()) {
641 return false;
642 }
643 }
644 }
645 if (event_selection_set_.HasAuxTrace()) {
646 // ETM data is dumped to kernel buffer only when there is no thread traced by ETM. It happens
647 // either when all monitored threads are scheduled off cpu, or when all etm perf events are
648 // disabled.
649 // If ETM data isn't dumped to kernel buffer in time, overflow parts will be dropped. This
650 // makes less than expected data, especially in system wide recording. So add a periodic event
651 // to flush etm data by temporarily disable all perf events.
652 auto etm_flush = [this]() {
653 return event_selection_set_.SetEnableEvents(false) &&
654 event_selection_set_.SetEnableEvents(true);
655 };
656 if (!loop->AddPeriodicEvent(SecondToTimeval(kDefaultEtmDataFlushPeriodInSec), etm_flush)) {
657 return false;
658 }
659 }
660 return true;
661 }
662
DoRecording(Workload * workload)663 bool RecordCommand::DoRecording(Workload* workload) {
664 // Write records in mapped buffers of perf_event_files to output file while workload is running.
665 if (workload != nullptr && !workload->IsStarted() && !workload->Start()) {
666 return false;
667 }
668 if (start_profiling_fd_.get() != -1) {
669 if (!android::base::WriteStringToFd("STARTED", start_profiling_fd_)) {
670 PLOG(ERROR) << "failed to write to start_profiling_fd_";
671 }
672 start_profiling_fd_.reset();
673 }
674 if (stdio_controls_profiling_) {
675 printf("started\n");
676 fflush(stdout);
677 }
678 if (!event_selection_set_.GetIOEventLoop()->RunLoop()) {
679 return false;
680 }
681 time_stat_.stop_recording_time = GetSystemClock();
682 if (!event_selection_set_.FinishReadMmapEventData()) {
683 return false;
684 }
685 time_stat_.finish_recording_time = GetSystemClock();
686 return true;
687 }
688
WriteRecordDataToOutFd(const std::string & in_filename,android::base::unique_fd out_fd)689 static bool WriteRecordDataToOutFd(const std::string& in_filename,
690 android::base::unique_fd out_fd) {
691 android::base::unique_fd in_fd(FileHelper::OpenReadOnly(in_filename));
692 if (in_fd == -1) {
693 PLOG(ERROR) << "Failed to open " << in_filename;
694 return false;
695 }
696 char buf[8192];
697 while (true) {
698 ssize_t n = TEMP_FAILURE_RETRY(read(in_fd, buf, sizeof(buf)));
699 if (n < 0) {
700 PLOG(ERROR) << "Failed to read " << in_filename;
701 return false;
702 }
703 if (n == 0) {
704 break;
705 }
706 if (!android::base::WriteFully(out_fd, buf, n)) {
707 PLOG(ERROR) << "Failed to write to out_fd";
708 return false;
709 }
710 }
711 unlink(in_filename.c_str());
712 return true;
713 }
714
PostProcessRecording(const std::vector<std::string> & args)715 bool RecordCommand::PostProcessRecording(const std::vector<std::string>& args) {
716 // 1. Merge map records dumped while recording by map record thread.
717 if (map_record_thread_) {
718 if (!map_record_thread_->Join() || !MergeMapRecords()) {
719 return false;
720 }
721 }
722
723 // 2. Post unwind dwarf callchain.
724 if (unwind_dwarf_callchain_ && post_unwind_) {
725 if (!PostUnwindRecords()) {
726 return false;
727 }
728 }
729
730 // 3. Optionally join Callchains.
731 if (callchain_joiner_) {
732 JoinCallChains();
733 }
734
735 // 4. Dump additional features, and close record file.
736 if (!DumpAdditionalFeatures(args)) {
737 return false;
738 }
739 if (!record_file_writer_->Close()) {
740 return false;
741 }
742 if (out_fd_ != -1 && !WriteRecordDataToOutFd(record_filename_, std::move(out_fd_))) {
743 return false;
744 }
745 time_stat_.post_process_time = GetSystemClock();
746
747 // 4. Show brief record result.
748 auto record_stat = event_selection_set_.GetRecordStat();
749 if (event_selection_set_.HasAuxTrace()) {
750 LOG(INFO) << "Aux data traced: " << record_stat.aux_data_size;
751 if (record_stat.lost_aux_data_size != 0) {
752 LOG(INFO) << "Aux data lost in user space: " << record_stat.lost_aux_data_size;
753 }
754 } else {
755 std::string cut_samples;
756 if (record_stat.cut_stack_samples > 0) {
757 cut_samples = android::base::StringPrintf(" (cut %zu)", record_stat.cut_stack_samples);
758 }
759 lost_record_count_ += record_stat.lost_samples + record_stat.lost_non_samples;
760 LOG(INFO) << "Samples recorded: " << sample_record_count_ << cut_samples
761 << ". Samples lost: " << lost_record_count_ << ".";
762 LOG(DEBUG) << "In user space, dropped " << record_stat.lost_samples << " samples, "
763 << record_stat.lost_non_samples << " non samples, cut stack of "
764 << record_stat.cut_stack_samples << " samples.";
765 if (sample_record_count_ + lost_record_count_ != 0) {
766 double lost_percent =
767 static_cast<double>(lost_record_count_) / (lost_record_count_ + sample_record_count_);
768 constexpr double LOST_PERCENT_WARNING_BAR = 0.1;
769 if (lost_percent >= LOST_PERCENT_WARNING_BAR) {
770 LOG(WARNING) << "Lost " << (lost_percent * 100) << "% of samples, "
771 << "consider increasing mmap_pages(-m), "
772 << "or decreasing sample frequency(-f), "
773 << "or increasing sample period(-c).";
774 }
775 }
776 if (callchain_joiner_) {
777 callchain_joiner_->DumpStat();
778 }
779 }
780 LOG(DEBUG) << "Prepare recording time "
781 << (time_stat_.start_recording_time - time_stat_.prepare_recording_time) / 1e6
782 << " ms, recording time "
783 << (time_stat_.stop_recording_time - time_stat_.start_recording_time) / 1e6
784 << " ms, stop recording time "
785 << (time_stat_.finish_recording_time - time_stat_.stop_recording_time) / 1e6
786 << " ms, post process time "
787 << (time_stat_.post_process_time - time_stat_.finish_recording_time) / 1e6 << " ms.";
788 return true;
789 }
790
ParseOptions(const std::vector<std::string> & args,std::vector<std::string> * non_option_args,ProbeEvents * probe_events)791 bool RecordCommand::ParseOptions(const std::vector<std::string>& args,
792 std::vector<std::string>* non_option_args,
793 ProbeEvents* probe_events) {
794 OptionValueMap options;
795 std::vector<std::pair<OptionName, OptionValue>> ordered_options;
796
797 if (!PreprocessOptions(args, GetRecordCmdOptionFormats(), &options, &ordered_options,
798 non_option_args)) {
799 return false;
800 }
801
802 // Process options.
803 system_wide_collection_ = options.PullBoolValue("-a");
804
805 for (const OptionValue& value : options.PullValues("--add-meta-info")) {
806 const std::string& s = *value.str_value;
807 auto split_pos = s.find('=');
808 if (split_pos == std::string::npos || split_pos == 0 || split_pos + 1 == s.size()) {
809 LOG(ERROR) << "invalid meta-info: " << s;
810 return false;
811 }
812 extra_meta_info_[s.substr(0, split_pos)] = s.substr(split_pos + 1);
813 }
814
815 if (auto value = options.PullValue("--addr-filter"); value) {
816 auto filters = ParseAddrFilterOption(*value->str_value);
817 if (filters.empty()) {
818 return false;
819 }
820 event_selection_set_.SetAddrFilters(std::move(filters));
821 }
822
823 if (auto value = options.PullValue("--app"); value) {
824 app_package_name_ = *value->str_value;
825 }
826
827 if (auto value = options.PullValue("--aux-buffer-size"); value) {
828 uint64_t v = value->uint_value;
829 if (v > std::numeric_limits<size_t>::max() || !IsPowerOfTwo(v) || v % sysconf(_SC_PAGE_SIZE)) {
830 LOG(ERROR) << "invalid aux buffer size: " << v;
831 return false;
832 }
833 aux_buffer_size_ = static_cast<size_t>(v);
834 }
835
836 if (options.PullValue("-b")) {
837 branch_sampling_ = branch_sampling_type_map["any"];
838 }
839
840 if (!options.PullUintValue("--callchain-joiner-min-matching-nodes",
841 &callchain_joiner_min_matching_nodes_, 1)) {
842 return false;
843 }
844
845 if (auto value = options.PullValue("--clockid"); value) {
846 clockid_ = *value->str_value;
847 if (clockid_ != "perf") {
848 if (!IsSettingClockIdSupported()) {
849 LOG(ERROR) << "Setting clockid is not supported by the kernel.";
850 return false;
851 }
852 if (clockid_map.find(clockid_) == clockid_map.end()) {
853 LOG(ERROR) << "Invalid clockid: " << clockid_;
854 return false;
855 }
856 }
857 }
858
859 if (auto value = options.PullValue("--cpu"); value) {
860 if (auto cpus = GetCpusFromString(*value->str_value); cpus) {
861 cpus_.assign(cpus->begin(), cpus->end());
862 } else {
863 return false;
864 }
865 }
866
867 if (!options.PullUintValue("--cpu-percent", &cpu_time_max_percent_, 1, 100)) {
868 return false;
869 }
870
871 if (!options.PullDoubleValue("--duration", &duration_in_sec_, 1e-9)) {
872 return false;
873 }
874
875 exclude_perf_ = options.PullBoolValue("--exclude-perf");
876 if (!record_filter_.ParseOptions(options)) {
877 return false;
878 }
879
880 if (options.PullValue("--exit-with-parent")) {
881 prctl(PR_SET_PDEATHSIG, SIGHUP, 0, 0, 0);
882 }
883
884 in_app_context_ = options.PullBoolValue("--in-app");
885
886 for (const OptionValue& value : options.PullValues("-j")) {
887 std::vector<std::string> branch_sampling_types = android::base::Split(*value.str_value, ",");
888 for (auto& type : branch_sampling_types) {
889 auto it = branch_sampling_type_map.find(type);
890 if (it == branch_sampling_type_map.end()) {
891 LOG(ERROR) << "unrecognized branch sampling filter: " << type;
892 return false;
893 }
894 branch_sampling_ |= it->second;
895 }
896 }
897 keep_failed_unwinding_result_ = options.PullBoolValue("--keep-failed-unwinding-result");
898 keep_failed_unwinding_debug_info_ = options.PullBoolValue("--keep-failed-unwinding-debug-info");
899 if (keep_failed_unwinding_debug_info_) {
900 keep_failed_unwinding_result_ = true;
901 }
902
903 for (const OptionValue& value : options.PullValues("--kprobe")) {
904 std::vector<std::string> cmds = android::base::Split(*value.str_value, ",");
905 for (const auto& cmd : cmds) {
906 if (!probe_events->AddKprobe(cmd)) {
907 return false;
908 }
909 }
910 }
911
912 if (auto value = options.PullValue("-m"); value) {
913 if (!IsPowerOfTwo(value->uint_value) ||
914 value->uint_value > std::numeric_limits<size_t>::max()) {
915 LOG(ERROR) << "Invalid mmap_pages: '" << value->uint_value << "'";
916 return false;
917 }
918 mmap_page_range_.first = mmap_page_range_.second = value->uint_value;
919 }
920
921 allow_callchain_joiner_ = !options.PullBoolValue("--no-callchain-joiner");
922 allow_cutting_samples_ = !options.PullBoolValue("--no-cut-samples");
923 can_dump_kernel_symbols_ = !options.PullBoolValue("--no-dump-kernel-symbols");
924 dump_symbols_ = !options.PullBoolValue("--no-dump-symbols");
925 child_inherit_ = !options.PullBoolValue("--no-inherit");
926 unwind_dwarf_callchain_ = !options.PullBoolValue("--no-unwind");
927
928 if (auto value = options.PullValue("-o"); value) {
929 record_filename_ = *value->str_value;
930 }
931
932 if (auto value = options.PullValue("--out-fd"); value) {
933 out_fd_.reset(static_cast<int>(value->uint_value));
934 }
935
936 for (const OptionValue& value : options.PullValues("-p")) {
937 if (auto pids = GetTidsFromString(*value.str_value, true); pids) {
938 event_selection_set_.AddMonitoredProcesses(pids.value());
939 } else {
940 return false;
941 }
942 }
943
944 // Use explicit if statements instead of logical operators to avoid short-circuit.
945 if (options.PullValue("--post-unwind")) {
946 post_unwind_ = true;
947 }
948 if (options.PullValue("--post-unwind=yes")) {
949 post_unwind_ = true;
950 }
951 if (options.PullValue("--post-unwind=no")) {
952 post_unwind_ = false;
953 }
954
955 if (!options.PullUintValue("--size-limit", &size_limit_in_bytes_, 1)) {
956 return false;
957 }
958
959 if (auto value = options.PullValue("--start_profiling_fd"); value) {
960 start_profiling_fd_.reset(static_cast<int>(value->uint_value));
961 }
962
963 stdio_controls_profiling_ = options.PullBoolValue("--stdio-controls-profiling");
964
965 if (auto value = options.PullValue("--stop-signal-fd"); value) {
966 stop_signal_fd_.reset(static_cast<int>(value->uint_value));
967 }
968
969 if (auto value = options.PullValue("--symfs"); value) {
970 if (!Dso::SetSymFsDir(*value->str_value)) {
971 return false;
972 }
973 }
974
975 for (const OptionValue& value : options.PullValues("-t")) {
976 if (auto tids = GetTidsFromString(*value.str_value, true); tids) {
977 event_selection_set_.AddMonitoredThreads(tids.value());
978 } else {
979 return false;
980 }
981 }
982
983 trace_offcpu_ = options.PullBoolValue("--trace-offcpu");
984
985 if (auto value = options.PullValue("--tracepoint-events"); value) {
986 if (!EventTypeManager::Instance().ReadTracepointsFromFile(*value->str_value)) {
987 return false;
988 }
989 }
990
991 CHECK(options.values.empty());
992
993 // Process ordered options.
994 std::vector<size_t> wait_setting_speed_event_groups;
995
996 for (const auto& pair : ordered_options) {
997 const OptionName& name = pair.first;
998 const OptionValue& value = pair.second;
999
1000 if (name == "-c" || name == "-f") {
1001 if (value.uint_value < 1) {
1002 LOG(ERROR) << "invalid " << name << ": " << value.uint_value;
1003 return false;
1004 }
1005 if (name == "-c") {
1006 sample_speed_.reset(new SampleSpeed(0, value.uint_value));
1007 } else {
1008 if (value.uint_value >= INT_MAX) {
1009 LOG(ERROR) << "sample freq can't be bigger than INT_MAX: " << value.uint_value;
1010 return false;
1011 }
1012 sample_speed_.reset(new SampleSpeed(value.uint_value, 0));
1013 }
1014
1015 for (auto groud_id : wait_setting_speed_event_groups) {
1016 event_selection_set_.SetSampleSpeed(groud_id, *sample_speed_);
1017 }
1018 wait_setting_speed_event_groups.clear();
1019
1020 } else if (name == "--call-graph") {
1021 std::vector<std::string> strs = android::base::Split(*value.str_value, ",");
1022 if (strs[0] == "fp") {
1023 fp_callchain_sampling_ = true;
1024 dwarf_callchain_sampling_ = false;
1025 } else if (strs[0] == "dwarf") {
1026 fp_callchain_sampling_ = false;
1027 dwarf_callchain_sampling_ = true;
1028 if (strs.size() > 1) {
1029 uint64_t size;
1030 if (!ParseUint(strs[1], &size)) {
1031 LOG(ERROR) << "invalid dump stack size in --call-graph option: " << strs[1];
1032 return false;
1033 }
1034 if ((size & 7) != 0) {
1035 LOG(ERROR) << "dump stack size " << size << " is not 8-byte aligned.";
1036 return false;
1037 }
1038 if (size >= MAX_DUMP_STACK_SIZE) {
1039 LOG(ERROR) << "dump stack size " << size << " is bigger than max allowed size "
1040 << MAX_DUMP_STACK_SIZE << ".";
1041 return false;
1042 }
1043 dump_stack_size_in_dwarf_sampling_ = static_cast<uint32_t>(size);
1044 }
1045 }
1046
1047 } else if (name == "-e") {
1048 std::vector<std::string> event_types = android::base::Split(*value.str_value, ",");
1049 for (auto& event_type : event_types) {
1050 if (probe_events->IsProbeEvent(event_type)) {
1051 if (!probe_events->CreateProbeEventIfNotExist(event_type)) {
1052 return false;
1053 }
1054 }
1055 size_t group_id;
1056 if (!event_selection_set_.AddEventType(event_type, &group_id)) {
1057 return false;
1058 }
1059 if (sample_speed_) {
1060 event_selection_set_.SetSampleSpeed(group_id, *sample_speed_);
1061 } else {
1062 wait_setting_speed_event_groups.push_back(group_id);
1063 }
1064 }
1065
1066 } else if (name == "-g") {
1067 fp_callchain_sampling_ = false;
1068 dwarf_callchain_sampling_ = true;
1069 } else if (name == "--group") {
1070 std::vector<std::string> event_types = android::base::Split(*value.str_value, ",");
1071 for (const auto& event_type : event_types) {
1072 if (probe_events->IsProbeEvent(event_type)) {
1073 if (!probe_events->CreateProbeEventIfNotExist(event_type)) {
1074 return false;
1075 }
1076 }
1077 }
1078 size_t group_id;
1079 if (!event_selection_set_.AddEventGroup(event_types, &group_id)) {
1080 return false;
1081 }
1082 if (sample_speed_) {
1083 event_selection_set_.SetSampleSpeed(group_id, *sample_speed_);
1084 } else {
1085 wait_setting_speed_event_groups.push_back(group_id);
1086 }
1087
1088 } else if (name == "--tp-filter") {
1089 if (!event_selection_set_.SetTracepointFilter(*value.str_value)) {
1090 return false;
1091 }
1092 } else {
1093 CHECK(false) << "unprocessed option: " << name;
1094 }
1095 }
1096
1097 if (!dwarf_callchain_sampling_) {
1098 if (!unwind_dwarf_callchain_) {
1099 LOG(ERROR) << "--no-unwind is only used with `--call-graph dwarf` option.";
1100 return false;
1101 }
1102 unwind_dwarf_callchain_ = false;
1103 }
1104 if (post_unwind_) {
1105 if (!dwarf_callchain_sampling_ || !unwind_dwarf_callchain_) {
1106 post_unwind_ = false;
1107 }
1108 }
1109
1110 if (fp_callchain_sampling_) {
1111 if (GetBuildArch() == ARCH_ARM) {
1112 LOG(WARNING) << "`--callgraph fp` option doesn't work well on arm architecture, "
1113 << "consider using `-g` option or profiling on aarch64 architecture.";
1114 }
1115 }
1116
1117 if (system_wide_collection_ && event_selection_set_.HasMonitoredTarget()) {
1118 LOG(ERROR) << "Record system wide and existing processes/threads can't be "
1119 "used at the same time.";
1120 return false;
1121 }
1122
1123 if (system_wide_collection_ && !IsRoot()) {
1124 LOG(ERROR) << "System wide profiling needs root privilege.";
1125 return false;
1126 }
1127
1128 if (dump_symbols_ && can_dump_kernel_symbols_) {
1129 // No need to dump kernel symbols as we will dump all required symbols.
1130 can_dump_kernel_symbols_ = false;
1131 }
1132 if (clockid_.empty()) {
1133 clockid_ = IsSettingClockIdSupported() ? "monotonic" : "perf";
1134 }
1135
1136 return true;
1137 }
1138
AdjustPerfEventLimit()1139 bool RecordCommand::AdjustPerfEventLimit() {
1140 bool set_prop = false;
1141 // 1. Adjust max_sample_rate.
1142 uint64_t cur_max_freq;
1143 if (GetMaxSampleFrequency(&cur_max_freq) && cur_max_freq < max_sample_freq_ &&
1144 !SetMaxSampleFrequency(max_sample_freq_)) {
1145 set_prop = true;
1146 }
1147 // 2. Adjust perf_cpu_time_max_percent.
1148 size_t cur_percent;
1149 if (GetCpuTimeMaxPercent(&cur_percent) && cur_percent != cpu_time_max_percent_ &&
1150 !SetCpuTimeMaxPercent(cpu_time_max_percent_)) {
1151 set_prop = true;
1152 }
1153 // 3. Adjust perf_event_mlock_kb.
1154 long cpus = sysconf(_SC_NPROCESSORS_CONF);
1155 uint64_t mlock_kb = cpus * (mmap_page_range_.second + 1) * 4;
1156 if (event_selection_set_.HasAuxTrace()) {
1157 mlock_kb += cpus * aux_buffer_size_ / 1024;
1158 }
1159 uint64_t cur_mlock_kb;
1160 if (GetPerfEventMlockKb(&cur_mlock_kb) && cur_mlock_kb < mlock_kb &&
1161 !SetPerfEventMlockKb(mlock_kb)) {
1162 set_prop = true;
1163 }
1164
1165 if (GetAndroidVersion() >= kAndroidVersionQ && set_prop && !in_app_context_) {
1166 return SetPerfEventLimits(std::max(max_sample_freq_, cur_max_freq), cpu_time_max_percent_,
1167 std::max(mlock_kb, cur_mlock_kb));
1168 }
1169 return true;
1170 }
1171
TraceOffCpu()1172 bool RecordCommand::TraceOffCpu() {
1173 if (FindEventTypeByName("sched:sched_switch") == nullptr) {
1174 LOG(ERROR) << "Can't trace off cpu because sched:sched_switch event is not available";
1175 return false;
1176 }
1177 for (auto& event_type : event_selection_set_.GetTracepointEvents()) {
1178 if (event_type->name == "sched:sched_switch") {
1179 LOG(ERROR) << "Trace offcpu can't be used together with sched:sched_switch event";
1180 return false;
1181 }
1182 }
1183 if (!IsDumpingRegsForTracepointEventsSupported()) {
1184 LOG(ERROR) << "Dumping regs for tracepoint events is not supported by the kernel";
1185 return false;
1186 }
1187 // --trace-offcpu option only works with one of the selected event types.
1188 std::set<std::string> accepted_events = {"cpu-cycles", "cpu-clock", "task-clock"};
1189 std::vector<const EventType*> events = event_selection_set_.GetEvents();
1190 if (events.size() != 1 || accepted_events.find(events[0]->name) == accepted_events.end()) {
1191 LOG(ERROR) << "--trace-offcpu option only works with one of events "
1192 << android::base::Join(accepted_events, ' ');
1193 return false;
1194 }
1195 return event_selection_set_.AddEventType("sched:sched_switch");
1196 }
1197
SetEventSelectionFlags()1198 bool RecordCommand::SetEventSelectionFlags() {
1199 event_selection_set_.SampleIdAll();
1200 if (!event_selection_set_.SetBranchSampling(branch_sampling_)) {
1201 return false;
1202 }
1203 if (fp_callchain_sampling_) {
1204 event_selection_set_.EnableFpCallChainSampling();
1205 } else if (dwarf_callchain_sampling_) {
1206 if (!event_selection_set_.EnableDwarfCallChainSampling(dump_stack_size_in_dwarf_sampling_)) {
1207 return false;
1208 }
1209 }
1210 event_selection_set_.SetInherit(child_inherit_);
1211 if (clockid_ != "perf") {
1212 event_selection_set_.SetClockId(clockid_map[clockid_]);
1213 }
1214 return true;
1215 }
1216
CreateAndInitRecordFile()1217 bool RecordCommand::CreateAndInitRecordFile() {
1218 record_file_writer_ = CreateRecordFile(record_filename_);
1219 if (record_file_writer_ == nullptr) {
1220 return false;
1221 }
1222 // Use first perf_event_attr and first event id to dump mmap and comm records.
1223 EventAttrWithId dumping_attr_id = event_selection_set_.GetEventAttrWithId()[0];
1224 map_record_reader_.emplace(*dumping_attr_id.attr, dumping_attr_id.ids[0],
1225 event_selection_set_.RecordNotExecutableMaps());
1226 map_record_reader_->SetCallback([this](Record* r) { return ProcessRecord(r); });
1227
1228 return DumpKernelSymbol() && DumpTracingData() && DumpMaps() && DumpAuxTraceInfo();
1229 }
1230
CreateRecordFile(const std::string & filename)1231 std::unique_ptr<RecordFileWriter> RecordCommand::CreateRecordFile(const std::string& filename) {
1232 std::unique_ptr<RecordFileWriter> writer = RecordFileWriter::CreateInstance(filename);
1233 if (writer == nullptr) {
1234 return nullptr;
1235 }
1236
1237 if (!writer->WriteAttrSection(event_selection_set_.GetEventAttrWithId())) {
1238 return nullptr;
1239 }
1240 return writer;
1241 }
1242
DumpKernelSymbol()1243 bool RecordCommand::DumpKernelSymbol() {
1244 if (can_dump_kernel_symbols_) {
1245 if (event_selection_set_.NeedKernelSymbol()) {
1246 std::string kallsyms;
1247 if (!LoadKernelSymbols(&kallsyms)) {
1248 // Symbol loading may have failed due to the lack of permissions. This
1249 // is not fatal, the symbols will appear as "unknown".
1250 return true;
1251 }
1252 KernelSymbolRecord r(kallsyms);
1253 if (!ProcessRecord(&r)) {
1254 return false;
1255 }
1256 }
1257 }
1258 return true;
1259 }
1260
DumpTracingData()1261 bool RecordCommand::DumpTracingData() {
1262 std::vector<const EventType*> tracepoint_event_types = event_selection_set_.GetTracepointEvents();
1263 if (tracepoint_event_types.empty() || !CanRecordRawData() || in_app_context_) {
1264 return true; // No need to dump tracing data, or can't do it.
1265 }
1266 std::vector<char> tracing_data;
1267 if (!GetTracingData(tracepoint_event_types, &tracing_data)) {
1268 return false;
1269 }
1270 TracingDataRecord record(tracing_data);
1271 if (!ProcessRecord(&record)) {
1272 return false;
1273 }
1274 return true;
1275 }
1276
DumpMaps()1277 bool RecordCommand::DumpMaps() {
1278 if (system_wide_collection_) {
1279 // For system wide recording:
1280 // If not aux tracing, only dump kernel maps. Maps of a process is dumped when needed (the
1281 // first time a sample hits that process).
1282 // If aux tracing, we don't know which maps will be needed, so dump all process maps. To
1283 // reduce pre recording time, we dump process maps in map record thread while recording.
1284 if (event_selection_set_.HasAuxTrace()) {
1285 map_record_thread_.emplace(*map_record_reader_);
1286 return true;
1287 }
1288 if (!event_selection_set_.ExcludeKernel()) {
1289 return map_record_reader_->ReadKernelMaps();
1290 }
1291 return true;
1292 }
1293 if (!event_selection_set_.ExcludeKernel() && !map_record_reader_->ReadKernelMaps()) {
1294 return false;
1295 }
1296 // Map from process id to a set of thread ids in that process.
1297 std::unordered_map<pid_t, std::unordered_set<pid_t>> process_map;
1298 for (pid_t pid : event_selection_set_.GetMonitoredProcesses()) {
1299 std::vector<pid_t> tids = GetThreadsInProcess(pid);
1300 process_map[pid].insert(tids.begin(), tids.end());
1301 }
1302 for (pid_t tid : event_selection_set_.GetMonitoredThreads()) {
1303 pid_t pid;
1304 if (GetProcessForThread(tid, &pid)) {
1305 process_map[pid].insert(tid);
1306 }
1307 }
1308
1309 // Dump each process.
1310 for (const auto& [pid, tids] : process_map) {
1311 if (!map_record_reader_->ReadProcessMaps(pid, tids, 0)) {
1312 return false;
1313 }
1314 }
1315 return true;
1316 }
1317
ProcessRecord(Record * record)1318 bool RecordCommand::ProcessRecord(Record* record) {
1319 UpdateRecord(record);
1320 if (ShouldOmitRecord(record)) {
1321 return true;
1322 }
1323 if (size_limit_in_bytes_ > 0u) {
1324 if (size_limit_in_bytes_ < record_file_writer_->GetDataSectionSize()) {
1325 return event_selection_set_.GetIOEventLoop()->ExitLoop();
1326 }
1327 }
1328 if (jit_debug_reader_ && !jit_debug_reader_->UpdateRecord(record)) {
1329 return false;
1330 }
1331 last_record_timestamp_ = std::max(last_record_timestamp_, record->Timestamp());
1332 // In system wide recording, maps are dumped when they are needed by records.
1333 if (system_wide_collection_ && !DumpMapsForRecord(record)) {
1334 return false;
1335 }
1336 // Record filter check should go after DumpMapsForRecord(). Otherwise, process/thread name
1337 // filters don't work in system wide collection.
1338 if (record->type() == PERF_RECORD_SAMPLE) {
1339 if (!record_filter_.Check(static_cast<SampleRecord*>(record))) {
1340 return true;
1341 }
1342 }
1343 if (unwind_dwarf_callchain_) {
1344 if (post_unwind_) {
1345 return SaveRecordForPostUnwinding(record);
1346 }
1347 return SaveRecordAfterUnwinding(record);
1348 }
1349 return SaveRecordWithoutUnwinding(record);
1350 }
1351
DumpAuxTraceInfo()1352 bool RecordCommand::DumpAuxTraceInfo() {
1353 if (event_selection_set_.HasAuxTrace()) {
1354 AuxTraceInfoRecord auxtrace_info = ETMRecorder::GetInstance().CreateAuxTraceInfoRecord();
1355 return ProcessRecord(&auxtrace_info);
1356 }
1357 return true;
1358 }
1359
1360 template <typename MmapRecordType>
MapOnlyExistInMemory(MmapRecordType * record)1361 bool MapOnlyExistInMemory(MmapRecordType* record) {
1362 return !record->InKernel() && MappedFileOnlyExistInMemory(record->filename);
1363 }
1364
ShouldOmitRecord(Record * record)1365 bool RecordCommand::ShouldOmitRecord(Record* record) {
1366 if (jit_debug_reader_) {
1367 // To profile jitted Java code, we need PROT_JIT_SYMFILE_MAP maps not overlapped by maps for
1368 // [anon:dalvik-jit-code-cache]. To profile interpreted Java code, we record maps that
1369 // are not executable. Some non-exec maps (like those for stack, heap) provide misleading map
1370 // entries for unwinding, as in http://b/77236599. So it is better to remove
1371 // dalvik-jit-code-cache and other maps that only exist in memory.
1372 switch (record->type()) {
1373 case PERF_RECORD_MMAP:
1374 return MapOnlyExistInMemory(static_cast<MmapRecord*>(record));
1375 case PERF_RECORD_MMAP2:
1376 return MapOnlyExistInMemory(static_cast<Mmap2Record*>(record));
1377 }
1378 }
1379 return false;
1380 }
1381
DumpMapsForRecord(Record * record)1382 bool RecordCommand::DumpMapsForRecord(Record* record) {
1383 if (record->type() == PERF_RECORD_SAMPLE) {
1384 pid_t pid = static_cast<SampleRecord*>(record)->tid_data.pid;
1385 if (dumped_processes_.find(pid) == dumped_processes_.end()) {
1386 // Dump map info and all thread names for that process.
1387 if (!map_record_reader_->ReadProcessMaps(pid, last_record_timestamp_)) {
1388 return false;
1389 }
1390 dumped_processes_.insert(pid);
1391 }
1392 }
1393 return true;
1394 }
1395
SaveRecordForPostUnwinding(Record * record)1396 bool RecordCommand::SaveRecordForPostUnwinding(Record* record) {
1397 if (!record_file_writer_->WriteRecord(*record)) {
1398 LOG(ERROR) << "If there isn't enough space for storing profiling data, consider using "
1399 << "--no-post-unwind option.";
1400 return false;
1401 }
1402 return true;
1403 }
1404
SaveRecordAfterUnwinding(Record * record)1405 bool RecordCommand::SaveRecordAfterUnwinding(Record* record) {
1406 if (record->type() == PERF_RECORD_SAMPLE) {
1407 auto& r = *static_cast<SampleRecord*>(record);
1408 // AdjustCallChainGeneratedByKernel() should go before UnwindRecord(). Because we don't want
1409 // to adjust callchains generated by dwarf unwinder.
1410 r.AdjustCallChainGeneratedByKernel();
1411 if (!UnwindRecord(r)) {
1412 return false;
1413 }
1414 // ExcludeKernelCallChain() should go after UnwindRecord() to notice the generated user call
1415 // chain.
1416 if (r.InKernel() && exclude_kernel_callchain_ && !r.ExcludeKernelCallChain()) {
1417 // If current record contains no user callchain, skip it.
1418 return true;
1419 }
1420 sample_record_count_++;
1421 } else if (record->type() == PERF_RECORD_LOST) {
1422 lost_record_count_ += static_cast<LostRecord*>(record)->lost;
1423 } else {
1424 thread_tree_.Update(*record);
1425 }
1426 return record_file_writer_->WriteRecord(*record);
1427 }
1428
SaveRecordWithoutUnwinding(Record * record)1429 bool RecordCommand::SaveRecordWithoutUnwinding(Record* record) {
1430 if (record->type() == PERF_RECORD_SAMPLE) {
1431 auto& r = *static_cast<SampleRecord*>(record);
1432 if (fp_callchain_sampling_ || dwarf_callchain_sampling_) {
1433 r.AdjustCallChainGeneratedByKernel();
1434 }
1435 if (r.InKernel() && exclude_kernel_callchain_ && !r.ExcludeKernelCallChain()) {
1436 // If current record contains no user callchain, skip it.
1437 return true;
1438 }
1439 sample_record_count_++;
1440 } else if (record->type() == PERF_RECORD_LOST) {
1441 lost_record_count_ += static_cast<LostRecord*>(record)->lost;
1442 }
1443 return record_file_writer_->WriteRecord(*record);
1444 }
1445
ProcessJITDebugInfo(const std::vector<JITDebugInfo> & debug_info,bool sync_kernel_records)1446 bool RecordCommand::ProcessJITDebugInfo(const std::vector<JITDebugInfo>& debug_info,
1447 bool sync_kernel_records) {
1448 EventAttrWithId attr_id = event_selection_set_.GetEventAttrWithId()[0];
1449 for (auto& info : debug_info) {
1450 if (info.type == JITDebugInfo::JIT_DEBUG_JIT_CODE) {
1451 uint64_t timestamp =
1452 jit_debug_reader_->SyncWithRecords() ? info.timestamp : last_record_timestamp_;
1453 Mmap2Record record(*attr_id.attr, false, info.pid, info.pid, info.jit_code_addr,
1454 info.jit_code_len, info.file_offset, map_flags::PROT_JIT_SYMFILE_MAP,
1455 info.file_path, attr_id.ids[0], timestamp);
1456 if (!ProcessRecord(&record)) {
1457 return false;
1458 }
1459 } else {
1460 if (info.extracted_dex_file_map) {
1461 ThreadMmap& map = *info.extracted_dex_file_map;
1462 uint64_t timestamp =
1463 jit_debug_reader_->SyncWithRecords() ? info.timestamp : last_record_timestamp_;
1464 Mmap2Record record(*attr_id.attr, false, info.pid, info.pid, map.start_addr, map.len,
1465 map.pgoff, map.prot, map.name, attr_id.ids[0], timestamp);
1466 if (!ProcessRecord(&record)) {
1467 return false;
1468 }
1469 }
1470 thread_tree_.AddDexFileOffset(info.file_path, info.dex_file_offset);
1471 }
1472 }
1473 // We want to let samples see the most recent JIT maps generated before them, but no JIT maps
1474 // generated after them. So process existing samples each time generating new JIT maps. We prefer
1475 // to process samples after processing JIT maps. Because some of the samples may hit the new JIT
1476 // maps, and we want to report them properly.
1477 if (sync_kernel_records && !event_selection_set_.SyncKernelBuffer()) {
1478 return false;
1479 }
1480 return true;
1481 }
1482
ProcessControlCmd(IOEventLoop * loop)1483 bool RecordCommand::ProcessControlCmd(IOEventLoop* loop) {
1484 char* line = nullptr;
1485 size_t line_length = 0;
1486 if (getline(&line, &line_length, stdin) == -1) {
1487 free(line);
1488 // When the simpleperf Java API destroys the simpleperf process, it also closes the stdin pipe.
1489 // So we may see EOF of stdin.
1490 return loop->ExitLoop();
1491 }
1492 std::string cmd = android::base::Trim(line);
1493 free(line);
1494 LOG(DEBUG) << "process control cmd: " << cmd;
1495 bool result = false;
1496 if (cmd == "pause") {
1497 result = event_selection_set_.SetEnableEvents(false);
1498 } else if (cmd == "resume") {
1499 result = event_selection_set_.SetEnableEvents(true);
1500 } else {
1501 LOG(ERROR) << "unknown control cmd: " << cmd;
1502 }
1503 printf("%s\n", result ? "ok" : "error");
1504 fflush(stdout);
1505 return result;
1506 }
1507
1508 template <class RecordType>
UpdateMmapRecordForEmbeddedPath(RecordType & r,bool has_prot,uint32_t prot)1509 void UpdateMmapRecordForEmbeddedPath(RecordType& r, bool has_prot, uint32_t prot) {
1510 if (r.InKernel()) {
1511 return;
1512 }
1513 std::string filename = r.filename;
1514 bool name_changed = false;
1515 // Some vdex files in map files are marked with deleted flag, but they exist in the file system.
1516 // It may be because a new file is used to replace the old one, but still worth to try.
1517 if (android::base::EndsWith(filename, " (deleted)")) {
1518 filename.resize(filename.size() - 10);
1519 name_changed = true;
1520 }
1521 if (r.data->pgoff != 0 && (!has_prot || (prot & PROT_EXEC))) {
1522 // For the case of a shared library "foobar.so" embedded
1523 // inside an APK, we rewrite the original MMAP from
1524 // ["path.apk" offset=X] to ["path.apk!/foobar.so" offset=W]
1525 // so as to make the library name explicit. This update is
1526 // done here (as part of the record operation) as opposed to
1527 // on the host during the report, since we want to report
1528 // the correct library name even if the the APK in question
1529 // is not present on the host. The new offset W is
1530 // calculated to be with respect to the start of foobar.so,
1531 // not to the start of path.apk.
1532 EmbeddedElf* ee = ApkInspector::FindElfInApkByOffset(filename, r.data->pgoff);
1533 if (ee != nullptr) {
1534 // Compute new offset relative to start of elf in APK.
1535 auto data = *r.data;
1536 data.pgoff -= ee->entry_offset();
1537 r.SetDataAndFilename(data, GetUrlInApk(filename, ee->entry_name()));
1538 return;
1539 }
1540 }
1541 std::string zip_path;
1542 std::string entry_name;
1543 if (ParseExtractedInMemoryPath(filename, &zip_path, &entry_name)) {
1544 filename = GetUrlInApk(zip_path, entry_name);
1545 name_changed = true;
1546 }
1547 if (name_changed) {
1548 auto data = *r.data;
1549 r.SetDataAndFilename(data, filename);
1550 }
1551 }
1552
UpdateRecord(Record * record)1553 void RecordCommand::UpdateRecord(Record* record) {
1554 if (record->type() == PERF_RECORD_MMAP) {
1555 UpdateMmapRecordForEmbeddedPath(*static_cast<MmapRecord*>(record), false, 0);
1556 } else if (record->type() == PERF_RECORD_MMAP2) {
1557 auto r = static_cast<Mmap2Record*>(record);
1558 UpdateMmapRecordForEmbeddedPath(*r, true, r->data->prot);
1559 } else if (record->type() == PERF_RECORD_COMM) {
1560 auto r = static_cast<CommRecord*>(record);
1561 if (r->data->pid == r->data->tid) {
1562 std::string s = GetCompleteProcessName(r->data->pid);
1563 if (!s.empty()) {
1564 r->SetCommandName(s);
1565 }
1566 }
1567 }
1568 }
1569
UnwindRecord(SampleRecord & r)1570 bool RecordCommand::UnwindRecord(SampleRecord& r) {
1571 if ((r.sample_type & PERF_SAMPLE_CALLCHAIN) && (r.sample_type & PERF_SAMPLE_REGS_USER) &&
1572 (r.regs_user_data.reg_mask != 0) && (r.sample_type & PERF_SAMPLE_STACK_USER) &&
1573 (r.GetValidStackSize() > 0)) {
1574 ThreadEntry* thread = thread_tree_.FindThreadOrNew(r.tid_data.pid, r.tid_data.tid);
1575 RegSet regs(r.regs_user_data.abi, r.regs_user_data.reg_mask, r.regs_user_data.regs);
1576 std::vector<uint64_t> ips;
1577 std::vector<uint64_t> sps;
1578 if (!offline_unwinder_->UnwindCallChain(*thread, regs, r.stack_user_data.data,
1579 r.GetValidStackSize(), &ips, &sps)) {
1580 return false;
1581 }
1582 // The unwinding may fail if JIT debug info isn't the latest. In this case, read JIT debug info
1583 // from the process and retry unwinding.
1584 if (jit_debug_reader_ && !post_unwind_ &&
1585 offline_unwinder_->IsCallChainBrokenForIncompleteJITDebugInfo()) {
1586 jit_debug_reader_->ReadProcess(r.tid_data.pid);
1587 jit_debug_reader_->FlushDebugInfo(r.Timestamp());
1588 if (!offline_unwinder_->UnwindCallChain(*thread, regs, r.stack_user_data.data,
1589 r.GetValidStackSize(), &ips, &sps)) {
1590 return false;
1591 }
1592 }
1593 if (keep_failed_unwinding_result_ && !KeepFailedUnwindingResult(r, ips, sps)) {
1594 return false;
1595 }
1596 r.ReplaceRegAndStackWithCallChain(ips);
1597 if (callchain_joiner_ &&
1598 !callchain_joiner_->AddCallChain(r.tid_data.pid, r.tid_data.tid,
1599 CallChainJoiner::ORIGINAL_OFFLINE, ips, sps)) {
1600 return false;
1601 }
1602 }
1603 return true;
1604 }
1605
KeepFailedUnwindingResult(const SampleRecord & r,const std::vector<uint64_t> & ips,const std::vector<uint64_t> & sps)1606 bool RecordCommand::KeepFailedUnwindingResult(const SampleRecord& r,
1607 const std::vector<uint64_t>& ips,
1608 const std::vector<uint64_t>& sps) {
1609 auto& result = offline_unwinder_->GetUnwindingResult();
1610 if (result.error_code != unwindstack::ERROR_NONE) {
1611 if (keep_failed_unwinding_debug_info_) {
1612 return record_file_writer_->WriteRecord(UnwindingResultRecord(
1613 r.time_data.time, result, r.regs_user_data, r.stack_user_data, ips, sps));
1614 }
1615 return record_file_writer_->WriteRecord(
1616 UnwindingResultRecord(r.time_data.time, result, {}, {}, {}, {}));
1617 }
1618 return true;
1619 }
1620
MoveRecordFile(const std::string & old_filename)1621 std::unique_ptr<RecordFileReader> RecordCommand::MoveRecordFile(const std::string& old_filename) {
1622 if (!record_file_writer_->Close()) {
1623 return nullptr;
1624 }
1625 record_file_writer_.reset();
1626 {
1627 std::error_code ec;
1628 std::filesystem::rename(record_filename_, old_filename, ec);
1629 if (ec) {
1630 LOG(ERROR) << "Failed to rename: " << ec.message();
1631 return nullptr;
1632 }
1633 }
1634 record_file_writer_ = CreateRecordFile(record_filename_);
1635 if (!record_file_writer_) {
1636 return nullptr;
1637 }
1638 return RecordFileReader::CreateInstance(old_filename);
1639 }
1640
MergeMapRecords()1641 bool RecordCommand::MergeMapRecords() {
1642 // 1. Move records from record_filename_ to a temporary file.
1643 auto tmp_file = ScopedTempFiles::CreateTempFile();
1644 auto reader = MoveRecordFile(tmp_file->path);
1645 if (!reader) {
1646 return false;
1647 }
1648
1649 // 2. Copy map records from map record thread.
1650 auto callback = [this](Record* r) {
1651 UpdateRecord(r);
1652 if (ShouldOmitRecord(r)) {
1653 return true;
1654 }
1655 return record_file_writer_->WriteRecord(*r);
1656 };
1657 if (!map_record_thread_->ReadMapRecords(callback)) {
1658 return false;
1659 }
1660
1661 // 3. Copy data section from the old recording file.
1662 std::vector<char> buf(64 * 1024);
1663 uint64_t offset = reader->FileHeader().data.offset;
1664 uint64_t left_size = reader->FileHeader().data.size;
1665 while (left_size > 0) {
1666 size_t nread = std::min<size_t>(left_size, buf.size());
1667 if (!reader->ReadAtOffset(offset, buf.data(), nread) ||
1668 !record_file_writer_->WriteData(buf.data(), nread)) {
1669 return false;
1670 }
1671 offset += nread;
1672 left_size -= nread;
1673 }
1674 return true;
1675 }
1676
PostUnwindRecords()1677 bool RecordCommand::PostUnwindRecords() {
1678 auto tmp_file = ScopedTempFiles::CreateTempFile();
1679 auto reader = MoveRecordFile(tmp_file->path);
1680 if (!reader) {
1681 return false;
1682 }
1683 sample_record_count_ = 0;
1684 lost_record_count_ = 0;
1685 auto callback = [this](std::unique_ptr<Record> record) {
1686 return SaveRecordAfterUnwinding(record.get());
1687 };
1688 return reader->ReadDataSection(callback);
1689 }
1690
JoinCallChains()1691 bool RecordCommand::JoinCallChains() {
1692 // 1. Prepare joined callchains.
1693 if (!callchain_joiner_->JoinCallChains()) {
1694 return false;
1695 }
1696 // 2. Move records from record_filename_ to a temporary file.
1697 auto tmp_file = ScopedTempFiles::CreateTempFile();
1698 auto reader = MoveRecordFile(tmp_file->path);
1699 if (!reader) {
1700 return false;
1701 }
1702
1703 // 3. Read records from the temporary file, and write record with joined call chains back
1704 // to record_filename_.
1705 auto record_callback = [&](std::unique_ptr<Record> r) {
1706 if (r->type() != PERF_RECORD_SAMPLE) {
1707 return record_file_writer_->WriteRecord(*r);
1708 }
1709 SampleRecord& sr = *static_cast<SampleRecord*>(r.get());
1710 if (!sr.HasUserCallChain()) {
1711 return record_file_writer_->WriteRecord(sr);
1712 }
1713 pid_t pid;
1714 pid_t tid;
1715 CallChainJoiner::ChainType type;
1716 std::vector<uint64_t> ips;
1717 std::vector<uint64_t> sps;
1718 if (!callchain_joiner_->GetNextCallChain(pid, tid, type, ips, sps)) {
1719 return false;
1720 }
1721 CHECK_EQ(type, CallChainJoiner::JOINED_OFFLINE);
1722 CHECK_EQ(pid, static_cast<pid_t>(sr.tid_data.pid));
1723 CHECK_EQ(tid, static_cast<pid_t>(sr.tid_data.tid));
1724 sr.UpdateUserCallChain(ips);
1725 return record_file_writer_->WriteRecord(sr);
1726 };
1727 return reader->ReadDataSection(record_callback);
1728 }
1729
LoadSymbolMapFile(int pid,const std::string & package,ThreadTree * thread_tree)1730 static void LoadSymbolMapFile(int pid, const std::string& package, ThreadTree* thread_tree) {
1731 // On Linux, symbol map files usually go to /tmp/perf-<pid>.map
1732 // On Android, there is no directory where any process can create files.
1733 // For now, use /data/local/tmp/perf-<pid>.map, which works for standalone programs,
1734 // and /data/data/<package>/perf-<pid>.map, which works for apps.
1735 auto path = package.empty()
1736 ? android::base::StringPrintf("/data/local/tmp/perf-%d.map", pid)
1737 : android::base::StringPrintf("/data/data/%s/perf-%d.map", package.c_str(), pid);
1738
1739 auto symbols = ReadSymbolMapFromFile(path);
1740 if (!symbols.empty()) {
1741 thread_tree->AddSymbolsForProcess(pid, &symbols);
1742 }
1743 }
1744
DumpAdditionalFeatures(const std::vector<std::string> & args)1745 bool RecordCommand::DumpAdditionalFeatures(const std::vector<std::string>& args) {
1746 // Read data section of perf.data to collect hit file information.
1747 thread_tree_.ClearThreadAndMap();
1748 bool kernel_symbols_available = false;
1749 std::string kallsyms;
1750 if (event_selection_set_.NeedKernelSymbol() && LoadKernelSymbols(&kallsyms)) {
1751 Dso::SetKallsyms(kallsyms);
1752 kernel_symbols_available = true;
1753 }
1754 std::unordered_set<int> loaded_symbol_maps;
1755 std::vector<uint64_t> auxtrace_offset;
1756 std::unordered_set<Dso*> debug_unwinding_files;
1757 bool failed_unwinding_sample = false;
1758
1759 auto callback = [&](const Record* r) {
1760 thread_tree_.Update(*r);
1761 if (r->type() == PERF_RECORD_SAMPLE) {
1762 auto sample = reinterpret_cast<const SampleRecord*>(r);
1763 // Symbol map files are available after recording. Load one for the process.
1764 if (loaded_symbol_maps.insert(sample->tid_data.pid).second) {
1765 LoadSymbolMapFile(sample->tid_data.pid, app_package_name_, &thread_tree_);
1766 }
1767 if (failed_unwinding_sample) {
1768 failed_unwinding_sample = false;
1769 CollectHitFileInfo(*sample, &debug_unwinding_files);
1770 } else {
1771 CollectHitFileInfo(*sample, nullptr);
1772 }
1773 } else if (r->type() == PERF_RECORD_AUXTRACE) {
1774 auto auxtrace = static_cast<const AuxTraceRecord*>(r);
1775 auxtrace_offset.emplace_back(auxtrace->location.file_offset - auxtrace->size());
1776 } else if (r->type() == SIMPLE_PERF_RECORD_UNWINDING_RESULT) {
1777 failed_unwinding_sample = true;
1778 }
1779 };
1780
1781 if (!record_file_writer_->ReadDataSection(callback)) {
1782 return false;
1783 }
1784
1785 size_t feature_count = 6;
1786 if (branch_sampling_) {
1787 feature_count++;
1788 }
1789 if (!auxtrace_offset.empty()) {
1790 feature_count++;
1791 }
1792 if (keep_failed_unwinding_debug_info_) {
1793 feature_count += 2;
1794 }
1795 if (!record_file_writer_->BeginWriteFeatures(feature_count)) {
1796 return false;
1797 }
1798 if (!DumpBuildIdFeature()) {
1799 return false;
1800 }
1801 if (!DumpFileFeature()) {
1802 return false;
1803 }
1804 utsname uname_buf;
1805 if (TEMP_FAILURE_RETRY(uname(&uname_buf)) != 0) {
1806 PLOG(ERROR) << "uname() failed";
1807 return false;
1808 }
1809 if (!record_file_writer_->WriteFeatureString(PerfFileFormat::FEAT_OSRELEASE, uname_buf.release)) {
1810 return false;
1811 }
1812 if (!record_file_writer_->WriteFeatureString(PerfFileFormat::FEAT_ARCH, uname_buf.machine)) {
1813 return false;
1814 }
1815
1816 std::string exec_path = android::base::GetExecutablePath();
1817 if (exec_path.empty()) exec_path = "simpleperf";
1818 std::vector<std::string> cmdline;
1819 cmdline.push_back(exec_path);
1820 cmdline.push_back("record");
1821 cmdline.insert(cmdline.end(), args.begin(), args.end());
1822 if (!record_file_writer_->WriteCmdlineFeature(cmdline)) {
1823 return false;
1824 }
1825 if (branch_sampling_ != 0 && !record_file_writer_->WriteBranchStackFeature()) {
1826 return false;
1827 }
1828 if (!DumpMetaInfoFeature(kernel_symbols_available)) {
1829 return false;
1830 }
1831 if (!auxtrace_offset.empty() && !record_file_writer_->WriteAuxTraceFeature(auxtrace_offset)) {
1832 return false;
1833 }
1834 if (keep_failed_unwinding_debug_info_ && !DumpDebugUnwindFeature(debug_unwinding_files)) {
1835 return false;
1836 }
1837
1838 if (!record_file_writer_->EndWriteFeatures()) {
1839 return false;
1840 }
1841 return true;
1842 }
1843
DumpBuildIdFeature()1844 bool RecordCommand::DumpBuildIdFeature() {
1845 std::vector<BuildIdRecord> build_id_records;
1846 BuildId build_id;
1847 std::vector<Dso*> dso_v = thread_tree_.GetAllDsos();
1848 for (Dso* dso : dso_v) {
1849 // For aux tracing, we don't know which binaries are traced.
1850 // So dump build ids for all binaries.
1851 if (!dso->HasDumpId() && !event_selection_set_.HasAuxTrace()) {
1852 continue;
1853 }
1854 if (dso->type() == DSO_KERNEL) {
1855 if (!GetKernelBuildId(&build_id)) {
1856 continue;
1857 }
1858 build_id_records.push_back(BuildIdRecord(true, UINT_MAX, build_id, dso->Path()));
1859 } else if (dso->type() == DSO_KERNEL_MODULE) {
1860 bool has_build_id = false;
1861 if (android::base::EndsWith(dso->Path(), ".ko")) {
1862 has_build_id = GetBuildIdFromDsoPath(dso->Path(), &build_id);
1863 } else if (const std::string& path = dso->Path();
1864 path.size() > 2 && path[0] == '[' && path.back() == ']') {
1865 // For kernel modules that we can't find the corresponding file, read build id from /sysfs.
1866 has_build_id = GetModuleBuildId(path.substr(1, path.size() - 2), &build_id);
1867 }
1868 if (has_build_id) {
1869 build_id_records.push_back(BuildIdRecord(true, UINT_MAX, build_id, dso->Path()));
1870 } else {
1871 LOG(DEBUG) << "Can't read build_id for module " << dso->Path();
1872 }
1873 } else if (dso->type() == DSO_ELF_FILE) {
1874 if (dso->Path() == DEFAULT_EXECNAME_FOR_THREAD_MMAP || dso->IsForJavaMethod()) {
1875 continue;
1876 }
1877 if (!GetBuildIdFromDsoPath(dso->Path(), &build_id)) {
1878 LOG(DEBUG) << "Can't read build_id from file " << dso->Path();
1879 continue;
1880 }
1881 build_id_records.push_back(BuildIdRecord(false, UINT_MAX, build_id, dso->Path()));
1882 }
1883 }
1884 if (!record_file_writer_->WriteBuildIdFeature(build_id_records)) {
1885 return false;
1886 }
1887 return true;
1888 }
1889
DumpFileFeature()1890 bool RecordCommand::DumpFileFeature() {
1891 std::vector<Dso*> dso_v = thread_tree_.GetAllDsos();
1892 // To parse ETM data for kernel modules, we need to dump memory address for kernel modules.
1893 if (event_selection_set_.HasAuxTrace() && !event_selection_set_.ExcludeKernel()) {
1894 for (Dso* dso : dso_v) {
1895 if (dso->type() == DSO_KERNEL_MODULE) {
1896 dso->CreateDumpId();
1897 }
1898 }
1899 }
1900 return record_file_writer_->WriteFileFeatures(dso_v);
1901 }
1902
DumpMetaInfoFeature(bool kernel_symbols_available)1903 bool RecordCommand::DumpMetaInfoFeature(bool kernel_symbols_available) {
1904 std::unordered_map<std::string, std::string> info_map = extra_meta_info_;
1905 info_map["simpleperf_version"] = GetSimpleperfVersion();
1906 info_map["system_wide_collection"] = system_wide_collection_ ? "true" : "false";
1907 info_map["trace_offcpu"] = trace_offcpu_ ? "true" : "false";
1908 // By storing event types information in perf.data, the readers of perf.data have the same
1909 // understanding of event types, even if they are on another machine.
1910 info_map["event_type_info"] = ScopedEventTypes::BuildString(event_selection_set_.GetEvents());
1911 #if defined(__ANDROID__)
1912 info_map["product_props"] = android::base::StringPrintf(
1913 "%s:%s:%s", android::base::GetProperty("ro.product.manufacturer", "").c_str(),
1914 android::base::GetProperty("ro.product.model", "").c_str(),
1915 android::base::GetProperty("ro.product.name", "").c_str());
1916 info_map["android_version"] = android::base::GetProperty("ro.build.version.release", "");
1917 info_map["android_sdk_version"] = android::base::GetProperty("ro.build.version.sdk", "");
1918 info_map["android_build_type"] = android::base::GetProperty("ro.build.type", "");
1919 if (!app_package_name_.empty()) {
1920 info_map["app_package_name"] = app_package_name_;
1921 if (IsRoot()) {
1922 info_map["app_type"] = GetAppType(app_package_name_);
1923 }
1924 }
1925 if (event_selection_set_.HasAuxTrace()) {
1926 // used by --exclude-perf in cmd_inject.cpp
1927 info_map["recording_process"] = std::to_string(getpid());
1928 }
1929 #endif
1930 info_map["clockid"] = clockid_;
1931 info_map["timestamp"] = std::to_string(time(nullptr));
1932 info_map["kernel_symbols_available"] = kernel_symbols_available ? "true" : "false";
1933 if (dwarf_callchain_sampling_ && !unwind_dwarf_callchain_) {
1934 OfflineUnwinder::CollectMetaInfo(&info_map);
1935 }
1936 return record_file_writer_->WriteMetaInfoFeature(info_map);
1937 }
1938
DumpDebugUnwindFeature(const std::unordered_set<Dso * > & dso_set)1939 bool RecordCommand::DumpDebugUnwindFeature(const std::unordered_set<Dso*>& dso_set) {
1940 DebugUnwindFeature debug_unwind_feature;
1941 debug_unwind_feature.reserve(dso_set.size());
1942 for (const Dso* dso : dso_set) {
1943 if (dso->type() != DSO_ELF_FILE) {
1944 continue;
1945 }
1946 const std::string& filename = dso->GetDebugFilePath();
1947 std::unique_ptr<ElfFile> elf = ElfFile::Open(filename);
1948 if (elf) {
1949 llvm::MemoryBuffer* buffer = elf->GetMemoryBuffer();
1950 debug_unwind_feature.resize(debug_unwind_feature.size() + 1);
1951 auto& debug_unwind_file = debug_unwind_feature.back();
1952 debug_unwind_file.path = filename;
1953 debug_unwind_file.size = buffer->getBufferSize();
1954 if (!record_file_writer_->WriteFeature(PerfFileFormat::FEAT_DEBUG_UNWIND_FILE,
1955 buffer->getBufferStart(), buffer->getBufferSize())) {
1956 return false;
1957 }
1958 } else {
1959 LOG(WARNING) << "failed to keep " << filename << " in debug_unwind_feature section";
1960 }
1961 }
1962 return record_file_writer_->WriteDebugUnwindFeature(debug_unwind_feature);
1963 }
1964
CollectHitFileInfo(const SampleRecord & r,std::unordered_set<Dso * > * dso_set)1965 void RecordCommand::CollectHitFileInfo(const SampleRecord& r, std::unordered_set<Dso*>* dso_set) {
1966 const ThreadEntry* thread = thread_tree_.FindThreadOrNew(r.tid_data.pid, r.tid_data.tid);
1967 size_t kernel_ip_count;
1968 std::vector<uint64_t> ips = r.GetCallChain(&kernel_ip_count);
1969 for (size_t i = 0; i < ips.size(); i++) {
1970 const MapEntry* map = thread_tree_.FindMap(thread, ips[i], i < kernel_ip_count);
1971 Dso* dso = map->dso;
1972 if (dump_symbols_) {
1973 const Symbol* symbol = thread_tree_.FindSymbol(map, ips[i], nullptr, &dso);
1974 if (!symbol->HasDumpId()) {
1975 dso->CreateSymbolDumpId(symbol);
1976 }
1977 }
1978 if (!dso->HasDumpId() && dso->type() != DSO_UNKNOWN_FILE) {
1979 dso->CreateDumpId();
1980 }
1981 if (dso_set != nullptr) {
1982 dso_set->insert(dso);
1983 }
1984 }
1985 }
1986
1987 } // namespace
1988
ConsumeStr(const char * & p,const char * s)1989 static bool ConsumeStr(const char*& p, const char* s) {
1990 if (strncmp(p, s, strlen(s)) == 0) {
1991 p += strlen(s);
1992 return true;
1993 }
1994 return false;
1995 }
1996
ConsumeAddr(const char * & p,uint64_t * addr)1997 static bool ConsumeAddr(const char*& p, uint64_t* addr) {
1998 errno = 0;
1999 char* end;
2000 *addr = strtoull(p, &end, 0);
2001 if (errno == 0 && p != end) {
2002 p = end;
2003 return true;
2004 }
2005 return false;
2006 }
2007
2008 // To reduce function length, not all format errors are checked.
ParseOneAddrFilter(const std::string & s,std::vector<AddrFilter> * filters)2009 static bool ParseOneAddrFilter(const std::string& s, std::vector<AddrFilter>* filters) {
2010 std::vector<std::string> args = android::base::Split(s, " ");
2011 if (args.size() != 2) {
2012 return false;
2013 }
2014
2015 uint64_t addr1;
2016 uint64_t addr2;
2017 uint64_t off1;
2018 uint64_t off2;
2019 std::string path;
2020
2021 if (auto p = s.data(); ConsumeStr(p, "start") && ConsumeAddr(p, &addr1)) {
2022 if (*p == '\0') {
2023 // start <kernel_addr>
2024 filters->emplace_back(AddrFilter::KERNEL_START, addr1, 0, "");
2025 return true;
2026 }
2027 if (ConsumeStr(p, "@") && *p != '\0') {
2028 // start <vaddr>@<file_path>
2029 if (auto elf = ElfFile::Open(p); elf && elf->VaddrToOff(addr1, &off1) && Realpath(p, &path)) {
2030 filters->emplace_back(AddrFilter::FILE_START, off1, 0, path);
2031 return true;
2032 }
2033 }
2034 }
2035 if (auto p = s.data(); ConsumeStr(p, "stop") && ConsumeAddr(p, &addr1)) {
2036 if (*p == '\0') {
2037 // stop <kernel_addr>
2038 filters->emplace_back(AddrFilter::KERNEL_STOP, addr1, 0, "");
2039 return true;
2040 }
2041 if (ConsumeStr(p, "@") && *p != '\0') {
2042 // stop <vaddr>@<file_path>
2043 if (auto elf = ElfFile::Open(p); elf && elf->VaddrToOff(addr1, &off1) && Realpath(p, &path)) {
2044 filters->emplace_back(AddrFilter::FILE_STOP, off1, 0, path);
2045 return true;
2046 }
2047 }
2048 }
2049 if (auto p = s.data(); ConsumeStr(p, "filter") && ConsumeAddr(p, &addr1) && ConsumeStr(p, "-") &&
2050 ConsumeAddr(p, &addr2)) {
2051 if (*p == '\0') {
2052 // filter <kernel_addr_start>-<kernel_addr_end>
2053 filters->emplace_back(AddrFilter::KERNEL_RANGE, addr1, addr2 - addr1, "");
2054 return true;
2055 }
2056 if (ConsumeStr(p, "@") && *p != '\0') {
2057 // filter <vaddr_start>-<vaddr_end>@<file_path>
2058 if (auto elf = ElfFile::Open(p); elf && elf->VaddrToOff(addr1, &off1) &&
2059 elf->VaddrToOff(addr2, &off2) && Realpath(p, &path)) {
2060 filters->emplace_back(AddrFilter::FILE_RANGE, off1, off2 - off1, path);
2061 return true;
2062 }
2063 }
2064 }
2065 if (auto p = s.data(); ConsumeStr(p, "filter") && *p != '\0') {
2066 // filter <file_path>
2067 path = android::base::Trim(p);
2068 if (auto elf = ElfFile::Open(path); elf) {
2069 for (const ElfSegment& seg : elf->GetProgramHeader()) {
2070 if (seg.is_executable) {
2071 filters->emplace_back(AddrFilter::FILE_RANGE, seg.file_offset, seg.file_size, path);
2072 }
2073 }
2074 return true;
2075 }
2076 }
2077 return false;
2078 }
2079
ParseAddrFilterOption(const std::string & s)2080 std::vector<AddrFilter> ParseAddrFilterOption(const std::string& s) {
2081 std::vector<AddrFilter> filters;
2082 for (const auto& str : android::base::Split(s, ",")) {
2083 if (!ParseOneAddrFilter(str, &filters)) {
2084 LOG(ERROR) << "failed to parse addr filter: " << str;
2085 return {};
2086 }
2087 }
2088 return filters;
2089 }
2090
RegisterRecordCommand()2091 void RegisterRecordCommand() {
2092 RegisterCommand("record", [] { return std::unique_ptr<Command>(new RecordCommand()); });
2093 }
2094
2095 } // namespace simpleperf
2096