1 /*
2 * Copyright (C) 2016 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "command.h"
18
19 #include <unordered_map>
20
21 #include <android-base/logging.h>
22 #include <android-base/strings.h>
23
24 #include "callchain.h"
25 #include "event_attr.h"
26 #include "event_type.h"
27 #include "record_file.h"
28 #include "sample_tree.h"
29 #include "tracing.h"
30 #include "utils.h"
31
32 namespace simpleperf {
33 namespace {
34
35 struct SlabSample {
36 const Symbol* symbol; // the function making allocation
37 uint64_t ptr; // the start address of the allocated space
38 uint64_t bytes_req; // requested space size
39 uint64_t bytes_alloc; // allocated space size
40 uint64_t sample_count; // count of allocations
41 uint64_t gfp_flags; // flags used for allocation
42 uint64_t cross_cpu_allocations; // count of allocations freed not on the
43 // cpu allocating them
44 CallChainRoot<SlabSample> callchain; // a callchain tree representing all
45 // callchains in this sample
SlabSamplesimpleperf::__anon711df3d90110::SlabSample46 SlabSample(const Symbol* symbol, uint64_t ptr, uint64_t bytes_req, uint64_t bytes_alloc,
47 uint64_t sample_count, uint64_t gfp_flags, uint64_t cross_cpu_allocations)
48 : symbol(symbol),
49 ptr(ptr),
50 bytes_req(bytes_req),
51 bytes_alloc(bytes_alloc),
52 sample_count(sample_count),
53 gfp_flags(gfp_flags),
54 cross_cpu_allocations(cross_cpu_allocations) {}
55
GetPeriodsimpleperf::__anon711df3d90110::SlabSample56 uint64_t GetPeriod() const { return sample_count; }
57 };
58
59 struct SlabAccumulateInfo {
60 uint64_t bytes_req;
61 uint64_t bytes_alloc;
62 };
63
64 BUILD_COMPARE_VALUE_FUNCTION(ComparePtr, ptr);
65 BUILD_COMPARE_VALUE_FUNCTION_REVERSE(CompareBytesReq, bytes_req);
66 BUILD_COMPARE_VALUE_FUNCTION_REVERSE(CompareBytesAlloc, bytes_alloc);
67 BUILD_COMPARE_VALUE_FUNCTION(CompareGfpFlags, gfp_flags);
68 BUILD_COMPARE_VALUE_FUNCTION_REVERSE(CompareCrossCpuAllocations, cross_cpu_allocations);
69
70 BUILD_DISPLAY_HEX64_FUNCTION(DisplayPtr, ptr);
71 BUILD_DISPLAY_UINT64_FUNCTION(DisplayBytesReq, bytes_req);
72 BUILD_DISPLAY_UINT64_FUNCTION(DisplayBytesAlloc, bytes_alloc);
73 BUILD_DISPLAY_HEX64_FUNCTION(DisplayGfpFlags, gfp_flags);
74 BUILD_DISPLAY_UINT64_FUNCTION(DisplayCrossCpuAllocations, cross_cpu_allocations);
75
CompareFragment(const SlabSample * sample1,const SlabSample * sample2)76 static int CompareFragment(const SlabSample* sample1, const SlabSample* sample2) {
77 uint64_t frag1 = sample1->bytes_alloc - sample1->bytes_req;
78 uint64_t frag2 = sample2->bytes_alloc - sample2->bytes_req;
79 return Compare(frag2, frag1);
80 }
81
DisplayFragment(const SlabSample * sample)82 static std::string DisplayFragment(const SlabSample* sample) {
83 return android::base::StringPrintf("%" PRIu64, sample->bytes_alloc - sample->bytes_req);
84 }
85
86 struct SlabSampleTree {
87 std::vector<SlabSample*> samples;
88 uint64_t total_requested_bytes;
89 uint64_t total_allocated_bytes;
90 uint64_t nr_allocations;
91 uint64_t nr_frees;
92 uint64_t nr_cross_cpu_allocations;
93 };
94
95 struct SlabFormat {
96 enum {
97 KMEM_ALLOC,
98 KMEM_FREE,
99 } type;
100 TracingFieldPlace call_site;
101 TracingFieldPlace ptr;
102 TracingFieldPlace bytes_req;
103 TracingFieldPlace bytes_alloc;
104 TracingFieldPlace gfp_flags;
105 };
106
107 class SlabSampleTreeBuilder : public SampleTreeBuilder<SlabSample, SlabAccumulateInfo> {
108 public:
SlabSampleTreeBuilder(const SampleComparator<SlabSample> & sample_comparator,ThreadTree * thread_tree)109 SlabSampleTreeBuilder(const SampleComparator<SlabSample>& sample_comparator,
110 ThreadTree* thread_tree)
111 : SampleTreeBuilder(sample_comparator),
112 thread_tree_(thread_tree),
113 total_requested_bytes_(0),
114 total_allocated_bytes_(0),
115 nr_allocations_(0),
116 nr_cross_cpu_allocations_(0) {}
117
GetSampleTree() const118 SlabSampleTree GetSampleTree() const {
119 SlabSampleTree sample_tree;
120 sample_tree.samples = GetSamples();
121 sample_tree.total_requested_bytes = total_requested_bytes_;
122 sample_tree.total_allocated_bytes = total_allocated_bytes_;
123 sample_tree.nr_allocations = nr_allocations_;
124 sample_tree.nr_frees = nr_frees_;
125 sample_tree.nr_cross_cpu_allocations = nr_cross_cpu_allocations_;
126 return sample_tree;
127 }
128
AddSlabFormat(const std::vector<uint64_t> & event_ids,SlabFormat format)129 void AddSlabFormat(const std::vector<uint64_t>& event_ids, SlabFormat format) {
130 std::unique_ptr<SlabFormat> p(new SlabFormat(format));
131 for (auto id : event_ids) {
132 event_id_to_format_map_[id] = p.get();
133 }
134 formats_.push_back(std::move(p));
135 }
136
137 protected:
CreateSample(const SampleRecord & r,bool in_kernel,SlabAccumulateInfo * acc_info)138 SlabSample* CreateSample(const SampleRecord& r, bool in_kernel,
139 SlabAccumulateInfo* acc_info) override {
140 if (!in_kernel) {
141 // Normally we don't parse records in user space because tracepoint
142 // events all happen in kernel. But if r.ip_data.ip == 0, it may be
143 // a kernel record failed to dump ip register and is still useful.
144 if (r.ip_data.ip == 0) {
145 // It seems we are on a kernel can't dump regset for tracepoint events
146 // because of lacking perf_arch_fetch_caller_regs(). We can't get
147 // callchain, but we can still do a normal report.
148 static bool first = true;
149 if (first) {
150 first = false;
151 if (accumulate_callchain_) {
152 // The kernel doesn't seem to support dumping registers for
153 // tracepoint events because of lacking
154 // perf_arch_fetch_caller_regs().
155 LOG(WARNING) << "simpleperf may not get callchains for tracepoint"
156 << " events because of lacking kernel support.";
157 }
158 }
159 } else {
160 return nullptr;
161 }
162 }
163 uint64_t id = r.id_data.id;
164 auto it = event_id_to_format_map_.find(id);
165 if (it == event_id_to_format_map_.end()) {
166 return nullptr;
167 }
168 const char* raw_data = r.raw_data.data;
169 SlabFormat* format = it->second;
170 if (format->type == SlabFormat::KMEM_ALLOC) {
171 uint64_t call_site = format->call_site.ReadFromData(raw_data);
172 const Symbol* symbol = thread_tree_->FindKernelSymbol(call_site);
173 uint64_t ptr = format->ptr.ReadFromData(raw_data);
174 uint64_t bytes_req = format->bytes_req.ReadFromData(raw_data);
175 uint64_t bytes_alloc = format->bytes_alloc.ReadFromData(raw_data);
176 uint64_t gfp_flags = format->gfp_flags.ReadFromData(raw_data);
177 SlabSample* sample = InsertSample(std::unique_ptr<SlabSample>(
178 new SlabSample(symbol, ptr, bytes_req, bytes_alloc, 1, gfp_flags, 0)));
179 alloc_cpu_record_map_.insert(std::make_pair(ptr, std::make_pair(r.cpu_data.cpu, sample)));
180 acc_info->bytes_req = bytes_req;
181 acc_info->bytes_alloc = bytes_alloc;
182 return sample;
183 } else if (format->type == SlabFormat::KMEM_FREE) {
184 uint64_t ptr = format->ptr.ReadFromData(raw_data);
185 auto it = alloc_cpu_record_map_.find(ptr);
186 if (it != alloc_cpu_record_map_.end()) {
187 SlabSample* sample = it->second.second;
188 if (r.cpu_data.cpu != it->second.first) {
189 sample->cross_cpu_allocations++;
190 nr_cross_cpu_allocations_++;
191 }
192 alloc_cpu_record_map_.erase(it);
193 }
194 nr_frees_++;
195 }
196 return nullptr;
197 }
198
CreateBranchSample(const SampleRecord &,const BranchStackItemType &)199 SlabSample* CreateBranchSample(const SampleRecord&, const BranchStackItemType&) override {
200 return nullptr;
201 }
202
CreateCallChainSample(const ThreadEntry *,const SlabSample * sample,uint64_t ip,bool in_kernel,const std::vector<SlabSample * > & callchain,const SlabAccumulateInfo & acc_info)203 SlabSample* CreateCallChainSample(const ThreadEntry*, const SlabSample* sample, uint64_t ip,
204 bool in_kernel, const std::vector<SlabSample*>& callchain,
205 const SlabAccumulateInfo& acc_info) override {
206 if (!in_kernel) {
207 return nullptr;
208 }
209 const Symbol* symbol = thread_tree_->FindKernelSymbol(ip);
210 return InsertCallChainSample(
211 std::unique_ptr<SlabSample>(new SlabSample(symbol, sample->ptr, acc_info.bytes_req,
212 acc_info.bytes_alloc, 1, sample->gfp_flags, 0)),
213 callchain);
214 }
215
GetThreadOfSample(SlabSample *)216 const ThreadEntry* GetThreadOfSample(SlabSample*) override { return nullptr; }
217
GetPeriodForCallChain(const SlabAccumulateInfo &)218 uint64_t GetPeriodForCallChain(const SlabAccumulateInfo&) override {
219 // Decide the percentage of callchain by the sample_count, so use 1 as the
220 // period when calling AddCallChain().
221 return 1;
222 }
223
UpdateSummary(const SlabSample * sample)224 void UpdateSummary(const SlabSample* sample) override {
225 total_requested_bytes_ += sample->bytes_req;
226 total_allocated_bytes_ += sample->bytes_alloc;
227 nr_allocations_++;
228 }
229
MergeSample(SlabSample * sample1,SlabSample * sample2)230 void MergeSample(SlabSample* sample1, SlabSample* sample2) override {
231 sample1->bytes_req += sample2->bytes_req;
232 sample1->bytes_alloc += sample2->bytes_alloc;
233 sample1->sample_count += sample2->sample_count;
234 }
235
236 private:
237 ThreadTree* thread_tree_;
238 uint64_t total_requested_bytes_;
239 uint64_t total_allocated_bytes_;
240 uint64_t nr_allocations_;
241 uint64_t nr_frees_;
242 uint64_t nr_cross_cpu_allocations_;
243
244 std::unordered_map<uint64_t, SlabFormat*> event_id_to_format_map_;
245 std::vector<std::unique_ptr<SlabFormat>> formats_;
246 std::unordered_map<uint64_t, std::pair<uint32_t, SlabSample*>> alloc_cpu_record_map_;
247 };
248
249 using SlabSampleTreeSorter = SampleTreeSorter<SlabSample>;
250 using SlabSampleTreeDisplayer = SampleTreeDisplayer<SlabSample, SlabSampleTree>;
251 using SlabSampleCallgraphDisplayer = CallgraphDisplayer<SlabSample, CallChainNode<SlabSample>>;
252
253 struct EventAttrWithName {
254 perf_event_attr attr;
255 std::string name;
256 std::vector<uint64_t> event_ids;
257 };
258
259 class KmemCommand : public Command {
260 public:
KmemCommand()261 KmemCommand()
262 : Command("kmem", "collect kernel memory allocation information",
263 // clang-format off
264 "Usage: kmem (record [record options] | report [report options])\n"
265 "kmem record\n"
266 "-g Enable call graph recording. Same as '--call-graph fp'.\n"
267 "--slab Collect slab allocation information. Default option.\n"
268 "Other record options provided by simpleperf record command are also available.\n"
269 "kmem report\n"
270 "--children Print the accumulated allocation info appeared in the callchain.\n"
271 " Can be used on perf.data recorded with `--call-graph fp` option.\n"
272 "-g [callee|caller] Print call graph for perf.data recorded with\n"
273 " `--call-graph fp` option. If callee mode is used, the graph\n"
274 " shows how functions are called from others. Otherwise, the\n"
275 " graph shows how functions call others. Default is callee\n"
276 " mode. The percentage shown in the graph is determined by\n"
277 " the hit count of the callchain.\n"
278 "-i Specify path of record file, default is perf.data\n"
279 "-o report_file_name Set report file name, default is stdout.\n"
280 "--slab Report slab allocation information. Default option.\n"
281 "--slab-sort key1,key2,...\n"
282 " Select the keys to sort and print slab allocation information.\n"
283 " Should be used with --slab option. Possible keys include:\n"
284 " hit -- the allocation count.\n"
285 " caller -- the function calling allocation.\n"
286 " ptr -- the address of the allocated space.\n"
287 " bytes_req -- the total requested space size.\n"
288 " bytes_alloc -- the total allocated space size.\n"
289 " fragment -- the extra allocated space size\n"
290 " (bytes_alloc - bytes_req).\n"
291 " gfp_flags -- the flags used for allocation.\n"
292 " pingpong -- the count of allocations that are freed not on\n"
293 " the cpu allocating them.\n"
294 " The default slab sort keys are:\n"
295 " hit,caller,bytes_req,bytes_alloc,fragment,pingpong.\n"
296 // clang-format on
297 ),
298 is_record_(false),
299 use_slab_(false),
300 accumulate_callchain_(false),
301 print_callgraph_(false),
302 callgraph_show_callee_(false),
303 record_filename_("perf.data"),
304 record_file_arch_(GetBuildArch()) {}
305
306 bool Run(const std::vector<std::string>& args);
307
308 private:
309 bool ParseOptions(const std::vector<std::string>& args, std::vector<std::string>* left_args);
310 bool RecordKmemInfo(const std::vector<std::string>& record_args);
311 bool ReportKmemInfo();
312 bool PrepareToBuildSampleTree();
313 void ReadEventAttrsFromRecordFile();
314 bool ReadFeaturesFromRecordFile();
315 bool ReadSampleTreeFromRecordFile();
316 bool ProcessRecord(std::unique_ptr<Record> record);
317 void ProcessTracingData(const std::vector<char>& data);
318 bool PrintReport();
319 void PrintReportContext(FILE* fp);
320 void PrintSlabReportContext(FILE* fp);
321
322 bool is_record_;
323 bool use_slab_;
324 std::vector<std::string> slab_sort_keys_;
325 bool accumulate_callchain_;
326 bool print_callgraph_;
327 bool callgraph_show_callee_;
328
329 std::string record_filename_;
330 std::unique_ptr<RecordFileReader> record_file_reader_;
331 std::vector<EventAttrWithName> event_attrs_;
332 std::string record_cmdline_;
333 ArchType record_file_arch_;
334
335 ThreadTree thread_tree_;
336 SlabSampleTree slab_sample_tree_;
337 std::unique_ptr<SlabSampleTreeBuilder> slab_sample_tree_builder_;
338 std::unique_ptr<SlabSampleTreeSorter> slab_sample_tree_sorter_;
339 std::unique_ptr<SlabSampleTreeDisplayer> slab_sample_tree_displayer_;
340
341 std::string report_filename_;
342 };
343
Run(const std::vector<std::string> & args)344 bool KmemCommand::Run(const std::vector<std::string>& args) {
345 std::vector<std::string> left_args;
346 if (!ParseOptions(args, &left_args)) {
347 return false;
348 }
349 if (!use_slab_) {
350 use_slab_ = true;
351 }
352 if (is_record_) {
353 return RecordKmemInfo(left_args);
354 }
355 return ReportKmemInfo();
356 }
357
ParseOptions(const std::vector<std::string> & args,std::vector<std::string> * left_args)358 bool KmemCommand::ParseOptions(const std::vector<std::string>& args,
359 std::vector<std::string>* left_args) {
360 if (args.empty()) {
361 LOG(ERROR) << "No subcommand specified";
362 return false;
363 }
364 if (args[0] == "record") {
365 if (!IsRoot()) {
366 LOG(ERROR) << "simpleperf kmem record command needs root privilege";
367 return false;
368 }
369 is_record_ = true;
370 size_t i;
371 for (i = 1; i < args.size() && !args[i].empty() && args[i][0] == '-'; ++i) {
372 if (args[i] == "-g") {
373 left_args->push_back("--call-graph");
374 left_args->push_back("fp");
375 } else if (args[i] == "--slab") {
376 use_slab_ = true;
377 } else {
378 left_args->push_back(args[i]);
379 }
380 }
381 left_args->insert(left_args->end(), args.begin() + i, args.end());
382 } else if (args[0] == "report") {
383 is_record_ = false;
384 for (size_t i = 1; i < args.size(); ++i) {
385 if (args[i] == "--children") {
386 accumulate_callchain_ = true;
387 } else if (args[i] == "-g") {
388 print_callgraph_ = true;
389 accumulate_callchain_ = true;
390 callgraph_show_callee_ = true;
391 if (i + 1 < args.size() && args[i + 1][0] != '-') {
392 ++i;
393 if (args[i] == "callee") {
394 callgraph_show_callee_ = true;
395 } else if (args[i] == "caller") {
396 callgraph_show_callee_ = false;
397 } else {
398 LOG(ERROR) << "Unknown argument with -g option: " << args[i];
399 return false;
400 }
401 }
402 } else if (args[i] == "-i") {
403 if (!NextArgumentOrError(args, &i)) {
404 return false;
405 }
406 record_filename_ = args[i];
407 } else if (args[i] == "-o") {
408 if (!NextArgumentOrError(args, &i)) {
409 return false;
410 }
411 report_filename_ = args[i];
412 } else if (args[i] == "--slab") {
413 use_slab_ = true;
414 } else if (args[i] == "--slab-sort") {
415 if (!NextArgumentOrError(args, &i)) {
416 return false;
417 }
418 slab_sort_keys_ = android::base::Split(args[i], ",");
419 } else {
420 ReportUnknownOption(args, i);
421 return false;
422 }
423 }
424 } else {
425 LOG(ERROR) << "Unknown subcommand for " << Name() << ": " << args[0]
426 << ". Try `simpleperf help " << Name() << "`";
427 return false;
428 }
429 return true;
430 }
431
RecordKmemInfo(const std::vector<std::string> & record_args)432 bool KmemCommand::RecordKmemInfo(const std::vector<std::string>& record_args) {
433 std::vector<std::string> args;
434 if (use_slab_) {
435 std::vector<std::string> trace_events = {"kmem:kmalloc", "kmem:kmem_cache_alloc",
436 "kmem:kmalloc_node", "kmem:kmem_cache_alloc_node",
437 "kmem:kfree", "kmem:kmem_cache_free"};
438 for (const auto& name : trace_events) {
439 if (ParseEventType(name)) {
440 args.insert(args.end(), {"-e", name});
441 }
442 }
443 }
444 if (args.empty()) {
445 LOG(ERROR) << "Kernel allocation related trace events are not supported.";
446 return false;
447 }
448 args.push_back("-a");
449 args.insert(args.end(), record_args.begin(), record_args.end());
450 std::unique_ptr<Command> record_cmd = CreateCommandInstance("record");
451 if (record_cmd == nullptr) {
452 LOG(ERROR) << "record command isn't available";
453 return false;
454 }
455 return record_cmd->Run(args);
456 }
457
ReportKmemInfo()458 bool KmemCommand::ReportKmemInfo() {
459 if (!PrepareToBuildSampleTree()) {
460 return false;
461 }
462 record_file_reader_ = RecordFileReader::CreateInstance(record_filename_);
463 if (record_file_reader_ == nullptr) {
464 return false;
465 }
466 ReadEventAttrsFromRecordFile();
467 if (!ReadFeaturesFromRecordFile()) {
468 return false;
469 }
470 if (!ReadSampleTreeFromRecordFile()) {
471 return false;
472 }
473 if (!PrintReport()) {
474 return false;
475 }
476 return true;
477 }
478
PrepareToBuildSampleTree()479 bool KmemCommand::PrepareToBuildSampleTree() {
480 if (use_slab_) {
481 if (slab_sort_keys_.empty()) {
482 slab_sort_keys_ = {"hit", "caller", "bytes_req", "bytes_alloc", "fragment", "pingpong"};
483 }
484 SampleComparator<SlabSample> comparator;
485 SampleComparator<SlabSample> sort_comparator;
486 SampleDisplayer<SlabSample, SlabSampleTree> displayer;
487 std::string accumulated_name = accumulate_callchain_ ? "Accumulated_" : "";
488
489 if (print_callgraph_) {
490 displayer.AddExclusiveDisplayFunction(SlabSampleCallgraphDisplayer());
491 }
492
493 for (const auto& key : slab_sort_keys_) {
494 if (key == "hit") {
495 sort_comparator.AddCompareFunction(CompareSampleCount);
496 displayer.AddDisplayFunction(accumulated_name + "Hit", DisplaySampleCount);
497 } else if (key == "caller") {
498 comparator.AddCompareFunction(CompareSymbol);
499 displayer.AddDisplayFunction("Caller", DisplaySymbol);
500 } else if (key == "ptr") {
501 comparator.AddCompareFunction(ComparePtr);
502 displayer.AddDisplayFunction("Ptr", DisplayPtr);
503 } else if (key == "bytes_req") {
504 sort_comparator.AddCompareFunction(CompareBytesReq);
505 displayer.AddDisplayFunction(accumulated_name + "BytesReq", DisplayBytesReq);
506 } else if (key == "bytes_alloc") {
507 sort_comparator.AddCompareFunction(CompareBytesAlloc);
508 displayer.AddDisplayFunction(accumulated_name + "BytesAlloc", DisplayBytesAlloc);
509 } else if (key == "fragment") {
510 sort_comparator.AddCompareFunction(CompareFragment);
511 displayer.AddDisplayFunction(accumulated_name + "Fragment", DisplayFragment);
512 } else if (key == "gfp_flags") {
513 comparator.AddCompareFunction(CompareGfpFlags);
514 displayer.AddDisplayFunction("GfpFlags", DisplayGfpFlags);
515 } else if (key == "pingpong") {
516 sort_comparator.AddCompareFunction(CompareCrossCpuAllocations);
517 displayer.AddDisplayFunction("Pingpong", DisplayCrossCpuAllocations);
518 } else {
519 LOG(ERROR) << "Unknown sort key for slab allocation: " << key;
520 return false;
521 }
522 slab_sample_tree_builder_.reset(new SlabSampleTreeBuilder(comparator, &thread_tree_));
523 slab_sample_tree_builder_->SetCallChainSampleOptions(accumulate_callchain_, print_callgraph_,
524 !callgraph_show_callee_);
525 sort_comparator.AddComparator(comparator);
526 slab_sample_tree_sorter_.reset(new SlabSampleTreeSorter(sort_comparator));
527 slab_sample_tree_displayer_.reset(new SlabSampleTreeDisplayer(displayer));
528 }
529 }
530 return true;
531 }
532
ReadEventAttrsFromRecordFile()533 void KmemCommand::ReadEventAttrsFromRecordFile() {
534 std::vector<EventAttrWithId> attrs = record_file_reader_->AttrSection();
535 for (const auto& attr_with_id : attrs) {
536 EventAttrWithName attr;
537 attr.attr = *attr_with_id.attr;
538 attr.event_ids = attr_with_id.ids;
539 attr.name = GetEventNameByAttr(attr.attr);
540 event_attrs_.push_back(attr);
541 }
542 }
543
ReadFeaturesFromRecordFile()544 bool KmemCommand::ReadFeaturesFromRecordFile() {
545 record_file_reader_->LoadBuildIdAndFileFeatures(thread_tree_);
546 std::string arch = record_file_reader_->ReadFeatureString(PerfFileFormat::FEAT_ARCH);
547 if (!arch.empty()) {
548 record_file_arch_ = GetArchType(arch);
549 if (record_file_arch_ == ARCH_UNSUPPORTED) {
550 return false;
551 }
552 }
553 std::vector<std::string> cmdline = record_file_reader_->ReadCmdlineFeature();
554 if (!cmdline.empty()) {
555 record_cmdline_ = android::base::Join(cmdline, ' ');
556 }
557 if (record_file_reader_->HasFeature(PerfFileFormat::FEAT_TRACING_DATA)) {
558 std::vector<char> tracing_data;
559 if (!record_file_reader_->ReadFeatureSection(PerfFileFormat::FEAT_TRACING_DATA,
560 &tracing_data)) {
561 return false;
562 }
563 ProcessTracingData(tracing_data);
564 }
565 return true;
566 }
567
ReadSampleTreeFromRecordFile()568 bool KmemCommand::ReadSampleTreeFromRecordFile() {
569 if (!record_file_reader_->ReadDataSection(
570 [this](std::unique_ptr<Record> record) { return ProcessRecord(std::move(record)); })) {
571 return false;
572 }
573 if (use_slab_) {
574 slab_sample_tree_ = slab_sample_tree_builder_->GetSampleTree();
575 slab_sample_tree_sorter_->Sort(slab_sample_tree_.samples, print_callgraph_);
576 }
577 return true;
578 }
579
ProcessRecord(std::unique_ptr<Record> record)580 bool KmemCommand::ProcessRecord(std::unique_ptr<Record> record) {
581 thread_tree_.Update(*record);
582 if (record->type() == PERF_RECORD_SAMPLE) {
583 if (use_slab_) {
584 slab_sample_tree_builder_->ProcessSampleRecord(
585 *static_cast<const SampleRecord*>(record.get()));
586 }
587 } else if (record->type() == PERF_RECORD_TRACING_DATA ||
588 record->type() == SIMPLE_PERF_RECORD_TRACING_DATA) {
589 const auto& r = *static_cast<TracingDataRecord*>(record.get());
590 ProcessTracingData(std::vector<char>(r.data, r.data + r.data_size));
591 }
592 return true;
593 }
594
ProcessTracingData(const std::vector<char> & data)595 void KmemCommand::ProcessTracingData(const std::vector<char>& data) {
596 Tracing tracing(data);
597 for (auto& attr : event_attrs_) {
598 if (attr.attr.type == PERF_TYPE_TRACEPOINT) {
599 uint64_t trace_event_id = attr.attr.config;
600 attr.name = tracing.GetTracingEventNameHavingId(trace_event_id);
601 TracingFormat format = tracing.GetTracingFormatHavingId(trace_event_id);
602 if (use_slab_) {
603 if (format.name == "kmalloc" || format.name == "kmem_cache_alloc" ||
604 format.name == "kmalloc_node" || format.name == "kmem_cache_alloc_node") {
605 SlabFormat f;
606 f.type = SlabFormat::KMEM_ALLOC;
607 format.GetField("call_site", f.call_site);
608 format.GetField("ptr", f.ptr);
609 format.GetField("bytes_req", f.bytes_req);
610 format.GetField("bytes_alloc", f.bytes_alloc);
611 format.GetField("gfp_flags", f.gfp_flags);
612 slab_sample_tree_builder_->AddSlabFormat(attr.event_ids, f);
613 } else if (format.name == "kfree" || format.name == "kmem_cache_free") {
614 SlabFormat f;
615 f.type = SlabFormat::KMEM_FREE;
616 format.GetField("call_site", f.call_site);
617 format.GetField("ptr", f.ptr);
618 slab_sample_tree_builder_->AddSlabFormat(attr.event_ids, f);
619 }
620 }
621 }
622 }
623 }
624
PrintReport()625 bool KmemCommand::PrintReport() {
626 std::unique_ptr<FILE, decltype(&fclose)> file_handler(nullptr, fclose);
627 FILE* report_fp = stdout;
628 if (!report_filename_.empty()) {
629 file_handler.reset(fopen(report_filename_.c_str(), "w"));
630 if (file_handler == nullptr) {
631 PLOG(ERROR) << "failed to open " << report_filename_;
632 return false;
633 }
634 report_fp = file_handler.get();
635 }
636 PrintReportContext(report_fp);
637 if (use_slab_) {
638 fprintf(report_fp, "\n\n");
639 PrintSlabReportContext(report_fp);
640 slab_sample_tree_displayer_->DisplaySamples(report_fp, slab_sample_tree_.samples,
641 &slab_sample_tree_);
642 }
643 return true;
644 }
645
PrintReportContext(FILE * fp)646 void KmemCommand::PrintReportContext(FILE* fp) {
647 if (!record_cmdline_.empty()) {
648 fprintf(fp, "Cmdline: %s\n", record_cmdline_.c_str());
649 }
650 fprintf(fp, "Arch: %s\n", GetArchString(record_file_arch_).c_str());
651 for (const auto& attr : event_attrs_) {
652 fprintf(fp, "Event: %s (type %u, config %llu)\n", attr.name.c_str(), attr.attr.type,
653 attr.attr.config);
654 }
655 }
656
PrintSlabReportContext(FILE * fp)657 void KmemCommand::PrintSlabReportContext(FILE* fp) {
658 fprintf(fp, "Slab allocation information:\n");
659 fprintf(fp, "Total requested bytes: %" PRIu64 "\n", slab_sample_tree_.total_requested_bytes);
660 fprintf(fp, "Total allocated bytes: %" PRIu64 "\n", slab_sample_tree_.total_allocated_bytes);
661 uint64_t fragment =
662 slab_sample_tree_.total_allocated_bytes - slab_sample_tree_.total_requested_bytes;
663 double percentage = 0.0;
664 if (slab_sample_tree_.total_allocated_bytes != 0) {
665 percentage = 100.0 * fragment / slab_sample_tree_.total_allocated_bytes;
666 }
667 fprintf(fp, "Total fragment: %" PRIu64 ", %f%%\n", fragment, percentage);
668 fprintf(fp, "Total allocations: %" PRIu64 "\n", slab_sample_tree_.nr_allocations);
669 fprintf(fp, "Total frees: %" PRIu64 "\n", slab_sample_tree_.nr_frees);
670 percentage = 0.0;
671 if (slab_sample_tree_.nr_allocations != 0) {
672 percentage =
673 100.0 * slab_sample_tree_.nr_cross_cpu_allocations / slab_sample_tree_.nr_allocations;
674 }
675 fprintf(fp, "Total cross cpu allocation/free: %" PRIu64 ", %f%%\n",
676 slab_sample_tree_.nr_cross_cpu_allocations, percentage);
677 fprintf(fp, "\n");
678 }
679
680 } // namespace
681
RegisterKmemCommand()682 void RegisterKmemCommand() {
683 RegisterCommand("kmem", [] { return std::unique_ptr<Command>(new KmemCommand()); });
684 }
685
686 } // namespace simpleperf
687