1 /*
2  * Copyright (C) 2016 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "command.h"
18 
19 #include <unordered_map>
20 
21 #include <android-base/logging.h>
22 #include <android-base/strings.h>
23 
24 #include "callchain.h"
25 #include "event_attr.h"
26 #include "event_type.h"
27 #include "record_file.h"
28 #include "sample_tree.h"
29 #include "tracing.h"
30 #include "utils.h"
31 
32 namespace simpleperf {
33 namespace {
34 
35 struct SlabSample {
36   const Symbol* symbol;                 // the function making allocation
37   uint64_t ptr;                         // the start address of the allocated space
38   uint64_t bytes_req;                   // requested space size
39   uint64_t bytes_alloc;                 // allocated space size
40   uint64_t sample_count;                // count of allocations
41   uint64_t gfp_flags;                   // flags used for allocation
42   uint64_t cross_cpu_allocations;       // count of allocations freed not on the
43                                         // cpu allocating them
44   CallChainRoot<SlabSample> callchain;  // a callchain tree representing all
45                                         // callchains in this sample
SlabSamplesimpleperf::__anon711df3d90110::SlabSample46   SlabSample(const Symbol* symbol, uint64_t ptr, uint64_t bytes_req, uint64_t bytes_alloc,
47              uint64_t sample_count, uint64_t gfp_flags, uint64_t cross_cpu_allocations)
48       : symbol(symbol),
49         ptr(ptr),
50         bytes_req(bytes_req),
51         bytes_alloc(bytes_alloc),
52         sample_count(sample_count),
53         gfp_flags(gfp_flags),
54         cross_cpu_allocations(cross_cpu_allocations) {}
55 
GetPeriodsimpleperf::__anon711df3d90110::SlabSample56   uint64_t GetPeriod() const { return sample_count; }
57 };
58 
59 struct SlabAccumulateInfo {
60   uint64_t bytes_req;
61   uint64_t bytes_alloc;
62 };
63 
64 BUILD_COMPARE_VALUE_FUNCTION(ComparePtr, ptr);
65 BUILD_COMPARE_VALUE_FUNCTION_REVERSE(CompareBytesReq, bytes_req);
66 BUILD_COMPARE_VALUE_FUNCTION_REVERSE(CompareBytesAlloc, bytes_alloc);
67 BUILD_COMPARE_VALUE_FUNCTION(CompareGfpFlags, gfp_flags);
68 BUILD_COMPARE_VALUE_FUNCTION_REVERSE(CompareCrossCpuAllocations, cross_cpu_allocations);
69 
70 BUILD_DISPLAY_HEX64_FUNCTION(DisplayPtr, ptr);
71 BUILD_DISPLAY_UINT64_FUNCTION(DisplayBytesReq, bytes_req);
72 BUILD_DISPLAY_UINT64_FUNCTION(DisplayBytesAlloc, bytes_alloc);
73 BUILD_DISPLAY_HEX64_FUNCTION(DisplayGfpFlags, gfp_flags);
74 BUILD_DISPLAY_UINT64_FUNCTION(DisplayCrossCpuAllocations, cross_cpu_allocations);
75 
CompareFragment(const SlabSample * sample1,const SlabSample * sample2)76 static int CompareFragment(const SlabSample* sample1, const SlabSample* sample2) {
77   uint64_t frag1 = sample1->bytes_alloc - sample1->bytes_req;
78   uint64_t frag2 = sample2->bytes_alloc - sample2->bytes_req;
79   return Compare(frag2, frag1);
80 }
81 
DisplayFragment(const SlabSample * sample)82 static std::string DisplayFragment(const SlabSample* sample) {
83   return android::base::StringPrintf("%" PRIu64, sample->bytes_alloc - sample->bytes_req);
84 }
85 
86 struct SlabSampleTree {
87   std::vector<SlabSample*> samples;
88   uint64_t total_requested_bytes;
89   uint64_t total_allocated_bytes;
90   uint64_t nr_allocations;
91   uint64_t nr_frees;
92   uint64_t nr_cross_cpu_allocations;
93 };
94 
95 struct SlabFormat {
96   enum {
97     KMEM_ALLOC,
98     KMEM_FREE,
99   } type;
100   TracingFieldPlace call_site;
101   TracingFieldPlace ptr;
102   TracingFieldPlace bytes_req;
103   TracingFieldPlace bytes_alloc;
104   TracingFieldPlace gfp_flags;
105 };
106 
107 class SlabSampleTreeBuilder : public SampleTreeBuilder<SlabSample, SlabAccumulateInfo> {
108  public:
SlabSampleTreeBuilder(const SampleComparator<SlabSample> & sample_comparator,ThreadTree * thread_tree)109   SlabSampleTreeBuilder(const SampleComparator<SlabSample>& sample_comparator,
110                         ThreadTree* thread_tree)
111       : SampleTreeBuilder(sample_comparator),
112         thread_tree_(thread_tree),
113         total_requested_bytes_(0),
114         total_allocated_bytes_(0),
115         nr_allocations_(0),
116         nr_cross_cpu_allocations_(0) {}
117 
GetSampleTree() const118   SlabSampleTree GetSampleTree() const {
119     SlabSampleTree sample_tree;
120     sample_tree.samples = GetSamples();
121     sample_tree.total_requested_bytes = total_requested_bytes_;
122     sample_tree.total_allocated_bytes = total_allocated_bytes_;
123     sample_tree.nr_allocations = nr_allocations_;
124     sample_tree.nr_frees = nr_frees_;
125     sample_tree.nr_cross_cpu_allocations = nr_cross_cpu_allocations_;
126     return sample_tree;
127   }
128 
AddSlabFormat(const std::vector<uint64_t> & event_ids,SlabFormat format)129   void AddSlabFormat(const std::vector<uint64_t>& event_ids, SlabFormat format) {
130     std::unique_ptr<SlabFormat> p(new SlabFormat(format));
131     for (auto id : event_ids) {
132       event_id_to_format_map_[id] = p.get();
133     }
134     formats_.push_back(std::move(p));
135   }
136 
137  protected:
CreateSample(const SampleRecord & r,bool in_kernel,SlabAccumulateInfo * acc_info)138   SlabSample* CreateSample(const SampleRecord& r, bool in_kernel,
139                            SlabAccumulateInfo* acc_info) override {
140     if (!in_kernel) {
141       // Normally we don't parse records in user space because tracepoint
142       // events all happen in kernel. But if r.ip_data.ip == 0, it may be
143       // a kernel record failed to dump ip register and is still useful.
144       if (r.ip_data.ip == 0) {
145         // It seems we are on a kernel can't dump regset for tracepoint events
146         // because of lacking perf_arch_fetch_caller_regs(). We can't get
147         // callchain, but we can still do a normal report.
148         static bool first = true;
149         if (first) {
150           first = false;
151           if (accumulate_callchain_) {
152             // The kernel doesn't seem to support dumping registers for
153             // tracepoint events because of lacking
154             // perf_arch_fetch_caller_regs().
155             LOG(WARNING) << "simpleperf may not get callchains for tracepoint"
156                          << " events because of lacking kernel support.";
157           }
158         }
159       } else {
160         return nullptr;
161       }
162     }
163     uint64_t id = r.id_data.id;
164     auto it = event_id_to_format_map_.find(id);
165     if (it == event_id_to_format_map_.end()) {
166       return nullptr;
167     }
168     const char* raw_data = r.raw_data.data;
169     SlabFormat* format = it->second;
170     if (format->type == SlabFormat::KMEM_ALLOC) {
171       uint64_t call_site = format->call_site.ReadFromData(raw_data);
172       const Symbol* symbol = thread_tree_->FindKernelSymbol(call_site);
173       uint64_t ptr = format->ptr.ReadFromData(raw_data);
174       uint64_t bytes_req = format->bytes_req.ReadFromData(raw_data);
175       uint64_t bytes_alloc = format->bytes_alloc.ReadFromData(raw_data);
176       uint64_t gfp_flags = format->gfp_flags.ReadFromData(raw_data);
177       SlabSample* sample = InsertSample(std::unique_ptr<SlabSample>(
178           new SlabSample(symbol, ptr, bytes_req, bytes_alloc, 1, gfp_flags, 0)));
179       alloc_cpu_record_map_.insert(std::make_pair(ptr, std::make_pair(r.cpu_data.cpu, sample)));
180       acc_info->bytes_req = bytes_req;
181       acc_info->bytes_alloc = bytes_alloc;
182       return sample;
183     } else if (format->type == SlabFormat::KMEM_FREE) {
184       uint64_t ptr = format->ptr.ReadFromData(raw_data);
185       auto it = alloc_cpu_record_map_.find(ptr);
186       if (it != alloc_cpu_record_map_.end()) {
187         SlabSample* sample = it->second.second;
188         if (r.cpu_data.cpu != it->second.first) {
189           sample->cross_cpu_allocations++;
190           nr_cross_cpu_allocations_++;
191         }
192         alloc_cpu_record_map_.erase(it);
193       }
194       nr_frees_++;
195     }
196     return nullptr;
197   }
198 
CreateBranchSample(const SampleRecord &,const BranchStackItemType &)199   SlabSample* CreateBranchSample(const SampleRecord&, const BranchStackItemType&) override {
200     return nullptr;
201   }
202 
CreateCallChainSample(const ThreadEntry *,const SlabSample * sample,uint64_t ip,bool in_kernel,const std::vector<SlabSample * > & callchain,const SlabAccumulateInfo & acc_info)203   SlabSample* CreateCallChainSample(const ThreadEntry*, const SlabSample* sample, uint64_t ip,
204                                     bool in_kernel, const std::vector<SlabSample*>& callchain,
205                                     const SlabAccumulateInfo& acc_info) override {
206     if (!in_kernel) {
207       return nullptr;
208     }
209     const Symbol* symbol = thread_tree_->FindKernelSymbol(ip);
210     return InsertCallChainSample(
211         std::unique_ptr<SlabSample>(new SlabSample(symbol, sample->ptr, acc_info.bytes_req,
212                                                    acc_info.bytes_alloc, 1, sample->gfp_flags, 0)),
213         callchain);
214   }
215 
GetThreadOfSample(SlabSample *)216   const ThreadEntry* GetThreadOfSample(SlabSample*) override { return nullptr; }
217 
GetPeriodForCallChain(const SlabAccumulateInfo &)218   uint64_t GetPeriodForCallChain(const SlabAccumulateInfo&) override {
219     // Decide the percentage of callchain by the sample_count, so use 1 as the
220     // period when calling AddCallChain().
221     return 1;
222   }
223 
UpdateSummary(const SlabSample * sample)224   void UpdateSummary(const SlabSample* sample) override {
225     total_requested_bytes_ += sample->bytes_req;
226     total_allocated_bytes_ += sample->bytes_alloc;
227     nr_allocations_++;
228   }
229 
MergeSample(SlabSample * sample1,SlabSample * sample2)230   void MergeSample(SlabSample* sample1, SlabSample* sample2) override {
231     sample1->bytes_req += sample2->bytes_req;
232     sample1->bytes_alloc += sample2->bytes_alloc;
233     sample1->sample_count += sample2->sample_count;
234   }
235 
236  private:
237   ThreadTree* thread_tree_;
238   uint64_t total_requested_bytes_;
239   uint64_t total_allocated_bytes_;
240   uint64_t nr_allocations_;
241   uint64_t nr_frees_;
242   uint64_t nr_cross_cpu_allocations_;
243 
244   std::unordered_map<uint64_t, SlabFormat*> event_id_to_format_map_;
245   std::vector<std::unique_ptr<SlabFormat>> formats_;
246   std::unordered_map<uint64_t, std::pair<uint32_t, SlabSample*>> alloc_cpu_record_map_;
247 };
248 
249 using SlabSampleTreeSorter = SampleTreeSorter<SlabSample>;
250 using SlabSampleTreeDisplayer = SampleTreeDisplayer<SlabSample, SlabSampleTree>;
251 using SlabSampleCallgraphDisplayer = CallgraphDisplayer<SlabSample, CallChainNode<SlabSample>>;
252 
253 struct EventAttrWithName {
254   perf_event_attr attr;
255   std::string name;
256   std::vector<uint64_t> event_ids;
257 };
258 
259 class KmemCommand : public Command {
260  public:
KmemCommand()261   KmemCommand()
262       : Command("kmem", "collect kernel memory allocation information",
263                 // clang-format off
264 "Usage: kmem (record [record options] | report [report options])\n"
265 "kmem record\n"
266 "-g        Enable call graph recording. Same as '--call-graph fp'.\n"
267 "--slab    Collect slab allocation information. Default option.\n"
268 "Other record options provided by simpleperf record command are also available.\n"
269 "kmem report\n"
270 "--children  Print the accumulated allocation info appeared in the callchain.\n"
271 "            Can be used on perf.data recorded with `--call-graph fp` option.\n"
272 "-g [callee|caller]  Print call graph for perf.data recorded with\n"
273 "                    `--call-graph fp` option. If callee mode is used, the graph\n"
274 "                     shows how functions are called from others. Otherwise, the\n"
275 "                     graph shows how functions call others. Default is callee\n"
276 "                     mode. The percentage shown in the graph is determined by\n"
277 "                     the hit count of the callchain.\n"
278 "-i          Specify path of record file, default is perf.data\n"
279 "-o report_file_name  Set report file name, default is stdout.\n"
280 "--slab      Report slab allocation information. Default option.\n"
281 "--slab-sort key1,key2,...\n"
282 "            Select the keys to sort and print slab allocation information.\n"
283 "            Should be used with --slab option. Possible keys include:\n"
284 "              hit         -- the allocation count.\n"
285 "              caller      -- the function calling allocation.\n"
286 "              ptr         -- the address of the allocated space.\n"
287 "              bytes_req   -- the total requested space size.\n"
288 "              bytes_alloc -- the total allocated space size.\n"
289 "              fragment    -- the extra allocated space size\n"
290 "                             (bytes_alloc - bytes_req).\n"
291 "              gfp_flags   -- the flags used for allocation.\n"
292 "              pingpong    -- the count of allocations that are freed not on\n"
293 "                             the cpu allocating them.\n"
294 "            The default slab sort keys are:\n"
295 "              hit,caller,bytes_req,bytes_alloc,fragment,pingpong.\n"
296                 // clang-format on
297                 ),
298         is_record_(false),
299         use_slab_(false),
300         accumulate_callchain_(false),
301         print_callgraph_(false),
302         callgraph_show_callee_(false),
303         record_filename_("perf.data"),
304         record_file_arch_(GetBuildArch()) {}
305 
306   bool Run(const std::vector<std::string>& args);
307 
308  private:
309   bool ParseOptions(const std::vector<std::string>& args, std::vector<std::string>* left_args);
310   bool RecordKmemInfo(const std::vector<std::string>& record_args);
311   bool ReportKmemInfo();
312   bool PrepareToBuildSampleTree();
313   void ReadEventAttrsFromRecordFile();
314   bool ReadFeaturesFromRecordFile();
315   bool ReadSampleTreeFromRecordFile();
316   bool ProcessRecord(std::unique_ptr<Record> record);
317   void ProcessTracingData(const std::vector<char>& data);
318   bool PrintReport();
319   void PrintReportContext(FILE* fp);
320   void PrintSlabReportContext(FILE* fp);
321 
322   bool is_record_;
323   bool use_slab_;
324   std::vector<std::string> slab_sort_keys_;
325   bool accumulate_callchain_;
326   bool print_callgraph_;
327   bool callgraph_show_callee_;
328 
329   std::string record_filename_;
330   std::unique_ptr<RecordFileReader> record_file_reader_;
331   std::vector<EventAttrWithName> event_attrs_;
332   std::string record_cmdline_;
333   ArchType record_file_arch_;
334 
335   ThreadTree thread_tree_;
336   SlabSampleTree slab_sample_tree_;
337   std::unique_ptr<SlabSampleTreeBuilder> slab_sample_tree_builder_;
338   std::unique_ptr<SlabSampleTreeSorter> slab_sample_tree_sorter_;
339   std::unique_ptr<SlabSampleTreeDisplayer> slab_sample_tree_displayer_;
340 
341   std::string report_filename_;
342 };
343 
Run(const std::vector<std::string> & args)344 bool KmemCommand::Run(const std::vector<std::string>& args) {
345   std::vector<std::string> left_args;
346   if (!ParseOptions(args, &left_args)) {
347     return false;
348   }
349   if (!use_slab_) {
350     use_slab_ = true;
351   }
352   if (is_record_) {
353     return RecordKmemInfo(left_args);
354   }
355   return ReportKmemInfo();
356 }
357 
ParseOptions(const std::vector<std::string> & args,std::vector<std::string> * left_args)358 bool KmemCommand::ParseOptions(const std::vector<std::string>& args,
359                                std::vector<std::string>* left_args) {
360   if (args.empty()) {
361     LOG(ERROR) << "No subcommand specified";
362     return false;
363   }
364   if (args[0] == "record") {
365     if (!IsRoot()) {
366       LOG(ERROR) << "simpleperf kmem record command needs root privilege";
367       return false;
368     }
369     is_record_ = true;
370     size_t i;
371     for (i = 1; i < args.size() && !args[i].empty() && args[i][0] == '-'; ++i) {
372       if (args[i] == "-g") {
373         left_args->push_back("--call-graph");
374         left_args->push_back("fp");
375       } else if (args[i] == "--slab") {
376         use_slab_ = true;
377       } else {
378         left_args->push_back(args[i]);
379       }
380     }
381     left_args->insert(left_args->end(), args.begin() + i, args.end());
382   } else if (args[0] == "report") {
383     is_record_ = false;
384     for (size_t i = 1; i < args.size(); ++i) {
385       if (args[i] == "--children") {
386         accumulate_callchain_ = true;
387       } else if (args[i] == "-g") {
388         print_callgraph_ = true;
389         accumulate_callchain_ = true;
390         callgraph_show_callee_ = true;
391         if (i + 1 < args.size() && args[i + 1][0] != '-') {
392           ++i;
393           if (args[i] == "callee") {
394             callgraph_show_callee_ = true;
395           } else if (args[i] == "caller") {
396             callgraph_show_callee_ = false;
397           } else {
398             LOG(ERROR) << "Unknown argument with -g option: " << args[i];
399             return false;
400           }
401         }
402       } else if (args[i] == "-i") {
403         if (!NextArgumentOrError(args, &i)) {
404           return false;
405         }
406         record_filename_ = args[i];
407       } else if (args[i] == "-o") {
408         if (!NextArgumentOrError(args, &i)) {
409           return false;
410         }
411         report_filename_ = args[i];
412       } else if (args[i] == "--slab") {
413         use_slab_ = true;
414       } else if (args[i] == "--slab-sort") {
415         if (!NextArgumentOrError(args, &i)) {
416           return false;
417         }
418         slab_sort_keys_ = android::base::Split(args[i], ",");
419       } else {
420         ReportUnknownOption(args, i);
421         return false;
422       }
423     }
424   } else {
425     LOG(ERROR) << "Unknown subcommand for " << Name() << ": " << args[0]
426                << ". Try `simpleperf help " << Name() << "`";
427     return false;
428   }
429   return true;
430 }
431 
RecordKmemInfo(const std::vector<std::string> & record_args)432 bool KmemCommand::RecordKmemInfo(const std::vector<std::string>& record_args) {
433   std::vector<std::string> args;
434   if (use_slab_) {
435     std::vector<std::string> trace_events = {"kmem:kmalloc",      "kmem:kmem_cache_alloc",
436                                              "kmem:kmalloc_node", "kmem:kmem_cache_alloc_node",
437                                              "kmem:kfree",        "kmem:kmem_cache_free"};
438     for (const auto& name : trace_events) {
439       if (ParseEventType(name)) {
440         args.insert(args.end(), {"-e", name});
441       }
442     }
443   }
444   if (args.empty()) {
445     LOG(ERROR) << "Kernel allocation related trace events are not supported.";
446     return false;
447   }
448   args.push_back("-a");
449   args.insert(args.end(), record_args.begin(), record_args.end());
450   std::unique_ptr<Command> record_cmd = CreateCommandInstance("record");
451   if (record_cmd == nullptr) {
452     LOG(ERROR) << "record command isn't available";
453     return false;
454   }
455   return record_cmd->Run(args);
456 }
457 
ReportKmemInfo()458 bool KmemCommand::ReportKmemInfo() {
459   if (!PrepareToBuildSampleTree()) {
460     return false;
461   }
462   record_file_reader_ = RecordFileReader::CreateInstance(record_filename_);
463   if (record_file_reader_ == nullptr) {
464     return false;
465   }
466   ReadEventAttrsFromRecordFile();
467   if (!ReadFeaturesFromRecordFile()) {
468     return false;
469   }
470   if (!ReadSampleTreeFromRecordFile()) {
471     return false;
472   }
473   if (!PrintReport()) {
474     return false;
475   }
476   return true;
477 }
478 
PrepareToBuildSampleTree()479 bool KmemCommand::PrepareToBuildSampleTree() {
480   if (use_slab_) {
481     if (slab_sort_keys_.empty()) {
482       slab_sort_keys_ = {"hit", "caller", "bytes_req", "bytes_alloc", "fragment", "pingpong"};
483     }
484     SampleComparator<SlabSample> comparator;
485     SampleComparator<SlabSample> sort_comparator;
486     SampleDisplayer<SlabSample, SlabSampleTree> displayer;
487     std::string accumulated_name = accumulate_callchain_ ? "Accumulated_" : "";
488 
489     if (print_callgraph_) {
490       displayer.AddExclusiveDisplayFunction(SlabSampleCallgraphDisplayer());
491     }
492 
493     for (const auto& key : slab_sort_keys_) {
494       if (key == "hit") {
495         sort_comparator.AddCompareFunction(CompareSampleCount);
496         displayer.AddDisplayFunction(accumulated_name + "Hit", DisplaySampleCount);
497       } else if (key == "caller") {
498         comparator.AddCompareFunction(CompareSymbol);
499         displayer.AddDisplayFunction("Caller", DisplaySymbol);
500       } else if (key == "ptr") {
501         comparator.AddCompareFunction(ComparePtr);
502         displayer.AddDisplayFunction("Ptr", DisplayPtr);
503       } else if (key == "bytes_req") {
504         sort_comparator.AddCompareFunction(CompareBytesReq);
505         displayer.AddDisplayFunction(accumulated_name + "BytesReq", DisplayBytesReq);
506       } else if (key == "bytes_alloc") {
507         sort_comparator.AddCompareFunction(CompareBytesAlloc);
508         displayer.AddDisplayFunction(accumulated_name + "BytesAlloc", DisplayBytesAlloc);
509       } else if (key == "fragment") {
510         sort_comparator.AddCompareFunction(CompareFragment);
511         displayer.AddDisplayFunction(accumulated_name + "Fragment", DisplayFragment);
512       } else if (key == "gfp_flags") {
513         comparator.AddCompareFunction(CompareGfpFlags);
514         displayer.AddDisplayFunction("GfpFlags", DisplayGfpFlags);
515       } else if (key == "pingpong") {
516         sort_comparator.AddCompareFunction(CompareCrossCpuAllocations);
517         displayer.AddDisplayFunction("Pingpong", DisplayCrossCpuAllocations);
518       } else {
519         LOG(ERROR) << "Unknown sort key for slab allocation: " << key;
520         return false;
521       }
522       slab_sample_tree_builder_.reset(new SlabSampleTreeBuilder(comparator, &thread_tree_));
523       slab_sample_tree_builder_->SetCallChainSampleOptions(accumulate_callchain_, print_callgraph_,
524                                                            !callgraph_show_callee_);
525       sort_comparator.AddComparator(comparator);
526       slab_sample_tree_sorter_.reset(new SlabSampleTreeSorter(sort_comparator));
527       slab_sample_tree_displayer_.reset(new SlabSampleTreeDisplayer(displayer));
528     }
529   }
530   return true;
531 }
532 
ReadEventAttrsFromRecordFile()533 void KmemCommand::ReadEventAttrsFromRecordFile() {
534   std::vector<EventAttrWithId> attrs = record_file_reader_->AttrSection();
535   for (const auto& attr_with_id : attrs) {
536     EventAttrWithName attr;
537     attr.attr = *attr_with_id.attr;
538     attr.event_ids = attr_with_id.ids;
539     attr.name = GetEventNameByAttr(attr.attr);
540     event_attrs_.push_back(attr);
541   }
542 }
543 
ReadFeaturesFromRecordFile()544 bool KmemCommand::ReadFeaturesFromRecordFile() {
545   record_file_reader_->LoadBuildIdAndFileFeatures(thread_tree_);
546   std::string arch = record_file_reader_->ReadFeatureString(PerfFileFormat::FEAT_ARCH);
547   if (!arch.empty()) {
548     record_file_arch_ = GetArchType(arch);
549     if (record_file_arch_ == ARCH_UNSUPPORTED) {
550       return false;
551     }
552   }
553   std::vector<std::string> cmdline = record_file_reader_->ReadCmdlineFeature();
554   if (!cmdline.empty()) {
555     record_cmdline_ = android::base::Join(cmdline, ' ');
556   }
557   if (record_file_reader_->HasFeature(PerfFileFormat::FEAT_TRACING_DATA)) {
558     std::vector<char> tracing_data;
559     if (!record_file_reader_->ReadFeatureSection(PerfFileFormat::FEAT_TRACING_DATA,
560                                                  &tracing_data)) {
561       return false;
562     }
563     ProcessTracingData(tracing_data);
564   }
565   return true;
566 }
567 
ReadSampleTreeFromRecordFile()568 bool KmemCommand::ReadSampleTreeFromRecordFile() {
569   if (!record_file_reader_->ReadDataSection(
570           [this](std::unique_ptr<Record> record) { return ProcessRecord(std::move(record)); })) {
571     return false;
572   }
573   if (use_slab_) {
574     slab_sample_tree_ = slab_sample_tree_builder_->GetSampleTree();
575     slab_sample_tree_sorter_->Sort(slab_sample_tree_.samples, print_callgraph_);
576   }
577   return true;
578 }
579 
ProcessRecord(std::unique_ptr<Record> record)580 bool KmemCommand::ProcessRecord(std::unique_ptr<Record> record) {
581   thread_tree_.Update(*record);
582   if (record->type() == PERF_RECORD_SAMPLE) {
583     if (use_slab_) {
584       slab_sample_tree_builder_->ProcessSampleRecord(
585           *static_cast<const SampleRecord*>(record.get()));
586     }
587   } else if (record->type() == PERF_RECORD_TRACING_DATA ||
588              record->type() == SIMPLE_PERF_RECORD_TRACING_DATA) {
589     const auto& r = *static_cast<TracingDataRecord*>(record.get());
590     ProcessTracingData(std::vector<char>(r.data, r.data + r.data_size));
591   }
592   return true;
593 }
594 
ProcessTracingData(const std::vector<char> & data)595 void KmemCommand::ProcessTracingData(const std::vector<char>& data) {
596   Tracing tracing(data);
597   for (auto& attr : event_attrs_) {
598     if (attr.attr.type == PERF_TYPE_TRACEPOINT) {
599       uint64_t trace_event_id = attr.attr.config;
600       attr.name = tracing.GetTracingEventNameHavingId(trace_event_id);
601       TracingFormat format = tracing.GetTracingFormatHavingId(trace_event_id);
602       if (use_slab_) {
603         if (format.name == "kmalloc" || format.name == "kmem_cache_alloc" ||
604             format.name == "kmalloc_node" || format.name == "kmem_cache_alloc_node") {
605           SlabFormat f;
606           f.type = SlabFormat::KMEM_ALLOC;
607           format.GetField("call_site", f.call_site);
608           format.GetField("ptr", f.ptr);
609           format.GetField("bytes_req", f.bytes_req);
610           format.GetField("bytes_alloc", f.bytes_alloc);
611           format.GetField("gfp_flags", f.gfp_flags);
612           slab_sample_tree_builder_->AddSlabFormat(attr.event_ids, f);
613         } else if (format.name == "kfree" || format.name == "kmem_cache_free") {
614           SlabFormat f;
615           f.type = SlabFormat::KMEM_FREE;
616           format.GetField("call_site", f.call_site);
617           format.GetField("ptr", f.ptr);
618           slab_sample_tree_builder_->AddSlabFormat(attr.event_ids, f);
619         }
620       }
621     }
622   }
623 }
624 
PrintReport()625 bool KmemCommand::PrintReport() {
626   std::unique_ptr<FILE, decltype(&fclose)> file_handler(nullptr, fclose);
627   FILE* report_fp = stdout;
628   if (!report_filename_.empty()) {
629     file_handler.reset(fopen(report_filename_.c_str(), "w"));
630     if (file_handler == nullptr) {
631       PLOG(ERROR) << "failed to open " << report_filename_;
632       return false;
633     }
634     report_fp = file_handler.get();
635   }
636   PrintReportContext(report_fp);
637   if (use_slab_) {
638     fprintf(report_fp, "\n\n");
639     PrintSlabReportContext(report_fp);
640     slab_sample_tree_displayer_->DisplaySamples(report_fp, slab_sample_tree_.samples,
641                                                 &slab_sample_tree_);
642   }
643   return true;
644 }
645 
PrintReportContext(FILE * fp)646 void KmemCommand::PrintReportContext(FILE* fp) {
647   if (!record_cmdline_.empty()) {
648     fprintf(fp, "Cmdline: %s\n", record_cmdline_.c_str());
649   }
650   fprintf(fp, "Arch: %s\n", GetArchString(record_file_arch_).c_str());
651   for (const auto& attr : event_attrs_) {
652     fprintf(fp, "Event: %s (type %u, config %llu)\n", attr.name.c_str(), attr.attr.type,
653             attr.attr.config);
654   }
655 }
656 
PrintSlabReportContext(FILE * fp)657 void KmemCommand::PrintSlabReportContext(FILE* fp) {
658   fprintf(fp, "Slab allocation information:\n");
659   fprintf(fp, "Total requested bytes: %" PRIu64 "\n", slab_sample_tree_.total_requested_bytes);
660   fprintf(fp, "Total allocated bytes: %" PRIu64 "\n", slab_sample_tree_.total_allocated_bytes);
661   uint64_t fragment =
662       slab_sample_tree_.total_allocated_bytes - slab_sample_tree_.total_requested_bytes;
663   double percentage = 0.0;
664   if (slab_sample_tree_.total_allocated_bytes != 0) {
665     percentage = 100.0 * fragment / slab_sample_tree_.total_allocated_bytes;
666   }
667   fprintf(fp, "Total fragment: %" PRIu64 ", %f%%\n", fragment, percentage);
668   fprintf(fp, "Total allocations: %" PRIu64 "\n", slab_sample_tree_.nr_allocations);
669   fprintf(fp, "Total frees: %" PRIu64 "\n", slab_sample_tree_.nr_frees);
670   percentage = 0.0;
671   if (slab_sample_tree_.nr_allocations != 0) {
672     percentage =
673         100.0 * slab_sample_tree_.nr_cross_cpu_allocations / slab_sample_tree_.nr_allocations;
674   }
675   fprintf(fp, "Total cross cpu allocation/free: %" PRIu64 ", %f%%\n",
676           slab_sample_tree_.nr_cross_cpu_allocations, percentage);
677   fprintf(fp, "\n");
678 }
679 
680 }  // namespace
681 
RegisterKmemCommand()682 void RegisterKmemCommand() {
683   RegisterCommand("kmem", [] { return std::unique_ptr<Command>(new KmemCommand()); });
684 }
685 
686 }  // namespace simpleperf
687