[opt](cpu-profile) enable cpu profile in BE webui (#40330) (#41044)

bp #40330
This commit is contained in:
Mingyu Chen
2024-09-20 17:18:16 +08:00
committed by GitHub
parent 8e860a26a7
commit 596cfc9b18
12 changed files with 1821 additions and 52 deletions

View File

@ -93,4 +93,5 @@ header:
- "pytest/sys/data"
- "pytest/deploy/*.conf"
- "tools/jeprof"
- "tools/FlameGraph/*"
comment: on-failure

View File

@ -725,4 +725,12 @@ Apache 2.0, Copyright 2023 SAP SE or an SAP affiliate company, Johannes Bechberg
This project is maintained by the SapMachine team at SAP SE
----------------------------------------------------------------------------------
----------------------------------------------------------------------------------
be/tools/FlameGraph/*.pl: COMMON DEVELOPMENT AND DISTRIBUTION LICENSE Version 1.0
Unless otherwise noted, all files in this distribution are released
under the Common Development and Distribution License (CDDL).
Exceptions are noted within the associated source files.
----------------------------------------------------------------------------------

View File

@ -17,9 +17,15 @@
#include "http/action/pprof_actions.h"
#if !defined(__SANITIZE_ADDRESS__) && !defined(ADDRESS_SANITIZER) && !defined(LEAK_SANITIZER) && \
!defined(THREAD_SANITIZER) && !defined(USE_JEMALLOC)
#include <gperftools/heap-profiler.h> // IWYU pragma: keep
#include <gperftools/malloc_extension.h> // IWYU pragma: keep
#include <gperftools/profiler.h> // IWYU pragma: keep
#endif
#if !defined(__SANITIZE_ADDRESS__) && !defined(ADDRESS_SANITIZER) && !defined(LEAK_SANITIZER) && \
!defined(THREAD_SANITIZER)
#include <gperftools/profiler.h> // IWYU pragma: keep
#endif
#include <stdio.h>
#include <fstream>
@ -133,8 +139,7 @@ public:
};
void ProfileAction::handle(HttpRequest* req) {
#if defined(ADDRESS_SANITIZER) || defined(LEAK_SANITIZER) || defined(THREAD_SANITIZER) || \
defined(USE_JEMALLOC)
#if defined(ADDRESS_SANITIZER) || defined(LEAK_SANITIZER) || defined(THREAD_SANITIZER)
std::string str = "CPU profiling is not available with address sanitizer or jemalloc builds.";
HttpChannel::send_reply(req, str);
#else
@ -170,6 +175,7 @@ void ProfileAction::handle(HttpRequest* req) {
prof_file.close();
std::string str = ss.str();
HttpChannel::send_reply(req, str);
return;
}
// text type. we will return readable content via http response
@ -185,7 +191,7 @@ void ProfileAction::handle(HttpRequest* req) {
std::string svg_file_content;
std::string flamegraph_install_dir =
std::string(std::getenv("DORIS_HOME")) + "/tools/FlameGraph/";
Status st = PprofUtils::generate_flamegraph(30, flamegraph_install_dir, false,
Status st = PprofUtils::generate_flamegraph(seconds, flamegraph_install_dir, false,
&svg_file_content);
if (!st.ok()) {
HttpChannel::send_reply(req, st.to_string());

View File

@ -281,8 +281,7 @@ void heap_handler(const WebPageHandler::ArgumentMap& args, std::stringstream* ou
void cpu_handler(const WebPageHandler::ArgumentMap& args, std::stringstream* output) {
(*output) << "<h2>CPU Profile</h2>" << std::endl;
#if defined(ADDRESS_SANITIZER) || defined(LEAK_SANITIZER) || defined(THREAD_SANITIZER) || \
defined(USE_JEMALLOC)
#if defined(ADDRESS_SANITIZER) || defined(LEAK_SANITIZER) || defined(THREAD_SANITIZER)
(*output) << "<pre>" << std::endl;
(*output) << "CPU profiling is not available with address sanitizer builds." << std::endl;
(*output) << "</pre>" << std::endl;
@ -313,7 +312,8 @@ void cpu_handler(const WebPageHandler::ArgumentMap& args, std::stringstream* out
<< std::endl;
(*output) << "And you need to download the FlameGraph and place it under 'be/tools/FlameGraph'."
<< std::endl;
(*output) << "Finally, check if the following files exist" << std::endl;
(*output) << "Finally, check if the following files exist. And should be executable."
<< std::endl;
(*output) << std::endl;
(*output) << " be/tools/FlameGraph/stackcollapse-perf.pl" << std::endl;
(*output) << " be/tools/FlameGraph/flamegraph.pl" << std::endl;
@ -333,9 +333,6 @@ void cpu_handler(const WebPageHandler::ArgumentMap& args, std::stringstream* out
<< std::endl;
(*output) << " <br/>" << std::endl;
(*output) << " <div id=\"cpuResult\"><pre id=\"cpuContent\"></pre></div>" << std::endl;
(*output) << " <br/>" << std::endl;
(*output) << " <div id=\"cpuResultGraph\"><pre id=\"cpuContentGraph\"></pre></div>"
<< std::endl;
(*output) << "</div>" << std::endl;
// for text profile
@ -348,14 +345,14 @@ void cpu_handler(const WebPageHandler::ArgumentMap& args, std::stringstream* out
(*output) << " type: \"GET\"," << std::endl;
(*output) << " dataType: \"text\"," << std::endl;
(*output) << " url: \"pprof/profile?type=text\"," << std::endl;
(*output) << " timeout: 60000," << std::endl;
(*output) << " timeout: 120000," << std::endl;
(*output) << " success: function (result) {" << std::endl;
(*output) << " document.getElementById(\"cpuContent\").innerText = result;"
<< std::endl;
(*output) << " }" << std::endl;
(*output) << " ," << std::endl;
(*output) << " error: function (result) {" << std::endl;
(*output) << " alert(result);" << std::endl;
(*output) << " alert(JSON.stringify(result));" << std::endl;
(*output) << " }" << std::endl;
(*output) << " ," << std::endl;
(*output) << " });" << std::endl;
@ -363,21 +360,21 @@ void cpu_handler(const WebPageHandler::ArgumentMap& args, std::stringstream* out
// for graph profile
(*output) << "$('#getCpuGraph').click(function () {" << std::endl;
(*output) << " document.getElementById(\"cpuContentGraph\").innerText = \"Sampling... (30 "
(*output) << " document.getElementById(\"cpuContent\").innerText = \"Sampling... (30 "
"seconds)\";"
<< std::endl;
(*output) << " $.ajax({" << std::endl;
(*output) << " type: \"GET\"," << std::endl;
(*output) << " dataType: \"text\"," << std::endl;
(*output) << " url: \"pprof/profile?type=flamegraph\"," << std::endl;
(*output) << " timeout: 60000," << std::endl;
(*output) << " timeout: 120000," << std::endl;
(*output) << " success: function (result) {" << std::endl;
(*output) << " document.getElementById(\"cpuResultGraph\").innerHTML = result;"
(*output) << " document.getElementById(\"cpuContent\").innerHTML = result;"
<< std::endl;
(*output) << " }" << std::endl;
(*output) << " ," << std::endl;
(*output) << " error: function (result) {" << std::endl;
(*output) << " alert(result);" << std::endl;
(*output) << " alert(JSON.stringify(result));" << std::endl;
(*output) << " }" << std::endl;
(*output) << " ," << std::endl;
(*output) << " });" << std::endl;

View File

@ -114,6 +114,7 @@ Status PprofUtils::get_readable_profile(const std::string& file_or_content, bool
std::string final_cmd =
pprof_cmd + strings::Substitute(" --text $0 $1", self_cmdline, final_file);
AgentUtils util;
LOG(INFO) << "begin to run command: " << final_cmd;
bool rc = util.exec_cmd(final_cmd, &cmd_output, false);
// delete raw file
@ -158,6 +159,7 @@ Status PprofUtils::generate_flamegraph(int32_t sample_seconds,
AgentUtils util;
std::string cmd_output;
LOG(INFO) << "begin to run command: " << cmd.str();
bool rc = util.exec_cmd(cmd.str(), &cmd_output);
if (!rc) {
static_cast<void>(io::global_local_filesystem()->delete_file(tmp_file.str()));
@ -174,6 +176,7 @@ Status PprofUtils::generate_flamegraph(int32_t sample_seconds,
std::stringstream gen_cmd;
gen_cmd << perf_cmd << " script -i " << tmp_file.str() << " | " << stackcollapse_perf_pl
<< " | " << flamegraph_pl << " > " << graph_file.str();
LOG(INFO) << "begin to run command: " << gen_cmd.str();
rc = util.exec_cmd(gen_cmd.str(), &res_content);
if (!rc) {
static_cast<void>(io::global_local_filesystem()->delete_file(tmp_file.str()));
@ -185,6 +188,7 @@ Status PprofUtils::generate_flamegraph(int32_t sample_seconds,
std::stringstream gen_cmd;
gen_cmd << perf_cmd << " script -i " << tmp_file.str() << " | " << stackcollapse_perf_pl
<< " | " << flamegraph_pl;
LOG(INFO) << "begin to run command: " << gen_cmd.str();
rc = util.exec_cmd(gen_cmd.str(), &res_content, false);
if (!rc) {
static_cast<void>(io::global_local_filesystem()->delete_file(tmp_file.str()));

View File

@ -826,7 +826,7 @@ Status VFileScanner::_get_next_reader() {
std::unique_ptr<ParquetReader> parquet_reader = ParquetReader::create_unique(
_profile, *_params, range, _state->query_options().batch_size,
const_cast<cctz::time_zone*>(&_state->timezone_obj()), _io_ctx.get(), _state,
_shoudl_enable_file_meta_cache() ? ExecEnv::GetInstance()->file_meta_cache()
_should_enable_file_meta_cache() ? ExecEnv::GetInstance()->file_meta_cache()
: nullptr,
_state->query_options().enable_parquet_lazy_mat);
{

View File

@ -243,7 +243,7 @@ private:
// 1. max_external_file_meta_cache_num is > 0
// 2. the file number is less than 1/3 of cache's capacibility
// Otherwise, the cache miss rate will be high
bool _shoudl_enable_file_meta_cache() {
bool _should_enable_file_meta_cache() {
return config::max_external_file_meta_cache_num > 0 &&
_split_source->num_scan_ranges() < config::max_external_file_meta_cache_num / 3;
}

View File

@ -666,7 +666,8 @@ if [[ "${OUTPUT_BE_BINARY}" -eq 1 ]]; then
install -d "${DORIS_OUTPUT}/be/bin" \
"${DORIS_OUTPUT}/be/conf" \
"${DORIS_OUTPUT}/be/lib" \
"${DORIS_OUTPUT}/be/www"
"${DORIS_OUTPUT}/be/www" \
"${DORIS_OUTPUT}/be/tools/FlameGraph"
cp -r -p "${DORIS_HOME}/be/output/bin"/* "${DORIS_OUTPUT}/be/bin"/
cp -r -p "${DORIS_HOME}/be/output/conf"/* "${DORIS_OUTPUT}/be/conf"/
@ -712,6 +713,7 @@ EOF
fi
cp -r -p "${DORIS_HOME}/webroot/be"/* "${DORIS_OUTPUT}/be/www"/
cp -r -p "${DORIS_HOME}/tools/FlameGraph"/* "${DORIS_OUTPUT}/be/tools/FlameGraph"/
if [[ "${STRIP_DEBUG_INFO}" = "ON" ]]; then
cp -r -p "${DORIS_HOME}/be/output/lib/debug_info" "${DORIS_OUTPUT}/be/lib"/
fi

65
dist/LICENSE-dist.txt vendored
View File

@ -1503,35 +1503,36 @@ LGPL -- licenes/LICENSE-LGPL.txt
* gsasl: 1.10.0/1.8.0
Other dependencies:
* libevent: 2.1.12 -- license/LICENSE-libevent.txt
* openssl: 1.1.1s -- license/LICENSE-openssl.txt
* gflag: 2.2.2 -- license/LICENSE-gflag.txt
* glog: 0.4.0 -- license/LICENSE-glog.txt
* gtest: 1.11.0 -- license/LICENSE-gtest.txt
* snappy: 1.1.8 -- license/LICENSE-snappy.txt
* gperftools: 2.9.1 -- license/LICENSE-gperftools.txt
* zlib: 1.2.11 -- license/LICENSE-zlib.txt
* lz4: 1.9.3 -- license/LICENSE-lz4.txt
* bzip2: 1.0.8 -- license/LICENSE-bzip2.txt
* rapidjson@1a803826 -- license/LICENSE-rapidjson.txt
* curl: 7.79.0 -- license/LICENSE-curl.txt
* re2: 2021-02-02 -- license/LICENSE-re2.txt
* hyperscan: 5.4.0 -- license/LICENSE-hyperscan.txt
* vectorscan: 5.4.7 -- license/LICENSE-vectorscan.txt
* boost: 1.73.0 -- license/LICENSE-boost.txt
* unixodbc: 2.3.7 -- license/LICENSE-unixodbc.txt
* leveldb: 1.23 -- license/LICENSE-leveldb.txt
* cyrus-sasl: 2.1.27 -- license/LICENSE-cyrus-sasl.txt
* librdkafka: 1.8.2 -- license/LICENSE-librdkafka.txt
* zstd: 1.5.2 -- license/LICENSE-zstd.txt
* brotli: 1.0.9 -- license/LICENSE-brotli.txt
* bitshuffle: 0.5.1 -- license/LICENSE-bigshuffle.txt
* fmt: 7.1.3 -- license/LICENSE-fmt.txt
* jemalloc: 5.3.0 -- license/LICENSE-jemolloc.txt
* lzma@master -- license/LICENSE-lzma.txt
* libdivide: 5.0 -- license/LICENSE-libdivide.txt
* pdqsort: 0.0.0+git20180419 -- license/LICENSE-pdqsort.txt
* breakpad@38ee0be -- license/LICENSE-breakpod.txt
* xsimd: xmid@e9234cd6 -- license/LICENSE-xsimd.txt
* xxhash: 0.8.1 -- license/LICENSE-xxhash.txt
* concurrentqueue: 1.0.3 -- license/LICENSE-concurrentqueue.txt
* libevent: 2.1.12 -- licenses/LICENSE-libevent.txt
* openssl: 1.1.1s -- licenses/LICENSE-openssl.txt
* gflag: 2.2.2 -- licenses/LICENSE-gflag.txt
* glog: 0.4.0 -- licenses/LICENSE-glog.txt
* gtest: 1.11.0 -- licenses/LICENSE-gtest.txt
* snappy: 1.1.8 -- licenses/LICENSE-snappy.txt
* gperftools: 2.9.1 -- licenses/LICENSE-gperftools.txt
* zlib: 1.2.11 -- licenses/LICENSE-zlib.txt
* lz4: 1.9.3 -- licenses/LICENSE-lz4.txt
* bzip2: 1.0.8 -- licenses/LICENSE-bzip2.txt
* rapidjson@1a803826 -- licenses/LICENSE-rapidjson.txt
* curl: 7.79.0 -- licenses/LICENSE-curl.txt
* re2: 2021-02-02 -- licenses/LICENSE-re2.txt
* hyperscan: 5.4.0 -- licenses/LICENSE-hyperscan.txt
* vectorscan: 5.4.7 -- licenses/LICENSE-vectorscan.txt
* boost: 1.73.0 -- licenses/LICENSE-boost.txt
* unixodbc: 2.3.7 -- licenses/LICENSE-unixodbc.txt
* leveldb: 1.23 -- licenses/LICENSE-leveldb.txt
* cyrus-sasl: 2.1.27 -- licenses/LICENSE-cyrus-sasl.txt
* librdkafka: 1.8.2 -- licenses/LICENSE-librdkafka.txt
* zstd: 1.5.2 -- licenses/LICENSE-zstd.txt
* brotli: 1.0.9 -- licenses/LICENSE-brotli.txt
* bitshuffle: 0.5.1 -- licenses/LICENSE-bigshuffle.txt
* fmt: 7.1.3 -- licenses/LICENSE-fmt.txt
* jemalloc: 5.3.0 -- licenses/LICENSE-jemolloc.txt
* lzma@master -- licenses/LICENSE-lzma.txt
* libdivide: 5.0 -- licenses/LICENSE-libdivide.txt
* pdqsort: 0.0.0+git20180419 -- licenses/LICENSE-pdqsort.txt
* breakpad@38ee0be -- licenses/LICENSE-breakpod.txt
* xsimd: xmid@e9234cd6 -- licenses/LICENSE-xsimd.txt
* xxhash: 0.8.1 -- licenses/LICENSE-xxhash.txt
* concurrentqueue: 1.0.3 -- licenses/LICENSE-concurrentqueue.txt
* FlameGraph -- licenses/LICENSE-CDDL-1.0.txt

13
tools/FlameGraph/README Normal file
View File

@ -0,0 +1,13 @@
These 2 files:
- flamegraph.pl
- stackcollapse-perf.pl
are copied from:
https://github.com/brendangregg/FlameGraph/blob/master/flamegraph.pl
https://github.com/brendangregg/FlameGraph/blob/master/stackcollapse-perf.pl
which are under license:
https://github.com/brendangregg/FlameGraph/blob/master/docs/cddl1.txt

1302
tools/FlameGraph/flamegraph.pl Executable file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,435 @@
#!/usr/bin/perl -w
#
# stackcollapse-perf.pl collapse perf samples into single lines.
#
# Parses a list of multiline stacks generated by "perf script", and
# outputs a semicolon separated stack followed by a space and a count.
# If memory addresses (+0xd) are present, they are stripped, and resulting
# identical stacks are colased with their counts summed.
#
# USAGE: ./stackcollapse-perf.pl [options] infile > outfile
#
# Run "./stackcollapse-perf.pl -h" to list options.
#
# Example input:
#
# swapper 0 [000] 158665.570607: cpu-clock:
# ffffffff8103ce3b native_safe_halt ([kernel.kallsyms])
# ffffffff8101c6a3 default_idle ([kernel.kallsyms])
# ffffffff81013236 cpu_idle ([kernel.kallsyms])
# ffffffff815bf03e rest_init ([kernel.kallsyms])
# ffffffff81aebbfe start_kernel ([kernel.kallsyms].init.text)
# [...]
#
# Example output:
#
# swapper;start_kernel;rest_init;cpu_idle;default_idle;native_safe_halt 1
#
# Input may be created and processed using:
#
# perf record -a -g -F 997 sleep 60
# perf script | ./stackcollapse-perf.pl > out.stacks-folded
#
# The output of "perf script" should include stack traces. If these are missing
# for you, try manually selecting the perf script output; eg:
#
# perf script -f comm,pid,tid,cpu,time,event,ip,sym,dso,trace | ...
#
# This is also required for the --pid or --tid options, so that the output has
# both the PID and TID.
#
# Copyright 2012 Joyent, Inc. All rights reserved.
# Copyright 2012 Brendan Gregg. All rights reserved.
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at docs/cddl1.txt or
# http://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at docs/cddl1.txt.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
# 02-Mar-2012 Brendan Gregg Created this.
# 02-Jul-2014 " " Added process name to stacks.
use strict;
use Getopt::Long;
my %collapsed;
sub remember_stack {
my ($stack, $count) = @_;
$collapsed{$stack} += $count;
}
my $annotate_kernel = 0; # put an annotation on kernel function
my $annotate_jit = 0; # put an annotation on jit symbols
my $annotate_all = 0; # enale all annotations
my $include_pname = 1; # include process names in stacks
my $include_pid = 0; # include process ID with process name
my $include_tid = 0; # include process & thread ID with process name
my $include_addrs = 0; # include raw address where a symbol can't be found
my $tidy_java = 1; # condense Java signatures
my $tidy_generic = 1; # clean up function names a little
my $target_pname; # target process name from perf invocation
my $event_filter = ""; # event type filter, defaults to first encountered event
my $event_defaulted = 0; # whether we defaulted to an event (none provided)
my $event_warning = 0; # if we printed a warning for the event
my $show_inline = 0;
my $show_context = 0;
my $srcline_in_input = 0; # if there are extra lines with source location (perf script -F+srcline)
GetOptions('inline' => \$show_inline,
'context' => \$show_context,
'srcline' => \$srcline_in_input,
'pid' => \$include_pid,
'kernel' => \$annotate_kernel,
'jit' => \$annotate_jit,
'all' => \$annotate_all,
'tid' => \$include_tid,
'addrs' => \$include_addrs,
'event-filter=s' => \$event_filter)
or die <<USAGE_END;
USAGE: $0 [options] infile > outfile\n
--pid # include PID with process names [1]
--tid # include TID and PID with process names [1]
--inline # un-inline using addr2line
--all # all annotations (--kernel --jit)
--kernel # annotate kernel functions with a _[k]
--jit # annotate jit functions with a _[j]
--context # adds source context to --inline
--srcline # parses output of 'perf script -F+srcline' and adds source context
--addrs # include raw addresses where symbols can't be found
--event-filter=EVENT # event name filter\n
[1] perf script must emit both PID and TIDs for these to work; eg, Linux < 4.1:
perf script -f comm,pid,tid,cpu,time,event,ip,sym,dso,trace
for Linux >= 4.1:
perf script -F comm,pid,tid,cpu,time,event,ip,sym,dso,trace
If you save this output add --header on Linux >= 3.14 to include perf info.
USAGE_END
if ($annotate_all) {
$annotate_kernel = $annotate_jit = 1;
}
my %inlineCache;
my %nmCache;
sub inlineCacheAdd {
my ($pc, $mod, $result) = @_;
if (defined($inlineCache{$pc})) {
$inlineCache{$pc}{$mod} = $result;
} else {
$inlineCache{$pc} = {$mod => $result};
}
}
# for the --inline option
sub inline {
my ($pc, $rawfunc, $mod) = @_;
return $inlineCache{$pc}{$mod} if defined($inlineCache{$pc}{$mod});
# capture addr2line output
my $a2l_output = `addr2line -a $pc -e $mod -i -f -s -C`;
# remove first line
$a2l_output =~ s/^(.*\n){1}//;
if ($a2l_output =~ /\?\?\n\?\?:0/) {
# if addr2line fails and rawfunc is func+offset, then fall back to it
if ($rawfunc =~ /^(.+)\+0x([0-9a-f]+)$/) {
my $func = $1;
my $addr = hex $2;
$nmCache{$mod}=`nm $mod` unless defined $nmCache{$mod};
if ($nmCache{$mod} =~ /^([0-9a-f]+) . \Q$func\E$/m) {
my $base = hex $1;
my $newPc = sprintf "0x%x", $base+$addr;
my $result = inline($newPc, '', $mod);
inlineCacheAdd($pc, $mod, $result);
return $result;
}
}
}
my @fullfunc;
my $one_item = "";
for (split /^/, $a2l_output) {
chomp $_;
# remove discriminator info if exists
$_ =~ s/ \(discriminator \S+\)//;
if ($one_item eq "") {
$one_item = $_;
} else {
if ($show_context == 1) {
unshift @fullfunc, $one_item . ":$_";
} else {
unshift @fullfunc, $one_item;
}
$one_item = "";
}
}
my $result = join ";" , @fullfunc;
inlineCacheAdd($pc, $mod, $result);
return $result;
}
my @stack;
my $pname;
my $m_pid;
my $m_tid;
my $m_period;
#
# Main loop
#
while (defined($_ = <>)) {
# find the name of the process launched by perf, by stepping backwards
# over the args to find the first non-option (no dash):
if (/^# cmdline/) {
my @args = split ' ', $_;
foreach my $arg (reverse @args) {
if ($arg !~ /^-/) {
$target_pname = $arg;
$target_pname =~ s:.*/::; # strip pathname
last;
}
}
}
# skip remaining comments
next if m/^#/;
chomp;
# end of stack. save cached data.
if (m/^$/) {
# ignore filtered samples
next if not $pname;
if ($include_pname) {
if (defined $pname) {
unshift @stack, $pname;
} else {
unshift @stack, "";
}
}
remember_stack(join(";", @stack), $m_period) if @stack;
undef @stack;
undef $pname;
next;
}
#
# event record start
#
if (/^(\S.+?)\s+(\d+)\/*(\d+)*\s+/) {
# default "perf script" output has TID but not PID
# eg, "java 25607 4794564.109216: 1 cycles:"
# eg, "java 12688 [002] 6544038.708352: 235 cpu-clock:"
# eg, "V8 WorkerThread 25607 4794564.109216: 104345 cycles:"
# eg, "java 24636/25607 [000] 4794564.109216: 1 cycles:"
# eg, "java 12688/12764 6544038.708352: 10309278 cpu-clock:"
# eg, "V8 WorkerThread 24636/25607 [000] 94564.109216: 100 cycles:"
# other combinations possible
my ($comm, $pid, $tid, $period) = ($1, $2, $3, "");
if (not $tid) {
$tid = $pid;
$pid = "?";
}
if (/:\s*(\d+)*\s+(\S+):\s*$/) {
$period = $1;
my $event = $2;
if ($event_filter eq "") {
# By default only show events of the first encountered
# event type. Merging together different types, such as
# instructions and cycles, produces misleading results.
$event_filter = $event;
$event_defaulted = 1;
} elsif ($event ne $event_filter) {
if ($event_defaulted and $event_warning == 0) {
# only print this warning if necessary:
# when we defaulted and there was
# multiple event types.
print STDERR "Filtering for events of type: $event\n";
$event_warning = 1;
}
next;
}
}
if (not $period) {
$period = 1
}
($m_pid, $m_tid, $m_period) = ($pid, $tid, $period);
if ($include_tid) {
$pname = "$comm-$m_pid/$m_tid";
} elsif ($include_pid) {
$pname = "$comm-$m_pid";
} else {
$pname = "$comm";
}
$pname =~ tr/ /_/;
#
# stack line
#
} elsif (/^\s*(\w+)\s*(.+) \((.*)\)/) {
# ignore filtered samples
next if not $pname;
my ($pc, $rawfunc, $mod) = ($1, $2, $3);
if ($show_inline == 1 && $mod !~ m/(perf-\d+.map|kernel\.|\[[^\]]+\])/) {
my $inlineRes = inline($pc, $rawfunc, $mod);
# - empty result this happens e.g., when $mod does not exist or is a path to a compressed kernel module
# if this happens, the user will see error message from addr2line written to stderr
# - if addr2line results in "??" , then it's much more sane to fall back than produce a '??' in graph
if($inlineRes ne "" and $inlineRes ne "??" and $inlineRes ne "??:??:0" ) {
unshift @stack, $inlineRes;
next;
}
}
# Linux 4.8 included symbol offsets in perf script output by default, eg:
# 7fffb84c9afc cpu_startup_entry+0x800047c022ec ([kernel.kallsyms])
# strip these off:
$rawfunc =~ s/\+0x[\da-f]+$//;
next if $rawfunc =~ /^\(/; # skip process names
my $is_unknown=0;
my @inline;
for (split /\->/, $rawfunc) {
my $func = $_;
if ($func eq "[unknown]") {
if ($mod ne "[unknown]") { # use module name instead, if known
$func = $mod;
$func =~ s/.*\///;
} else {
$func = "unknown";
$is_unknown=1;
}
if ($include_addrs) {
$func = "\[$func \<$pc\>\]";
} else {
$func = "\[$func\]";
}
}
if ($tidy_generic) {
$func =~ s/;/:/g;
if ($func !~ m/\.\(.*\)\./) {
# This doesn't look like a Go method name (such as
# "net/http.(*Client).Do"), so everything after the first open
# paren (that is not part of an "(anonymous namespace)") is
# just noise.
$func =~ s/\((?!anonymous namespace\)).*//;
}
# now tidy this horrible thing:
# 13a80b608e0a RegExp:[&<>\"\'] (/tmp/perf-7539.map)
$func =~ tr/"\'//d;
# fall through to $tidy_java
}
if ($tidy_java and $pname =~ m/^java/) {
# along with $tidy_generic, converts the following:
# Lorg/mozilla/javascript/ContextFactory;.call(Lorg/mozilla/javascript/ContextAction;)Ljava/lang/Object;
# Lorg/mozilla/javascript/ContextFactory;.call(Lorg/mozilla/javascript/C
# Lorg/mozilla/javascript/MemberBox;.<init>(Ljava/lang/reflect/Method;)V
# into:
# org/mozilla/javascript/ContextFactory:.call
# org/mozilla/javascript/ContextFactory:.call
# org/mozilla/javascript/MemberBox:.init
$func =~ s/^L// if $func =~ m:/:;
}
#
# Annotations
#
# detect inlined from the @inline array
# detect kernel from the module name; eg, frames to parse include:
# ffffffff8103ce3b native_safe_halt ([kernel.kallsyms])
# 8c3453 tcp_sendmsg (/lib/modules/4.3.0-rc1-virtual/build/vmlinux)
# 7d8 ipv4_conntrack_local+0x7f8f80b8 ([nf_conntrack_ipv4])
# detect jit from the module name; eg:
# 7f722d142778 Ljava/io/PrintStream;::print (/tmp/perf-19982.map)
if (scalar(@inline) > 0) {
$func .= "_[i]" unless $func =~ m/\_\[i\]/; # inlined
} elsif ($annotate_kernel == 1 && $mod =~ m/(^\[|vmlinux$)/ && $mod !~ /unknown/) {
$func .= "_[k]"; # kernel
} elsif ($annotate_jit == 1 && $mod =~ m:/tmp/perf-\d+\.map:) {
$func .= "_[j]" unless $func =~ m/\_\[j\]/; # jitted
}
#
# Source lines
#
#
# Sample outputs:
# | a.out 35081 252436.005167: 667783 cycles:
# | 408ebb some_method_name+0x8b (/full/path/to/a.out)
# | uniform_int_dist.h:300
# | 4069f5 main+0x935 (/full/path/to/a.out)
# | file.cpp:137
# | 7f6d2148eb25 __libc_start_main+0xd5 (/lib64/libc-2.33.so)
# | libc-2.33.so[27b25]
#
# | a.out 35081 252435.738165: 306459 cycles:
# | 7f6d213c2750 [unknown] (/usr/lib64/libkmod.so.2.3.6)
# | libkmod.so.2.3.6[6750]
#
# | a.out 35081 252435.738373: 315813 cycles:
# | 7f6d215ca51b __strlen_avx2+0x4b (/lib64/libc-2.33.so)
# | libc-2.33.so[16351b]
# | 7ffc71ee9580 [unknown] ([unknown])
# |
#
# | a.out 35081 252435.718940: 247984 cycles:
# | ffffffff814f9302 up_write+0x32 ([kernel.kallsyms])
# | [kernel.kallsyms][ffffffff814f9302]
if($srcline_in_input and not $is_unknown){
$_ = <>;
chomp;
s/\[.*?\]//g;
s/^\s*//g;
s/\s*$//g;
$func.=':'.$_ unless $_ eq "";
}
push @inline, $func;
}
unshift @stack, @inline;
} else {
warn "Unrecognized line: $_";
}
}
foreach my $k (sort { $a cmp $b } keys %collapsed) {
print "$k $collapsed{$k}\n";
}