builtin-annotate.c 13.4 KB
Newer Older
1 2 3 4 5 6 7 8 9
/*
 * builtin-annotate.c
 *
 * Builtin annotate command: Analyze the perf.data input file,
 * look up and read DSOs and symbol information and display
 * a histogram of results, along various sorting keys.
 */
#include "builtin.h"

10
#include "util/util.h"
11
#include "util/color.h"
12
#include <linux/list.h>
13
#include "util/cache.h"
14
#include <linux/rbtree.h>
15 16 17
#include "util/symbol.h"

#include "perf.h"
18
#include "util/debug.h"
19

20 21
#include "util/evlist.h"
#include "util/evsel.h"
22
#include "util/annotate.h"
23
#include "util/event.h"
24
#include <subcmd/parse-options.h>
25
#include "util/parse-events.h"
26
#include "util/thread.h"
27
#include "util/sort.h"
28
#include "util/hist.h"
29
#include "util/session.h"
30
#include "util/tool.h"
31
#include "util/data.h"
32
#include "arch/common.h"
33
#include "util/block-range.h"
34

35
#include <dlfcn.h>
36
#include <errno.h>
37 38
#include <linux/bitmap.h>

39
struct perf_annotate {
40
	struct perf_tool tool;
41 42
	struct perf_session *session;
	bool	   use_tui, use_stdio, use_gtk;
43 44
	bool	   full_paths;
	bool	   print_line;
45
	bool	   skip_missing;
46 47 48
	const char *sym_hist_filter;
	const char *cpu_list;
	DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
49
};
50

51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147
/*
 * Given one basic block:
 *
 *	from	to		branch_i
 *	* ----> *
 *		|
 *		| block
 *		v
 *		* ----> *
 *		from	to	branch_i+1
 *
 * where the horizontal are the branches and the vertical is the executed
 * block of instructions.
 *
 * We count, for each 'instruction', the number of blocks that covered it as
 * well as count the ratio each branch is taken.
 *
 * We can do this without knowing the actual instruction stream by keeping
 * track of the address ranges. We break down ranges such that there is no
 * overlap and iterate from the start until the end.
 *
 * @acme: once we parse the objdump output _before_ processing the samples,
 * we can easily fold the branch.cycles IPC bits in.
 */
static void process_basic_block(struct addr_map_symbol *start,
				struct addr_map_symbol *end,
				struct branch_flags *flags)
{
	struct symbol *sym = start->sym;
	struct annotation *notes = sym ? symbol__annotation(sym) : NULL;
	struct block_range_iter iter;
	struct block_range *entry;

	/*
	 * Sanity; NULL isn't executable and the CPU cannot execute backwards
	 */
	if (!start->addr || start->addr > end->addr)
		return;

	iter = block_range__create(start->addr, end->addr);
	if (!block_range_iter__valid(&iter))
		return;

	/*
	 * First block in range is a branch target.
	 */
	entry = block_range_iter(&iter);
	assert(entry->is_target);
	entry->entry++;

	do {
		entry = block_range_iter(&iter);

		entry->coverage++;
		entry->sym = sym;

		if (notes)
			notes->max_coverage = max(notes->max_coverage, entry->coverage);

	} while (block_range_iter__next(&iter));

	/*
	 * Last block in rage is a branch.
	 */
	entry = block_range_iter(&iter);
	assert(entry->is_branch);
	entry->taken++;
	if (flags->predicted)
		entry->pred++;
}

static void process_branch_stack(struct branch_stack *bs, struct addr_location *al,
				 struct perf_sample *sample)
{
	struct addr_map_symbol *prev = NULL;
	struct branch_info *bi;
	int i;

	if (!bs || !bs->nr)
		return;

	bi = sample__resolve_bstack(sample, al);
	if (!bi)
		return;

	for (i = bs->nr - 1; i >= 0; i--) {
		/*
		 * XXX filter against symbol
		 */
		if (prev)
			process_basic_block(prev, &bi[i].from, &bi[i].flags);
		prev = &bi[i].to;
	}

	free(bi);
}

148
static int perf_evsel__add_sample(struct perf_evsel *evsel,
149
				  struct perf_sample *sample,
150 151
				  struct addr_location *al,
				  struct perf_annotate *ann)
152
{
153
	struct hists *hists = evsel__hists(evsel);
154
	struct hist_entry *he;
155
	int ret;
156

157 158 159
	if (ann->sym_hist_filter != NULL &&
	    (al->sym == NULL ||
	     strcmp(ann->sym_hist_filter, al->sym->name) != 0)) {
160
		/* We're only interested in a symbol named sym_hist_filter */
161 162 163 164
		/*
		 * FIXME: why isn't this done in the symbol_filter when loading
		 * the DSO?
		 */
165 166 167 168
		if (al->sym != NULL) {
			rb_erase(&al->sym->rb_node,
				 &al->map->dso->symbols[al->map->type]);
			symbol__delete(al->sym);
169
			dso__reset_find_symbol_cache(al->map->dso);
170 171 172 173
		}
		return 0;
	}

174 175 176 177 178 179
	/*
	 * XXX filtered samples can still have branch entires pointing into our
	 * symbol and are missed.
	 */
	process_branch_stack(sample->branch_stack, al, sample);

180
	he = hists__add_entry(hists, al, NULL, NULL, NULL, sample, true);
181
	if (he == NULL)
182
		return -ENOMEM;
183

184
	ret = hist_entry__inc_addr_samples(he, sample, evsel->idx, al->addr);
185
	hists__inc_nr_samples(hists, true);
186
	return ret;
187 188
}

189
static int process_sample_event(struct perf_tool *tool,
190
				union perf_event *event,
191
				struct perf_sample *sample,
192
				struct perf_evsel *evsel,
193
				struct machine *machine)
194
{
195
	struct perf_annotate *ann = container_of(tool, struct perf_annotate, tool);
196
	struct addr_location al;
197
	int ret = 0;
198

199
	if (machine__resolve(machine, &al, sample) < 0) {
200 201
		pr_warning("problem processing %d event, skipping it.\n",
			   event->header.type);
202 203 204
		return -1;
	}

205
	if (ann->cpu_list && !test_bit(sample->cpu, ann->cpu_bitmap))
206
		goto out_put;
207

208
	if (!al.filtered && perf_evsel__add_sample(evsel, sample, &al, ann)) {
209 210
		pr_warning("problem incrementing symbol count, "
			   "skipping event\n");
211
		ret = -1;
212
	}
213 214 215
out_put:
	addr_location__put(&al);
	return ret;
216 217
}

218 219
static int hist_entry__tty_annotate(struct hist_entry *he,
				    struct perf_evsel *evsel,
220
				    struct perf_annotate *ann)
221
{
222
	return symbol__tty_annotate(he->ms.sym, he->ms.map, evsel,
223
				    ann->print_line, ann->full_paths, 0, 0);
224 225
}

226
static void hists__find_annotations(struct hists *hists,
227
				    struct perf_evsel *evsel,
228
				    struct perf_annotate *ann)
229
{
230
	struct rb_node *nd = rb_first(&hists->entries), *next;
231
	int key = K_RIGHT;
232

233
	while (nd) {
234
		struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node);
235
		struct annotation *notes;
236

237 238
		if (he->ms.sym == NULL || he->ms.map->dso->annotate_warned)
			goto find_next;
239

240
		notes = symbol__annotation(he->ms.sym);
241
		if (notes->src == NULL) {
242
find_next:
243
			if (key == K_LEFT)
244 245 246
				nd = rb_prev(nd);
			else
				nd = rb_next(nd);
247
			continue;
248
		}
249

250
		if (use_browser == 2) {
251
			int ret;
252 253 254 255 256 257 258 259 260 261
			int (*annotate)(struct hist_entry *he,
					struct perf_evsel *evsel,
					struct hist_browser_timer *hbt);

			annotate = dlsym(perf_gtk_handle,
					 "hist_entry__gtk_annotate");
			if (annotate == NULL) {
				ui__error("GTK browser not found!\n");
				return;
			}
262

263
			ret = annotate(he, evsel, NULL);
264 265 266 267 268
			if (!ret || !ann->skip_missing)
				return;

			/* skip missing symbols */
			nd = rb_next(nd);
269
		} else if (use_browser == 1) {
270
			key = hist_entry__tui_annotate(he, evsel, NULL);
271
			switch (key) {
272 273 274 275
			case -1:
				if (!ann->skip_missing)
					return;
				/* fall through */
276
			case K_RIGHT:
277
				next = rb_next(nd);
278
				break;
279
			case K_LEFT:
280
				next = rb_prev(nd);
281
				break;
282 283
			default:
				return;
284
			}
285 286 287

			if (next != NULL)
				nd = next;
288
		} else {
289
			hist_entry__tty_annotate(he, evsel, ann);
290 291 292
			nd = rb_next(nd);
			/*
			 * Since we have a hist_entry per IP for the same
293
			 * symbol, free he->ms.sym->src to signal we already
294 295
			 * processed this symbol.
			 */
296
			zfree(&notes->src->cycles_hist);
297
			zfree(&notes->src);
298
		}
299 300 301
	}
}

302
static int __cmd_annotate(struct perf_annotate *ann)
303
{
304
	int ret;
305
	struct perf_session *session = ann->session;
306 307
	struct perf_evsel *pos;
	u64 total_nr_samples;
308

309 310 311
	if (ann->cpu_list) {
		ret = perf_session__cpu_bitmap(session, ann->cpu_list,
					       ann->cpu_bitmap);
312
		if (ret)
313
			goto out;
314 315
	}

316
	if (!objdump_path) {
317
		ret = perf_env__lookup_objdump(&session->header.env);
318
		if (ret)
319
			goto out;
320 321
	}

322
	ret = perf_session__process_events(session);
323
	if (ret)
324
		goto out;
325

326
	if (dump_trace) {
327
		perf_session__fprintf_nr_events(session, stdout);
328
		perf_evlist__fprintf_nr_events(session->evlist, stdout);
329
		goto out;
330
	}
331

332
	if (verbose > 3)
333
		perf_session__fprintf(session, stdout);
334

335
	if (verbose > 2)
336
		perf_session__fprintf_dsos(session, stdout);
337

338
	total_nr_samples = 0;
339
	evlist__for_each_entry(session->evlist, pos) {
340
		struct hists *hists = evsel__hists(pos);
341 342 343 344
		u32 nr_samples = hists->stats.nr_events[PERF_RECORD_SAMPLE];

		if (nr_samples > 0) {
			total_nr_samples += nr_samples;
345
			hists__collapse_resort(hists, NULL);
346 347
			/* Don't sort callchain */
			perf_evsel__reset_sample_bit(pos, CALLCHAIN);
348
			perf_evsel__output_resort(pos, NULL);
349 350 351 352 353

			if (symbol_conf.event_group &&
			    !perf_evsel__is_group_leader(pos))
				continue;

354
			hists__find_annotations(hists, pos, ann);
355 356
		}
	}
357

358
	if (total_nr_samples == 0) {
359
		ui__error("The %s file has no samples!\n", session->data->file.path);
360
		goto out;
361
	}
362

363 364 365 366 367 368 369
	if (use_browser == 2) {
		void (*show_annotations)(void);

		show_annotations = dlsym(perf_gtk_handle,
					 "perf_gtk__show_annotations");
		if (show_annotations == NULL) {
			ui__error("GTK browser not found!\n");
370
			goto out;
371 372 373
		}
		show_annotations();
	}
374

375
out:
376
	return ret;
377 378 379
}

static const char * const annotate_usage[] = {
380
	"perf annotate [<options>]",
381 382 383
	NULL
};

384
int cmd_annotate(int argc, const char **argv)
385 386
{
	struct perf_annotate annotate = {
387
		.tool = {
388 389
			.sample	= process_sample_event,
			.mmap	= perf_event__process_mmap,
390
			.mmap2	= perf_event__process_mmap2,
391
			.comm	= perf_event__process_comm,
392
			.exit	= perf_event__process_exit,
393
			.fork	= perf_event__process_fork,
394
			.namespaces = perf_event__process_namespaces,
395 396
			.attr	= perf_event__process_attr,
			.build_id = perf_event__process_build_id,
397
			.tracing_data   = perf_event__process_tracing_data,
398
			.feature	= perf_event__process_feature,
399
			.ordered_events = true,
400 401 402
			.ordering_requires_timestamps = true,
		},
	};
403
	struct perf_data data = {
404 405
		.mode  = PERF_DATA_MODE_READ,
	};
406
	struct option options[] = {
407
	OPT_STRING('i', "input", &input_name, "file",
408
		    "input file name"),
409 410
	OPT_STRING('d', "dsos", &symbol_conf.dso_list_str, "dso[,dso...]",
		   "only consider symbols in these dsos"),
411
	OPT_STRING('s', "symbol", &annotate.sym_hist_filter, "symbol",
412
		    "symbol to annotate"),
413
	OPT_BOOLEAN('f', "force", &data.force, "don't complain, do it"),
414
	OPT_INCR('v', "verbose", &verbose,
415
		    "be more verbose (show symbol address, etc)"),
416
	OPT_BOOLEAN('q', "quiet", &quiet, "do now show any message"),
417 418
	OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
		    "dump raw trace in ASCII"),
419
	OPT_BOOLEAN(0, "gtk", &annotate.use_gtk, "Use the GTK interface"),
420 421
	OPT_BOOLEAN(0, "tui", &annotate.use_tui, "Use the TUI interface"),
	OPT_BOOLEAN(0, "stdio", &annotate.use_stdio, "Use the stdio interface"),
422 423 424
	OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
		   "file", "vmlinux pathname"),
	OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules,
425
		    "load module symbols - WARNING: use only with -k and LIVE kernel"),
426
	OPT_BOOLEAN('l', "print-line", &annotate.print_line,
427
		    "print matching source lines (may be slow)"),
428
	OPT_BOOLEAN('P', "full-paths", &annotate.full_paths,
429
		    "Don't shorten the displayed pathnames"),
430 431
	OPT_BOOLEAN(0, "skip-missing", &annotate.skip_missing,
		    "Skip symbols that cannot be annotated"),
432
	OPT_STRING('C', "cpu", &annotate.cpu_list, "cpu", "list of cpus to profile"),
433 434 435
	OPT_CALLBACK(0, "symfs", NULL, "directory",
		     "Look for files with symbols relative to this directory",
		     symbol__config_symfs),
436
	OPT_BOOLEAN(0, "source", &symbol_conf.annotate_src,
437
		    "Interleave source code with assembly code (default)"),
438
	OPT_BOOLEAN(0, "asm-raw", &symbol_conf.annotate_asm_raw,
439
		    "Display raw encoding of assembly instructions (default)"),
440 441
	OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style",
		   "Specify disassembler style (e.g. -M intel for intel syntax)"),
442 443
	OPT_STRING(0, "objdump", &objdump_path, "path",
		   "objdump binary to use for disassembly and annotations"),
444 445
	OPT_BOOLEAN(0, "group", &symbol_conf.event_group,
		    "Show event group information together"),
446 447
	OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period,
		    "Show a column with the sum of periods"),
448 449
	OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples,
		    "Show a column with the number of samples"),
450 451 452
	OPT_CALLBACK_DEFAULT(0, "stdio-color", NULL, "mode",
			     "'always' (default), 'never' or 'auto' only applicable to --stdio mode",
			     stdio__config_color, "always"),
453
	OPT_END()
454
	};
455 456 457 458 459
	int ret;

	set_option_flag(options, 0, "show-total-period", PARSE_OPT_EXCLUSIVE);
	set_option_flag(options, 0, "show-nr-samples", PARSE_OPT_EXCLUSIVE);

460

461
	ret = hists__init();
462 463
	if (ret < 0)
		return ret;
464

465
	argc = parse_options(argc, argv, options, annotate_usage, 0);
466 467 468 469 470 471 472 473 474 475
	if (argc) {
		/*
		 * Special case: if there's an argument left then assume that
		 * it's a symbol filter:
		 */
		if (argc > 1)
			usage_with_options(annotate_usage, options);

		annotate.sym_hist_filter = argv[0];
	}
476

477 478
	if (symbol_conf.show_nr_samples && annotate.use_gtk) {
		pr_err("--show-nr-samples is not available in --gtk mode at this time\n");
479 480 481
		return ret;
	}

482 483 484
	if (quiet)
		perf_quiet_option();

485
	data.file.path = input_name;
486

487
	annotate.session = perf_session__new(&data, false, &annotate.tool);
488
	if (annotate.session == NULL)
489
		return -1;
490

491 492 493 494
	ret = symbol__annotation_init();
	if (ret < 0)
		goto out_delete;

495 496
	symbol_conf.try_vmlinux_path = true;

497
	ret = symbol__init(&annotate.session->header.env);
498 499
	if (ret < 0)
		goto out_delete;
500

501
	if (setup_sorting(NULL) < 0)
502
		usage_with_options(annotate_usage, options);
503

504 505 506 507 508 509 510 511 512
	if (annotate.use_stdio)
		use_browser = 0;
	else if (annotate.use_tui)
		use_browser = 1;
	else if (annotate.use_gtk)
		use_browser = 2;

	setup_browser(true);

513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528
	ret = __cmd_annotate(&annotate);

out_delete:
	/*
	 * Speed up the exit process, for large files this can
	 * take quite a while.
	 *
	 * XXX Enable this when using valgrind or if we ever
	 * librarize this command.
	 *
	 * Also experiment with obstacks to see how much speed
	 * up we'll get here.
	 *
	 * perf_session__delete(session);
	 */
	return ret;
529
}