Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'veristat: replay, filtering, sorting'

Andrii Nakryiko says:

====================

This patch set adds a bunch of new featurs and improvements that were sorely
missing during recent active use of veristat to develop BPF verifier precision
changes. Individual patches provide justification, explanation and often
examples showing how new capabilities can be used.
====================

Signed-off-by: Alexei Starovoitov <ast@kernel.org>

+731 -164
+731 -164
tools/testing/selftests/bpf/veristat.c
··· 17 17 #include <bpf/libbpf.h> 18 18 #include <libelf.h> 19 19 #include <gelf.h> 20 + #include <float.h> 20 21 21 22 enum stat_id { 22 23 VERDICT, ··· 35 34 NUM_STATS_CNT = FILE_NAME - VERDICT, 36 35 }; 37 36 37 + /* In comparison mode each stat can specify up to four different values: 38 + * - A side value; 39 + * - B side value; 40 + * - absolute diff value; 41 + * - relative (percentage) diff value. 42 + * 43 + * When specifying stat specs in comparison mode, user can use one of the 44 + * following variant suffixes to specify which exact variant should be used for 45 + * ordering or filtering: 46 + * - `_a` for A side value; 47 + * - `_b` for B side value; 48 + * - `_diff` for absolute diff value; 49 + * - `_pct` for relative (percentage) diff value. 50 + * 51 + * If no variant suffix is provided, then `_b` (control data) is assumed. 52 + * 53 + * As an example, let's say instructions stat has the following output: 54 + * 55 + * Insns (A) Insns (B) Insns (DIFF) 56 + * --------- --------- -------------- 57 + * 21547 20920 -627 (-2.91%) 58 + * 59 + * Then: 60 + * - 21547 is A side value (insns_a); 61 + * - 20920 is B side value (insns_b); 62 + * - -627 is absolute diff value (insns_diff); 63 + * - -2.91% is relative diff value (insns_pct). 64 + * 65 + * For verdict there is no verdict_pct variant. 66 + * For file and program name, _a and _b variants are equivalent and there are 67 + * no _diff or _pct variants. 68 + */ 69 + enum stat_variant { 70 + VARIANT_A, 71 + VARIANT_B, 72 + VARIANT_DIFF, 73 + VARIANT_PCT, 74 + }; 75 + 38 76 struct verif_stats { 39 77 char *file_name; 40 78 char *prog_name; ··· 81 41 long stats[NUM_STATS_CNT]; 82 42 }; 83 43 44 + /* joined comparison mode stats */ 45 + struct verif_stats_join { 46 + char *file_name; 47 + char *prog_name; 48 + 49 + const struct verif_stats *stats_a; 50 + const struct verif_stats *stats_b; 51 + }; 52 + 84 53 struct stat_specs { 85 54 int spec_cnt; 86 55 enum stat_id ids[ALL_STATS_CNT]; 56 + enum stat_variant variants[ALL_STATS_CNT]; 87 57 bool asc[ALL_STATS_CNT]; 88 58 int lens[ALL_STATS_CNT * 3]; /* 3x for comparison mode */ 89 59 }; ··· 104 54 RESFMT_CSV, 105 55 }; 106 56 57 + enum filter_kind { 58 + FILTER_NAME, 59 + FILTER_STAT, 60 + }; 61 + 62 + enum operator_kind { 63 + OP_EQ, /* == or = */ 64 + OP_NEQ, /* != or <> */ 65 + OP_LT, /* < */ 66 + OP_LE, /* <= */ 67 + OP_GT, /* > */ 68 + OP_GE, /* >= */ 69 + }; 70 + 107 71 struct filter { 72 + enum filter_kind kind; 73 + /* FILTER_NAME */ 74 + char *any_glob; 108 75 char *file_glob; 109 76 char *prog_glob; 77 + /* FILTER_STAT */ 78 + enum operator_kind op; 79 + int stat_id; 80 + enum stat_variant stat_var; 81 + long value; 110 82 }; 111 83 112 84 static struct env { ··· 139 67 int log_level; 140 68 enum resfmt out_fmt; 141 69 bool comparison_mode; 70 + bool replay_mode; 142 71 143 72 struct verif_stats *prog_stats; 144 73 int prog_stat_cnt; ··· 147 74 /* baseline_stats is allocated and used only in comparsion mode */ 148 75 struct verif_stats *baseline_stats; 149 76 int baseline_stat_cnt; 77 + 78 + struct verif_stats_join *join_stats; 79 + int join_stat_cnt; 150 80 151 81 struct stat_specs output_spec; 152 82 struct stat_specs sort_spec; ··· 191 115 { "sort", 's', "SPEC", 0, "Specify sort order" }, 192 116 { "output-format", 'o', "FMT", 0, "Result output format (table, csv), default is table." }, 193 117 { "compare", 'C', NULL, 0, "Comparison mode" }, 118 + { "replay", 'R', NULL, 0, "Replay mode" }, 194 119 { "filter", 'f', "FILTER", 0, "Filter expressions (or @filename for file with expressions)." }, 195 120 {}, 196 121 }; ··· 245 168 break; 246 169 case 'C': 247 170 env.comparison_mode = true; 171 + break; 172 + case 'R': 173 + env.replay_mode = true; 248 174 break; 249 175 case 'f': 250 176 if (arg[0] == '@') ··· 306 226 return !*str && !*pat; 307 227 } 308 228 309 - static bool should_process_file(const char *filename) 310 - { 311 - int i; 312 - 313 - if (env.deny_filter_cnt > 0) { 314 - for (i = 0; i < env.deny_filter_cnt; i++) { 315 - if (glob_matches(filename, env.deny_filters[i].file_glob)) 316 - return false; 317 - } 318 - } 319 - 320 - if (env.allow_filter_cnt == 0) 321 - return true; 322 - 323 - for (i = 0; i < env.allow_filter_cnt; i++) { 324 - if (glob_matches(filename, env.allow_filters[i].file_glob)) 325 - return true; 326 - } 327 - 328 - return false; 329 - } 330 - 331 229 static bool is_bpf_obj_file(const char *path) { 332 230 Elf64_Ehdr *ehdr; 333 231 int fd, err = -EINVAL; ··· 338 280 return err == 0; 339 281 } 340 282 341 - static bool should_process_prog(const char *path, const char *prog_name) 283 + static bool should_process_file_prog(const char *filename, const char *prog_name) 342 284 { 343 - const char *filename = basename(path); 344 - int i; 285 + struct filter *f; 286 + int i, allow_cnt = 0; 345 287 346 - if (env.deny_filter_cnt > 0) { 347 - for (i = 0; i < env.deny_filter_cnt; i++) { 348 - if (glob_matches(filename, env.deny_filters[i].file_glob)) 349 - return false; 350 - if (!env.deny_filters[i].prog_glob) 288 + for (i = 0; i < env.deny_filter_cnt; i++) { 289 + f = &env.deny_filters[i]; 290 + if (f->kind != FILTER_NAME) 291 + continue; 292 + 293 + if (f->any_glob && glob_matches(filename, f->any_glob)) 294 + return false; 295 + if (f->any_glob && prog_name && glob_matches(prog_name, f->any_glob)) 296 + return false; 297 + if (f->file_glob && glob_matches(filename, f->file_glob)) 298 + return false; 299 + if (f->prog_glob && prog_name && glob_matches(prog_name, f->prog_glob)) 300 + return false; 301 + } 302 + 303 + for (i = 0; i < env.allow_filter_cnt; i++) { 304 + f = &env.allow_filters[i]; 305 + if (f->kind != FILTER_NAME) 306 + continue; 307 + 308 + allow_cnt++; 309 + if (f->any_glob) { 310 + if (glob_matches(filename, f->any_glob)) 311 + return true; 312 + if (prog_name && glob_matches(prog_name, f->any_glob)) 313 + return true; 314 + } else { 315 + if (f->file_glob && !glob_matches(filename, f->file_glob)) 351 316 continue; 352 - if (glob_matches(prog_name, env.deny_filters[i].prog_glob)) 353 - return false; 317 + if (f->prog_glob && prog_name && !glob_matches(prog_name, f->prog_glob)) 318 + continue; 319 + return true; 354 320 } 355 321 } 356 322 357 - if (env.allow_filter_cnt == 0) 358 - return true; 359 - 360 - for (i = 0; i < env.allow_filter_cnt; i++) { 361 - if (!glob_matches(filename, env.allow_filters[i].file_glob)) 362 - continue; 363 - /* if filter specifies only filename glob part, it implicitly 364 - * allows all progs within that file 365 - */ 366 - if (!env.allow_filters[i].prog_glob) 367 - return true; 368 - if (glob_matches(prog_name, env.allow_filters[i].prog_glob)) 369 - return true; 370 - } 371 - 372 - return false; 323 + /* if there are no file/prog name allow filters, allow all progs, 324 + * unless they are denied earlier explicitly 325 + */ 326 + return allow_cnt == 0; 373 327 } 328 + 329 + static struct { 330 + enum operator_kind op_kind; 331 + const char *op_str; 332 + } operators[] = { 333 + /* Order of these definitions matter to avoid situations like '<' 334 + * matching part of what is actually a '<>' operator. That is, 335 + * substrings should go last. 336 + */ 337 + { OP_EQ, "==" }, 338 + { OP_NEQ, "!=" }, 339 + { OP_NEQ, "<>" }, 340 + { OP_LE, "<=" }, 341 + { OP_LT, "<" }, 342 + { OP_GE, ">=" }, 343 + { OP_GT, ">" }, 344 + { OP_EQ, "=" }, 345 + }; 346 + 347 + static bool parse_stat_id_var(const char *name, size_t len, int *id, enum stat_variant *var); 374 348 375 349 static int append_filter(struct filter **filters, int *cnt, const char *str) 376 350 { 377 351 struct filter *f; 378 352 void *tmp; 379 353 const char *p; 354 + int i; 380 355 381 356 tmp = realloc(*filters, (*cnt + 1) * sizeof(**filters)); 382 357 if (!tmp) ··· 417 326 *filters = tmp; 418 327 419 328 f = &(*filters)[*cnt]; 420 - f->file_glob = f->prog_glob = NULL; 329 + memset(f, 0, sizeof(*f)); 421 330 422 - /* filter can be specified either as "<obj-glob>" or "<obj-glob>/<prog-glob>" */ 331 + /* First, let's check if it's a stats filter of the following form: 332 + * <stat><op><value, where: 333 + * - <stat> is one of supported numerical stats (verdict is also 334 + * considered numerical, failure == 0, success == 1); 335 + * - <op> is comparison operator (see `operators` definitions); 336 + * - <value> is an integer (or failure/success, or false/true as 337 + * special aliases for 0 and 1, respectively). 338 + * If the form doesn't match what user provided, we assume file/prog 339 + * glob filter. 340 + */ 341 + for (i = 0; i < ARRAY_SIZE(operators); i++) { 342 + enum stat_variant var; 343 + int id; 344 + long val; 345 + const char *end = str; 346 + const char *op_str; 347 + 348 + op_str = operators[i].op_str; 349 + p = strstr(str, op_str); 350 + if (!p) 351 + continue; 352 + 353 + if (!parse_stat_id_var(str, p - str, &id, &var)) { 354 + fprintf(stderr, "Unrecognized stat name in '%s'!\n", str); 355 + return -EINVAL; 356 + } 357 + if (id >= FILE_NAME) { 358 + fprintf(stderr, "Non-integer stat is specified in '%s'!\n", str); 359 + return -EINVAL; 360 + } 361 + 362 + p += strlen(op_str); 363 + 364 + if (strcasecmp(p, "true") == 0 || 365 + strcasecmp(p, "t") == 0 || 366 + strcasecmp(p, "success") == 0 || 367 + strcasecmp(p, "succ") == 0 || 368 + strcasecmp(p, "s") == 0 || 369 + strcasecmp(p, "match") == 0 || 370 + strcasecmp(p, "m") == 0) { 371 + val = 1; 372 + } else if (strcasecmp(p, "false") == 0 || 373 + strcasecmp(p, "f") == 0 || 374 + strcasecmp(p, "failure") == 0 || 375 + strcasecmp(p, "fail") == 0 || 376 + strcasecmp(p, "mismatch") == 0 || 377 + strcasecmp(p, "mis") == 0) { 378 + val = 0; 379 + } else { 380 + errno = 0; 381 + val = strtol(p, (char **)&end, 10); 382 + if (errno || end == p || *end != '\0' ) { 383 + fprintf(stderr, "Invalid integer value in '%s'!\n", str); 384 + return -EINVAL; 385 + } 386 + } 387 + 388 + f->kind = FILTER_STAT; 389 + f->stat_id = id; 390 + f->stat_var = var; 391 + f->op = operators[i].op_kind; 392 + f->value = val; 393 + 394 + *cnt += 1; 395 + return 0; 396 + } 397 + 398 + /* File/prog filter can be specified either as '<glob>' or 399 + * '<file-glob>/<prog-glob>'. In the former case <glob> is applied to 400 + * both file and program names. This seems to be way more useful in 401 + * practice. If user needs full control, they can use '/<prog-glob>' 402 + * form to glob just program name, or '<file-glob>/' to glob only file 403 + * name. But usually common <glob> seems to be the most useful and 404 + * ergonomic way. 405 + */ 406 + f->kind = FILTER_NAME; 423 407 p = strchr(str, '/'); 424 408 if (!p) { 425 - f->file_glob = strdup(str); 426 - if (!f->file_glob) 409 + f->any_glob = strdup(str); 410 + if (!f->any_glob) 427 411 return -ENOMEM; 428 412 } else { 429 - f->file_glob = strndup(str, p - str); 430 - f->prog_glob = strdup(p + 1); 431 - if (!f->file_glob || !f->prog_glob) { 432 - free(f->file_glob); 433 - free(f->prog_glob); 434 - f->file_glob = f->prog_glob = NULL; 435 - return -ENOMEM; 413 + if (str != p) { 414 + /* non-empty file glob */ 415 + f->file_glob = strndup(str, p - str); 416 + if (!f->file_glob) 417 + return -ENOMEM; 418 + } 419 + if (strlen(p + 1) > 0) { 420 + /* non-empty prog glob */ 421 + f->prog_glob = strdup(p + 1); 422 + if (!f->prog_glob) { 423 + free(f->file_glob); 424 + f->file_glob = NULL; 425 + return -ENOMEM; 426 + } 436 427 } 437 428 } 438 429 439 - *cnt = *cnt + 1; 430 + *cnt += 1; 440 431 return 0; 441 432 } 442 433 ··· 561 388 }, 562 389 }; 563 390 391 + static const struct stat_specs default_csv_output_spec = { 392 + .spec_cnt = 9, 393 + .ids = { 394 + FILE_NAME, PROG_NAME, VERDICT, DURATION, 395 + TOTAL_INSNS, TOTAL_STATES, PEAK_STATES, 396 + MAX_STATES_PER_INSN, MARK_READ_MAX_LEN, 397 + }, 398 + }; 399 + 564 400 static const struct stat_specs default_sort_spec = { 401 + .spec_cnt = 2, 402 + .ids = { 403 + FILE_NAME, PROG_NAME, 404 + }, 405 + .asc = { true, true, }, 406 + }; 407 + 408 + /* sorting for comparison mode to join two data sets */ 409 + static const struct stat_specs join_sort_spec = { 565 410 .spec_cnt = 2, 566 411 .ids = { 567 412 FILE_NAME, PROG_NAME, ··· 591 400 const char *header; 592 401 const char *names[4]; 593 402 bool asc_by_default; 403 + bool left_aligned; 594 404 } stat_defs[] = { 595 - [FILE_NAME] = { "File", {"file_name", "filename", "file"}, true /* asc */ }, 596 - [PROG_NAME] = { "Program", {"prog_name", "progname", "prog"}, true /* asc */ }, 597 - [VERDICT] = { "Verdict", {"verdict"}, true /* asc: failure, success */ }, 405 + [FILE_NAME] = { "File", {"file_name", "filename", "file"}, true /* asc */, true /* left */ }, 406 + [PROG_NAME] = { "Program", {"prog_name", "progname", "prog"}, true /* asc */, true /* left */ }, 407 + [VERDICT] = { "Verdict", {"verdict"}, true /* asc: failure, success */, true /* left */ }, 598 408 [DURATION] = { "Duration (us)", {"duration", "dur"}, }, 599 - [TOTAL_INSNS] = { "Total insns", {"total_insns", "insns"}, }, 600 - [TOTAL_STATES] = { "Total states", {"total_states", "states"}, }, 409 + [TOTAL_INSNS] = { "Insns", {"total_insns", "insns"}, }, 410 + [TOTAL_STATES] = { "States", {"total_states", "states"}, }, 601 411 [PEAK_STATES] = { "Peak states", {"peak_states"}, }, 602 412 [MAX_STATES_PER_INSN] = { "Max states per insn", {"max_states_per_insn"}, }, 603 413 [MARK_READ_MAX_LEN] = { "Max mark read length", {"max_mark_read_len", "mark_read"}, }, 604 414 }; 605 415 416 + static bool parse_stat_id_var(const char *name, size_t len, int *id, enum stat_variant *var) 417 + { 418 + static const char *var_sfxs[] = { 419 + [VARIANT_A] = "_a", 420 + [VARIANT_B] = "_b", 421 + [VARIANT_DIFF] = "_diff", 422 + [VARIANT_PCT] = "_pct", 423 + }; 424 + int i, j, k; 425 + 426 + for (i = 0; i < ARRAY_SIZE(stat_defs); i++) { 427 + struct stat_def *def = &stat_defs[i]; 428 + size_t alias_len, sfx_len; 429 + const char *alias; 430 + 431 + for (j = 0; j < ARRAY_SIZE(stat_defs[i].names); j++) { 432 + alias = def->names[j]; 433 + if (!alias) 434 + continue; 435 + 436 + alias_len = strlen(alias); 437 + if (strncmp(name, alias, alias_len) != 0) 438 + continue; 439 + 440 + if (alias_len == len) { 441 + /* If no variant suffix is specified, we 442 + * assume control group (just in case we are 443 + * in comparison mode. Variant is ignored in 444 + * non-comparison mode. 445 + */ 446 + *var = VARIANT_B; 447 + *id = i; 448 + return true; 449 + } 450 + 451 + for (k = 0; k < ARRAY_SIZE(var_sfxs); k++) { 452 + sfx_len = strlen(var_sfxs[k]); 453 + if (alias_len + sfx_len != len) 454 + continue; 455 + 456 + if (strncmp(name + alias_len, var_sfxs[k], sfx_len) == 0) { 457 + *var = (enum stat_variant)k; 458 + *id = i; 459 + return true; 460 + } 461 + } 462 + } 463 + } 464 + 465 + return false; 466 + } 467 + 468 + static bool is_asc_sym(char c) 469 + { 470 + return c == '^'; 471 + } 472 + 473 + static bool is_desc_sym(char c) 474 + { 475 + return c == 'v' || c == 'V' || c == '.' || c == '!' || c == '_'; 476 + } 477 + 606 478 static int parse_stat(const char *stat_name, struct stat_specs *specs) 607 479 { 608 - int id, i; 480 + int id; 481 + bool has_order = false, is_asc = false; 482 + size_t len = strlen(stat_name); 483 + enum stat_variant var; 609 484 610 485 if (specs->spec_cnt >= ARRAY_SIZE(specs->ids)) { 611 486 fprintf(stderr, "Can't specify more than %zd stats\n", ARRAY_SIZE(specs->ids)); 612 487 return -E2BIG; 613 488 } 614 489 615 - for (id = 0; id < ARRAY_SIZE(stat_defs); id++) { 616 - struct stat_def *def = &stat_defs[id]; 617 - 618 - for (i = 0; i < ARRAY_SIZE(stat_defs[id].names); i++) { 619 - if (!def->names[i] || strcmp(def->names[i], stat_name) != 0) 620 - continue; 621 - 622 - specs->ids[specs->spec_cnt] = id; 623 - specs->asc[specs->spec_cnt] = def->asc_by_default; 624 - specs->spec_cnt++; 625 - 626 - return 0; 627 - } 490 + if (len > 1 && (is_asc_sym(stat_name[len - 1]) || is_desc_sym(stat_name[len - 1]))) { 491 + has_order = true; 492 + is_asc = is_asc_sym(stat_name[len - 1]); 493 + len -= 1; 628 494 } 629 495 630 - fprintf(stderr, "Unrecognized stat name '%s'\n", stat_name); 631 - return -ESRCH; 496 + if (!parse_stat_id_var(stat_name, len, &id, &var)) { 497 + fprintf(stderr, "Unrecognized stat name '%s'\n", stat_name); 498 + return -ESRCH; 499 + } 500 + 501 + specs->ids[specs->spec_cnt] = id; 502 + specs->variants[specs->spec_cnt] = var; 503 + specs->asc[specs->spec_cnt] = has_order ? is_asc : stat_defs[id].asc_by_default; 504 + specs->spec_cnt++; 505 + 506 + return 0; 632 507 } 633 508 634 509 static int parse_stats(const char *stats_str, struct stat_specs *specs) ··· 797 540 int err = 0; 798 541 void *tmp; 799 542 800 - if (!should_process_prog(filename, bpf_program__name(prog))) { 543 + if (!should_process_file_prog(basename(filename), bpf_program__name(prog))) { 801 544 env.progs_skipped++; 802 545 return 0; 803 546 } ··· 853 596 LIBBPF_OPTS(bpf_object_open_opts, opts); 854 597 int err = 0, prog_cnt = 0; 855 598 856 - if (!should_process_file(basename(filename))) { 599 + if (!should_process_file_prog(basename(filename), NULL)) { 857 600 if (env.verbose) 858 601 printf("Skipping '%s' due to filters...\n", filename); 859 602 env.files_skipped++; ··· 973 716 return cmp; 974 717 } 975 718 976 - return 0; 719 + /* always disambiguate with file+prog, which are unique */ 720 + cmp = strcmp(s1->file_name, s2->file_name); 721 + if (cmp != 0) 722 + return cmp; 723 + return strcmp(s1->prog_name, s2->prog_name); 724 + } 725 + 726 + static void fetch_join_stat_value(const struct verif_stats_join *s, 727 + enum stat_id id, enum stat_variant var, 728 + const char **str_val, 729 + double *num_val) 730 + { 731 + long v1, v2; 732 + 733 + if (id == FILE_NAME) { 734 + *str_val = s->file_name; 735 + return; 736 + } 737 + if (id == PROG_NAME) { 738 + *str_val = s->prog_name; 739 + return; 740 + } 741 + 742 + v1 = s->stats_a ? s->stats_a->stats[id] : 0; 743 + v2 = s->stats_b ? s->stats_b->stats[id] : 0; 744 + 745 + switch (var) { 746 + case VARIANT_A: 747 + if (!s->stats_a) 748 + *num_val = -DBL_MAX; 749 + else 750 + *num_val = s->stats_a->stats[id]; 751 + return; 752 + case VARIANT_B: 753 + if (!s->stats_b) 754 + *num_val = -DBL_MAX; 755 + else 756 + *num_val = s->stats_b->stats[id]; 757 + return; 758 + case VARIANT_DIFF: 759 + if (!s->stats_a || !s->stats_b) 760 + *num_val = -DBL_MAX; 761 + else if (id == VERDICT) 762 + *num_val = v1 == v2 ? 1.0 /* MATCH */ : 0.0 /* MISMATCH */; 763 + else 764 + *num_val = (double)(v2 - v1); 765 + return; 766 + case VARIANT_PCT: 767 + if (!s->stats_a || !s->stats_b) { 768 + *num_val = -DBL_MAX; 769 + } else if (v1 == 0) { 770 + if (v1 == v2) 771 + *num_val = 0.0; 772 + else 773 + *num_val = v2 < v1 ? -100.0 : 100.0; 774 + } else { 775 + *num_val = (v2 - v1) * 100.0 / v1; 776 + } 777 + return; 778 + } 779 + } 780 + 781 + static int cmp_join_stat(const struct verif_stats_join *s1, 782 + const struct verif_stats_join *s2, 783 + enum stat_id id, enum stat_variant var, bool asc) 784 + { 785 + const char *str1 = NULL, *str2 = NULL; 786 + double v1, v2; 787 + int cmp = 0; 788 + 789 + fetch_join_stat_value(s1, id, var, &str1, &v1); 790 + fetch_join_stat_value(s2, id, var, &str2, &v2); 791 + 792 + if (str1) 793 + cmp = strcmp(str1, str2); 794 + else if (v1 != v2) 795 + cmp = v1 < v2 ? -1 : 1; 796 + 797 + return asc ? cmp : -cmp; 798 + } 799 + 800 + static int cmp_join_stats(const void *v1, const void *v2) 801 + { 802 + const struct verif_stats_join *s1 = v1, *s2 = v2; 803 + int i, cmp; 804 + 805 + for (i = 0; i < env.sort_spec.spec_cnt; i++) { 806 + cmp = cmp_join_stat(s1, s2, 807 + env.sort_spec.ids[i], 808 + env.sort_spec.variants[i], 809 + env.sort_spec.asc[i]); 810 + if (cmp != 0) 811 + return cmp; 812 + } 813 + 814 + /* always disambiguate with file+prog, which are unique */ 815 + cmp = strcmp(s1->file_name, s2->file_name); 816 + if (cmp != 0) 817 + return cmp; 818 + return strcmp(s1->prog_name, s2->prog_name); 977 819 } 978 820 979 821 #define HEADER_CHAR '-' ··· 1094 738 1095 739 static void output_headers(enum resfmt fmt) 1096 740 { 741 + const char *fmt_str; 1097 742 int i, len; 1098 743 1099 744 for (i = 0; i < env.output_spec.spec_cnt; i++) { ··· 1108 751 *max_len = len; 1109 752 break; 1110 753 case RESFMT_TABLE: 1111 - printf("%s%-*s", i == 0 ? "" : COLUMN_SEP, *max_len, stat_defs[id].header); 754 + fmt_str = stat_defs[id].left_aligned ? "%s%-*s" : "%s%*s"; 755 + printf(fmt_str, i == 0 ? "" : COLUMN_SEP, *max_len, stat_defs[id].header); 1112 756 if (i == env.output_spec.spec_cnt - 1) 1113 757 printf("\n"); 1114 758 break; ··· 1130 772 { 1131 773 switch (id) { 1132 774 case FILE_NAME: 1133 - *str = s->file_name; 775 + *str = s ? s->file_name : "N/A"; 1134 776 break; 1135 777 case PROG_NAME: 1136 - *str = s->prog_name; 778 + *str = s ? s->prog_name : "N/A"; 1137 779 break; 1138 780 case VERDICT: 1139 - *str = s->stats[VERDICT] ? "success" : "failure"; 781 + if (!s) 782 + *str = "N/A"; 783 + else 784 + *str = s->stats[VERDICT] ? "success" : "failure"; 1140 785 break; 1141 786 case DURATION: 1142 787 case TOTAL_INSNS: ··· 1147 786 case PEAK_STATES: 1148 787 case MAX_STATES_PER_INSN: 1149 788 case MARK_READ_MAX_LEN: 1150 - *val = s->stats[id]; 789 + *val = s ? s->stats[id] : 0; 1151 790 break; 1152 791 default: 1153 792 fprintf(stderr, "Unrecognized stat #%d\n", id); ··· 1200 839 printf("Done. Processed %d files, %d programs. Skipped %d files, %d programs.\n", 1201 840 env.files_processed, env.files_skipped, env.progs_processed, env.progs_skipped); 1202 841 } 1203 - } 1204 - 1205 - static int handle_verif_mode(void) 1206 - { 1207 - int i, err; 1208 - 1209 - if (env.filename_cnt == 0) { 1210 - fprintf(stderr, "Please provide path to BPF object file!\n"); 1211 - argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat"); 1212 - return -EINVAL; 1213 - } 1214 - 1215 - for (i = 0; i < env.filename_cnt; i++) { 1216 - err = process_obj(env.filenames[i]); 1217 - if (err) { 1218 - fprintf(stderr, "Failed to process '%s': %d\n", env.filenames[i], err); 1219 - return err; 1220 - } 1221 - } 1222 - 1223 - qsort(env.prog_stats, env.prog_stat_cnt, sizeof(*env.prog_stats), cmp_prog_stats); 1224 - 1225 - if (env.out_fmt == RESFMT_TABLE) { 1226 - /* calculate column widths */ 1227 - output_headers(RESFMT_TABLE_CALCLEN); 1228 - for (i = 0; i < env.prog_stat_cnt; i++) 1229 - output_stats(&env.prog_stats[i], RESFMT_TABLE_CALCLEN, false); 1230 - } 1231 - 1232 - /* actually output the table */ 1233 - output_headers(env.out_fmt); 1234 - for (i = 0; i < env.prog_stat_cnt; i++) { 1235 - output_stats(&env.prog_stats[i], env.out_fmt, i == env.prog_stat_cnt - 1); 1236 - } 1237 - 1238 - return 0; 1239 842 } 1240 843 1241 844 static int parse_stat_value(const char *str, enum stat_id id, struct verif_stats *st) ··· 1333 1008 * parsed entire line; if row should be ignored we pretend we 1334 1009 * never parsed it 1335 1010 */ 1336 - if (!should_process_prog(st->file_name, st->prog_name)) { 1011 + if (!should_process_file_prog(st->file_name, st->prog_name)) { 1337 1012 free(st->file_name); 1338 1013 free(st->prog_name); 1339 1014 *stat_cntp -= 1; ··· 1422 1097 output_comp_header_underlines(); 1423 1098 } 1424 1099 1425 - static void output_comp_stats(const struct verif_stats *base, const struct verif_stats *comp, 1100 + static void output_comp_stats(const struct verif_stats_join *join_stats, 1426 1101 enum resfmt fmt, bool last) 1427 1102 { 1103 + const struct verif_stats *base = join_stats->stats_a; 1104 + const struct verif_stats *comp = join_stats->stats_b; 1428 1105 char base_buf[1024] = {}, comp_buf[1024] = {}, diff_buf[1024] = {}; 1429 1106 int i; 1430 1107 ··· 1444 1117 /* normalize all the outputs to be in string buffers for simplicity */ 1445 1118 if (is_key_stat(id)) { 1446 1119 /* key stats (file and program name) are always strings */ 1447 - if (base != &fallback_stats) 1120 + if (base) 1448 1121 snprintf(base_buf, sizeof(base_buf), "%s", base_str); 1449 1122 else 1450 1123 snprintf(base_buf, sizeof(base_buf), "%s", comp_str); 1451 1124 } else if (base_str) { 1452 1125 snprintf(base_buf, sizeof(base_buf), "%s", base_str); 1453 1126 snprintf(comp_buf, sizeof(comp_buf), "%s", comp_str); 1454 - if (strcmp(base_str, comp_str) == 0) 1127 + if (!base || !comp) 1128 + snprintf(diff_buf, sizeof(diff_buf), "%s", "N/A"); 1129 + else if (strcmp(base_str, comp_str) == 0) 1455 1130 snprintf(diff_buf, sizeof(diff_buf), "%s", "MATCH"); 1456 1131 else 1457 1132 snprintf(diff_buf, sizeof(diff_buf), "%s", "MISMATCH"); 1458 1133 } else { 1459 1134 double p = 0.0; 1460 1135 1461 - snprintf(base_buf, sizeof(base_buf), "%ld", base_val); 1462 - snprintf(comp_buf, sizeof(comp_buf), "%ld", comp_val); 1136 + if (base) 1137 + snprintf(base_buf, sizeof(base_buf), "%ld", base_val); 1138 + else 1139 + snprintf(base_buf, sizeof(base_buf), "%s", "N/A"); 1140 + if (comp) 1141 + snprintf(comp_buf, sizeof(comp_buf), "%ld", comp_val); 1142 + else 1143 + snprintf(comp_buf, sizeof(comp_buf), "%s", "N/A"); 1463 1144 1464 1145 diff_val = comp_val - base_val; 1465 - if (base == &fallback_stats || comp == &fallback_stats || base_val == 0) { 1466 - if (comp_val == base_val) 1467 - p = 0.0; /* avoid +0 (+100%) case */ 1468 - else 1469 - p = comp_val < base_val ? -100.0 : 100.0; 1146 + if (!base || !comp) { 1147 + snprintf(diff_buf, sizeof(diff_buf), "%s", "N/A"); 1470 1148 } else { 1471 - p = diff_val * 100.0 / base_val; 1149 + if (base_val == 0) { 1150 + if (comp_val == base_val) 1151 + p = 0.0; /* avoid +0 (+100%) case */ 1152 + else 1153 + p = comp_val < base_val ? -100.0 : 100.0; 1154 + } else { 1155 + p = diff_val * 100.0 / base_val; 1156 + } 1157 + snprintf(diff_buf, sizeof(diff_buf), "%+ld (%+.2lf%%)", diff_val, p); 1472 1158 } 1473 - snprintf(diff_buf, sizeof(diff_buf), "%+ld (%+.2lf%%)", diff_val, p); 1474 1159 } 1475 1160 1476 1161 switch (fmt) { ··· 1538 1199 return strcmp(base->prog_name, comp->prog_name); 1539 1200 } 1540 1201 1202 + static bool is_join_stat_filter_matched(struct filter *f, const struct verif_stats_join *stats) 1203 + { 1204 + static const double eps = 1e-9; 1205 + const char *str = NULL; 1206 + double value = 0.0; 1207 + 1208 + fetch_join_stat_value(stats, f->stat_id, f->stat_var, &str, &value); 1209 + 1210 + switch (f->op) { 1211 + case OP_EQ: return value > f->value - eps && value < f->value + eps; 1212 + case OP_NEQ: return value < f->value - eps || value > f->value + eps; 1213 + case OP_LT: return value < f->value - eps; 1214 + case OP_LE: return value <= f->value + eps; 1215 + case OP_GT: return value > f->value + eps; 1216 + case OP_GE: return value >= f->value - eps; 1217 + } 1218 + 1219 + fprintf(stderr, "BUG: unknown filter op %d!\n", f->op); 1220 + return false; 1221 + } 1222 + 1223 + static bool should_output_join_stats(const struct verif_stats_join *stats) 1224 + { 1225 + struct filter *f; 1226 + int i, allow_cnt = 0; 1227 + 1228 + for (i = 0; i < env.deny_filter_cnt; i++) { 1229 + f = &env.deny_filters[i]; 1230 + if (f->kind != FILTER_STAT) 1231 + continue; 1232 + 1233 + if (is_join_stat_filter_matched(f, stats)) 1234 + return false; 1235 + } 1236 + 1237 + for (i = 0; i < env.allow_filter_cnt; i++) { 1238 + f = &env.allow_filters[i]; 1239 + if (f->kind != FILTER_STAT) 1240 + continue; 1241 + allow_cnt++; 1242 + 1243 + if (is_join_stat_filter_matched(f, stats)) 1244 + return true; 1245 + } 1246 + 1247 + /* if there are no stat allowed filters, pass everything through */ 1248 + return allow_cnt == 0; 1249 + } 1250 + 1541 1251 static int handle_comparison_mode(void) 1542 1252 { 1543 1253 struct stat_specs base_specs = {}, comp_specs = {}; 1254 + struct stat_specs tmp_sort_spec; 1544 1255 enum resfmt cur_fmt; 1545 - int err, i, j; 1256 + int err, i, j, last_idx; 1546 1257 1547 1258 if (env.filename_cnt != 2) { 1548 - fprintf(stderr, "Comparison mode expects exactly two input CSV files!\n"); 1259 + fprintf(stderr, "Comparison mode expects exactly two input CSV files!\n\n"); 1549 1260 argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat"); 1550 1261 return -EINVAL; 1551 1262 } ··· 1633 1244 } 1634 1245 } 1635 1246 1247 + /* Replace user-specified sorting spec with file+prog sorting rule to 1248 + * be able to join two datasets correctly. Once we are done, we will 1249 + * restore the original sort spec. 1250 + */ 1251 + tmp_sort_spec = env.sort_spec; 1252 + env.sort_spec = join_sort_spec; 1636 1253 qsort(env.prog_stats, env.prog_stat_cnt, sizeof(*env.prog_stats), cmp_prog_stats); 1637 1254 qsort(env.baseline_stats, env.baseline_stat_cnt, sizeof(*env.baseline_stats), cmp_prog_stats); 1255 + env.sort_spec = tmp_sort_spec; 1638 1256 1639 - /* for human-readable table output we need to do extra pass to 1640 - * calculate column widths, so we substitute current output format 1641 - * with RESFMT_TABLE_CALCLEN and later revert it back to RESFMT_TABLE 1642 - * and do everything again. 1643 - */ 1644 - if (env.out_fmt == RESFMT_TABLE) 1645 - cur_fmt = RESFMT_TABLE_CALCLEN; 1646 - else 1647 - cur_fmt = env.out_fmt; 1648 - 1649 - one_more_time: 1650 - output_comp_headers(cur_fmt); 1651 - 1652 - /* If baseline and comparison datasets have different subset of rows 1653 - * (we match by 'object + prog' as a unique key) then assume 1654 - * empty/missing/zero value for rows that are missing in the opposite 1655 - * data set 1257 + /* Join two datasets together. If baseline and comparison datasets 1258 + * have different subset of rows (we match by 'object + prog' as 1259 + * a unique key) then assume empty/missing/zero value for rows that 1260 + * are missing in the opposite data set. 1656 1261 */ 1657 1262 i = j = 0; 1658 1263 while (i < env.baseline_stat_cnt || j < env.prog_stat_cnt) { 1659 - bool last = (i == env.baseline_stat_cnt - 1) || (j == env.prog_stat_cnt - 1); 1660 1264 const struct verif_stats *base, *comp; 1265 + struct verif_stats_join *join; 1266 + void *tmp; 1661 1267 int r; 1662 1268 1663 1269 base = i < env.baseline_stat_cnt ? &env.baseline_stats[i] : &fallback_stats; ··· 1669 1285 return -EINVAL; 1670 1286 } 1671 1287 1288 + tmp = realloc(env.join_stats, (env.join_stat_cnt + 1) * sizeof(*env.join_stats)); 1289 + if (!tmp) 1290 + return -ENOMEM; 1291 + env.join_stats = tmp; 1292 + 1293 + join = &env.join_stats[env.join_stat_cnt]; 1294 + memset(join, 0, sizeof(*join)); 1295 + 1672 1296 r = cmp_stats_key(base, comp); 1673 1297 if (r == 0) { 1674 - output_comp_stats(base, comp, cur_fmt, last); 1298 + join->file_name = base->file_name; 1299 + join->prog_name = base->prog_name; 1300 + join->stats_a = base; 1301 + join->stats_b = comp; 1675 1302 i++; 1676 1303 j++; 1677 1304 } else if (comp == &fallback_stats || r < 0) { 1678 - output_comp_stats(base, &fallback_stats, cur_fmt, last); 1305 + join->file_name = base->file_name; 1306 + join->prog_name = base->prog_name; 1307 + join->stats_a = base; 1308 + join->stats_b = NULL; 1679 1309 i++; 1680 1310 } else { 1681 - output_comp_stats(&fallback_stats, comp, cur_fmt, last); 1311 + join->file_name = comp->file_name; 1312 + join->prog_name = comp->prog_name; 1313 + join->stats_a = NULL; 1314 + join->stats_b = comp; 1682 1315 j++; 1683 1316 } 1317 + env.join_stat_cnt += 1; 1318 + } 1319 + 1320 + /* now sort joined results accorsing to sort spec */ 1321 + qsort(env.join_stats, env.join_stat_cnt, sizeof(*env.join_stats), cmp_join_stats); 1322 + 1323 + /* for human-readable table output we need to do extra pass to 1324 + * calculate column widths, so we substitute current output format 1325 + * with RESFMT_TABLE_CALCLEN and later revert it back to RESFMT_TABLE 1326 + * and do everything again. 1327 + */ 1328 + if (env.out_fmt == RESFMT_TABLE) 1329 + cur_fmt = RESFMT_TABLE_CALCLEN; 1330 + else 1331 + cur_fmt = env.out_fmt; 1332 + 1333 + one_more_time: 1334 + output_comp_headers(cur_fmt); 1335 + 1336 + for (i = 0; i < env.join_stat_cnt; i++) { 1337 + const struct verif_stats_join *join = &env.join_stats[i]; 1338 + 1339 + if (!should_output_join_stats(join)) 1340 + continue; 1341 + 1342 + if (cur_fmt == RESFMT_TABLE_CALCLEN) 1343 + last_idx = i; 1344 + 1345 + output_comp_stats(join, cur_fmt, i == last_idx); 1684 1346 } 1685 1347 1686 1348 if (cur_fmt == RESFMT_TABLE_CALCLEN) { 1687 1349 cur_fmt = RESFMT_TABLE; 1688 1350 goto one_more_time; /* ... this time with feeling */ 1689 1351 } 1352 + 1353 + return 0; 1354 + } 1355 + 1356 + static bool is_stat_filter_matched(struct filter *f, const struct verif_stats *stats) 1357 + { 1358 + long value = stats->stats[f->stat_id]; 1359 + 1360 + switch (f->op) { 1361 + case OP_EQ: return value == f->value; 1362 + case OP_NEQ: return value != f->value; 1363 + case OP_LT: return value < f->value; 1364 + case OP_LE: return value <= f->value; 1365 + case OP_GT: return value > f->value; 1366 + case OP_GE: return value >= f->value; 1367 + } 1368 + 1369 + fprintf(stderr, "BUG: unknown filter op %d!\n", f->op); 1370 + return false; 1371 + } 1372 + 1373 + static bool should_output_stats(const struct verif_stats *stats) 1374 + { 1375 + struct filter *f; 1376 + int i, allow_cnt = 0; 1377 + 1378 + for (i = 0; i < env.deny_filter_cnt; i++) { 1379 + f = &env.deny_filters[i]; 1380 + if (f->kind != FILTER_STAT) 1381 + continue; 1382 + 1383 + if (is_stat_filter_matched(f, stats)) 1384 + return false; 1385 + } 1386 + 1387 + for (i = 0; i < env.allow_filter_cnt; i++) { 1388 + f = &env.allow_filters[i]; 1389 + if (f->kind != FILTER_STAT) 1390 + continue; 1391 + allow_cnt++; 1392 + 1393 + if (is_stat_filter_matched(f, stats)) 1394 + return true; 1395 + } 1396 + 1397 + /* if there are no stat allowed filters, pass everything through */ 1398 + return allow_cnt == 0; 1399 + } 1400 + 1401 + static void output_prog_stats(void) 1402 + { 1403 + const struct verif_stats *stats; 1404 + int i, last_stat_idx = 0; 1405 + 1406 + if (env.out_fmt == RESFMT_TABLE) { 1407 + /* calculate column widths */ 1408 + output_headers(RESFMT_TABLE_CALCLEN); 1409 + for (i = 0; i < env.prog_stat_cnt; i++) { 1410 + stats = &env.prog_stats[i]; 1411 + if (!should_output_stats(stats)) 1412 + continue; 1413 + output_stats(stats, RESFMT_TABLE_CALCLEN, false); 1414 + last_stat_idx = i; 1415 + } 1416 + } 1417 + 1418 + /* actually output the table */ 1419 + output_headers(env.out_fmt); 1420 + for (i = 0; i < env.prog_stat_cnt; i++) { 1421 + stats = &env.prog_stats[i]; 1422 + if (!should_output_stats(stats)) 1423 + continue; 1424 + output_stats(stats, env.out_fmt, i == last_stat_idx); 1425 + } 1426 + } 1427 + 1428 + static int handle_verif_mode(void) 1429 + { 1430 + int i, err; 1431 + 1432 + if (env.filename_cnt == 0) { 1433 + fprintf(stderr, "Please provide path to BPF object file!\n\n"); 1434 + argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat"); 1435 + return -EINVAL; 1436 + } 1437 + 1438 + for (i = 0; i < env.filename_cnt; i++) { 1439 + err = process_obj(env.filenames[i]); 1440 + if (err) { 1441 + fprintf(stderr, "Failed to process '%s': %d\n", env.filenames[i], err); 1442 + return err; 1443 + } 1444 + } 1445 + 1446 + qsort(env.prog_stats, env.prog_stat_cnt, sizeof(*env.prog_stats), cmp_prog_stats); 1447 + 1448 + output_prog_stats(); 1449 + 1450 + return 0; 1451 + } 1452 + 1453 + static int handle_replay_mode(void) 1454 + { 1455 + struct stat_specs specs = {}; 1456 + int err; 1457 + 1458 + if (env.filename_cnt != 1) { 1459 + fprintf(stderr, "Replay mode expects exactly one input CSV file!\n\n"); 1460 + argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat"); 1461 + return -EINVAL; 1462 + } 1463 + 1464 + err = parse_stats_csv(env.filenames[0], &specs, 1465 + &env.prog_stats, &env.prog_stat_cnt); 1466 + if (err) { 1467 + fprintf(stderr, "Failed to parse stats from '%s': %d\n", env.filenames[0], err); 1468 + return err; 1469 + } 1470 + 1471 + qsort(env.prog_stats, env.prog_stat_cnt, sizeof(*env.prog_stats), cmp_prog_stats); 1472 + 1473 + output_prog_stats(); 1690 1474 1691 1475 return 0; 1692 1476 } ··· 1867 1315 return 1; 1868 1316 1869 1317 if (env.verbose && env.quiet) { 1870 - fprintf(stderr, "Verbose and quiet modes are incompatible, please specify just one or neither!\n"); 1318 + fprintf(stderr, "Verbose and quiet modes are incompatible, please specify just one or neither!\n\n"); 1871 1319 argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat"); 1872 1320 return 1; 1873 1321 } 1874 1322 if (env.verbose && env.log_level == 0) 1875 1323 env.log_level = 1; 1876 1324 1877 - if (env.output_spec.spec_cnt == 0) 1878 - env.output_spec = default_output_spec; 1325 + if (env.output_spec.spec_cnt == 0) { 1326 + if (env.out_fmt == RESFMT_CSV) 1327 + env.output_spec = default_csv_output_spec; 1328 + else 1329 + env.output_spec = default_output_spec; 1330 + } 1879 1331 if (env.sort_spec.spec_cnt == 0) 1880 1332 env.sort_spec = default_sort_spec; 1881 1333 1334 + if (env.comparison_mode && env.replay_mode) { 1335 + fprintf(stderr, "Can't specify replay and comparison mode at the same time!\n\n"); 1336 + argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat"); 1337 + return 1; 1338 + } 1339 + 1882 1340 if (env.comparison_mode) 1883 1341 err = handle_comparison_mode(); 1342 + else if (env.replay_mode) 1343 + err = handle_replay_mode(); 1884 1344 else 1885 1345 err = handle_verif_mode(); 1886 1346 1887 1347 free_verif_stats(env.prog_stats, env.prog_stat_cnt); 1888 1348 free_verif_stats(env.baseline_stats, env.baseline_stat_cnt); 1349 + free(env.join_stats); 1889 1350 for (i = 0; i < env.filename_cnt; i++) 1890 1351 free(env.filenames[i]); 1891 1352 free(env.filenames); 1892 1353 for (i = 0; i < env.allow_filter_cnt; i++) { 1354 + free(env.allow_filters[i].any_glob); 1893 1355 free(env.allow_filters[i].file_glob); 1894 1356 free(env.allow_filters[i].prog_glob); 1895 1357 } 1896 1358 free(env.allow_filters); 1897 1359 for (i = 0; i < env.deny_filter_cnt; i++) { 1360 + free(env.deny_filters[i].any_glob); 1898 1361 free(env.deny_filters[i].file_glob); 1899 1362 free(env.deny_filters[i].prog_glob); 1900 1363 }