mirror of
https://github.com/containers/podman.git
synced 2025-05-18 15:47:51 +08:00

Go benchmark results include an Average, represented as <mean> ± <standard deviation> This is suboptimal for many reasons: * Some web server somewhere in our CI pipeline (Cirrus? Google? Gitlab? I have no idea) sends the wrong mime-type header, rendering the CSV weird-looking in a browser. Not that it's intended for a browser, but we have to debug/verify manually once in a while. * The spaces and +/- makes it less machine-readable. Solution: split the "Average" field into two: Average, and Standard Deviation. And, as a courtesy to human readers, add a new column with SD as a percentage. Signed-off-by: Ed Santiago <santiago@redhat.com>
134 lines
4.0 KiB
Perl
Executable File
134 lines
4.0 KiB
Perl
Executable File
#!/usr/bin/perl
|
|
#
|
|
# parse-localbenchmarks - convert localbenchmarks output to CSV
|
|
#
|
|
# This is a filter. It transforms data from one format to another. Usage:
|
|
#
|
|
# $ make localbenchmarks &> mylogfile
|
|
# $ hack/parse-localbenchmarks <mylogfile > benchmarks.csv
|
|
#
|
|
# To be more precise, this is a very stupid simpleminded filter. It is
|
|
# not a complete solution to the benchmarks problem. In particular,
|
|
# other tools are still needed to:
|
|
#
|
|
# * Actually _run_ the benchmarks in some standard production environment
|
|
# * Run this script on the results
|
|
# * Save results, with identifying tags (datetime, git hash, PR id, ...)
|
|
# * Compare two or more sets of CSVs
|
|
#
|
|
(our $ME = $0) =~ s|^.*/||; # script name
|
|
|
|
use v5.14;
|
|
use utf8;
|
|
|
|
# FIXME: add --help. Some day. Not urgent.
|
|
die "$ME: This is a filter, not an interactive tool\n" if -t *STDIN;
|
|
|
|
my $n_samples; # Number of timing runs (FIXME: unused)
|
|
my %results; # Timing results
|
|
my @benchmarks; # Names of benchmarks
|
|
my ($type, $testname); # Current context
|
|
|
|
#
|
|
# Pass 1: read in timings
|
|
#
|
|
while (my $line = <STDIN>) {
|
|
# Log will have lots of ginkgo output. The only thing we care about is
|
|
# the summary at the end, which will look something like:
|
|
#
|
|
# * [MEASUREMENT]
|
|
# Podman Benchmark Suite
|
|
# ....
|
|
# Ran 3 samples:
|
|
# [CPU] podman images:
|
|
# Fastest Time: 0.265s
|
|
# Slowest Time: 0.322s
|
|
# Average Time: 0.302s ± 0.018s
|
|
# [MEM] podman images:
|
|
# Smallest: 44076.0KB
|
|
# Largest: 44616.0KB
|
|
# Average: 44338.7KB ± 171.2KB
|
|
# [CPU] podman push:
|
|
# ....repeat [CPU] and [MEM] for each test
|
|
# --------------------------
|
|
# SSSSSSSSSSSSSSSSSSSSS (and more ginkgo output we don't care about)
|
|
#
|
|
chomp $line;
|
|
next unless $line =~ /^.{1,3}\s+\[MEASUREMENT\]/ .. $line =~ /^-{20,}$/;
|
|
|
|
# Trim leading & trailing whitespace
|
|
$line =~ s/(^\s+|\s+$)//g;
|
|
|
|
# FIXME: we don't actually emit this. What would be a good way to do so?
|
|
if ($line =~ /^Ran\s+(\d+)\s+samples/) {
|
|
$n_samples = $1;
|
|
}
|
|
|
|
# e.g., [CPU] podman foo:
|
|
elsif ($line =~ /^\[([A-Z]+)\]\s+(\S.*\S):$/) {
|
|
($type, $testname) = ($1, $2);
|
|
}
|
|
|
|
# e.g., 'Fastest Time: 0.265s'
|
|
elsif ($line =~ /^(\S.*?\S):\s+(.*)/) {
|
|
log_result($testname, $type, $1, $2);
|
|
}
|
|
|
|
else {
|
|
warn "Cannot grok '$line'\n" if $ENV{DEBUG_PARSELOCALBENCHMARKS};
|
|
}
|
|
}
|
|
|
|
#
|
|
# Pass 2: write out CSV
|
|
#
|
|
|
|
# Headings...
|
|
print "\"Test Name\"";
|
|
printf ", \"%s\"", $_ for @benchmarks;
|
|
print "\n";
|
|
|
|
# ...then data
|
|
for my $t (sort keys %results) {
|
|
printf "\"%s\"", $t;
|
|
for my $benchmark (@benchmarks) {
|
|
printf ", \"%s\"", $results{$t}{$benchmark} || '';
|
|
}
|
|
print "\n";
|
|
}
|
|
|
|
exit 0;
|
|
|
|
################
|
|
# log_result # Preserve one record
|
|
################
|
|
sub log_result {
|
|
my $testname = shift; # in: test name (eg "podman foo")
|
|
my $type = shift; # in: CPU or MEM
|
|
my $name = shift; # in: benchmark name (eg "Fastest")
|
|
my $result = shift; # in: benchmark value
|
|
|
|
my $benchmark = "$type $name";
|
|
$results{$testname}{$benchmark} = $result;
|
|
|
|
# Keep an ordered list of benchmark names (as in, the order we
|
|
# encounter them)
|
|
push @benchmarks, $benchmark
|
|
unless grep { $_ eq $benchmark } @benchmarks;
|
|
|
|
# Special case: "Average X" may be of the form "xxx ± yyy". Move the
|
|
# standard deviation to its own column.
|
|
if ($name =~ /Average/) {
|
|
if ($results{$testname}{$benchmark} =~ s/^(\S+)\s+.*\s+(\S+)$/$1/) {
|
|
my ($average, $sd) = ($1, $2);
|
|
log_result($testname, $type, 'StdDev', $sd);
|
|
|
|
# Strip off units, so we can determine it as a percentage
|
|
$average =~ s/[a-z]+$//i;
|
|
$sd =~ s/[a-z]+$//i;
|
|
my $pct = sprintf("%.1f%%", $sd * 100.0 / $average);
|
|
log_result($testname, $type, 'StdDev (Percent)', $pct);
|
|
}
|
|
}
|
|
}
|