podman/hack/parse-localbenchmarks

#!/usr/bin/perl
#
# parse-localbenchmarks - convert localbenchmarks output to CSV
#
# This is a filter. It transforms data from one format to another. Usage:
#
#    $ make localbenchmarks &> mylogfile
#    $ hack/parse-localbenchmarks <mylogfile > benchmarks.csv
#
# To be more precise, this is a very stupid simpleminded filter. It is
# not a complete solution to the benchmarks problem. In particular,
# other tools are still needed to:
#
#    * Actually _run_ the benchmarks in some standard production environment
#    * Run this script on the results
#    * Save results, with identifying tags (datetime, git hash, PR id, ...)
#    * Compare two or more sets of CSVs
#
(our $ME = $0) =~ s|^.*/||;             # script name

use v5.14;
use utf8;

# FIXME: add --help. Some day. Not urgent.
die "$ME: This is a filter, not an interactive tool\n"    if -t *STDIN;

my $n_samples;                          # Number of timing runs (FIXME: unused)
my %results;                            # Timing results
my @benchmarks;                         # Names of benchmarks
my ($type, $testname);                  # Current context

#
# Pass 1: read in timings
#
while (my $line = <STDIN>) {
    # Log will have lots of ginkgo output. The only thing we care about is
    # the summary at the end, which will look something like:
    #
    # * [MEASUREMENT]
    # Podman Benchmark Suite
    # ....
    #  Ran 3 samples:
    #  [CPU] podman images:
    #    Fastest Time: 0.265s
    #    Slowest Time: 0.322s
    #    Average Time: 0.302s ± 0.018s
    #  [MEM] podman images:
    #    Smallest: 44076.0KB
    #    Largest: 44616.0KB
    #    Average: 44338.7KB ± 171.2KB
    #  [CPU] podman push:
    #  ....repeat [CPU] and [MEM] for each test
    #  --------------------------
    #  SSSSSSSSSSSSSSSSSSSSS (and more ginkgo output we don't care about)
    #
    chomp $line;
    next unless $line =~ /^.{1,3}\s+\[MEASUREMENT\]/ .. $line =~ /^-{20,}$/;

    # Trim leading & trailing whitespace
    $line =~ s/(^\s+|\s+$)//g;

    # FIXME: we don't actually emit this. What would be a good way to do so?
    if ($line =~ /^Ran\s+(\d+)\s+samples/) {
        $n_samples = $1;
    }

    # e.g., [CPU] podman foo:
    elsif ($line =~ /^\[([A-Z]+)\]\s+(\S.*\S):$/) {
        ($type, $testname) = ($1, $2);
    }

    # e.g., 'Fastest Time: 0.265s'
    elsif ($line =~ /^(\S.*?\S):\s+(.*)/) {
        log_result($testname, $type, $1, $2);
    }

    else {
        warn "Cannot grok '$line'\n"    if $ENV{DEBUG_PARSELOCALBENCHMARKS};
    }
}

#
# Pass 2: write out CSV
#

# Headings...
print  "\"Test Name\"";
printf ", \"%s\"", $_   for @benchmarks;
print  "\n";

# ...then data
for my $t (sort keys %results) {
    printf "\"%s\"", $t;
    for my $benchmark (@benchmarks) {
        printf ", \"%s\"", $results{$t}{$benchmark} || '';
    }
    print "\n";
}

exit 0;

################
#  log_result  #  Preserve one record
################
sub log_result {
    my $testname = shift;               # in: test name (eg "podman foo")
    my $type     = shift;               # in: CPU or MEM
    my $name     = shift;               # in: benchmark name (eg "Fastest")
    my $result   = shift;               # in: benchmark value

    my $benchmark = "$type $name";
    $results{$testname}{$benchmark} = $result;

    # Keep an ordered list of benchmark names (as in, the order we
    # encounter them)
    push @benchmarks, $benchmark
        unless grep { $_ eq $benchmark } @benchmarks;

    # Special case: "Average X" may be of the form "xxx ± yyy". Move the
    # standard deviation to its own column.
    if ($name =~ /Average/) {
        if ($results{$testname}{$benchmark} =~ s/^(\S+)\s+.*\s+(\S+)$/$1/) {
            my ($average, $sd) = ($1, $2);
            log_result($testname, $type, 'StdDev', $sd);

            # Strip off units, so we can determine it as a percentage
            $average   =~ s/[a-z]+$//i;
            $sd        =~ s/[a-z]+$//i;
            my $pct = sprintf("%.1f%%", $sd * 100.0 / $average);
            log_result($testname, $type, 'StdDev (Percent)', $pct);
        }
    }
}