mirror of
				https://github.com/containers/podman.git
				synced 2025-10-25 18:25:59 +08:00 
			
		
		
		
	 08741496d8
			
		
	
	08741496d8
	
	
	
		
			
			Go benchmark results include an Average, represented as
    <mean> ± <standard deviation>
This is suboptimal for many reasons:
  * Some web server somewhere in our CI pipeline (Cirrus?
    Google? Gitlab? I have no idea) sends the wrong mime-type
    header, rendering the CSV weird-looking in a browser.
    Not that it's intended for a browser, but we have to
    debug/verify manually once in a while.
  * The spaces and +/- makes it less machine-readable.
Solution: split the "Average" field into two: Average, and
Standard Deviation. And, as a courtesy to human readers,
add a new column with SD as a percentage.
Signed-off-by: Ed Santiago <santiago@redhat.com>
		
	
		
			
				
	
	
		
			134 lines
		
	
	
		
			4.0 KiB
		
	
	
	
		
			Perl
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			134 lines
		
	
	
		
			4.0 KiB
		
	
	
	
		
			Perl
		
	
	
		
			Executable File
		
	
	
	
	
| #!/usr/bin/perl
 | |
| #
 | |
| # parse-localbenchmarks - convert localbenchmarks output to CSV
 | |
| #
 | |
| # This is a filter. It transforms data from one format to another. Usage:
 | |
| #
 | |
| #    $ make localbenchmarks &> mylogfile
 | |
| #    $ hack/parse-localbenchmarks <mylogfile > benchmarks.csv
 | |
| #
 | |
| # To be more precise, this is a very stupid simpleminded filter. It is
 | |
| # not a complete solution to the benchmarks problem. In particular,
 | |
| # other tools are still needed to:
 | |
| #
 | |
| #    * Actually _run_ the benchmarks in some standard production environment
 | |
| #    * Run this script on the results
 | |
| #    * Save results, with identifying tags (datetime, git hash, PR id, ...)
 | |
| #    * Compare two or more sets of CSVs
 | |
| #
 | |
| (our $ME = $0) =~ s|^.*/||;             # script name
 | |
| 
 | |
| use v5.14;
 | |
| use utf8;
 | |
| 
 | |
| # FIXME: add --help. Some day. Not urgent.
 | |
| die "$ME: This is a filter, not an interactive tool\n"    if -t *STDIN;
 | |
| 
 | |
| my $n_samples;                          # Number of timing runs (FIXME: unused)
 | |
| my %results;                            # Timing results
 | |
| my @benchmarks;                         # Names of benchmarks
 | |
| my ($type, $testname);                  # Current context
 | |
| 
 | |
| #
 | |
| # Pass 1: read in timings
 | |
| #
 | |
| while (my $line = <STDIN>) {
 | |
|     # Log will have lots of ginkgo output. The only thing we care about is
 | |
|     # the summary at the end, which will look something like:
 | |
|     #
 | |
|     # * [MEASUREMENT]
 | |
|     # Podman Benchmark Suite
 | |
|     # ....
 | |
|     #  Ran 3 samples:
 | |
|     #  [CPU] podman images:
 | |
|     #    Fastest Time: 0.265s
 | |
|     #    Slowest Time: 0.322s
 | |
|     #    Average Time: 0.302s ± 0.018s
 | |
|     #  [MEM] podman images:
 | |
|     #    Smallest: 44076.0KB
 | |
|     #    Largest: 44616.0KB
 | |
|     #    Average: 44338.7KB ± 171.2KB
 | |
|     #  [CPU] podman push:
 | |
|     #  ....repeat [CPU] and [MEM] for each test
 | |
|     #  --------------------------
 | |
|     #  SSSSSSSSSSSSSSSSSSSSS (and more ginkgo output we don't care about)
 | |
|     #
 | |
|     chomp $line;
 | |
|     next unless $line =~ /^.{1,3}\s+\[MEASUREMENT\]/ .. $line =~ /^-{20,}$/;
 | |
| 
 | |
|     # Trim leading & trailing whitespace
 | |
|     $line =~ s/(^\s+|\s+$)//g;
 | |
| 
 | |
|     # FIXME: we don't actually emit this. What would be a good way to do so?
 | |
|     if ($line =~ /^Ran\s+(\d+)\s+samples/) {
 | |
|         $n_samples = $1;
 | |
|     }
 | |
| 
 | |
|     # e.g., [CPU] podman foo:
 | |
|     elsif ($line =~ /^\[([A-Z]+)\]\s+(\S.*\S):$/) {
 | |
|         ($type, $testname) = ($1, $2);
 | |
|     }
 | |
| 
 | |
|     # e.g., 'Fastest Time: 0.265s'
 | |
|     elsif ($line =~ /^(\S.*?\S):\s+(.*)/) {
 | |
|         log_result($testname, $type, $1, $2);
 | |
|     }
 | |
| 
 | |
|     else {
 | |
|         warn "Cannot grok '$line'\n"    if $ENV{DEBUG_PARSELOCALBENCHMARKS};
 | |
|     }
 | |
| }
 | |
| 
 | |
| #
 | |
| # Pass 2: write out CSV
 | |
| #
 | |
| 
 | |
| # Headings...
 | |
| print  "\"Test Name\"";
 | |
| printf ", \"%s\"", $_   for @benchmarks;
 | |
| print  "\n";
 | |
| 
 | |
| # ...then data
 | |
| for my $t (sort keys %results) {
 | |
|     printf "\"%s\"", $t;
 | |
|     for my $benchmark (@benchmarks) {
 | |
|         printf ", \"%s\"", $results{$t}{$benchmark} || '';
 | |
|     }
 | |
|     print "\n";
 | |
| }
 | |
| 
 | |
| exit 0;
 | |
| 
 | |
| ################
 | |
| #  log_result  #  Preserve one record
 | |
| ################
 | |
| sub log_result {
 | |
|     my $testname = shift;               # in: test name (eg "podman foo")
 | |
|     my $type     = shift;               # in: CPU or MEM
 | |
|     my $name     = shift;               # in: benchmark name (eg "Fastest")
 | |
|     my $result   = shift;               # in: benchmark value
 | |
| 
 | |
|     my $benchmark = "$type $name";
 | |
|     $results{$testname}{$benchmark} = $result;
 | |
| 
 | |
|     # Keep an ordered list of benchmark names (as in, the order we
 | |
|     # encounter them)
 | |
|     push @benchmarks, $benchmark
 | |
|         unless grep { $_ eq $benchmark } @benchmarks;
 | |
| 
 | |
|     # Special case: "Average X" may be of the form "xxx ± yyy". Move the
 | |
|     # standard deviation to its own column.
 | |
|     if ($name =~ /Average/) {
 | |
|         if ($results{$testname}{$benchmark} =~ s/^(\S+)\s+.*\s+(\S+)$/$1/) {
 | |
|             my ($average, $sd) = ($1, $2);
 | |
|             log_result($testname, $type, 'StdDev', $sd);
 | |
| 
 | |
|             # Strip off units, so we can determine it as a percentage
 | |
|             $average   =~ s/[a-z]+$//i;
 | |
|             $sd        =~ s/[a-z]+$//i;
 | |
|             my $pct = sprintf("%.1f%%", $sd * 100.0 / $average);
 | |
|             log_result($testname, $type, 'StdDev (Percent)', $pct);
 | |
|         }
 | |
|     }
 | |
| }
 |