1 files changed, 304 insertions, 0 deletions
diff --git a/test-suite/compare_lnt_benchmarks.pl b/test-suite/compare_lnt_benchmarks.pl
new file mode 100755
index 0000000..f0d5139
--- /dev/null
+++ b/test-suite/compare_lnt_benchmarks.pl
@@ -0,0 +1,304 @@
+#!/usr/bin/env perl
+# This program handles the results of the LLVM test-suite benchmark.
+# There are two stages:
+#   prepare: collates multiple samples into a single log file containing
+#            the arithmetic mean of each tests' results.
+#   compare: compares two collated results and produce a file containing
+#            the relative values between baseline and target runs, with
+#            the geomean of compile and execution time at the end.
+#
+# Suggested usage:
+#
+# ... point install directory to the unmodified / previous compiler
+# $ ./run.sh -b
+# $ ./compare_lnt_benchmarks.pl prepare sanbox/build > baseline.txt
+# ... change install directory to the modified / current compiler
+# $ ./run.sh -b
+# $ ./compare_lnt_benchmarks.pl prepare sanbox/build > target.txt
+# $ ./compare_lnt_benchmarks.pl compare baseline.txt target.txt > results.txt
+
+use strict;
+use warnings;
+use Scalar::Util qw(looks_like_number);
+use Statistics::Descriptive;
+use Data::Dumper;
+
+my $syntax = "Syntax: $0 <action> [action options]\n".
+             "Actions:\n".
+             "\tprepare: reads all report files in sample-N, collate into one run\n".
+             "\t         options: <sanbox/build> with report.txt or sample-N\n".
+             "\tcompare: compares two runs, taking geomean of all benchmarks\n".
+             "\t         options: <baseline.report.txt> <target.report.txt>\n";
+my $action = $ARGV[0];
+die $syntax unless defined $action;
+
+###################################################
+
+if ($action eq "prepare") {
+  die $syntax unless defined $ARGV[1];
+  &prepare($ARGV[1]);
+} elsif ($action eq "compare") {
+  die $syntax unless defined $ARGV[1] and defined $ARGV[2];
+  &compare($ARGV[1], $ARGV[2]);
+} else {
+  die $syntax;
+}
+exit;
+
+###################################################
+
+# prepare a number of logs into a collated results by average
+sub prepare($) {
+  my ($basedir) = @_;
+  my $logname = "report.simple.txt";
+
+  # Single report, just copy
+  if (-f "$basedir/$logname") {
+    my %results;
+    my $header = &read_file("$basedir/$logname", \%results);
+    &dump(\%results, $header);
+
+  # Multiple reports, collate
+  } elsif (-f "$basedir/sample-0/$logname") {
+    my @results;
+    my $header;
+
+    # List all sample files
+    opendir DIR, $basedir || die "Can't open '$basedir': $!\n";
+    my @samples = grep { /sample-\d+/ && -f "$basedir/$_/$logname" } readdir DIR;
+    closedir DIR;
+    die "Basedir '$basedir' has no txt logs\n" unless scalar @samples;
+
+    # For each sample, read&push
+    foreach my $s (@samples) {
+      my %sample;
+      $header = &read_file("$basedir/$s/$logname", \%sample);
+      push @results, \%sample;
+    }
+
+    # Collate results
+    my %result;
+    &collate_results(\@results, \%result);
+    &dump(\%result, $header);
+
+  } else {
+    die "Basedir '$basedir' has no txt logs\n"
+  }
+}
+
+# compare two logs, producing the relative results and geomean
+sub compare($$) {
+  my ($baseline_file, $target_file) = @_;
+  die "Baseline report file '$baseline_file' doesn't exist or is not a file\n"
+              unless -f $baseline_file;
+  die "Target report file '$target_file' doesn't exist or is not a file\n"
+              unless -f $target_file;
+  my (%baseline, %target, %result) = ((), (), ());
+
+  my $header = &read_file($baseline_file, \%baseline);
+  &read_file($target_file, \%target);
+
+  &compare_results(\%baseline, \%target, \%result);
+
+  &dump(\%result, '', 1);
+}
+
+###################################################
+
+# reads a file, saving all data indexed by prog name
+# returns the header for further use
+sub read_file($$) {
+  my ($filename, $result) = @_;
+  my $header = '';
+  my $max = 1;
+  open FH, $filename || die "Can't open $filename: $!\n";
+  while (<FH>) {
+    chomp();
+    my ($program, $sep,
+        $cc_pass, $cc_time, $cc_real,
+        $ex_pass, $ex_time, $ex_real) = split /\s+/;
+    # Make sure we have the right file
+    if (!$header) {
+      die "Invalid header in $filename\n"
+        unless $program eq "Program"
+           and $cc_time eq "CC_Time"
+           and $ex_time eq "Exec_Time";
+      $header = $_;
+      next;
+    }
+    # Log each non-header line
+    $result->{$program} = {
+      'cc_pass' => $cc_pass,
+      'cc_time' => $cc_time,
+      'cc_real' => $cc_real,
+      'ex_pass' => $ex_pass,
+      'ex_time' => $ex_time,
+      'ex_real' => $ex_real,
+    };
+
+    # Max length prog name
+    my $len = length($program);
+    $max = $len if $len > $max;
+  }
+  close FH;
+
+  $result->{'max'} = $max;
+
+  return $header;
+}
+
+# returns "pass" AND "pass"
+sub pass_diff($$) {
+  my ($baseline, $target) = @_;
+  if (defined $baseline and defined $target and
+      $baseline eq "pass" and $target eq "pass") {
+    return "pass";
+  } elsif (!$baseline and $target) {
+    return $target;
+  } elsif ($baseline and !$target) {
+    return $baseline;
+  } else {
+    return "fail";
+  }
+}
+
+# collate multiple reports into one, by getting the
+# average of each result into each field
+sub collate_results($$) {
+  my ($results, $result) = @_;
+
+  foreach my $sample (@$results) {
+    foreach my $prog (keys %$sample) {
+      next unless ref $sample->{$prog} eq "HASH";
+
+      # Set up statistics
+      if (!defined $result->{$prog}) {
+        $result->{$prog}->{'cc_time'} = new Statistics::Descriptive::Full();
+        $result->{$prog}->{'cc_real'} = new Statistics::Descriptive::Full();
+        $result->{$prog}->{'ex_time'} = new Statistics::Descriptive::Full();
+        $result->{$prog}->{'ex_real'} = new Statistics::Descriptive::Full();
+      }
+      # gather results
+      $result->{$prog}->{'cc_time'}->add_data($sample->{$prog}->{'cc_time'});
+      $result->{$prog}->{'cc_real'}->add_data($sample->{$prog}->{'cc_real'});
+      $result->{$prog}->{'ex_time'}->add_data($sample->{$prog}->{'ex_time'});
+      $result->{$prog}->{'ex_real'}->add_data($sample->{$prog}->{'ex_real'});
+      # collate pass status
+      $result->{$prog}->{'cc_pass'} =
+        &pass_diff($result->{$prog}->{'cc_pass'},
+                   $sample->{$prog}->{'cc_pass'});
+      $result->{$prog}->{'ex_pass'} =
+        &pass_diff($result->{$prog}->{'ex_pass'},
+                   $sample->{$prog}->{'ex_pass'});
+    }
+    # All max should be the same
+    $result->{'max'} = $sample->{'max'};
+  }
+  # collate results
+  foreach my $prog (keys %$result) {
+    next unless ref $result->{$prog} eq "HASH";
+    $result->{$prog}->{'cc_time'} = $result->{$prog}->{'cc_time'}->mean();
+    $result->{$prog}->{'cc_real'} = $result->{$prog}->{'cc_real'}->mean();
+    $result->{$prog}->{'ex_time'} = $result->{$prog}->{'ex_time'}->mean();
+    $result->{$prog}->{'ex_real'} = $result->{$prog}->{'ex_real'}->mean();
+  }
+}
+
+# compares two aggregated logs with absolute values
+# and creates a final result with relative values
+sub compare_results($$$) {
+  my ($baseline, $target, $result) = @_;
+  my $cc_geo = new Statistics::Descriptive::Full();
+  my $ex_geo = new Statistics::Descriptive::Full();
+
+  $result->{'cc_pass'} = "pass";
+  $result->{'ex_pass'} = "pass";
+  foreach my $prog (keys %$baseline) {
+    next unless ref $baseline->{$prog} eq "HASH";
+    die "Program '$prog' in baseline doesn't exist in target\n"
+      unless defined $target->{$prog};
+
+    # No zero on divisions
+    my ($b, $t) = ($baseline->{$prog}, $target->{$prog});
+    foreach my $k (keys %$b) {
+      $b->{$k} = 0.001 if looks_like_number($b->{$k})
+                       and $b->{$k} == 0.0;
+    }
+    foreach my $k (keys %$t) {
+      $t->{$k} = 0.001 if looks_like_number($t->{$k})
+                       and $t->{$k} == 0.0;
+    }
+
+    # Proportional difference
+    $result->{$prog} = {
+      'cc_pass' => &pass_diff($b->{'cc_pass'}, $t->{'cc_pass'}),
+      'cc_time' => ($t->{'cc_time'} / $b->{'cc_time'}) * 100,
+      'cc_real' => ($t->{'cc_real'} / $b->{'cc_real'}) * 100,
+      'ex_pass' => &pass_diff($b->{'ex_pass'}, $t->{'ex_pass'}),
+      'ex_time' => ($t->{'ex_time'} / $b->{'ex_time'}) * 100,
+      'ex_real' => ($t->{'ex_real'} / $b->{'ex_real'}) * 100,
+    };
+
+    # Add data to statistical model (ignore real)
+    $cc_geo->add_data($result->{$prog}->{'cc_time'});
+    $ex_geo->add_data($result->{$prog}->{'ex_time'});
+
+    # Update global "pass" status
+    $result->{'cc_pass'} = &pass_diff($result->{'cc_pass'},
+                                       $result->{$prog}->{'cc_pass'});
+    $result->{'ex_pass'} = &pass_diff($result->{'ex_pass'},
+                                       $result->{$prog}->{'ex_pass'});
+  }
+
+  # Get geomean of all differences / max length of prog name
+  $result->{'cc_geo'} = $cc_geo->geometric_mean();
+  $result->{'ex_geo'} = $ex_geo->geometric_mean();
+  $result->{'max'} = $baseline->{'max'};
+}
+
+# dumps the file in the same format as read
+# can dump the header or geomean, if requested
+sub dump($$) {
+  my ($result, $header, $geo) = @_;
+  my $max = $result->{'max'};
+
+  # If we have the header, print it
+  if ($header) {
+    print $header."\n";
+  }
+
+  foreach my $prog (sort keys %$result) {
+    next unless ref $result->{$prog} eq "HASH";
+
+    # Dump the program name, with spaces at the end
+    my $p = $result->{$prog};
+    my $spaces = $max - length($prog);
+    print $prog;
+    print ' ' x $spaces;
+    print "\t|\t";
+
+    # Compile time
+    print "\t".$p->{'cc_pass'};
+    printf("\t%0.4f", $p->{'cc_time'});
+    printf("\t%0.4f", $p->{'cc_real'});
+
+    # Execution time
+    printf "\t".$p->{'ex_pass'};
+    printf("\t%0.4f", $p->{'ex_time'});
+    printf("\t%0.4f", $p->{'ex_real'});
+
+    print "\n";
+  }
+
+  # Print the final geomean line
+  if ($geo) {
+    print "GEOMEAN";
+    print ' ' x ($max - 7);
+    print "\t|\t";
+    print "\t".$result->{'cc_pass'};
+    printf("\t%0.2f\t", $result->{'cc_geo'});
+    print "\t".$result->{'ex_pass'};
+    printf("\t%0.2f", $result->{'ex_geo'});
+    print "\n";
+  }
+}