test-suite/compare_lnt_benchmarks.pl


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304

#!/usr/bin/env perl
# This program handles the results of the LLVM test-suite benchmark.
# There are two stages:
#   prepare: collates multiple samples into a single log file containing
#            the arithmetic mean of each tests' results.
#   compare: compares two collated results and produce a file containing
#            the relative values between baseline and target runs, with
#            the geomean of compile and execution time at the end.
#
# Suggested usage:
#
# ... point install directory to the unmodified / previous compiler
# $ ./run.sh -b
# $ ./compare_lnt_benchmarks.pl prepare sanbox/build > baseline.txt
# ... change install directory to the modified / current compiler
# $ ./run.sh -b
# $ ./compare_lnt_benchmarks.pl prepare sanbox/build > target.txt
# $ ./compare_lnt_benchmarks.pl compare baseline.txt target.txt > results.txt

use strict;
use warnings;
use Scalar::Util qw(looks_like_number);
use Statistics::Descriptive;
use Data::Dumper;

my $syntax = "Syntax: $0 <action> [action options]\n".
             "Actions:\n".
             "\tprepare: reads all report files in sample-N, collate into one run\n".
             "\t         options: <sanbox/build> with report.txt or sample-N\n".
             "\tcompare: compares two runs, taking geomean of all benchmarks\n".
             "\t         options: <baseline.report.txt> <target.report.txt>\n";
my $action = $ARGV[0];
die $syntax unless defined $action;

###################################################

if ($action eq "prepare") {
  die $syntax unless defined $ARGV[1];
  &prepare($ARGV[1]);
} elsif ($action eq "compare") {
  die $syntax unless defined $ARGV[1] and defined $ARGV[2];
  &compare($ARGV[1], $ARGV[2]);
} else {
  die $syntax;
}
exit;

###################################################

# prepare a number of logs into a collated results by average
sub prepare($) {
  my ($basedir) = @_;
  my $logname = "report.simple.txt";

  # Single report, just copy
  if (-f "$basedir/$logname") {
    my %results;
    my $header = &read_file("$basedir/$logname", \%results);
    &dump(\%results, $header);

  # Multiple reports, collate
  } elsif (-f "$basedir/sample-0/$logname") {
    my @results;
    my $header;

    # List all sample files
    opendir DIR, $basedir || die "Can't open '$basedir': $!\n";
    my @samples = grep { /sample-\d+/ && -f "$basedir/$_/$logname" } readdir DIR;
    closedir DIR;
    die "Basedir '$basedir' has no txt logs\n" unless scalar @samples;

    # For each sample, read&push
    foreach my $s (@samples) {
      my %sample;
      $header = &read_file("$basedir/$s/$logname", \%sample);
      push @results, \%sample;
    }

    # Collate results
    my %result;
    &collate_results(\@results, \%result);
    &dump(\%result, $header);

  } else {
    die "Basedir '$basedir' has no txt logs\n"
  }
}

# compare two logs, producing the relative results and geomean
sub compare($$) {
  my ($baseline_file, $target_file) = @_;
  die "Baseline report file '$baseline_file' doesn't exist or is not a file\n"
              unless -f $baseline_file;
  die "Target report file '$target_file' doesn't exist or is not a file\n"
              unless -f $target_file;
  my (%baseline, %target, %result) = ((), (), ());

  my $header = &read_file($baseline_file, \%baseline);
  &read_file($target_file, \%target);

  &compare_results(\%baseline, \%target, \%result);

  &dump(\%result, '', 1);
}

###################################################

# reads a file, saving all data indexed by prog name
# returns the header for further use
sub read_file($$) {
  my ($filename, $result) = @_;
  my $header = '';
  my $max = 1;
  open FH, $filename || die "Can't open $filename: $!\n";
  while (<FH>) {
    chomp();
    my ($program, $sep,
        $cc_pass, $cc_time, $cc_real,
        $ex_pass, $ex_time, $ex_real) = split /\s+/;
    # Make sure we have the right file
    if (!$header) {
      die "Invalid header in $filename\n"
        unless $program eq "Program"
           and $cc_time eq "CC_Time"
           and $ex_time eq "Exec_Time";
      $header = $_;
      next;
    }
    # Log each non-header line
    $result->{$program} = {
      'cc_pass' => $cc_pass,
      'cc_time' => $cc_time,
      'cc_real' => $cc_real,
      'ex_pass' => $ex_pass,
      'ex_time' => $ex_time,
      'ex_real' => $ex_real,
    };

    # Max length prog name
    my $len = length($program);
    $max = $len if $len > $max;
  }
  close FH;

  $result->{'max'} = $max;

  return $header;
}

# returns "pass" AND "pass"
sub pass_diff($$) {
  my ($baseline, $target) = @_;
  if (defined $baseline and defined $target and
      $baseline eq "pass" and $target eq "pass") {
    return "pass";
  } elsif (!$baseline and $target) {
    return $target;
  } elsif ($baseline and !$target) {
    return $baseline;
  } else {
    return "fail";
  }
}

# collate multiple reports into one, by getting the
# average of each result into each field
sub collate_results($$) {
  my ($results, $result) = @_;

  foreach my $sample (@$results) {
    foreach my $prog (keys %$sample) {
      next unless ref $sample->{$prog} eq "HASH";

      # Set up statistics
      if (!defined $result->{$prog}) {
        $result->{$prog}->{'cc_time'} = new Statistics::Descriptive::Full();
        $result->{$prog}->{'cc_real'} = new Statistics::Descriptive::Full();
        $result->{$prog}->{'ex_time'} = new Statistics::Descriptive::Full();
        $result->{$prog}->{'ex_real'} = new Statistics::Descriptive::Full();
      }
      # gather results
      $result->{$prog}->{'cc_time'}->add_data($sample->{$prog}->{'cc_time'});
      $result->{$prog}->{'cc_real'}->add_data($sample->{$prog}->{'cc_real'});
      $result->{$prog}->{'ex_time'}->add_data($sample->{$prog}->{'ex_time'});
      $result->{$prog}->{'ex_real'}->add_data($sample->{$prog}->{'ex_real'});
      # collate pass status
      $result->{$prog}->{'cc_pass'} =
        &pass_diff($result->{$prog}->{'cc_pass'},
                   $sample->{$prog}->{'cc_pass'});
      $result->{$prog}->{'ex_pass'} =
        &pass_diff($result->{$prog}->{'ex_pass'},
                   $sample->{$prog}->{'ex_pass'});
    }
    # All max should be the same
    $result->{'max'} = $sample->{'max'};
  }
  # collate results
  foreach my $prog (keys %$result) {
    next unless ref $result->{$prog} eq "HASH";
    $result->{$prog}->{'cc_time'} = $result->{$prog}->{'cc_time'}->mean();
    $result->{$prog}->{'cc_real'} = $result->{$prog}->{'cc_real'}->mean();
    $result->{$prog}->{'ex_time'} = $result->{$prog}->{'ex_time'}->mean();
    $result->{$prog}->{'ex_real'} = $result->{$prog}->{'ex_real'}->mean();
  }
}

# compares two aggregated logs with absolute values
# and creates a final result with relative values
sub compare_results($$$) {
  my ($baseline, $target, $result) = @_;
  my $cc_geo = new Statistics::Descriptive::Full();
  my $ex_geo = new Statistics::Descriptive::Full();

  $result->{'cc_pass'} = "pass";
  $result->{'ex_pass'} = "pass";
  foreach my $prog (keys %$baseline) {
    next unless ref $baseline->{$prog} eq "HASH";
    die "Program '$prog' in baseline doesn't exist in target\n"
      unless defined $target->{$prog};

    # No zero on divisions
    my ($b, $t) = ($baseline->{$prog}, $target->{$prog});
    foreach my $k (keys %$b) {
      $b->{$k} = 0.001 if looks_like_number($b->{$k})
                       and $b->{$k} == 0.0;
    }
    foreach my $k (keys %$t) {
      $t->{$k} = 0.001 if looks_like_number($t->{$k})
                       and $t->{$k} == 0.0;
    }

    # Proportional difference
    $result->{$prog} = {
      'cc_pass' => &pass_diff($b->{'cc_pass'}, $t->{'cc_pass'}),
      'cc_time' => ($t->{'cc_time'} / $b->{'cc_time'}) * 100,
      'cc_real' => ($t->{'cc_real'} / $b->{'cc_real'}) * 100,
      'ex_pass' => &pass_diff($b->{'ex_pass'}, $t->{'ex_pass'}),
      'ex_time' => ($t->{'ex_time'} / $b->{'ex_time'}) * 100,
      'ex_real' => ($t->{'ex_real'} / $b->{'ex_real'}) * 100,
    };

    # Add data to statistical model (ignore real)
    $cc_geo->add_data($result->{$prog}->{'cc_time'});
    $ex_geo->add_data($result->{$prog}->{'ex_time'});

    # Update global "pass" status
    $result->{'cc_pass'} = &pass_diff($result->{'cc_pass'},
                                       $result->{$prog}->{'cc_pass'});
    $result->{'ex_pass'} = &pass_diff($result->{'ex_pass'},
                                       $result->{$prog}->{'ex_pass'});
  }

  # Get geomean of all differences / max length of prog name
  $result->{'cc_geo'} = $cc_geo->geometric_mean();
  $result->{'ex_geo'} = $ex_geo->geometric_mean();
  $result->{'max'} = $baseline->{'max'};
}

# dumps the file in the same format as read
# can dump the header or geomean, if requested
sub dump($$) {
  my ($result, $header, $geo) = @_;
  my $max = $result->{'max'};

  # If we have the header, print it
  if ($header) {
    print $header."\n";
  }

  foreach my $prog (sort keys %$result) {
    next unless ref $result->{$prog} eq "HASH";

    # Dump the program name, with spaces at the end
    my $p = $result->{$prog};
    my $spaces = $max - length($prog);
    print $prog;
    print ' ' x $spaces;
    print "\t|\t";

    # Compile time
    print "\t".$p->{'cc_pass'};
    printf("\t%0.4f", $p->{'cc_time'});
    printf("\t%0.4f", $p->{'cc_real'});

    # Execution time
    printf "\t".$p->{'ex_pass'};
    printf("\t%0.4f", $p->{'ex_time'});
    printf("\t%0.4f", $p->{'ex_real'});

    print "\n";
  }

  # Print the final geomean line
  if ($geo) {
    print "GEOMEAN";
    print ' ' x ($max - 7);
    print "\t|\t";
    print "\t".$result->{'cc_pass'};
    printf("\t%0.2f\t", $result->{'cc_geo'});
    print "\t".$result->{'ex_pass'};
    printf("\t%0.2f", $result->{'ex_geo'});
    print "\n";
  }
}