[contrib] validate_failures.py: Add "flaky" statistic.

Calculate number of flaky entries in the manifest, and
remove flaky tests from the set of failed tests, thus
excluding flaky tests from the "failed" count.

Change-Id: Id3ee1ce5ef60d570cbc0a534744d5197656b3442
diff --git a/contrib/testsuite-management/validate_failures.py b/contrib/testsuite-management/validate_failures.py
index 1d69376..9b8d0f5 100755
--- a/contrib/testsuite-management/validate_failures.py
+++ b/contrib/testsuite-management/validate_failures.py
@@ -252,7 +252,6 @@
         outfile.write(_EXP_LINE_FORMAT % (current_tool, current_exp))
       outfile.write('%s\n' % result)
 
-
   # Check if testsuite of expected_result is present in current results.
   # This is used to compare partial test results against a full manifest.
   def HasTestsuite(self, expected_result):
@@ -506,9 +505,6 @@
   # they are expected failures that are not failing anymore).
   manifest_vs_actual = ResultSet()
   for expected_result in manifest:
-    # Ignore tests marked flaky.
-    if 'flaky' in expected_result.attrs:
-      continue
     # We try to support comparing partial results vs full manifest
     # (e.g., manifest has failures for gcc, g++, gfortran, but we ran only
     # g++ testsuite).  To achieve this we record encountered testsuites in
@@ -571,9 +567,25 @@
     sum_files = results.split()
   return sum_files
 
+def DiscardFlaky(expected, actual):
+  flaky_list = []
+  for expected_result in expected:
+    if 'flaky' in expected_result.attrs:
+      flaky_list.append(expected_result)
+
+  for expected_result in flaky_list:
+    expected.remove(expected_result)
+    actual.discard(expected_result)
+
+  return len(flaky_list)
+
 
 def PerformComparison(expected, actual):
   stats = ResultsStats()
+  stats.total = actual.total
+  # We need to ignore flaky tests in comparison, so remove them now from
+  # both expected and actual sets.
+  stats.flaky = DiscardFlaky(expected, actual)
   stats.fails = len(actual)
 
   actual_vs_expected, expected_vs_actual = CompareResults(expected, actual)