#!/usr/bin/env python3
# This script greps the JSON files for the buildbots on the LLVM official
# build master by name and prints an HTML page with the links to the bots
# and the status.
#
# Multiple masters can be used, as well as multiple groups of bots and
# multiple bots per group, all in a json file. See linaro.json in this
# repository to have an idea how the config file is.
import sys
import os
import argparse
import json
import tempfile
import logging
import pickle
import shutil
import time
from datetime import datetime, timedelta
# The requests allows HTTP keep-alive which re-uses the same TCP connection
# to download multiple files.
import requests
from textwrap import dedent
from make_table import Table
def ignored(s):
return "ignore" in s and s["ignore"]
def not_ignored(s):
return not ignored(s)
# Returns the parsed json URL or raises an exception
def wget(session, url):
got = session.get(url)
got.raise_for_status()
return got.json()
# Map from buildbot status codes we want to treat as errors to the color they
# should be shown in. The codes are documented at
# https://docs.buildbot.net/latest/developer/results.html#build-result-codes,
# and these colors match the suggested ones there.
RESULT_COLORS = {
2: "red", # Error
4: "purple", # Exception
5: "purple", # Retry
6: "pink", # Cancelled
}
def get_bot_failing_steps(session, base_url, buildid):
try:
contents = wget(session, "{}/api/v2/builds/{}/steps".format(base_url, buildid))
except requests.exceptions.RequestException:
return ""
for step in contents["steps"]:
if step["results"] in RESULT_COLORS:
yield (step["name"], step["results"])
# Get the status of a individual bot BOT. Returns a dict with the
# information.
def get_bot_status(session, bot, base_url, builder_url, build_url):
try:
builds = wget(
session, "{}/api/v2/{}/{}/{}".format(base_url, builder_url, bot, build_url)
)
except requests.exceptions.RequestException as e:
logging.debug(" Couldn't get builds for bot {}!".format(bot))
return {"valid": False}
reversed_builds = iter(sorted(builds["builds"], key=lambda b: -b["number"]))
next_build = None
for build in reversed_builds:
if not build["complete"]:
next_build = build
continue
time_since = int(datetime.now().timestamp()) - int(build["complete_at"])
duration = int(build["complete_at"]) - int(build["started_at"])
agent_url = "{}/#/{}/{}".format(base_url, builder_url, build["builderid"])
status = {
"builder_url": agent_url,
"number": build["number"],
"build_url": "{}/builds/{}".format(agent_url, build["number"]),
"state": build["state_string"],
"time_since": timedelta(seconds=time_since),
"duration": timedelta(seconds=duration),
"fail": build["state_string"] != "build successful",
"next_in_progress": None
if next_build is None
else "{}/builds/{}".format(agent_url, next_build["number"]),
}
if status["fail"]:
buildid = build["buildid"]
status["steps"] = list(get_bot_failing_steps(session, base_url, buildid))
# find the start of the failure streak
first_fail = build
for build in reversed_builds:
if build["state_string"] == "build successful":
status["first_fail_number"] = first_fail["number"]
status["first_fail_url"] = "{}/builds/{}".format(
agent_url, first_fail["number"]
)
# Occasionaly we find a finished build without complete_at,
# it may be an intermitent issue on Buildbot's side.
complete_at = first_fail.get("complete_at")
if complete_at is not None:
fail_since = int(datetime.now().timestamp()) - int(complete_at)
status["fail_since"] = timedelta(seconds=fail_since)
break
first_fail = build
else:
pass # fails since forever?
return status
# Get status for all bots named in the config
# Return a dictionary of (base_url, bot name) -> status info
def get_buildbot_bots_status(config):
session = requests.Session()
bot_cache = {}
for server in filter(not_ignored, config):
base_url = server["base_url"]
logging.debug("Parsing server {}...".format(server["name"]))
for builder in server["builders"]:
logging.debug(" Parsing builders {}...".format(builder["name"]))
for bot in builder["bots"]:
bot_key = (base_url, bot["name"])
if bot_key in bot_cache:
continue
logging.debug(" Parsing bot {}...".format(bot["name"]))
status = get_bot_status(
session,
bot["name"],
base_url,
server["builder_url"],
server["build_url"],
)
if status is not None:
if status.get("valid", True):
logging.debug(
" Bot status: " + ("FAIL" if status["fail"] else "PASS")
)
bot_cache[bot_key] = status
return bot_cache
def write_bot_status(config, output_file, bots_status):
temp = tempfile.NamedTemporaryFile(mode="w+", delete=False)
temp.write(
dedent(
"""\
"""
)
)
column_titles = [
"Buildbot",
"Status",
"T Since",
"Duration",
"Latest",
"Failing steps",
"Build In Progress",
"1st Failing",
"Failing Since",
]
num_columns = len(column_titles)
# The first table should also say when this was generated.
# If we were to put this in its own header only table, it would
# not align with the rest because it has no content.
first = True
# Dump all servers / bots
for server in filter(not_ignored, config):
with Table(temp) as table:
table.Border(0).Cellspacing(1).Cellpadding(2)
table.AddRow().AddCell().Colspan(num_columns)
if first:
table.AddRow().AddHeader(
"Generated {} ({})".format(
datetime.today().ctime(), time.tzname[time.daylight]
)
).Colspan(num_columns)
table.AddRow().AddCell().Colspan(num_columns)
first = False
table.AddRow().AddHeader(server["name"]).Colspan(num_columns)
for builder in server["builders"]:
table.AddRow().AddCell().Colspan(num_columns)
table.AddRow().AddHeader(builder["name"]).Colspan(num_columns)
title_row = table.AddRow()
for title in column_titles:
title_row.AddHeader(title)
table.BeginBody()
for bot in builder["bots"]:
logging.debug("Writing out status for {}".format(bot["name"]))
row = table.AddRow()
base_url = server["base_url"]
try:
status = bots_status[(base_url, bot["name"])]
except KeyError:
row.AddCell("{} is offline!".format(bot["name"])).Colspan(
num_columns
)
continue
else:
if not status.get("valid", True):
row.AddCell(
"Could not read status for {}!".format(bot["name"])
).Colspan(num_columns)
continue
row.AddCell(
"{}".format(status["builder_url"], bot["name"])
)
status_cell = row.AddCell()
if status["fail"]:
status_cell.Style("color:red").Content("FAIL")
else:
status_cell.Style("color:green").Content("PASS")
time_since_cell = row.AddCell()
if "time_since" in status:
time_since = status["time_since"]
# No build should be taking more than a day
if time_since > timedelta(hours=24):
time_since_cell.Style("color:red")
time_since_cell.Content(time_since)
duration_cell = row.AddCell()
if "duration" in status:
duration_cell.Content(status["duration"])
number_cell = row.AddCell()
if "number" in status:
number_cell.Content(
"{}".format(
status["build_url"], status["number"]
)
)
steps_cell = row.AddCell()
if "steps" in status and status["steps"]:
def render_step(name, result):
return "{}".format(
RESULT_COLORS[result], name
)
step_list = ", ".join(
render_step(name, result)
for name, result in status["steps"]
)
steps_cell.Style("text-align:center").Content(step_list)
next_in_progress_cell = row.AddCell()
if "next_in_progress" in status:
next_build = status["next_in_progress"]
next_in_progress_cell.Content(
"No"
if next_build is None
else "Yes".format(next_build)
)
first_fail_cell = row.AddCell()
if "first_fail_number" in status:
first_fail_cell.Content(
"{}".format(
status["first_fail_url"], status["first_fail_number"]
)
)
fail_since_cell = row.AddCell()
if "fail_since" in status:
fail_since = status["fail_since"]
# No build should fail for more than a day
if fail_since > timedelta(hours=24):
fail_since_cell.Style("color:red")
fail_since_cell.Content(fail_since)
table.EndBody()
# Move temp to main (atomic change)
temp.close()
shutil.move(temp.name, output_file)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"-d", dest="debug", action="store_true", help="show debug log messages"
)
parser.add_argument(
"--cachefile",
required=False,
help="Location of bot status data cache file (a pickled Python object). If it exists use it, "
"if it does not, read the status from the network and write it to this path.",
)
parser.add_argument("config_file", help="Bots description in JSON format")
parser.add_argument("output_file", help="output HTML path")
args = parser.parse_args()
if args.debug:
logging.basicConfig(stream=sys.stderr, level=logging.DEBUG)
try:
with open(args.config_file, "r") as f:
config = json.load(f)
except IOError as e:
print("error: failed to read {} config file: {}".format(args.config_file, e))
sys.exit(os.EX_CONFIG)
status = None
if args.cachefile and os.path.exists(args.cachefile):
logging.debug("Using cache file {}".format(args.cachefile))
with open(args.cachefile, "rb") as f:
status = pickle.load(f)
else:
status = get_buildbot_bots_status(config)
if args.cachefile:
logging.debug("Writing status to cache file {}".format(args.cachefile))
with open(args.cachefile, "wb") as f:
pickle.dump(status, f)
write_bot_status(config, args.output_file, status)