blob: 3f9924b219c9eb9ca921bbb6f9079bcfbf52aae7 [file] [log] [blame]
#! /usr/bin/env python3
import os
import logging
from datetime import timedelta
from functools import lru_cache
# Base dict order not guaranteed until 3.7+
from collections import OrderedDict
from datetime import datetime
# To convert time stamps
# (use datetime.datetime.fromisoformat in Python3.7+)
import dateutil.parser
from pybuildkite.buildkite import Buildkite, BuildState
buildkite = Buildkite()
def get_buildkite_token():
"""
We are assuming that this script is run on a machine
that can only be accessed by users that would already be
able to generate such tokens.
We could use an env var but the monitor is run via cron
which would have to inject the token. So it's going to end
up on disk anyway.
The format of this file is just a single line which is the
token.
"""
tokenfile_path = os.path.join("~", ".buildkite_token")
err_msg = "{}, Buildkite status will not be fetched."
token = None
try:
with open(os.path.expanduser(tokenfile_path)) as f:
# strip to remove ending newline
token = f.readline().strip()
if not token:
logging.debug(err_msg.format("Buildkite API token file was empty"))
token = None
except FileNotFoundError:
logging.debug(err_msg.format("Couldn't read Buildkite API token file"))
return token
@lru_cache(maxsize=None)
def get_pipeline_builds(organisation, pipeline):
return buildkite.builds().list_all_for_pipeline(
organisation,
pipeline,
# Finished means passed, failed, blocked or cancelled
states=[BuildState.FINISHED],
)
def get_last_finished_build_status(organisation, job_name, pipeline):
"""
Get information about the last finished build of a particular
job. (where a job is one of the pipeline steps)
"""
builds = get_pipeline_builds(organisation, pipeline)
for build in builds:
# There are two kinds of builds. Those triggered from phab reviews
# and those run perdidically to post-commit build the main and release
# branches.
# For the status page we ignore pre-commit and release builds.
# ph_buildable_revision is set if Phabricator triggered this build.
if "ph_buildable_revision" in build["env"] or build["branch"] != "main":
continue
# Where each "job" is a config e.g. AArch64, MacOS, C++20 etc.
for job in build["jobs"]:
# Some jobs won't have a name. I think these are the wait steps
# between jobs or groups of jobs.
if job.get("name") != job_name:
continue
# Some jobs were never started due to failures earlier in the pipeline.
# This isn't our fault so ignore these.
started_time = job.get("started_at")
if started_time is None:
continue
# We use started because that's when it starts to
# run on the agent (don't want to include waiting time)
started = dateutil.parser.isoparse(started_time)
finished = dateutil.parser.isoparse(job["finished_at"])
# Buildkite gives us mS resoloution but we don't need that
duration = timedelta(seconds=int((finished - started).total_seconds()))
# Time from buildkite ends up as UTC but now has no
# timezone info by default
now = datetime.now(finished.tzinfo)
time_since = timedelta(seconds=int((now - finished).total_seconds()))
state = job["state"]
failed = state != "passed"
status = {
# 'url' is the API url, we want a clickable link
"builder_url": job["agent"]["web_url"],
# We use the job's url because it takes you to the
# specific agent, instead of the build's url
"build_url": job["web_url"],
"number": build["number"],
"state": state,
"duration": duration,
"time_since": time_since,
"fail": failed,
}
return status
logging.debug(
'Found no finished builds for job "{}" '
'on pipeline "{}"'.format(job_name, pipeline)
)
return dict(valid=False)
def bot_key(job_name):
return ("https://www.buildkite.com", job_name)
# Get status for all Buildkite bots in config
# Return a dictionary of (base_url, bot name) -> status info
def get_buildkite_bots_status(config):
status = dict()
token = get_buildkite_token()
buildkite.set_access_token(token)
for server in config:
if server["name"] != "Buildkite":
continue
if token is None:
# We cannot query buildkite but we don't want to mark these
# bots as offline because we don't know that.
for builder in server["builders"]:
for job in builder["bots"]:
status[bot_key(job["name"])] = {"valid": False}
continue
# If we have a valid token then we can get the real status
organisation = server.get("buildkite_org")
if organisation is None:
raise RuntimeError("Buildkite servers must also have a 'buildkite_org'")
for builder in server["builders"]:
for job in builder["bots"]:
job_name = job["name"]
logging.debug('Getting status for buildkite job "{}"'.format(job_name))
status[bot_key(job_name)] = get_last_finished_build_status(
organisation, job_name, job["buildkite_pipeline"]
)
return status