blob: 39706c8599b72e4a11526da60d6733a4187e2edc [file] [log] [blame]
David Spickett30a986f2021-04-29 09:37:00 +01001#! /usr/bin/env python3
2
3import os
4import logging
5from datetime import timedelta
6from functools import lru_cache
David Spickett4f932d12023-06-13 12:32:06 +01007
David Spickett30a986f2021-04-29 09:37:00 +01008# Base dict order not guaranteed until 3.7+
9from collections import OrderedDict
10from datetime import datetime
David Spickett4f932d12023-06-13 12:32:06 +010011
David Spickett30a986f2021-04-29 09:37:00 +010012# To convert time stamps
13# (use datetime.datetime.fromisoformat in Python3.7+)
14import dateutil.parser
15from pybuildkite.buildkite import Buildkite, BuildState
16
17buildkite = Buildkite()
18
19
20def get_buildkite_token():
21 """
22 We are assuming that this script is run on a machine
23 that can only be accessed by users that would already be
24 able to generate such tokens.
25
26 We could use an env var but the monitor is run via cron
27 which would have to inject the token. So it's going to end
28 up on disk anyway.
29
30 The format of this file is just a single line which is the
31 token.
32 """
33 tokenfile_path = os.path.join("~", ".buildkite_token")
34 err_msg = "{}, Buildkite status will not be fetched."
35 token = None
36
37 try:
38 with open(os.path.expanduser(tokenfile_path)) as f:
39 # strip to remove ending newline
40 token = f.readline().strip()
41 if not token:
David Spickett4f932d12023-06-13 12:32:06 +010042 logging.debug(err_msg.format("Buildkite API token file was empty"))
David Spickett30a986f2021-04-29 09:37:00 +010043 token = None
44 except FileNotFoundError:
David Spickett4f932d12023-06-13 12:32:06 +010045 logging.debug(err_msg.format("Couldn't read Buildkite API token file"))
David Spickett30a986f2021-04-29 09:37:00 +010046
47 return token
48
49
50@lru_cache(maxsize=None)
51def get_pipeline_builds(organisation, pipeline):
52 return buildkite.builds().list_all_for_pipeline(
David Spickett4f932d12023-06-13 12:32:06 +010053 organisation,
54 pipeline,
David Spickett30a986f2021-04-29 09:37:00 +010055 # Finished means passed, failed, blocked or cancelled
David Spickett4f932d12023-06-13 12:32:06 +010056 states=[BuildState.FINISHED],
David Spickettfb0fdb02023-10-02 10:30:33 +010057 # There are two kinds of builds. Those triggered from phab reviews
58 # and those run perdidically to post-commit build the main and release
59 # branches.
60 # For the status page we ignore pre-commit and release builds.
61 branch="main",
David Spickett4f932d12023-06-13 12:32:06 +010062 )
David Spickett30a986f2021-04-29 09:37:00 +010063
64
David Spickett4f932d12023-06-13 12:32:06 +010065def get_last_finished_build_status(organisation, job_name, pipeline):
David Spickett30a986f2021-04-29 09:37:00 +010066 """
67 Get information about the last finished build of a particular
68 job. (where a job is one of the pipeline steps)
69 """
70 builds = get_pipeline_builds(organisation, pipeline)
71
72 for build in builds:
David Spickett30a986f2021-04-29 09:37:00 +010073 # Where each "job" is a config e.g. AArch64, MacOS, C++20 etc.
David Spickett4f932d12023-06-13 12:32:06 +010074 for job in build["jobs"]:
David Spickett30a986f2021-04-29 09:37:00 +010075 # Some jobs won't have a name. I think these are the wait steps
76 # between jobs or groups of jobs.
David Spickett4f932d12023-06-13 12:32:06 +010077 if job.get("name") != job_name:
David Spickett30a986f2021-04-29 09:37:00 +010078 continue
79
David Spickett8bb563d2021-10-22 09:46:12 +010080 # Some jobs were never started due to failures earlier in the pipeline.
81 # This isn't our fault so ignore these.
David Spickett4f932d12023-06-13 12:32:06 +010082 started_time = job.get("started_at")
David Spickett8bb563d2021-10-22 09:46:12 +010083 if started_time is None:
84 continue
85
David Spickett30a986f2021-04-29 09:37:00 +010086 # We use started because that's when it starts to
87 # run on the agent (don't want to include waiting time)
David Spickett8bb563d2021-10-22 09:46:12 +010088 started = dateutil.parser.isoparse(started_time)
David Spickett4f932d12023-06-13 12:32:06 +010089 finished = dateutil.parser.isoparse(job["finished_at"])
David Spickett30a986f2021-04-29 09:37:00 +010090 # Buildkite gives us mS resoloution but we don't need that
David Spickett4f932d12023-06-13 12:32:06 +010091 duration = timedelta(seconds=int((finished - started).total_seconds()))
David Spickett30a986f2021-04-29 09:37:00 +010092 # Time from buildkite ends up as UTC but now has no
93 # timezone info by default
94 now = datetime.now(finished.tzinfo)
David Spickett4f932d12023-06-13 12:32:06 +010095 time_since = timedelta(seconds=int((now - finished).total_seconds()))
David Spickett30a986f2021-04-29 09:37:00 +010096
David Spickett4f932d12023-06-13 12:32:06 +010097 state = job["state"]
98 failed = state != "passed"
David Spickett30a986f2021-04-29 09:37:00 +010099 status = {
100 # 'url' is the API url, we want a clickable link
David Spickett4f932d12023-06-13 12:32:06 +0100101 "builder_url": job["agent"]["web_url"],
David Spickett30a986f2021-04-29 09:37:00 +0100102 # We use the job's url because it takes you to the
103 # specific agent, instead of the build's url
David Spickett4f932d12023-06-13 12:32:06 +0100104 "build_url": job["web_url"],
105 "number": build["number"],
106 "state": state,
107 "duration": duration,
108 "time_since": time_since,
109 "fail": failed,
David Spickett30a986f2021-04-29 09:37:00 +0100110 }
David Spickett30a986f2021-04-29 09:37:00 +0100111 return status
112
113 logging.debug(
David Spickett4f932d12023-06-13 12:32:06 +0100114 'Found no finished builds for job "{}" '
115 'on pipeline "{}"'.format(job_name, pipeline)
116 )
David Spickett30a986f2021-04-29 09:37:00 +0100117 return dict(valid=False)
118
119
120def bot_key(job_name):
121 return ("https://www.buildkite.com", job_name)
122
123
124# Get status for all Buildkite bots in config
125# Return a dictionary of (base_url, bot name) -> status info
126def get_buildkite_bots_status(config):
127 status = dict()
128
129 token = get_buildkite_token()
130 buildkite.set_access_token(token)
131
132 for server in config:
David Spickett4f932d12023-06-13 12:32:06 +0100133 if server["name"] != "Buildkite":
David Spickett30a986f2021-04-29 09:37:00 +0100134 continue
135
136 if token is None:
137 # We cannot query buildkite but we don't want to mark these
138 # bots as offline because we don't know that.
David Spickett4f932d12023-06-13 12:32:06 +0100139 for builder in server["builders"]:
140 for job in builder["bots"]:
141 status[bot_key(job["name"])] = {"valid": False}
David Spickett30a986f2021-04-29 09:37:00 +0100142 continue
143
144 # If we have a valid token then we can get the real status
David Spickett4f932d12023-06-13 12:32:06 +0100145 organisation = server.get("buildkite_org")
David Spickett30a986f2021-04-29 09:37:00 +0100146 if organisation is None:
David Spickett4f932d12023-06-13 12:32:06 +0100147 raise RuntimeError("Buildkite servers must also have a 'buildkite_org'")
David Spickett30a986f2021-04-29 09:37:00 +0100148
David Spickett4f932d12023-06-13 12:32:06 +0100149 for builder in server["builders"]:
150 for job in builder["bots"]:
151 job_name = job["name"]
152 logging.debug('Getting status for buildkite job "{}"'.format(job_name))
David Spickett30a986f2021-04-29 09:37:00 +0100153
154 status[bot_key(job_name)] = get_last_finished_build_status(
David Spickett4f932d12023-06-13 12:32:06 +0100155 organisation, job_name, job["buildkite_pipeline"]
156 )
David Spickett30a986f2021-04-29 09:37:00 +0100157
158 return status