blob: 3f9924b219c9eb9ca921bbb6f9079bcfbf52aae7 [file] [log] [blame]
David Spickett30a986f2021-04-29 09:37:00 +01001#! /usr/bin/env python3
2
3import os
4import logging
5from datetime import timedelta
6from functools import lru_cache
David Spickett4f932d12023-06-13 12:32:06 +01007
David Spickett30a986f2021-04-29 09:37:00 +01008# Base dict order not guaranteed until 3.7+
9from collections import OrderedDict
10from datetime import datetime
David Spickett4f932d12023-06-13 12:32:06 +010011
David Spickett30a986f2021-04-29 09:37:00 +010012# To convert time stamps
13# (use datetime.datetime.fromisoformat in Python3.7+)
14import dateutil.parser
15from pybuildkite.buildkite import Buildkite, BuildState
16
17buildkite = Buildkite()
18
19
20def get_buildkite_token():
21 """
22 We are assuming that this script is run on a machine
23 that can only be accessed by users that would already be
24 able to generate such tokens.
25
26 We could use an env var but the monitor is run via cron
27 which would have to inject the token. So it's going to end
28 up on disk anyway.
29
30 The format of this file is just a single line which is the
31 token.
32 """
33 tokenfile_path = os.path.join("~", ".buildkite_token")
34 err_msg = "{}, Buildkite status will not be fetched."
35 token = None
36
37 try:
38 with open(os.path.expanduser(tokenfile_path)) as f:
39 # strip to remove ending newline
40 token = f.readline().strip()
41 if not token:
David Spickett4f932d12023-06-13 12:32:06 +010042 logging.debug(err_msg.format("Buildkite API token file was empty"))
David Spickett30a986f2021-04-29 09:37:00 +010043 token = None
44 except FileNotFoundError:
David Spickett4f932d12023-06-13 12:32:06 +010045 logging.debug(err_msg.format("Couldn't read Buildkite API token file"))
David Spickett30a986f2021-04-29 09:37:00 +010046
47 return token
48
49
50@lru_cache(maxsize=None)
51def get_pipeline_builds(organisation, pipeline):
52 return buildkite.builds().list_all_for_pipeline(
David Spickett4f932d12023-06-13 12:32:06 +010053 organisation,
54 pipeline,
David Spickett30a986f2021-04-29 09:37:00 +010055 # Finished means passed, failed, blocked or cancelled
David Spickett4f932d12023-06-13 12:32:06 +010056 states=[BuildState.FINISHED],
57 )
David Spickett30a986f2021-04-29 09:37:00 +010058
59
David Spickett4f932d12023-06-13 12:32:06 +010060def get_last_finished_build_status(organisation, job_name, pipeline):
David Spickett30a986f2021-04-29 09:37:00 +010061 """
62 Get information about the last finished build of a particular
63 job. (where a job is one of the pipeline steps)
64 """
65 builds = get_pipeline_builds(organisation, pipeline)
66
67 for build in builds:
David Spickett1b1e1fc2021-10-07 11:49:03 +010068 # There are two kinds of builds. Those triggered from phab reviews
David Spickett006315f2022-06-06 11:23:37 +010069 # and those run perdidically to post-commit build the main and release
70 # branches.
71 # For the status page we ignore pre-commit and release builds.
David Spickett1b1e1fc2021-10-07 11:49:03 +010072 # ph_buildable_revision is set if Phabricator triggered this build.
David Spickett4f932d12023-06-13 12:32:06 +010073 if "ph_buildable_revision" in build["env"] or build["branch"] != "main":
David Spickett30a986f2021-04-29 09:37:00 +010074 continue
75
76 # Where each "job" is a config e.g. AArch64, MacOS, C++20 etc.
David Spickett4f932d12023-06-13 12:32:06 +010077 for job in build["jobs"]:
David Spickett30a986f2021-04-29 09:37:00 +010078 # Some jobs won't have a name. I think these are the wait steps
79 # between jobs or groups of jobs.
David Spickett4f932d12023-06-13 12:32:06 +010080 if job.get("name") != job_name:
David Spickett30a986f2021-04-29 09:37:00 +010081 continue
82
David Spickett8bb563d2021-10-22 09:46:12 +010083 # Some jobs were never started due to failures earlier in the pipeline.
84 # This isn't our fault so ignore these.
David Spickett4f932d12023-06-13 12:32:06 +010085 started_time = job.get("started_at")
David Spickett8bb563d2021-10-22 09:46:12 +010086 if started_time is None:
87 continue
88
David Spickett30a986f2021-04-29 09:37:00 +010089 # We use started because that's when it starts to
90 # run on the agent (don't want to include waiting time)
David Spickett8bb563d2021-10-22 09:46:12 +010091 started = dateutil.parser.isoparse(started_time)
David Spickett4f932d12023-06-13 12:32:06 +010092 finished = dateutil.parser.isoparse(job["finished_at"])
David Spickett30a986f2021-04-29 09:37:00 +010093 # Buildkite gives us mS resoloution but we don't need that
David Spickett4f932d12023-06-13 12:32:06 +010094 duration = timedelta(seconds=int((finished - started).total_seconds()))
David Spickett30a986f2021-04-29 09:37:00 +010095 # Time from buildkite ends up as UTC but now has no
96 # timezone info by default
97 now = datetime.now(finished.tzinfo)
David Spickett4f932d12023-06-13 12:32:06 +010098 time_since = timedelta(seconds=int((now - finished).total_seconds()))
David Spickett30a986f2021-04-29 09:37:00 +010099
David Spickett4f932d12023-06-13 12:32:06 +0100100 state = job["state"]
101 failed = state != "passed"
David Spickett30a986f2021-04-29 09:37:00 +0100102 status = {
103 # 'url' is the API url, we want a clickable link
David Spickett4f932d12023-06-13 12:32:06 +0100104 "builder_url": job["agent"]["web_url"],
David Spickett30a986f2021-04-29 09:37:00 +0100105 # We use the job's url because it takes you to the
106 # specific agent, instead of the build's url
David Spickett4f932d12023-06-13 12:32:06 +0100107 "build_url": job["web_url"],
108 "number": build["number"],
109 "state": state,
110 "duration": duration,
111 "time_since": time_since,
112 "fail": failed,
David Spickett30a986f2021-04-29 09:37:00 +0100113 }
David Spickett30a986f2021-04-29 09:37:00 +0100114 return status
115
116 logging.debug(
David Spickett4f932d12023-06-13 12:32:06 +0100117 'Found no finished builds for job "{}" '
118 'on pipeline "{}"'.format(job_name, pipeline)
119 )
David Spickett30a986f2021-04-29 09:37:00 +0100120 return dict(valid=False)
121
122
123def bot_key(job_name):
124 return ("https://www.buildkite.com", job_name)
125
126
127# Get status for all Buildkite bots in config
128# Return a dictionary of (base_url, bot name) -> status info
129def get_buildkite_bots_status(config):
130 status = dict()
131
132 token = get_buildkite_token()
133 buildkite.set_access_token(token)
134
135 for server in config:
David Spickett4f932d12023-06-13 12:32:06 +0100136 if server["name"] != "Buildkite":
David Spickett30a986f2021-04-29 09:37:00 +0100137 continue
138
139 if token is None:
140 # We cannot query buildkite but we don't want to mark these
141 # bots as offline because we don't know that.
David Spickett4f932d12023-06-13 12:32:06 +0100142 for builder in server["builders"]:
143 for job in builder["bots"]:
144 status[bot_key(job["name"])] = {"valid": False}
David Spickett30a986f2021-04-29 09:37:00 +0100145 continue
146
147 # If we have a valid token then we can get the real status
David Spickett4f932d12023-06-13 12:32:06 +0100148 organisation = server.get("buildkite_org")
David Spickett30a986f2021-04-29 09:37:00 +0100149 if organisation is None:
David Spickett4f932d12023-06-13 12:32:06 +0100150 raise RuntimeError("Buildkite servers must also have a 'buildkite_org'")
David Spickett30a986f2021-04-29 09:37:00 +0100151
David Spickett4f932d12023-06-13 12:32:06 +0100152 for builder in server["builders"]:
153 for job in builder["bots"]:
154 job_name = job["name"]
155 logging.debug('Getting status for buildkite job "{}"'.format(job_name))
David Spickett30a986f2021-04-29 09:37:00 +0100156
157 status[bot_key(job_name)] = get_last_finished_build_status(
David Spickett4f932d12023-06-13 12:32:06 +0100158 organisation, job_name, job["buildkite_pipeline"]
159 )
David Spickett30a986f2021-04-29 09:37:00 +0100160
161 return status