blob: 09c8d3fede8583027ab5775ea15c98fd80794dd7 [file] [log] [blame]
David Spickett30a986f2021-04-29 09:37:00 +01001#! /usr/bin/env python3
2
3import os
4import logging
5from datetime import timedelta
6from functools import lru_cache
7# Base dict order not guaranteed until 3.7+
8from collections import OrderedDict
9from datetime import datetime
10# To convert time stamps
11# (use datetime.datetime.fromisoformat in Python3.7+)
12import dateutil.parser
13from pybuildkite.buildkite import Buildkite, BuildState
14
15buildkite = Buildkite()
16
17
18def get_buildkite_token():
19 """
20 We are assuming that this script is run on a machine
21 that can only be accessed by users that would already be
22 able to generate such tokens.
23
24 We could use an env var but the monitor is run via cron
25 which would have to inject the token. So it's going to end
26 up on disk anyway.
27
28 The format of this file is just a single line which is the
29 token.
30 """
31 tokenfile_path = os.path.join("~", ".buildkite_token")
32 err_msg = "{}, Buildkite status will not be fetched."
33 token = None
34
35 try:
36 with open(os.path.expanduser(tokenfile_path)) as f:
37 # strip to remove ending newline
38 token = f.readline().strip()
39 if not token:
40 logging.debug(err_msg.format(
41 "Buildkite API token file was empty"))
42 token = None
43 except FileNotFoundError:
44 logging.debug(err_msg.format(
45 "Couldn't read Buildkite API token file"))
46
47 return token
48
49
50@lru_cache(maxsize=None)
51def get_pipeline_builds(organisation, pipeline):
52 return buildkite.builds().list_all_for_pipeline(
53 organisation, pipeline,
54 # Finished means passed, failed, blocked or cancelled
55 states=[BuildState.FINISHED])
56
57
58def get_last_finished_build_status(organisation, job_name,
59 pipeline):
60 """
61 Get information about the last finished build of a particular
62 job. (where a job is one of the pipeline steps)
63 """
64 builds = get_pipeline_builds(organisation, pipeline)
65
66 for build in builds:
David Spickett1b1e1fc2021-10-07 11:49:03 +010067 # There are two kinds of builds. Those triggered from phab reviews
68 # and those run perdidically to post-commit build the main branch.
69 # For the status page we ignore pre-commit builds.
70 # ph_buildable_revision is set if Phabricator triggered this build.
71 if 'ph_buildable_revision' in build['env']:
David Spickett30a986f2021-04-29 09:37:00 +010072 continue
73
74 # Where each "job" is a config e.g. AArch64, MacOS, C++20 etc.
75 for job in build['jobs']:
76 # Some jobs won't have a name. I think these are the wait steps
77 # between jobs or groups of jobs.
78 if job.get('name') != job_name:
79 continue
80
81 # We use started because that's when it starts to
82 # run on the agent (don't want to include waiting time)
83 started = dateutil.parser.isoparse(job['started_at'])
84 finished = dateutil.parser.isoparse(job['finished_at'])
85 # Buildkite gives us mS resoloution but we don't need that
86 duration = timedelta(seconds=int(
87 (finished - started).total_seconds()))
88 # Time from buildkite ends up as UTC but now has no
89 # timezone info by default
90 now = datetime.now(finished.tzinfo)
91 time_since = timedelta(
92 seconds=int((now - finished).total_seconds()))
93
94 state = job['state']
95 failed = state != 'passed'
96 status = {
97 # 'url' is the API url, we want a clickable link
98 'builder_url': job['agent']['web_url'],
99 # We use the job's url because it takes you to the
100 # specific agent, instead of the build's url
101 'build_url': job['web_url'],
102 'number': build['number'],
103 'state': state,
104 'duration': duration,
105 'time_since': time_since,
106 'fail': failed,
107 }
108 if failed:
109 status['changes'] = build['commit'][:7]
110 return status
111
112 logging.debug(
113 "Found no finished builds for job \"{}\" "
114 "on pipeline \"{}\"".format(job_name, pipeline))
115 return dict(valid=False)
116
117
118def bot_key(job_name):
119 return ("https://www.buildkite.com", job_name)
120
121
122# Get status for all Buildkite bots in config
123# Return a dictionary of (base_url, bot name) -> status info
124def get_buildkite_bots_status(config):
125 status = dict()
126
127 token = get_buildkite_token()
128 buildkite.set_access_token(token)
129
130 for server in config:
131 if server['name'] != "Buildkite":
132 continue
133
134 if token is None:
135 # We cannot query buildkite but we don't want to mark these
136 # bots as offline because we don't know that.
137 for builder in server['builders']:
138 for job in builder['bots']:
139 status[bot_key(job['name'])] = {'valid': False}
140 continue
141
142 # If we have a valid token then we can get the real status
143 organisation = server.get('buildkite_org')
144 if organisation is None:
145 raise RuntimeError(
146 "Buildkite servers must also have a 'buildkite_org'")
147
148 for builder in server['builders']:
149 for job in builder['bots']:
150 job_name = job['name']
151 logging.debug("Getting status for buildkite job \"{}\"".format(
152 job_name))
153
154 status[bot_key(job_name)] = get_last_finished_build_status(
155 organisation, job_name,
156 job['buildkite_pipeline'])
157
158 return status