blob: d2fbab9b5f89ac6eb86591f7404a36f8187cca56 [file] [log] [blame]
David Spickett30a986f2021-04-29 09:37:00 +01001#! /usr/bin/env python3
2
3import os
4import logging
5from datetime import timedelta
6from functools import lru_cache
7# Base dict order not guaranteed until 3.7+
8from collections import OrderedDict
9from datetime import datetime
10# To convert time stamps
11# (use datetime.datetime.fromisoformat in Python3.7+)
12import dateutil.parser
13from pybuildkite.buildkite import Buildkite, BuildState
14
15buildkite = Buildkite()
16
17
18def get_buildkite_token():
19 """
20 We are assuming that this script is run on a machine
21 that can only be accessed by users that would already be
22 able to generate such tokens.
23
24 We could use an env var but the monitor is run via cron
25 which would have to inject the token. So it's going to end
26 up on disk anyway.
27
28 The format of this file is just a single line which is the
29 token.
30 """
31 tokenfile_path = os.path.join("~", ".buildkite_token")
32 err_msg = "{}, Buildkite status will not be fetched."
33 token = None
34
35 try:
36 with open(os.path.expanduser(tokenfile_path)) as f:
37 # strip to remove ending newline
38 token = f.readline().strip()
39 if not token:
40 logging.debug(err_msg.format(
41 "Buildkite API token file was empty"))
42 token = None
43 except FileNotFoundError:
44 logging.debug(err_msg.format(
45 "Couldn't read Buildkite API token file"))
46
47 return token
48
49
50@lru_cache(maxsize=None)
51def get_pipeline_builds(organisation, pipeline):
52 return buildkite.builds().list_all_for_pipeline(
53 organisation, pipeline,
54 # Finished means passed, failed, blocked or cancelled
55 states=[BuildState.FINISHED])
56
57
58def get_last_finished_build_status(organisation, job_name,
59 pipeline):
60 """
61 Get information about the last finished build of a particular
62 job. (where a job is one of the pipeline steps)
63 """
64 builds = get_pipeline_builds(organisation, pipeline)
65
66 for build in builds:
David Spickett1b1e1fc2021-10-07 11:49:03 +010067 # There are two kinds of builds. Those triggered from phab reviews
68 # and those run perdidically to post-commit build the main branch.
69 # For the status page we ignore pre-commit builds.
70 # ph_buildable_revision is set if Phabricator triggered this build.
71 if 'ph_buildable_revision' in build['env']:
David Spickett30a986f2021-04-29 09:37:00 +010072 continue
73
74 # Where each "job" is a config e.g. AArch64, MacOS, C++20 etc.
75 for job in build['jobs']:
76 # Some jobs won't have a name. I think these are the wait steps
77 # between jobs or groups of jobs.
78 if job.get('name') != job_name:
79 continue
80
David Spickett8bb563d2021-10-22 09:46:12 +010081 # Some jobs were never started due to failures earlier in the pipeline.
82 # This isn't our fault so ignore these.
83 started_time = job.get('started_at')
84 if started_time is None:
85 continue
86
David Spickett30a986f2021-04-29 09:37:00 +010087 # We use started because that's when it starts to
88 # run on the agent (don't want to include waiting time)
David Spickett8bb563d2021-10-22 09:46:12 +010089 started = dateutil.parser.isoparse(started_time)
David Spickett30a986f2021-04-29 09:37:00 +010090 finished = dateutil.parser.isoparse(job['finished_at'])
91 # Buildkite gives us mS resoloution but we don't need that
92 duration = timedelta(seconds=int(
93 (finished - started).total_seconds()))
94 # Time from buildkite ends up as UTC but now has no
95 # timezone info by default
96 now = datetime.now(finished.tzinfo)
97 time_since = timedelta(
98 seconds=int((now - finished).total_seconds()))
99
100 state = job['state']
101 failed = state != 'passed'
102 status = {
103 # 'url' is the API url, we want a clickable link
104 'builder_url': job['agent']['web_url'],
105 # We use the job's url because it takes you to the
106 # specific agent, instead of the build's url
107 'build_url': job['web_url'],
108 'number': build['number'],
109 'state': state,
110 'duration': duration,
111 'time_since': time_since,
112 'fail': failed,
113 }
114 if failed:
115 status['changes'] = build['commit'][:7]
116 return status
117
118 logging.debug(
119 "Found no finished builds for job \"{}\" "
120 "on pipeline \"{}\"".format(job_name, pipeline))
121 return dict(valid=False)
122
123
124def bot_key(job_name):
125 return ("https://www.buildkite.com", job_name)
126
127
128# Get status for all Buildkite bots in config
129# Return a dictionary of (base_url, bot name) -> status info
130def get_buildkite_bots_status(config):
131 status = dict()
132
133 token = get_buildkite_token()
134 buildkite.set_access_token(token)
135
136 for server in config:
137 if server['name'] != "Buildkite":
138 continue
139
140 if token is None:
141 # We cannot query buildkite but we don't want to mark these
142 # bots as offline because we don't know that.
143 for builder in server['builders']:
144 for job in builder['bots']:
145 status[bot_key(job['name'])] = {'valid': False}
146 continue
147
148 # If we have a valid token then we can get the real status
149 organisation = server.get('buildkite_org')
150 if organisation is None:
151 raise RuntimeError(
152 "Buildkite servers must also have a 'buildkite_org'")
153
154 for builder in server['builders']:
155 for job in builder['bots']:
156 job_name = job['name']
157 logging.debug("Getting status for buildkite job \"{}\"".format(
158 job_name))
159
160 status[bot_key(job_name)] = get_last_finished_build_status(
161 organisation, job_name,
162 job['buildkite_pipeline'])
163
164 return status