David Spickett | 30a986f | 2021-04-29 09:37:00 +0100 | [diff] [blame] | 1 | #! /usr/bin/env python3 |
| 2 | |
| 3 | import os |
| 4 | import logging |
| 5 | from datetime import timedelta |
| 6 | from functools import lru_cache |
David Spickett | 4f932d1 | 2023-06-13 12:32:06 +0100 | [diff] [blame] | 7 | |
David Spickett | 30a986f | 2021-04-29 09:37:00 +0100 | [diff] [blame] | 8 | # Base dict order not guaranteed until 3.7+ |
| 9 | from collections import OrderedDict |
| 10 | from datetime import datetime |
David Spickett | 4f932d1 | 2023-06-13 12:32:06 +0100 | [diff] [blame] | 11 | |
David Spickett | 30a986f | 2021-04-29 09:37:00 +0100 | [diff] [blame] | 12 | # To convert time stamps |
| 13 | # (use datetime.datetime.fromisoformat in Python3.7+) |
| 14 | import dateutil.parser |
| 15 | from pybuildkite.buildkite import Buildkite, BuildState |
| 16 | |
| 17 | buildkite = Buildkite() |
| 18 | |
| 19 | |
| 20 | def get_buildkite_token(): |
| 21 | """ |
| 22 | We are assuming that this script is run on a machine |
| 23 | that can only be accessed by users that would already be |
| 24 | able to generate such tokens. |
| 25 | |
| 26 | We could use an env var but the monitor is run via cron |
| 27 | which would have to inject the token. So it's going to end |
| 28 | up on disk anyway. |
| 29 | |
| 30 | The format of this file is just a single line which is the |
| 31 | token. |
| 32 | """ |
| 33 | tokenfile_path = os.path.join("~", ".buildkite_token") |
| 34 | err_msg = "{}, Buildkite status will not be fetched." |
| 35 | token = None |
| 36 | |
| 37 | try: |
| 38 | with open(os.path.expanduser(tokenfile_path)) as f: |
| 39 | # strip to remove ending newline |
| 40 | token = f.readline().strip() |
| 41 | if not token: |
David Spickett | 4f932d1 | 2023-06-13 12:32:06 +0100 | [diff] [blame] | 42 | logging.debug(err_msg.format("Buildkite API token file was empty")) |
David Spickett | 30a986f | 2021-04-29 09:37:00 +0100 | [diff] [blame] | 43 | token = None |
| 44 | except FileNotFoundError: |
David Spickett | 4f932d1 | 2023-06-13 12:32:06 +0100 | [diff] [blame] | 45 | logging.debug(err_msg.format("Couldn't read Buildkite API token file")) |
David Spickett | 30a986f | 2021-04-29 09:37:00 +0100 | [diff] [blame] | 46 | |
| 47 | return token |
| 48 | |
| 49 | |
| 50 | @lru_cache(maxsize=None) |
| 51 | def get_pipeline_builds(organisation, pipeline): |
| 52 | return buildkite.builds().list_all_for_pipeline( |
David Spickett | 4f932d1 | 2023-06-13 12:32:06 +0100 | [diff] [blame] | 53 | organisation, |
| 54 | pipeline, |
David Spickett | 30a986f | 2021-04-29 09:37:00 +0100 | [diff] [blame] | 55 | # Finished means passed, failed, blocked or cancelled |
David Spickett | 4f932d1 | 2023-06-13 12:32:06 +0100 | [diff] [blame] | 56 | states=[BuildState.FINISHED], |
| 57 | ) |
David Spickett | 30a986f | 2021-04-29 09:37:00 +0100 | [diff] [blame] | 58 | |
| 59 | |
David Spickett | 4f932d1 | 2023-06-13 12:32:06 +0100 | [diff] [blame] | 60 | def get_last_finished_build_status(organisation, job_name, pipeline): |
David Spickett | 30a986f | 2021-04-29 09:37:00 +0100 | [diff] [blame] | 61 | """ |
| 62 | Get information about the last finished build of a particular |
| 63 | job. (where a job is one of the pipeline steps) |
| 64 | """ |
| 65 | builds = get_pipeline_builds(organisation, pipeline) |
| 66 | |
| 67 | for build in builds: |
David Spickett | 1b1e1fc | 2021-10-07 11:49:03 +0100 | [diff] [blame] | 68 | # There are two kinds of builds. Those triggered from phab reviews |
David Spickett | 006315f | 2022-06-06 11:23:37 +0100 | [diff] [blame] | 69 | # and those run perdidically to post-commit build the main and release |
| 70 | # branches. |
| 71 | # For the status page we ignore pre-commit and release builds. |
David Spickett | 1b1e1fc | 2021-10-07 11:49:03 +0100 | [diff] [blame] | 72 | # ph_buildable_revision is set if Phabricator triggered this build. |
David Spickett | 4f932d1 | 2023-06-13 12:32:06 +0100 | [diff] [blame] | 73 | if "ph_buildable_revision" in build["env"] or build["branch"] != "main": |
David Spickett | 30a986f | 2021-04-29 09:37:00 +0100 | [diff] [blame] | 74 | continue |
| 75 | |
| 76 | # Where each "job" is a config e.g. AArch64, MacOS, C++20 etc. |
David Spickett | 4f932d1 | 2023-06-13 12:32:06 +0100 | [diff] [blame] | 77 | for job in build["jobs"]: |
David Spickett | 30a986f | 2021-04-29 09:37:00 +0100 | [diff] [blame] | 78 | # Some jobs won't have a name. I think these are the wait steps |
| 79 | # between jobs or groups of jobs. |
David Spickett | 4f932d1 | 2023-06-13 12:32:06 +0100 | [diff] [blame] | 80 | if job.get("name") != job_name: |
David Spickett | 30a986f | 2021-04-29 09:37:00 +0100 | [diff] [blame] | 81 | continue |
| 82 | |
David Spickett | 8bb563d | 2021-10-22 09:46:12 +0100 | [diff] [blame] | 83 | # Some jobs were never started due to failures earlier in the pipeline. |
| 84 | # This isn't our fault so ignore these. |
David Spickett | 4f932d1 | 2023-06-13 12:32:06 +0100 | [diff] [blame] | 85 | started_time = job.get("started_at") |
David Spickett | 8bb563d | 2021-10-22 09:46:12 +0100 | [diff] [blame] | 86 | if started_time is None: |
| 87 | continue |
| 88 | |
David Spickett | 30a986f | 2021-04-29 09:37:00 +0100 | [diff] [blame] | 89 | # We use started because that's when it starts to |
| 90 | # run on the agent (don't want to include waiting time) |
David Spickett | 8bb563d | 2021-10-22 09:46:12 +0100 | [diff] [blame] | 91 | started = dateutil.parser.isoparse(started_time) |
David Spickett | 4f932d1 | 2023-06-13 12:32:06 +0100 | [diff] [blame] | 92 | finished = dateutil.parser.isoparse(job["finished_at"]) |
David Spickett | 30a986f | 2021-04-29 09:37:00 +0100 | [diff] [blame] | 93 | # Buildkite gives us mS resoloution but we don't need that |
David Spickett | 4f932d1 | 2023-06-13 12:32:06 +0100 | [diff] [blame] | 94 | duration = timedelta(seconds=int((finished - started).total_seconds())) |
David Spickett | 30a986f | 2021-04-29 09:37:00 +0100 | [diff] [blame] | 95 | # Time from buildkite ends up as UTC but now has no |
| 96 | # timezone info by default |
| 97 | now = datetime.now(finished.tzinfo) |
David Spickett | 4f932d1 | 2023-06-13 12:32:06 +0100 | [diff] [blame] | 98 | time_since = timedelta(seconds=int((now - finished).total_seconds())) |
David Spickett | 30a986f | 2021-04-29 09:37:00 +0100 | [diff] [blame] | 99 | |
David Spickett | 4f932d1 | 2023-06-13 12:32:06 +0100 | [diff] [blame] | 100 | state = job["state"] |
| 101 | failed = state != "passed" |
David Spickett | 30a986f | 2021-04-29 09:37:00 +0100 | [diff] [blame] | 102 | status = { |
| 103 | # 'url' is the API url, we want a clickable link |
David Spickett | 4f932d1 | 2023-06-13 12:32:06 +0100 | [diff] [blame] | 104 | "builder_url": job["agent"]["web_url"], |
David Spickett | 30a986f | 2021-04-29 09:37:00 +0100 | [diff] [blame] | 105 | # We use the job's url because it takes you to the |
| 106 | # specific agent, instead of the build's url |
David Spickett | 4f932d1 | 2023-06-13 12:32:06 +0100 | [diff] [blame] | 107 | "build_url": job["web_url"], |
| 108 | "number": build["number"], |
| 109 | "state": state, |
| 110 | "duration": duration, |
| 111 | "time_since": time_since, |
| 112 | "fail": failed, |
David Spickett | 30a986f | 2021-04-29 09:37:00 +0100 | [diff] [blame] | 113 | } |
David Spickett | 30a986f | 2021-04-29 09:37:00 +0100 | [diff] [blame] | 114 | return status |
| 115 | |
| 116 | logging.debug( |
David Spickett | 4f932d1 | 2023-06-13 12:32:06 +0100 | [diff] [blame] | 117 | 'Found no finished builds for job "{}" ' |
| 118 | 'on pipeline "{}"'.format(job_name, pipeline) |
| 119 | ) |
David Spickett | 30a986f | 2021-04-29 09:37:00 +0100 | [diff] [blame] | 120 | return dict(valid=False) |
| 121 | |
| 122 | |
| 123 | def bot_key(job_name): |
| 124 | return ("https://www.buildkite.com", job_name) |
| 125 | |
| 126 | |
| 127 | # Get status for all Buildkite bots in config |
| 128 | # Return a dictionary of (base_url, bot name) -> status info |
| 129 | def get_buildkite_bots_status(config): |
| 130 | status = dict() |
| 131 | |
| 132 | token = get_buildkite_token() |
| 133 | buildkite.set_access_token(token) |
| 134 | |
| 135 | for server in config: |
David Spickett | 4f932d1 | 2023-06-13 12:32:06 +0100 | [diff] [blame] | 136 | if server["name"] != "Buildkite": |
David Spickett | 30a986f | 2021-04-29 09:37:00 +0100 | [diff] [blame] | 137 | continue |
| 138 | |
| 139 | if token is None: |
| 140 | # We cannot query buildkite but we don't want to mark these |
| 141 | # bots as offline because we don't know that. |
David Spickett | 4f932d1 | 2023-06-13 12:32:06 +0100 | [diff] [blame] | 142 | for builder in server["builders"]: |
| 143 | for job in builder["bots"]: |
| 144 | status[bot_key(job["name"])] = {"valid": False} |
David Spickett | 30a986f | 2021-04-29 09:37:00 +0100 | [diff] [blame] | 145 | continue |
| 146 | |
| 147 | # If we have a valid token then we can get the real status |
David Spickett | 4f932d1 | 2023-06-13 12:32:06 +0100 | [diff] [blame] | 148 | organisation = server.get("buildkite_org") |
David Spickett | 30a986f | 2021-04-29 09:37:00 +0100 | [diff] [blame] | 149 | if organisation is None: |
David Spickett | 4f932d1 | 2023-06-13 12:32:06 +0100 | [diff] [blame] | 150 | raise RuntimeError("Buildkite servers must also have a 'buildkite_org'") |
David Spickett | 30a986f | 2021-04-29 09:37:00 +0100 | [diff] [blame] | 151 | |
David Spickett | 4f932d1 | 2023-06-13 12:32:06 +0100 | [diff] [blame] | 152 | for builder in server["builders"]: |
| 153 | for job in builder["bots"]: |
| 154 | job_name = job["name"] |
| 155 | logging.debug('Getting status for buildkite job "{}"'.format(job_name)) |
David Spickett | 30a986f | 2021-04-29 09:37:00 +0100 | [diff] [blame] | 156 | |
| 157 | status[bot_key(job_name)] = get_last_finished_build_status( |
David Spickett | 4f932d1 | 2023-06-13 12:32:06 +0100 | [diff] [blame] | 158 | organisation, job_name, job["buildkite_pipeline"] |
| 159 | ) |
David Spickett | 30a986f | 2021-04-29 09:37:00 +0100 | [diff] [blame] | 160 | |
| 161 | return status |