David Spickett | 30a986f | 2021-04-29 09:37:00 +0100 | [diff] [blame] | 1 | #! /usr/bin/env python3 |
| 2 | |
| 3 | import os |
| 4 | import logging |
| 5 | from datetime import timedelta |
| 6 | from functools import lru_cache |
David Spickett | 4f932d1 | 2023-06-13 12:32:06 +0100 | [diff] [blame] | 7 | |
David Spickett | 30a986f | 2021-04-29 09:37:00 +0100 | [diff] [blame] | 8 | # Base dict order not guaranteed until 3.7+ |
| 9 | from collections import OrderedDict |
| 10 | from datetime import datetime |
David Spickett | 4f932d1 | 2023-06-13 12:32:06 +0100 | [diff] [blame] | 11 | |
David Spickett | 30a986f | 2021-04-29 09:37:00 +0100 | [diff] [blame] | 12 | # To convert time stamps |
| 13 | # (use datetime.datetime.fromisoformat in Python3.7+) |
| 14 | import dateutil.parser |
| 15 | from pybuildkite.buildkite import Buildkite, BuildState |
| 16 | |
| 17 | buildkite = Buildkite() |
| 18 | |
| 19 | |
| 20 | def get_buildkite_token(): |
| 21 | """ |
| 22 | We are assuming that this script is run on a machine |
| 23 | that can only be accessed by users that would already be |
| 24 | able to generate such tokens. |
| 25 | |
| 26 | We could use an env var but the monitor is run via cron |
| 27 | which would have to inject the token. So it's going to end |
| 28 | up on disk anyway. |
| 29 | |
| 30 | The format of this file is just a single line which is the |
| 31 | token. |
| 32 | """ |
| 33 | tokenfile_path = os.path.join("~", ".buildkite_token") |
| 34 | err_msg = "{}, Buildkite status will not be fetched." |
| 35 | token = None |
| 36 | |
| 37 | try: |
| 38 | with open(os.path.expanduser(tokenfile_path)) as f: |
| 39 | # strip to remove ending newline |
| 40 | token = f.readline().strip() |
| 41 | if not token: |
David Spickett | 4f932d1 | 2023-06-13 12:32:06 +0100 | [diff] [blame] | 42 | logging.debug(err_msg.format("Buildkite API token file was empty")) |
David Spickett | 30a986f | 2021-04-29 09:37:00 +0100 | [diff] [blame] | 43 | token = None |
| 44 | except FileNotFoundError: |
David Spickett | 4f932d1 | 2023-06-13 12:32:06 +0100 | [diff] [blame] | 45 | logging.debug(err_msg.format("Couldn't read Buildkite API token file")) |
David Spickett | 30a986f | 2021-04-29 09:37:00 +0100 | [diff] [blame] | 46 | |
| 47 | return token |
| 48 | |
| 49 | |
| 50 | @lru_cache(maxsize=None) |
| 51 | def get_pipeline_builds(organisation, pipeline): |
| 52 | return buildkite.builds().list_all_for_pipeline( |
David Spickett | 4f932d1 | 2023-06-13 12:32:06 +0100 | [diff] [blame] | 53 | organisation, |
| 54 | pipeline, |
David Spickett | 30a986f | 2021-04-29 09:37:00 +0100 | [diff] [blame] | 55 | # Finished means passed, failed, blocked or cancelled |
David Spickett | 4f932d1 | 2023-06-13 12:32:06 +0100 | [diff] [blame] | 56 | states=[BuildState.FINISHED], |
David Spickett | fb0fdb0 | 2023-10-02 10:30:33 +0100 | [diff] [blame^] | 57 | # There are two kinds of builds. Those triggered from phab reviews |
| 58 | # and those run perdidically to post-commit build the main and release |
| 59 | # branches. |
| 60 | # For the status page we ignore pre-commit and release builds. |
| 61 | branch="main", |
David Spickett | 4f932d1 | 2023-06-13 12:32:06 +0100 | [diff] [blame] | 62 | ) |
David Spickett | 30a986f | 2021-04-29 09:37:00 +0100 | [diff] [blame] | 63 | |
| 64 | |
David Spickett | 4f932d1 | 2023-06-13 12:32:06 +0100 | [diff] [blame] | 65 | def get_last_finished_build_status(organisation, job_name, pipeline): |
David Spickett | 30a986f | 2021-04-29 09:37:00 +0100 | [diff] [blame] | 66 | """ |
| 67 | Get information about the last finished build of a particular |
| 68 | job. (where a job is one of the pipeline steps) |
| 69 | """ |
| 70 | builds = get_pipeline_builds(organisation, pipeline) |
| 71 | |
| 72 | for build in builds: |
David Spickett | 30a986f | 2021-04-29 09:37:00 +0100 | [diff] [blame] | 73 | # Where each "job" is a config e.g. AArch64, MacOS, C++20 etc. |
David Spickett | 4f932d1 | 2023-06-13 12:32:06 +0100 | [diff] [blame] | 74 | for job in build["jobs"]: |
David Spickett | 30a986f | 2021-04-29 09:37:00 +0100 | [diff] [blame] | 75 | # Some jobs won't have a name. I think these are the wait steps |
| 76 | # between jobs or groups of jobs. |
David Spickett | 4f932d1 | 2023-06-13 12:32:06 +0100 | [diff] [blame] | 77 | if job.get("name") != job_name: |
David Spickett | 30a986f | 2021-04-29 09:37:00 +0100 | [diff] [blame] | 78 | continue |
| 79 | |
David Spickett | 8bb563d | 2021-10-22 09:46:12 +0100 | [diff] [blame] | 80 | # Some jobs were never started due to failures earlier in the pipeline. |
| 81 | # This isn't our fault so ignore these. |
David Spickett | 4f932d1 | 2023-06-13 12:32:06 +0100 | [diff] [blame] | 82 | started_time = job.get("started_at") |
David Spickett | 8bb563d | 2021-10-22 09:46:12 +0100 | [diff] [blame] | 83 | if started_time is None: |
| 84 | continue |
| 85 | |
David Spickett | 30a986f | 2021-04-29 09:37:00 +0100 | [diff] [blame] | 86 | # We use started because that's when it starts to |
| 87 | # run on the agent (don't want to include waiting time) |
David Spickett | 8bb563d | 2021-10-22 09:46:12 +0100 | [diff] [blame] | 88 | started = dateutil.parser.isoparse(started_time) |
David Spickett | 4f932d1 | 2023-06-13 12:32:06 +0100 | [diff] [blame] | 89 | finished = dateutil.parser.isoparse(job["finished_at"]) |
David Spickett | 30a986f | 2021-04-29 09:37:00 +0100 | [diff] [blame] | 90 | # Buildkite gives us mS resoloution but we don't need that |
David Spickett | 4f932d1 | 2023-06-13 12:32:06 +0100 | [diff] [blame] | 91 | duration = timedelta(seconds=int((finished - started).total_seconds())) |
David Spickett | 30a986f | 2021-04-29 09:37:00 +0100 | [diff] [blame] | 92 | # Time from buildkite ends up as UTC but now has no |
| 93 | # timezone info by default |
| 94 | now = datetime.now(finished.tzinfo) |
David Spickett | 4f932d1 | 2023-06-13 12:32:06 +0100 | [diff] [blame] | 95 | time_since = timedelta(seconds=int((now - finished).total_seconds())) |
David Spickett | 30a986f | 2021-04-29 09:37:00 +0100 | [diff] [blame] | 96 | |
David Spickett | 4f932d1 | 2023-06-13 12:32:06 +0100 | [diff] [blame] | 97 | state = job["state"] |
| 98 | failed = state != "passed" |
David Spickett | 30a986f | 2021-04-29 09:37:00 +0100 | [diff] [blame] | 99 | status = { |
| 100 | # 'url' is the API url, we want a clickable link |
David Spickett | 4f932d1 | 2023-06-13 12:32:06 +0100 | [diff] [blame] | 101 | "builder_url": job["agent"]["web_url"], |
David Spickett | 30a986f | 2021-04-29 09:37:00 +0100 | [diff] [blame] | 102 | # We use the job's url because it takes you to the |
| 103 | # specific agent, instead of the build's url |
David Spickett | 4f932d1 | 2023-06-13 12:32:06 +0100 | [diff] [blame] | 104 | "build_url": job["web_url"], |
| 105 | "number": build["number"], |
| 106 | "state": state, |
| 107 | "duration": duration, |
| 108 | "time_since": time_since, |
| 109 | "fail": failed, |
David Spickett | 30a986f | 2021-04-29 09:37:00 +0100 | [diff] [blame] | 110 | } |
David Spickett | 30a986f | 2021-04-29 09:37:00 +0100 | [diff] [blame] | 111 | return status |
| 112 | |
| 113 | logging.debug( |
David Spickett | 4f932d1 | 2023-06-13 12:32:06 +0100 | [diff] [blame] | 114 | 'Found no finished builds for job "{}" ' |
| 115 | 'on pipeline "{}"'.format(job_name, pipeline) |
| 116 | ) |
David Spickett | 30a986f | 2021-04-29 09:37:00 +0100 | [diff] [blame] | 117 | return dict(valid=False) |
| 118 | |
| 119 | |
| 120 | def bot_key(job_name): |
| 121 | return ("https://www.buildkite.com", job_name) |
| 122 | |
| 123 | |
| 124 | # Get status for all Buildkite bots in config |
| 125 | # Return a dictionary of (base_url, bot name) -> status info |
| 126 | def get_buildkite_bots_status(config): |
| 127 | status = dict() |
| 128 | |
| 129 | token = get_buildkite_token() |
| 130 | buildkite.set_access_token(token) |
| 131 | |
| 132 | for server in config: |
David Spickett | 4f932d1 | 2023-06-13 12:32:06 +0100 | [diff] [blame] | 133 | if server["name"] != "Buildkite": |
David Spickett | 30a986f | 2021-04-29 09:37:00 +0100 | [diff] [blame] | 134 | continue |
| 135 | |
| 136 | if token is None: |
| 137 | # We cannot query buildkite but we don't want to mark these |
| 138 | # bots as offline because we don't know that. |
David Spickett | 4f932d1 | 2023-06-13 12:32:06 +0100 | [diff] [blame] | 139 | for builder in server["builders"]: |
| 140 | for job in builder["bots"]: |
| 141 | status[bot_key(job["name"])] = {"valid": False} |
David Spickett | 30a986f | 2021-04-29 09:37:00 +0100 | [diff] [blame] | 142 | continue |
| 143 | |
| 144 | # If we have a valid token then we can get the real status |
David Spickett | 4f932d1 | 2023-06-13 12:32:06 +0100 | [diff] [blame] | 145 | organisation = server.get("buildkite_org") |
David Spickett | 30a986f | 2021-04-29 09:37:00 +0100 | [diff] [blame] | 146 | if organisation is None: |
David Spickett | 4f932d1 | 2023-06-13 12:32:06 +0100 | [diff] [blame] | 147 | raise RuntimeError("Buildkite servers must also have a 'buildkite_org'") |
David Spickett | 30a986f | 2021-04-29 09:37:00 +0100 | [diff] [blame] | 148 | |
David Spickett | 4f932d1 | 2023-06-13 12:32:06 +0100 | [diff] [blame] | 149 | for builder in server["builders"]: |
| 150 | for job in builder["bots"]: |
| 151 | job_name = job["name"] |
| 152 | logging.debug('Getting status for buildkite job "{}"'.format(job_name)) |
David Spickett | 30a986f | 2021-04-29 09:37:00 +0100 | [diff] [blame] | 153 | |
| 154 | status[bot_key(job_name)] = get_last_finished_build_status( |
David Spickett | 4f932d1 | 2023-06-13 12:32:06 +0100 | [diff] [blame] | 155 | organisation, job_name, job["buildkite_pipeline"] |
| 156 | ) |
David Spickett | 30a986f | 2021-04-29 09:37:00 +0100 | [diff] [blame] | 157 | |
| 158 | return status |