llvmbot monitor: Add Libcxx buildkite bots This adds the 6 libcxx buildkite bots to the status page. These bots build pre-commit changes from Phabricator and scheduled post commit builds of the main llvm branch. For this page we will only be showing the status of the last post commit build. If there's something wrong with pre-commit builds that isn't the changes themselves, libcxx maintainers can ping us. To get this status we're using the buildkite API (https://buildkite.com/docs/apis/rest-api) via the pybuildkite module (https://pypi.org/project/pybuildkite/). Although this is a fairly thin wrapper around requests I think it'll insulate us some from future API changes. Plus it's not difficult to install, the main dependency is requests itself. To authenticate we are using a token I have generated from my buildkite account. Any account that has been made a member of the llvm-project organisation can generate a token to use. This token should be placed in a file ~/.buildkite_token for the script to access it. We're making the assumption that the host machine is only accessible to users who would be able to generate their own tokens anyway so having it on disk isn't a big deal. For more info see: https://linaro.atlassian.net/wiki/spaces/TCWG/pages/22405546190/Buildkite+Bot+Maintenance If the script cannot find it or the file is empty, it will just carry on and show buildbot status only. I have copied the info schema from the buildbot status values for the most part. If you set the "name" of a section to "Buildkite" it will be treated as such. I have added a "valid" key in the buildkite info so we can differentiate between having no API access, and having API access but not finding any build info. Valid empty entry means we don't have access, empty and not valid means we failed to read the status. Each builder listed for buildkite isn't a builder but a job that an agent will pick up. In our case, the Arm jobs from the libcxx build. (the same way that the Armv7 buildbots are served by multiple agents) Note that the link in the "Bot" column requires a buildkite login and links to the actual agent that ran the job. "Build" links will always point to the same build for all bots but each one will open the specific section for that configuration. Change-Id: Icdc02305551cd8a585e06f33bce2fb7386063daf

commit: 30a986f409ee01d8f01dd5eb7bb8552ca77fb487 [log] [tgz]
author: David Spickett <david.spickett@linaro.org> Thu Apr 29 09:37:00 2021 +0100
committer: David Spickett <david.spickett@linaro.org> Mon Sep 13 10:14:48 2021 +0100
tree: 7e5c5487d66c39b1a30d6b0da05b2bf1cef832b1
parent: 19ba7f735bda3cb2f84f827e973e3c89efe53c78 [diff] [blame]
diff --git a/monitor/buildkite_status.py b/monitor/buildkite_status.py
new file mode 100755
index 0000000..07a848f
--- /dev/null
+++ b/monitor/buildkite_status.py

@@ -0,0 +1,158 @@
+#! /usr/bin/env python3
+
+import os
+import logging
+from datetime import timedelta
+from functools import lru_cache
+# Base dict order not guaranteed until 3.7+
+from collections import OrderedDict
+from datetime import datetime
+# To convert time stamps
+# (use datetime.datetime.fromisoformat in Python3.7+)
+import dateutil.parser
+from pybuildkite.buildkite import Buildkite, BuildState
+
+buildkite = Buildkite()
+
+
+def get_buildkite_token():
+    """
+    We are assuming that this script is run on a machine
+    that can only be accessed by users that would already be
+    able to generate such tokens.
+
+    We could use an env var but the monitor is run via cron
+    which would have to inject the token. So it's going to end
+    up on disk anyway.
+
+    The format of this file is just a single line which is the
+    token.
+    """
+    tokenfile_path = os.path.join("~", ".buildkite_token")
+    err_msg = "{}, Buildkite status will not be fetched."
+    token = None
+
+    try:
+        with open(os.path.expanduser(tokenfile_path)) as f:
+            # strip to remove ending newline
+            token = f.readline().strip()
+        if not token:
+            logging.debug(err_msg.format(
+                "Buildkite API token file was empty"))
+            token = None
+    except FileNotFoundError:
+        logging.debug(err_msg.format(
+            "Couldn't read Buildkite API token file"))
+
+    return token
+
+
+@lru_cache(maxsize=None)
+def get_pipeline_builds(organisation, pipeline):
+    return buildkite.builds().list_all_for_pipeline(
+        organisation, pipeline,
+        # Finished means passed, failed, blocked or cancelled
+        states=[BuildState.FINISHED])
+
+
+def get_last_finished_build_status(organisation, job_name,
+                                   pipeline):
+    """
+    Get information about the last finished build of a particular
+    job. (where a job is one of the pipeline steps)
+    """
+    builds = get_pipeline_builds(organisation, pipeline)
+
+    for build in builds:
+        # There are two kinds of builds. Ones triggered from phab reviews
+        # and ones run perdidically to post commit build the main branch.
+        # For the status page failures in pre-commit aren't very useful
+        # so we ignore them.
+        if build['branch'] != 'main':
+            continue
+
+        # Where each "job" is a config e.g. AArch64, MacOS, C++20 etc.
+        for job in build['jobs']:
+            # Some jobs won't have a name. I think these are the wait steps
+            # between jobs or groups of jobs.
+            if job.get('name') != job_name:
+                continue
+
+            # We use started because that's when it starts to
+            # run on the agent (don't want to include waiting time)
+            started = dateutil.parser.isoparse(job['started_at'])
+            finished = dateutil.parser.isoparse(job['finished_at'])
+            # Buildkite gives us mS resoloution but we don't need that
+            duration = timedelta(seconds=int(
+                (finished - started).total_seconds()))
+            # Time from buildkite ends up as UTC but now has no
+            # timezone info by default
+            now = datetime.now(finished.tzinfo)
+            time_since = timedelta(
+                seconds=int((now - finished).total_seconds()))
+
+            state = job['state']
+            failed = state != 'passed'
+            status = {
+                # 'url' is the API url, we want a clickable link
+                'builder_url': job['agent']['web_url'],
+                # We use the job's url because it takes you to the
+                # specific agent, instead of the build's url
+                'build_url': job['web_url'],
+                'number': build['number'],
+                'state': state,
+                'duration': duration,
+                'time_since': time_since,
+                'fail': failed,
+            }
+            if failed:
+                status['changes'] = build['commit'][:7]
+            return status
+
+    logging.debug(
+        "Found no finished builds for job \"{}\" "
+        "on pipeline \"{}\"".format(job_name, pipeline))
+    return dict(valid=False)
+
+
+def bot_key(job_name):
+    return ("https://www.buildkite.com", job_name)
+
+
+# Get status for all Buildkite bots in config
+# Return a dictionary of (base_url, bot name) -> status info
+def get_buildkite_bots_status(config):
+    status = dict()
+
+    token = get_buildkite_token()
+    buildkite.set_access_token(token)
+
+    for server in config:
+        if server['name'] != "Buildkite":
+            continue
+
+        if token is None:
+            # We cannot query buildkite but we don't want to mark these
+            # bots as offline because we don't know that.
+            for builder in server['builders']:
+                for job in builder['bots']:
+                    status[bot_key(job['name'])] = {'valid': False}
+            continue
+
+        # If we have a valid token then we can get the real status
+        organisation = server.get('buildkite_org')
+        if organisation is None:
+            raise RuntimeError(
+                "Buildkite servers must also have a 'buildkite_org'")
+
+        for builder in server['builders']:
+            for job in builder['bots']:
+                job_name = job['name']
+                logging.debug("Getting status for buildkite job \"{}\"".format(
+                    job_name))
+
+                status[bot_key(job_name)] = get_last_finished_build_status(
+                    organisation, job_name,
+                    job['buildkite_pipeline'])
+
+    return status
commit	30a986f409ee01d8f01dd5eb7bb8552ca77fb487	[log] [tgz]
author	David Spickett <david.spickett@linaro.org>	Thu Apr 29 09:37:00 2021 +0100
committer	David Spickett <david.spickett@linaro.org>	Mon Sep 13 10:14:48 2021 +0100
tree	7e5c5487d66c39b1a30d6b0da05b2bf1cef832b1
parent	19ba7f735bda3cb2f84f827e973e3c89efe53c78 [diff] [blame]