diff options
-rw-r--r-- | buildbot/README.txt | 5 | ||||
-rwxr-xr-x | buildbot/bot | 17 | ||||
-rwxr-xr-x | buildbot/llvmbot | 57 | ||||
-rwxr-xr-x | buildbot/start | 49 | ||||
-rw-r--r-- | monitor/.git-blame-ignore-revs | 2 | ||||
-rw-r--r-- | monitor/LICENSE | 21 | ||||
-rw-r--r-- | monitor/README.txt | 180 | ||||
-rwxr-xr-x | monitor/bot-status | 243 | ||||
-rwxr-xr-x | monitor/bot-status.py | 359 | ||||
-rw-r--r-- | monitor/fail.ico | bin | 1406 -> 0 bytes | |||
-rwxr-xr-x | monitor/install.sh | 30 | ||||
-rw-r--r-- | monitor/linaro.json | 122 | ||||
-rw-r--r-- | monitor/make_table.py | 132 | ||||
-rw-r--r-- | monitor/ok.ico | bin | 2462 -> 0 bytes |
14 files changed, 641 insertions, 576 deletions
diff --git a/buildbot/README.txt b/buildbot/README.txt deleted file mode 100644 index 20ad3c1..0000000 --- a/buildbot/README.txt +++ /dev/null @@ -1,5 +0,0 @@ -LLVM Buildbot Slave/Master Scripts -================================== - -These scripts are meant to be installed on a buildslave to help with the -setup, from starting Chromebook chroots to restarting services at boot. diff --git a/buildbot/bot b/buildbot/bot deleted file mode 100755 index d20d677..0000000 --- a/buildbot/bot +++ /dev/null @@ -1,17 +0,0 @@ -#!/usr/bin/env bash - -if [[ $1 != 'start' && $1 != 'stop' ]]; then - echo "Syntax: $0 <start|stop> [bot-dir]" - exit -1 -fi -ACTION=$1 -BOTDIR=/opt/buildbot -if [[ $2 != '' ]]; then - BOTDIR=$2 -fi - -if [[ $ACTION = 'start' ]]; then - sudo -u buildbot buildslave stop $BOTDIR -fi - -sudo -u buildbot buildslave $ACTION $BOTDIR diff --git a/buildbot/llvmbot b/buildbot/llvmbot deleted file mode 100755 index f7c5f0b..0000000 --- a/buildbot/llvmbot +++ /dev/null @@ -1,57 +0,0 @@ -#! /bin/sh - -### BEGIN INIT INFO -# Provides: LLVM Buildbot Slave -# Required-Start: $local_fs $remote_fs $buildslave -# Required-Stop: -# X-Start-Before: -# Default-Start: 2 3 4 5 -# Default-Stop: -# Short-Description: Runs tests specified by buildmaster -# Description: Runs tests specified by buildmaster -### END INIT INFO - -. /lib/lsb/init-functions - -N=/etc/init.d/llvmbot -USER=linaro -DIR=/home/linaro/devel/buildslave -PID=twistd.pid - -set -e - -start() { - if [ ! -f $DIR/$PID ]; then - su -l -c "buildslave start $DIR" $USER - else - echo "Buildslave on $DIR already started" - fi -} - -stop() { - if [ -f $DIR/$PID ]; then - su -l -c "buildslave stop $DIR" $USER - else - echo "Buildslave on $DIR not started" - fi -} - -case "$1" in - start) start;; - stop) stop;; - restart) - stop; start;; - status) - if [ -f $DIR/twisted.pid ]; then - echo "Buildslave on $DIR active" - else - echo "Buildslave on $DIR stopped" - fi - ;; - *) - echo "Usage: $N {start|stop|restart|status}" >&2 - exit 1 - ;; -esac - -exit 0 diff --git a/buildbot/start b/buildbot/start deleted file mode 100755 index 19346ce..0000000 --- a/buildbot/start +++ /dev/null @@ -1,49 +0,0 @@ -#!/bin/bash - -function psg() { - ps awwux | grep -v grep | grep $1 -} -function botdo() { - sudo -u buildbot $* -} - -if [[ $UID != 0 ]]; then - echo "Not root? Run 'sudo $0'" - exit 1 -fi - -mounted=`mount | grep external` -if [[ $mounted = '' ]]; then - echo "Mounting /external" - in_fstab=`grep external /etc/fstab` - if [[ $in_fstab != '' ]]; then - mount /external - else - mount -t ext4 /dev/sda1 /external - fi -fi - -running=`/etc/init.d/ssh status | grep 'is running'` -if [[ $running = '' ]]; then - echo "Starting SSH daemon" - iptables -P INPUT ACCEPT - /etc/init.d/ssh start -fi - -running=`psg powerd` -if [[ $running != '' ]]; then - echo "Stopping powerd" - stop powerd -fi - -buildbot_root=/external/buildbot -running=`psg buildslave` -if [[ $running = '' ]]; then - echo "Starting buildbot" - if [ -f $buildbot_root/twistd.pid ]; then - botdo buildslave stop $buildbot_root - fi - botdo buildslave start $buildbot_root -fi - -echo "System initialized correctly" diff --git a/monitor/.git-blame-ignore-revs b/monitor/.git-blame-ignore-revs new file mode 100644 index 0000000..4192104 --- /dev/null +++ b/monitor/.git-blame-ignore-revs @@ -0,0 +1,2 @@ +# black reformatting +4f932d15d3b0ca1c3827c84517b74188e084d004 diff --git a/monitor/LICENSE b/monitor/LICENSE new file mode 100644 index 0000000..8c0c644 --- /dev/null +++ b/monitor/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2022 Linaro + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/monitor/README.txt b/monitor/README.txt index 652c4eb..fd1e187 100644 --- a/monitor/README.txt +++ b/monitor/README.txt @@ -1,126 +1,94 @@ -Monitoring Tools for LLVM Development -===================================== +LLVM Buildbot Monitor +===================== -These tools are not meant to be used for development or testing, but to be -left running on a server or desktop as monitoring for your buildbots. They -are also meant to be used in conjunction, not as a replacement, to Nagios -and other hardware-level monitoring tools. +This is to be left running on a server or desktop as monitoring for your buildbots. +It purely reports the status of the builds. If you want hardware monitoring, look +elsewhere. -Currently we only have one: bot-monitor, which I keep running on Linaro's -public server (people.linaro.org) and keep it as a bookmark to quickly check -the bot status. It's also a helpful bookmark for all bots we care. +It supports Buildbot (as used by LLVM) . It does not support LLVM Green Dragon +(https://green.lab.llvm.org/green/). -JSON Documentation ------------------- +Currently we have one monitor running at http://llvm.validation.linaro.org/. +Bookmark this if you have a need to check bot status at a glance. -The JSON file should be self-explanatory, but just in case, here's a few -of the behaviours it exhibits when rendered by the current version of the -bot-monitor. +JSON Format +----------- + +The JSON file describes the bots we want to monitor and which master/build service +they connect to. The base structure is a list of masters, which has a few properties and a list -of builder groups, which in turn also have some properties and a list of slaves. +of builder groups, which in turn also have some properties and a list of bots +(which in Buildbot terms are actually called "Builders" but we ended up calling +them bots here). Master properties: - "name": "Name of the master, which will appear in bold big letters", - "base_url": "http://SERVER:PORT/BASE", - "builder_url": "part of the URL that refers to the list of builders", - "build_url": "part of the URL that refers to the list of builds", - "ignore" : "true | false, shows or hide the entire master from the page" - "builders": [ ... ] - -Builder properties: - - "name": "Name of this group (fast bots, self-hosting, etc)", - "ignore" : "true | false, shows or hide the entire builder from the page" - "bots": [ ... ] + "name": Name of the master, which will appear as the section title. + "base_url": The base URL of the master, which will be used to make API calls. + For example for LLVM this might be "https://lab.llvm.org/buildbot". + "builder_url": The part of the URL that refers to the list of builders. + Will be added to base_url when making API calls. + "build_url": Part of the URL that refers to the list of builds. Added to base_url + when making API calls. + "ignore" : Set to "true" to hide the master from the page. + "builders": [ ...a list of builder groups as detailed below... ] -Bots properties: +Builder group properties: - "name": "Exact name of the buildbot (becomes part of the URL)", - "ignore": "true | false, to ignore or not failures in this bot" + "name": Name of this group. "fast bots", "self-hosting", etc. Used as the section title. + "ignore" : Set to "true" to hide this builder group from the page. + "bots": [ ...a list of bots as decribed below... ] -Note that "ignore" has two different behaviour: +Bot properties: - * On masters and builders, it omits the entire class from the output - * On bots, it still shows them, but ignores their status + "name": The exact name of the buildbot. This will be used to build URLs for API calls. + "ignore": Set to "true" to ignore the status of this bot. -Note on bots: +Notes on bots: + * Bots may be repeated across builder groups if they fall into multiple categories + (this does not slow down the monitor as results are cached). + * The same bot name on 2 different masters refers to 2 different bots. - * You can repeat bots across builders, if they belong to multiple classes, for - example "self-hosting" and "test-suite". The script will cache the results - and simply re-print them, so this is *only* for visualisation / organisation - purposes. - * Using the same bot name on different masters means *different* bots. It may - be the same configuration on two different masters, or it may be completely - different bots. Beware. +Note that "ignore" has two different behaviours: + * On masters and builder groups, it omits the entire section from the output. + * On bots it shows the bot but ignores their status. Meaning that an ignored bot failing + does not make the overall page status failed. HTML Page --------- -For now, there's only HTML output, but there's nothing stopping we to develop -more forms of communication (email, IRC bots, etc). - -The HTML page is separated into blocks: Masters, Builder Groups, Bots. It also -has a date on the top, to make sure you're looking at an up-to-date page, and -it changes the page icon from green to red if at least one (non-ignored) bot -is broken. - -Bots offline are considered broken, as they may require attention. But when the -admin restarts the master, that kills all buildslaves, and this show up as -"slave lost". You don't need to do anything, just wait for the next successful -build. - -Each buildbot has four columns: - - * Name & link: The bot name with a link to its page on its master. Good for - easy access to buildbots and masters. - * Status: Can only be "PASS" or "FAIL", but contains additional information - if it fails, ex. "slave lost" or "build stage 1" or "test-suite". These are - the name of the stages that failed. - * Build number: The build number, to help identify if there is a change from - a specific number. Not very useful, but there just for reference. - * Commit range: The range of commits that were tested on that build. This is - very helpful to identify if a slow bot is failing because it hasn't yet - reached the commit range on a fast bot that is passing, or not. - - -LLVM Masters ------------- - -There are a number of masters in the LLVM upstream infrastructure, and we may -need to monitor bots in all of those, or switch between them, depending on the -need. - -* LLVM Upstream main master: http://lab.llvm.org:8011/ - -This is the main master that spams everyone every time one of the bots break. -Unless there is any specific concern, bots should be in this master. - -* LLVM Upstream silent master: http://lab.llvm.org:8014/ - -Exactly the same as above, but no emails are sent. This master is usually empty -except for the bots that may be noise temporarily, in active development, or -being a bot that doesn't track compiler regressions, but performance regressions -which is monitored on another page (http://llvm.org/perf/) - -* LLVM Japan master: http://bb.pgr.jp/ - -A side master built by Nakamura Takumi with some x86 and x86_64 buildbots. We -rarely need to monitor anything there, but it's good to know it's there. - -* Linaro Downstream master: http://buildmaster.tcwglab.linaro.org/ - -Our local master, that we use for development. Individual developers can have -their own containers, in which case, the masters will be in different ports. - -These bots should always be ignored for their global status, or we'll generate -a lot of noise to ourselves. Unless, of course, they're in their way upstream -and going through staging deployment. - -* Green Dragon bots: http://lab.llvm.org:8080/green/ - -This is not a buildbot master, but Jenkins. We don't monitor those in our page -but they do have IRC bots in the #llvm channel and are already quite good at -displaying success and failures. +The script will generate an HTML page. This page is separated into blocks: + * Masters which contain... + * Builder Groups which contain... + * Bots + +The date is printed at the top of the page so you know when the results were generated. + +Bots that are offline or partially fail to read via the API will show up with a message +along the lines of "<bot name> is offline!". The page should still update correctly +for the rest of the bots. + +Each listed bot has these columns: + + * "Buildbot": This shows the name and a link to the master's web interface for the bot. + * "Status": The status of the last finished build. PASS or FAIL (currently cancelled + is also treated as a failure). + * "T Since": The time since the last build finished. This is useful for spotting bots + that have gotten disconnected. If this time is greater than 24 hours, it will be shown + in red. + * "Duration": The length of the last build. + * "Latest": The build number of the last finished build, which itself will be a link + to the results page for that build. + * "Failing steps": The failed build steps, if it was a failed build. + * "Build In Progress": This will be "Yes" if there is a build currently running, and + the text will link to the build. Or "No" if there is not. If we cannot determine this, + it will be left blank and you should check the builder's status page instead. + * "1st Failing": The number of the first failed build, if the bot fails for several + builds. + * "Failing Since": The time since the first failed build finished. This is useful for + spotting bots that fail for a long time. + +Note: "finished" here refers to the build ending be that by success, cancellation or +failure. diff --git a/monitor/bot-status b/monitor/bot-status deleted file mode 100755 index d1a4986..0000000 --- a/monitor/bot-status +++ /dev/null @@ -1,243 +0,0 @@ -#!/usr/bin/env perl - -# This script greps the JSON files for the buildbots on -# the LLVM official build master by name and prints an -# HTML page with the links to the bots and the status. -# -# Multiple masters can be used, as well as multiple groups of bots -# and multiple bots per group, all in a json file. See linaro.json -# in this repository to have an idea how the config file is. -# -# Module JSON needs to be installed, either from cpan or packages. - -push @INC, `dirname $0`; - -use strict; -use warnings; -# Core modules -use File::Temp qw/tempfile/; -use File::Copy; -# This is not part of core, but you really *need* it. -use JSON; -# This can be replaced by `wget/curl` -use LWP; -use LWP::UserAgent; -# We don't have DateTime everywhere... -my $date = `date`; -# DEBUG -my $DEBUG = 0; - -######################################################### Initialisation -# Option checking -my $syntax = "$0 config-file.json output-file.html\n"; -die $syntax unless (scalar @ARGV == 2); -# Read config file -my ($config, $error) = &read_file($ARGV[0]); -die $error if ($error); -($config, $error) = &decode($config); -die $error if ($error); - -# Setup HTML output file -my $output = $ARGV[1]; -my ($temp, $tempname) = tempfile(); - - -######################################################### Main Logic -# Get status for all bots -my %bot_cache; -my $fail = 0; -foreach my $server (@$config) { - next if (defined $server->{'ignore'} and $server->{'ignore'} eq "true"); - my ($BASE_URL, $BUILDER_URL, $BUILD_URL) = - ($server->{'base_url'}, $server->{'builder_url'}, $server->{'build_url'}); - &debug("Parsing server ".$server->{'name'}."...\n"); - foreach my $builder (@{$server->{'builders'}}) { - &debug(" Parsing builder ".$builder->{'name'}."...\n"); - foreach my $bot (@{$builder->{'bots'}}) { - &debug(" Parsing bot ".$bot->{'name'}."...\n"); - next if defined $bot_cache{$bot->{'name'}}; - my $status = &get_status($bot->{'name'}, $BASE_URL, $BUILDER_URL, $BUILD_URL); - if (!defined $bot->{'ignore'} or $bot->{'ignore'} ne "true") { - $fail = 1 if ($status->{'fail'}); - } else { - &debug(" Ignoring...\n"); - } - &debug($status->{'fail'} ? " FAIL\n" : " PASS\n"); - $bot_cache{$BASE_URL.'/'.$bot->{'name'}} = $status; - } - } -} - -# Dump all servers / bots -foreach my $server (@$config) { - next if (defined $server->{'ignore'} and $server->{'ignore'} eq "true"); - my ($BASE_URL, $BUILDER_URL, $BUILD_URL) = - ($server->{'base_url'}, $server->{'builder_url'}, $server->{'build_url'}); - # Favicon - my $favicon = $fail ? "fail.ico" : "ok.ico"; - print $temp "<link rel=\"shortcut icon\" href=\"$favicon\" type=\"image/x-icon\"/>\n"; - # Header - print $temp "<table cellspacing=1 cellpadding=2>\n"; - print $temp "<tr><td colspan=5> </td><tr>\n"; - print $temp "<tr><th colspan=5>$server->{'name'} @ $date</td><tr>\n"; - ## Main loop - foreach my $builder (@{$server->{'builders'}}) { - print $temp "<tr><td colspan=5> </td><tr>\n"; - print $temp "<tr><th colspan=5>$builder->{'name'}</td><tr>\n"; - print $temp "<tr><th>Buildbot</th><th>Status</th><th>Comments</th>". - "<th>Build #</th><th>Commits</th><th>Time (minutes)</th></tr>\n"; - foreach my $bot (@{$builder->{'bots'}}) { - print $temp "<tr>\n"; - my $status = $bot_cache{$BASE_URL.'/'.$bot->{'name'}}; - my $url = "$BASE_URL/$BUILDER_URL/$bot->{'name'}"; - print $temp " <td><a href='$url'>$bot->{'name'}</a></td>\n"; - if ($status->{'fail'}) { - print $temp " <td><font color='red'>FAIL</font></td>\n". - " <td>$status->{'fail'}</td>\n"; - } else { - print $temp " <td><font color='green'>PASS</font></td>\n". - " <td> </td>\n"; - } - if (defined $status->{'build'}) { - my $build_url = $url."/builds/".$status->{'build'}; - print $temp " <td><a href='$build_url'>$status->{'build'}</a></td>\n"; - } else { - print $temp " <td> </td>\n"; - } - if (defined $status->{'from'} and - defined $status->{'to'}) { - print $temp " <td>$status->{'from'}-$status->{'to'}</td>\n"; - } else { - print $temp " <td> </td>\n"; - } - if (defined $status->{'time'}) { - my $time = sprintf("%.0f", $status->{'time'} / 60); - print $temp " <td>$time</td>\n"; - } else { - print $temp " <td> </td>\n"; - } - print $temp "</tr>\n"; - } - } - # Footer - print $temp "</table>\n"; -} -close $temp; - -# Move temp to main (atomic change) -move($tempname, $output); -exit; - -######################################################### Subs - -# GET STATUS: get the status of an individual bot -# (botname, base url, builder url, build url) -> (status) -sub get_status() { - my ($bot, $BASE_URL, $BUILDER_URL, $BUILD_URL) = @_; - my ($err, $contents, $json); - my %status; - - # Get buildbot main JSON - ($contents, $err) = wget("$BASE_URL/json/$BUILDER_URL/$bot"); - $status{'fail'} = $err; - return \%status if $err; - ($json, $err) = decode($contents); - $status{'fail'} = $err; - return \%status if $err; - - # Find recent builds - my $cached_builds = scalar @{$json->{'cachedBuilds'}}; - my $running_builds = scalar @{$json->{'currentBuilds'}}; - my $last_build = $json->{'cachedBuilds'}[$cached_builds - $running_builds - 1]; - return \%status if (not defined $last_build); - - # Get most recent build - ($contents, $err) = wget("$BASE_URL/json/$BUILDER_URL/$bot/$BUILD_URL/$last_build"); - $status{'fail'} = $err; - return \%status if $err; - ($json, $err) = decode($contents); - $status{'fail'} = $err; - return \%status if $err; - - # Build number - $status{'build'} = $json->{'number'}; - - # Status of the last build - # "text" : [ "build", "successful" ], - # "text" : [ "failed", "svn-llvm" ], - my $failed = 0; - foreach (@{$json->{'text'}}) { - $status{'fail'} .= $_." " if ($failed); - $failed = 1 if (/failed|exception/); - } - $status{'fail'} =~ s/ $//; - - # Commit range. All LLVM repositories are in git now, so truncate the hashes - # to 8 characters for display. - my @commits = @{$json->{'sourceStamp'}->{'changes'}}; - my $first_rev = $commits[0]->{'revision'}; - my $last_rev = $commits[-1]->{'revision'}; - $status{'from'} = substr($first_rev, 0, 8); - $status{'to'} = substr($last_rev, 0, 8); - - # Elapsed time of the last build. - $status{'time'} = $json->{'times'}[1] - $json->{'times'}[0]; - - return \%status; -} - -# WGET: uses LWP to get an URL, returns contents (or error). -# (url) -> (contents, error) -sub wget() { - my ($url) = @_; - my ($contents, $error) = ("", ""); - - my $ua = LWP::UserAgent->new; - $ua->agent("LLVM BotMonitor/0.1"); - my $req = HTTP::Request->new(GET => $url); - my $res = $ua->request($req); - - if ($res->is_success) { - $contents = $res->content; - } else { - $error = $res->status_line; - } - return ($contents, $error); -} - -# READ FILE: Reads a local file, returns contents -# (filename) -> (contents) -sub read_file() { - my ($file) = @_; - my ($contents, $error) = ("", ""); - if (open FH, $file) { - while (<FH>) { $contents .= $_; } - close FH; - } else { - $error = "Can't open config file $file: $!"; - } - return ($contents, $error); -} - -# DECODE: Reads contents, returns JSON output (or error) -# (contents) -> (JSON, error) -sub decode() { - my ($contents) = @_; - my ($json, $error) = ("", ""); - eval { $json = decode_json($contents); }; - if ($@) { - if ($DEBUG) { - $error = $@; - } else { - $error = "JSON error"; - } - } - return ($json, $error); -} - -# DEBUG: Prints debug messages if debug enabled -# (msg) -> () -sub debug () { - my ($msg) = @_; - print STDERR $msg if ($DEBUG); -} diff --git a/monitor/bot-status.py b/monitor/bot-status.py new file mode 100755 index 0000000..6e37def --- /dev/null +++ b/monitor/bot-status.py @@ -0,0 +1,359 @@ +#!/usr/bin/env python3 + +# This script greps the JSON files for the buildbots on the LLVM official +# build master by name and prints an HTML page with the links to the bots +# and the status. +# +# Multiple masters can be used, as well as multiple groups of bots and +# multiple bots per group, all in a json file. See linaro.json in this +# repository to have an idea how the config file is. + +import sys +import os +import argparse +import json +import tempfile +import logging +import pickle +import shutil +import time +from datetime import datetime, timedelta + +# The requests allows HTTP keep-alive which re-uses the same TCP connection +# to download multiple files. +import requests +from textwrap import dedent +from make_table import Table + +def ignored(s): + return "ignore" in s and s["ignore"] + + +def not_ignored(s): + return not ignored(s) + + +# Returns the parsed json URL or raises an exception +def wget(session, url): + got = session.get(url) + got.raise_for_status() + return got.json() + + +# Map from buildbot status codes we want to treat as errors to the color they +# should be shown in. The codes are documented at +# https://docs.buildbot.net/latest/developer/results.html#build-result-codes, +# and these colors match the suggested ones there. +RESULT_COLORS = { + 2: "red", # Error + 4: "purple", # Exception + 5: "purple", # Retry + 6: "pink", # Cancelled +} + + +def get_bot_failing_steps(session, base_url, buildid): + try: + contents = wget(session, "{}/api/v2/builds/{}/steps".format(base_url, buildid)) + except requests.exceptions.RequestException: + return "" + + for step in contents["steps"]: + if step["results"] in RESULT_COLORS: + yield (step["name"], step["results"]) + + +# Get the status of a individual bot BOT. Returns a dict with the +# information. +def get_bot_status(session, bot, base_url, builder_url, build_url): + try: + builds = wget( + session, "{}/api/v2/{}/{}/{}".format(base_url, builder_url, bot, build_url) + ) + except requests.exceptions.RequestException as e: + logging.debug(" Couldn't get builds for bot {}!".format(bot)) + return {"valid": False} + + reversed_builds = iter(sorted(builds["builds"], key=lambda b: -b["number"])) + next_build = None + for build in reversed_builds: + if not build["complete"]: + next_build = build + continue + + time_since = int(datetime.now().timestamp()) - int(build["complete_at"]) + duration = int(build["complete_at"]) - int(build["started_at"]) + agent_url = "{}/#/{}/{}".format(base_url, builder_url, build["builderid"]) + + status = { + "builder_url": agent_url, + "number": build["number"], + "build_url": "{}/builds/{}".format(agent_url, build["number"]), + "state": build["state_string"], + "time_since": timedelta(seconds=time_since), + "duration": timedelta(seconds=duration), + "fail": build["state_string"] != "build successful", + "next_in_progress": None + if next_build is None + else "{}/builds/{}".format(agent_url, next_build["number"]), + } + + if status["fail"]: + buildid = build["buildid"] + status["steps"] = list(get_bot_failing_steps(session, base_url, buildid)) + + # find the start of the failure streak + first_fail = build + for build in reversed_builds: + if build["state_string"] == "build successful": + status["first_fail_number"] = first_fail["number"] + status["first_fail_url"] = "{}/builds/{}".format( + agent_url, first_fail["number"] + ) + # Occasionaly we find a finished build without complete_at, + # it may be an intermitent issue on Buildbot's side. + complete_at = first_fail.get("complete_at") + if complete_at is not None: + fail_since = int(datetime.now().timestamp()) - int(complete_at) + status["fail_since"] = timedelta(seconds=fail_since) + break + first_fail = build + else: + pass # fails since forever? + + return status + + +# Get status for all bots named in the config +# Return a dictionary of (base_url, bot name) -> status info +def get_buildbot_bots_status(config): + session = requests.Session() + bot_cache = {} + + for server in filter(not_ignored, config): + base_url = server["base_url"] + logging.debug("Parsing server {}...".format(server["name"])) + for builder in server["builders"]: + logging.debug(" Parsing builders {}...".format(builder["name"])) + for bot in builder["bots"]: + bot_key = (base_url, bot["name"]) + if bot_key in bot_cache: + continue + + logging.debug(" Parsing bot {}...".format(bot["name"])) + status = get_bot_status( + session, + bot["name"], + base_url, + server["builder_url"], + server["build_url"], + ) + if status is not None: + if status.get("valid", True): + logging.debug( + " Bot status: " + ("FAIL" if status["fail"] else "PASS") + ) + bot_cache[bot_key] = status + + return bot_cache + + +def write_bot_status(config, output_file, bots_status): + temp = tempfile.NamedTemporaryFile(mode="w+", delete=False) + + temp.write( + dedent( + """\ + <!DOCTYPE html> + <style> + /* Combine the border between cells to prevent 1px gaps + in the row background colour. */ + table, td, th { + border-collapse: collapse; + } + /* Colour every other row in a table body grey. */ + tbody tr:nth-child(even) td { + background-color: #ededed; + } + </style>""" + ) + ) + + column_titles = [ + "Buildbot", + "Status", + "T Since", + "Duration", + "Latest", + "Failing steps", + "Build In Progress", + "1st Failing", + "Failing Since", + ] + num_columns = len(column_titles) + + # The first table should also say when this was generated. + # If we were to put this in its own header only table, it would + # not align with the rest because it has no content. + first = True + + # Dump all servers / bots + for server in filter(not_ignored, config): + with Table(temp) as table: + table.Border(0).Cellspacing(1).Cellpadding(2) + + table.AddRow().AddCell().Colspan(num_columns) + + if first: + table.AddRow().AddHeader( + "Generated {} ({})".format( + datetime.today().ctime(), time.tzname[time.daylight] + ) + ).Colspan(num_columns) + table.AddRow().AddCell().Colspan(num_columns) + first = False + + table.AddRow().AddHeader(server["name"]).Colspan(num_columns) + + for builder in server["builders"]: + table.AddRow().AddCell().Colspan(num_columns) + table.AddRow().AddHeader(builder["name"]).Colspan(num_columns) + title_row = table.AddRow() + for title in column_titles: + title_row.AddHeader(title) + + table.BeginBody() + + for bot in builder["bots"]: + logging.debug("Writing out status for {}".format(bot["name"])) + + row = table.AddRow() + base_url = server["base_url"] + try: + status = bots_status[(base_url, bot["name"])] + except KeyError: + row.AddCell("{} is offline!".format(bot["name"])).Colspan( + num_columns + ) + continue + else: + if not status.get("valid", True): + row.AddCell( + "Could not read status for {}!".format(bot["name"]) + ).Colspan(num_columns) + continue + + row.AddCell( + "<a href='{}'>{}</a>".format(status["builder_url"], bot["name"]) + ) + + status_cell = row.AddCell() + if status["fail"]: + status_cell.Style("color:red").Content("FAIL") + else: + status_cell.Style("color:green").Content("PASS") + + time_since_cell = row.AddCell() + if "time_since" in status: + time_since = status["time_since"] + # No build should be taking more than a day + if time_since > timedelta(hours=24): + time_since_cell.Style("color:red") + time_since_cell.Content(time_since) + + duration_cell = row.AddCell() + if "duration" in status: + duration_cell.Content(status["duration"]) + + number_cell = row.AddCell() + if "number" in status: + number_cell.Content( + "<a href='{}'>{}</a>".format( + status["build_url"], status["number"] + ) + ) + + steps_cell = row.AddCell() + if "steps" in status and status["steps"]: + + def render_step(name, result): + return "<font color='{}'>{}</font>".format( + RESULT_COLORS[result], name + ) + + step_list = ", ".join( + render_step(name, result) + for name, result in status["steps"] + ) + steps_cell.Style("text-align:center").Content(step_list) + + next_in_progress_cell = row.AddCell() + if "next_in_progress" in status: + next_build = status["next_in_progress"] + next_in_progress_cell.Content( + "No" + if next_build is None + else "<a href='{}'>Yes</a>".format(next_build) + ) + + first_fail_cell = row.AddCell() + if "first_fail_number" in status: + first_fail_cell.Content( + "<a href='{}'>{}</a>".format( + status["first_fail_url"], status["first_fail_number"] + ) + ) + + fail_since_cell = row.AddCell() + if "fail_since" in status: + fail_since = status["fail_since"] + # No build should fail for more than a day + if fail_since > timedelta(hours=24): + fail_since_cell.Style("color:red") + fail_since_cell.Content(fail_since) + + table.EndBody() + + # Move temp to main (atomic change) + temp.close() + shutil.move(temp.name, output_file) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "-d", dest="debug", action="store_true", help="show debug log messages" + ) + parser.add_argument( + "--cachefile", + required=False, + help="Location of bot status data cache file (a pickled Python object). If it exists use it, " + "if it does not, read the status from the network and write it to this path.", + ) + parser.add_argument("config_file", help="Bots description in JSON format") + parser.add_argument("output_file", help="output HTML path") + args = parser.parse_args() + + if args.debug: + logging.basicConfig(stream=sys.stderr, level=logging.DEBUG) + + try: + with open(args.config_file, "r") as f: + config = json.load(f) + except IOError as e: + print("error: failed to read {} config file: {}".format(args.config_file, e)) + sys.exit(os.EX_CONFIG) + + status = None + if args.cachefile and os.path.exists(args.cachefile): + logging.debug("Using cache file {}".format(args.cachefile)) + with open(args.cachefile, "rb") as f: + status = pickle.load(f) + else: + status = get_buildbot_bots_status(config) + if args.cachefile: + logging.debug("Writing status to cache file {}".format(args.cachefile)) + with open(args.cachefile, "wb") as f: + pickle.dump(status, f) + + write_bot_status(config, args.output_file, status) diff --git a/monitor/fail.ico b/monitor/fail.ico Binary files differdeleted file mode 100644 index 263676d..0000000 --- a/monitor/fail.ico +++ /dev/null diff --git a/monitor/install.sh b/monitor/install.sh index 7f503a5..db8e10d 100755 --- a/monitor/install.sh +++ b/monitor/install.sh @@ -15,7 +15,7 @@ # Full path BASE=$(readlink -fn -- "$0") BASE=$(dirname "$BASE") -if [ ! -x "$BASE/bot-status" ]; then +if [ ! -x "$BASE/bot-status.py" ]; then echo "Make sure the install script is in the monitor directory" exit 1 fi @@ -42,37 +42,25 @@ if [ $MANY -eq 1 ]; then JSON="" fi -# Checking for required Perl modules -if ! perl -v > /dev/null; then - echo "Please, install Perl" - exit 1 -fi -if ! perl -e "File::Temp" > /dev/null; then - echo "Please, install Perl's File module" - exit 1 +# Checking for required Python3 modules +if ! python3 --version > /dev/null; then + echo 'Python3 missing' fi -if ! perl -e "use JSON" > /dev/null; then - echo "Please, install Perl's JSON module" - exit 1 -fi -if ! perl -e "use LWP" > /dev/null; then - echo "Please, install Perl's LWP module" - exit 1 +if ! python3 -c 'import requests' > /dev/null; then + echo 'python3-requests module missing' fi ############################# Install # Creates bin for bot-status mkdir -p "$ROOT/bin" -ln -sf "$BASE/bot-status" "$ROOT/bin/bot-status" +ln -sf "$BASE/bot-status.py" "$ROOT/bin/bot-status.py" if [ "$JSON" != "" ]; then ln -sf "$BASE/$JSON" "$ROOT/bin/$JSON" fi -# Creates html dir for page and icons +# Creates html dir for page mkdir -p "$ROOT/html" -ln -sf "$BASE/fail.ico" "$ROOT/html/fail.ico" -ln -sf "$BASE/ok.ico" "$ROOT/html/ok.ico" touch "$ROOT/html/index.html" echo @@ -99,5 +87,5 @@ echo # Crontab echo " * To run the application every five minutes, add this line to your crontab:" -echo " */5 * * * * $ROOT/bin/bot-status $ROOT/bin/$JSON $ROOT/html/index.html" +echo " */5 * * * * $ROOT/bin/bot-status.py $ROOT/bin/$JSON $ROOT/html/index.html" echo diff --git a/monitor/linaro.json b/monitor/linaro.json index ef71046..637062c 100644 --- a/monitor/linaro.json +++ b/monitor/linaro.json @@ -1,113 +1,79 @@ [ { "name": "LLVM Lab", - "base_url": "http://lab.llvm.org:8011", + "base_url": "https://lab.llvm.org/buildbot", "builder_url": "builders", "build_url": "builds", "builders": [ { - "name": "Fast Bots", + "name": "Quick", "bots": [ - { "name": "clang-cmake-armv7-quick" }, - { "name": "clang-cmake-aarch64-quick" } + { "name": "clang-armv8-quick" }, + { "name": "clang-aarch64-quick" } ] }, { - "name": "Full Bots (LLVM, Clang, RT)", + "name": "Full (LLVM, Clang, Compiler-RT, Testsuite, 2-Stage)", "bots": [ - { "name": "clang-cmake-armv7-full" }, - { "name": "clang-cmake-thumbv7-full-sh" }, - { "name": "clang-cmake-aarch64-lld" }, - { "name": "clang-cmake-aarch64-full" } + { "name": "clang-aarch64-lld-2stage" }, + { "name": "clang-armv8-lld-2stage" }, + { "name": "clang-aarch64-full-2stage" } ] }, { - "name": "Self Hosting Bots", + "name": "2-stage (w/o Full Bots)", "bots": [ - { "name": "clang-cmake-armv7-selfhost" }, - { "name": "clang-cmake-armv7-selfhost-neon" }, - { "name": "clang-cmake-thumbv7-full-sh" }, - { "name": "clang-cmake-armv8-lld" }, - { "name": "clang-cmake-aarch64-full" }, - { "name": "clang-cmake-aarch64-lld" } + { "name": "clang-armv7-2stage" }, + { "name": "clang-armv7-vfpv3-2stage" }, + { "name": "clang-arm64-windows-msvc-2stage" }, + { "name": "clang-aarch64-sve-vla-2stage" }, + { "name": "clang-aarch64-sve-vls-2stage" } ] }, { - "name": "Test-Suite Bots", + "name": "Test-Suite (w/o Full Bots)", "bots": [ - { "name": "clang-cmake-armv7-lnt" }, - { "name": "clang-cmake-armv7-global-isel"}, - { "name": "clang-cmake-armv8-lld" }, - { "name": "clang-cmake-armv7-full" }, - { "name": "clang-cmake-thumbv7-full-sh" }, - { "name": "clang-cmake-aarch64-quick" }, - { "name": "clang-cmake-aarch64-full" }, - { "name": "clang-cmake-aarch64-lld" }, - { "name": "clang-cmake-aarch64-global-isel"} + { "name": "clang-armv7-lnt" }, + { "name": "clang-armv7-global-isel" }, + { "name": "clang-aarch64-global-isel" }, + { "name": "clang-aarch64-sve-vla" }, + { "name": "clang-aarch64-sve-vls" } ] }, { - "name": "Library Bots", + "name": "Windows", "bots": [ - { "name": "libcxx-libcxxabi-libunwind-armv7-linux" }, - { "name": "libcxx-libcxxabi-libunwind-armv7-linux-noexceptions" }, - { "name": "libcxx-libcxxabi-libunwind-armv8-linux" }, - { "name": "libcxx-libcxxabi-libunwind-armv8-linux-noexceptions" }, - { "name": "libcxx-libcxxabi-libunwind-aarch64-linux" }, - { "name": "libcxx-libcxxabi-libunwind-aarch64-linux-noexceptions" } + { "name": "clang-arm64-windows-msvc" }, + { "name": "clang-arm64-windows-msvc-2stage" }, + { "name": "lldb-aarch64-windows" } ] }, { - "name": "LLD Bots", + "name": "LLDB", "bots": [ - { "name": "clang-cmake-armv8-lld" }, - { "name": "clang-cmake-aarch64-lld" } + { "name": "lldb-aarch64-ubuntu" }, + { "name": "lldb-arm-ubuntu" }, + { "name": "lldb-aarch64-windows" } ] }, { - "name": "Cross-Compilation Bots", - "ignore" : "true", - "bots": [ - ] - }, - { - "name": "Benchmarking Bots", - "bots": [ - { "name": "clang-native-arm-lnt-perf" } - ] - } - ] - }, - { - "name": "LLVM Silent Master", - "base_url": "http://lab.llvm.org:8014", - "builder_url": "builders", - "build_url": "builds", - "builders": [ - { - "name": "Benchmarking Bots", - "ignore": "true", - "bots": [ - ] - } - ] - }, - { - "name": "Linaro Lab", - "base_url": "http://buildmaster.tcwglab.linaro.org", - "builder_url": "builders", - "build_url": "builds", - "ignore" : "true", - "builders": [ - { - "name": "Benchmarking Bots", - "bots": [ - { "name": "clang-native-arm-lnt-perf", "ignore": "true" } - ] - }, - { - "name": "Experimental", + "name": "Flang", "bots": [ + { "name": "clang-aarch64-full-2stage" }, + { "name": "clang-arm64-windows-msvc" }, + { "name": "clang-arm64-windows-msvc-2stage" }, + { "name": "clang-aarch64-sve-vla" }, + { "name": "clang-aarch64-sve-vla-2stage" }, + { "name": "clang-aarch64-sve-vls" }, + { "name": "clang-aarch64-sve-vls-2stage" }, + { "name": "flang-aarch64-dylib" }, + { "name": "flang-aarch64-sharedlibs" }, + { "name": "flang-aarch64-out-of-tree" }, + { "name": "flang-aarch64-release" }, + { "name": "flang-aarch64-debug-reverse-iteration" }, + { "name": "flang-aarch64-rel-assert" }, + { "name": "flang-aarch64-latest-gcc" }, + { "name": "flang-aarch64-libcxx" } ] } ] diff --git a/monitor/make_table.py b/monitor/make_table.py new file mode 100644 index 0000000..2f115b5 --- /dev/null +++ b/monitor/make_table.py @@ -0,0 +1,132 @@ +# This file contains a basic "builder" style API for making HTML tables +# and writing them to a file once finished. +# +# Use it as follows: +# with Table(outfile) as table: +# table.AddRow().AddCell("foo") +# +# To get: +# <table> +# <td>foo</td> +# </table> +# +# Methods return a reference to self, or to the new thing you added. +# This means you can keep chaining calls to build what you want. +# +# table.AddRow().AddCell("foo").Colspan(1).Style("mystyle") + + +class TableCell(object): + def __init__(self, name, content=None): + self.name = name + self.content = content + self.style = None + self.colspan = None + + def Style(self, style): + self.style = style + return self + + def Colspan(self, colspan): + self.colspan = colspan + return self + + def Content(self, content): + self.content = content + return self + + def __str__(self): + return " <{}{}{}>{}</{}>".format( + self.name, + "" if self.style is None else ' style="{}"'.format(self.style), + "" if self.colspan is None else " colspan={}".format(self.colspan), + " " if self.content is None else self.content, + self.name, + ) + + +class Cell(TableCell): + def __init__(self, content=None): + super(Cell, self).__init__("td", content) + + +class Header(TableCell): + def __init__(self, content=None): + super(Header, self).__init__("th", content) + + +class Row(object): + def __init__(self): + self.cells = [] + + def AddCell(self, content=None): + self.cells.append(Cell(content)) + return self.cells[-1] + + def AddHeader(self, content=None): + self.cells.append(Header(content)) + return self.cells[-1] + + def __str__(self): + return "\n".join([" <tr>", *map(str, self.cells), " </tr>"]) + + +class TableBody(object): + def __init__(self, close=False): + self.close = close + + def __str__(self): + return "<{}tbody>".format("/" if self.close else "") + + +class Table(object): + def __init__(self, out): + self.out = out + self.rows = [] + self.border = None + self.cellspacing = None + self.cellpadding = None + self.body_begins = None + + def __enter__(self): + return self + + def __exit__(self, *args): + self.out.write("\n" + str(self)) + + def __str__(self): + open_tag = "<table{}{}{}>".format( + "" if self.border is None else " border={}".format(self.border), + "" + if self.cellspacing is None + else " cellspacing={}".format(self.cellspacing), + "" + if self.cellpadding is None + else " cellpadding={}".format(self.cellpadding), + ) + rows = map(str, self.rows) + close_tag = "</table>" + + return "\n".join([open_tag, *rows, close_tag]) + + def AddRow(self): + self.rows.append(Row()) + return self.rows[-1] + + def Border(self, border): + self.border = border + return self + + def Cellspacing(self, cellspacing): + self.cellspacing = cellspacing + return self + + def Cellpadding(self, cellpadding): + self.cellpadding = cellpadding + return self + + def BeginBody(self): + self.rows.append(TableBody()) + + def EndBody(self): + self.rows.append(TableBody(close=True)) diff --git a/monitor/ok.ico b/monitor/ok.ico Binary files differdeleted file mode 100644 index 1acfa53..0000000 --- a/monitor/ok.ico +++ /dev/null |