Update lava-boot.py interface.

The new version now spawns several lava jobs as a workaround to boards reliability problems. If one of the jobs succeeds, returns OK, failure otherwise. Change-Id: Ic43e8ded679f07042af700f1fee36f8d5b7c8779
author: Christophe Lyon <christophe.lyon@linaro.org> 2018-02-15 13:23:36 +0000
committer: Christophe Lyon <christophe.lyon@linaro.org> 2018-02-15 13:26:34 +0000
commit: 65d798014d2d141cdff98c745e3b36d131157b18 (patch)
tree: 561740976a6dc486f7297c41166283a8f7195afc
parent: 28caee1a8d68271a39d8f80ce0b5662a3e831c3a (diff)
6 files changed, 261 insertions, 199 deletions
diff --git a/build-linux.sh b/build-linux.sh
index 622dc81..734391e 100755
--- a/build-linux.sh
+++ b/build-linux.sh
@@ -105,11 +105,6 @@ lava_boot()
     config=$2
     job=
 
-    # Use an external file to pass lava_job_id, to avoid having to
-    # parse various possible error messages from linaro-cp.py,
-    # yaml-to-json.py, lava-tool, ...
-    echo > lava_job_id
-
     case ${arch}:${config} in
 	arm:multi_v7_defconfig)
 	    DEVICE_TYPE=beaglebone-black
@@ -123,27 +118,14 @@ lava_boot()
 	    ;;
 	*)
 	    echo "$0: Unsupported config: ${config}"
-	    echo 0 > lava_job_id
-	    return
+	    return 0
 	    ;;
     esac
 
     # lava_boot.py works only when started from its own dir
     pushd ${mydir}
-    python3 ./lava-boot.py --level DEBUG --board ${DEVICE_TYPE} --linux-image app_linux/linux/$KERNEL --linux-dtb app_linux/linux/$DTB starttest | tee lava.stdout || true
-    result=$(grep "INFO job" lava.stdout)
-    popd
-
-    case "$result" in
-	*"INFO job"*)
-	    job=`echo $result | awk '{print $5;}'`
-	    ;;
-	*)
-	    echo "Error submitting job: $result"
-	    ;;
-    esac
-
-    echo $job > lava_job_id
+    python3 ./lava-boot.py --level DEBUG --board ${DEVICE_TYPE} --name "TCWG GCC testing" app_linux/linux/ &
+    return $!
 }
 
 status=0
@@ -166,8 +148,7 @@ do
     # Handle boot test via lava, if requested. Put the job in the lava
     # queue, we collect the results later
     if $boot ; then
-	lava_boot ${arch} ${config}
-	job=$(cat lava_job_id)
+	job=$(lava_boot ${arch} ${config})
 	case ${job} in
 	    "")
 		status=1
@@ -193,21 +174,25 @@ if $boot ; then
     for job in $jobs
     do
 	this_lava_result=0
-	python3 ${mydir}/lava-boot.py --level DEBUG --job_id ${job} wait || this_lava_result=$?
-
-	# If the job completed, check that the individual tests were successful
-	if [ "$this_lava_result" = "0" ]; then
-	    rm -f csv
-	    wget https://${LAVA_SERVER}/results/${job}/csv
-	    # linux-linaro-ubuntu-lsb_release fails because lsb_release is not
-	    # in the rootfs, so ignore this failure
-	    cat csv | grep -v linux-linaro-ubuntu-lsb_release | awk -F, '{print $3;}' | grep -i fail && this_lava_result=1
-	fi
-
-	if [ "$this_lava_result" = "1" ]; then
-	    global_lava_result=FAILED
-	    failed_jobs="${failed_jobs} $job"
-	fi
+	wait $job || this_lava_result=$?
+
+	case $this_lava_result in
+	    0)
+		# If the job completed, check that the individual tests were successful
+		rm -f csv
+		wget https://${LAVA_SERVER}/results/${job}/csv
+		# linux-linaro-ubuntu-lsb_release fails because lsb_release is not
+		# in the rootfs, so ignore this failure
+		cat csv | grep -v linux-linaro-ubuntu-lsb_release | awk -F, '{print $3;}' | grep -i fail && this_lava_result=1
+		;;
+	    1)
+		global_lava_result=FAILED
+		failed_jobs="${failed_jobs} $job"
+		;;
+	    2)
+		# Infrastructure error. Ignore for the moment.
+		;;
+	esac
     done
 
     echo "LAVA agregated results: $global_lava_result"
diff --git a/lava-boot.py b/lava-boot.py
index c2f4c6b..34c70d3 100644
--- a/lava-boot.py
+++ b/lava-boot.py
@@ -1,12 +1,9 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
 
 import argparse
-import contextlib
 import logging
 import os
 import requests
-import shutil
-import subprocess
 import sys
 import time
 from urllib.parse import urlparse
@@ -16,22 +13,23 @@ import yaml
 from jinja2 import Environment, FileSystemLoader
 
 
-LOG_FORMAT = "%(asctime)-15s %(levelname)7s %(message)s"
-
+###########
+# Constants
+###########
 BOARDS = {
-    "arndale" :         {"arch": "arm",
-                         "path": {"dtb": "arch/arm/boot/dts/exynos5250-arndale.dtb",
+    "arndale" :         {"path": {"dtb": "arch/arm/boot/dts/exynos5250-arndale.dtb",
                                  "kernel": "arch/arm/boot/zImage"}},
-    "beaglebone-black": {"arch": "arm",
-                         "path": {"dtb": "arch/arm/boot/dts/am335x-boneblack.dtb",
+    "beaglebone-black": {"path": {"dtb": "arch/arm/boot/dts/am335x-boneblack.dtb",
                                   "kernel": "arch/arm/boot/zImage"}},
-    "juno":             {"arch": "arm64",
-                         "path": {"dtb": "arch/arm64/boot/dts/arm/juno.dtb",
+    "juno":             {"path": {"dtb": "arch/arm64/boot/dts/arm/juno.dtb",
                                   "kernel": "arch/arm64/boot/Image"}},
-    "pandaboard" :      {"arch": "arm",
-                         "path": {"dtb": "arch/arm/boot/dts/omap4-panda.dtb",
+    "pandaboard" :      {"path": {"dtb": "arch/arm/boot/dts/omap4-panda.dtb",
                                   "kernel": "arch/arm/boot/zImage"}},
 }
+LOG_FORMAT = "%(asctime)-15s %(levelname)7s %(message)s"
+
+ERROR_INFRA = 2
+ERROR_JOB = 1
 
 
 ##################
@@ -43,34 +41,21 @@ LOG = logging.getLogger("root")
 #########
 # Helpers
 #########
-
-def build_lava_job_uri(uri, job_id):
-    """
-    Build the url to the lava scheduler job
-    """
-    url = urlparse(uri)
-    host = url.netloc
-    if "@" in url.netloc:
-        host = url.netloc.split("@")[1]
-
-    return url.scheme + "://" + host + "/scheduler/job/%s" % job_id
-
-
 class RequestsTransport(xmlrpc.client.Transport):
 
     def __init__(self, scheme):
         super().__init__()
         self.scheme = scheme
 
-    def request(self, host, handler, data, verbose=False):
+    def request(self, host, handler, request_body, verbose=False):
         headers = {"User-Agent": self.user_agent,
                    "Content-Type": "text/xml",
                    "Accept-Encoding": "gzip"}
         url = "%s://%s%s" % (self.scheme, host, handler)
         try:
             response = None
-            response = requests.post(url, data=data, headers=headers,
-                                     timeout=20.0)
+            response = requests.post(url, data=request_body,
+                                     headers=headers, timeout=20.0)
             response.raise_for_status()
             return self.parse_response(response)
         except requests.RequestException as exc:
@@ -80,42 +65,43 @@ class RequestsTransport(xmlrpc.client.Transport):
                 raise xmlrpc.client.ProtocolError(url, response.status_code,
                                                   str(exc), response.headers)
 
-    def parse_response(self, resp):
+    def parse_response(self, response):
         """
         Parse the xmlrpc response.
         """
         p, u = self.getparser()
-        p.feed(resp.text)
+        p.feed(response.text)
         p.close()
         return u.close()
 
 
-###########
-# Functions
-###########
-
-def render(device_type, context):
+def artifactorial(url, token, filename):
     try:
-        # TODO: not going to work if changing the directories layout
-        env = Environment(loader=FileSystemLoader(["share"]), trim_blocks=True)
-        template = env.get_template("%s.jinja2" % device_type)
-        return template.render(**context)
-    except Exception as exc:
-        LOG.exception(exc)
+        ret = requests.post(url, data={"token": token},
+                            files={"path": open(filename, "rb")})
+    except OSError as exc:
+        LOG.error("---> Unable to upload artifact")
+        LOG.debug(exc)
         return None
-
-
-def artifactorial(url, token, filename):
-    # TODO: check ret.status_code!
-    ret = requests.post(url, data={"token": token},
-                        files={"path": open(filename, "rb")})
     if ret.status_code != 200:
-        LOG.error("Unable to upload to artifactorial")
+        LOG.error("---> Unable to upload artifact")
         LOG.debug(ret.text)
         return None
     return ret.text
 
 
+def build_lava_job_uri(uri, job_id):
+    """
+    Build the url to the lava scheduler job
+    """
+    url = urlparse(uri)
+    host = url.netloc
+    if "@" in url.netloc:
+        host = url.netloc.split("@")[1]
+
+    return url.scheme + "://" + host + "/scheduler/job/%s" % job_id
+
+
 def load_configuration(options):
     if options.config is not None:
         return yaml.load(options.config.read())
@@ -131,6 +117,17 @@ def load_configuration(options):
             return None
 
 
+def render(device_type, context):
+    try:
+        # TODO: not going to work if changing the directories layout
+        env = Environment(loader=FileSystemLoader(["share"]), trim_blocks=True)
+        template = env.get_template("%s.jinja2" % device_type)
+        return template.render(**context)
+    except Exception as exc:
+        LOG.exception(exc)
+        return None
+
+
 def setup_logging(options):
     """
     Setup the log handler and the log level
@@ -148,102 +145,96 @@ def setup_logging(options):
         LOG.setLevel(logging.ERROR)
 
 
-def start_test(options, config):
+################
+# Main functions
+################
+def upload(options, config):
     # Post artifacts
     artifactorial_url = "%s/artifacts%s/" % (config["artifactorial"]["uri"],
                                              config["artifactorial"]["directory"])
     LOG.debug("-> artifactorial: %s", artifactorial_url)
 
-    ctx = {"job_name": "testing gcc"}
-    BOARDS[options.board]["path"]["dtb"] = options.linux_dtb
-    BOARDS[options.board]["path"]["kernel"] = options.linux_image
-    
+    ctx = {}
     for key, path in BOARDS[options.board]["path"].items():
-        LOG.debug("--> uploading %s at %s", path, key)
+        LOG.debug("--> uploading %s as %s", path, key)
         url = artifactorial(artifactorial_url, config["artifactorial"]["token"],
-                            path)
+                            os.path.join(options.linux, path))
         if url is None:
-            return 1
+            return None
         # HACK: should be removed when the lab proxy is fixed
         ctx[key] = url.replace("http://", "https://")
 
-    # Create the job definition
-    definition = render(options.board, ctx)
-    if definition is None:
-        LOG.error("-> unable to create the job definition")
-        return 1
+    return ctx
 
-    # Create the xmlrpc proxy
-    transport = RequestsTransport(urlparse(config["lava"]["uri"]).scheme)
-    proxy = xmlrpc.client.ServerProxy(config["lava"]["uri"], allow_none=True,
-                                      transport=transport)
 
-    job_id = None
-    try:
-        job_id = proxy.scheduler.jobs.submit(definition)
-        LOG.info("job %s", job_id)
-        LOG.debug("--> %s", build_lava_job_uri(config["lava"]["uri"], job_id))
-        return job_id
-
-        # Cancel the job when the script is canceled
-    except KeyboardInterrupt:
-        if job_id is not None:
-            LOG.warning("-> Canceling")
-            proxy.scheduler.jobs.cancel(job_id)
-        return 1
-
-def wait(options, config, job_id):
+def test(options, config, ctx):
     # Create the xmlrpc proxy
     transport = RequestsTransport(urlparse(config["lava"]["uri"]).scheme)
     proxy = xmlrpc.client.ServerProxy(config["lava"]["uri"], allow_none=True,
                                       transport=transport)
 
-    while True:
-        job = proxy.scheduler.jobs.show(job_id)
-        if job["state"] == "Finished":
-            break
-
-        LOG.debug("--> %s", job["state"])
-        time.sleep(10)
-        job = proxy.scheduler.jobs.show(job_id)
-
-    LOG.info("-> Result: %s", job["health"])
-
-    LOG.info("Final state: %s", job["state"])
-
-    return job["health"] != "Complete"
-
+    # TODO: add an handler for Ctrl+C
+    for i in range(0, options.retries):
+        LOG.info("Try n°%d", i)
+        LOG.info("-> submitting %d jobs", options.parallel)
+        job_ids = []
+        for j in range(0, options.parallel):
+            # Create the job definition
+            ctx["job_name"] = "%s [%d.%d/%d]" % (options.name, i, j, options.parallel)
+            definition = render(options.board, ctx)
+            if definition is None:
+                LOG.error("-> unable to create the job definition")
+                return 1
+
+            job_id = proxy.scheduler.jobs.submit(definition)
+            LOG.info("--> %s", job_id)
+            job_ids.append(job_id)
+
+        LOG.info("-> waiting for:")
+        jobs = {}
+        for job_id in job_ids:
+            LOG.info("--> %s", build_lava_job_uri(config["lava"]["uri"], job_id))
+            while True:
+                job = proxy.scheduler.jobs.show(job_id)
+                if job["state"] == "Finished":
+                    break
+
+                LOG.debug("---> %s", job["state"])
+                time.sleep(10)
+                job = proxy.scheduler.jobs.show(job_id)
+
+            LOG.info("--> Result: %s", job["health"])
+            jobs[job_id] = job
+
+        # Count the success/failures
+        for job_id in jobs:
+            if jobs[job_id]["health"] == "Complete":
+                return 0
+
+    # TODO: understand why this is failing
+    return 1
+
+
+#############
+# Entry point
+#############
 def main():
     # Setup the parser
     parser = argparse.ArgumentParser()
-    parser.add_argument("actions", type=str, nargs="*", default="all",
-                        choices=["all", "test", "starttest", "wait"],
-                        help="actions to execute")
-    parser.add_argument("--config", type=argparse.FileType("r"),
-                        default=None,
-                        help="configuration file")
-    parser.add_argument("--board", type=str,
-                        help="device type")
-
-    parser.add_argument("--linux-image", type=str,
-                        help="Linux image or zImage")
-    parser.add_argument("--linux-dtb", type=str,
-                        help="Linux DTB")
-
-    parser.add_argument("--job_id", type=int, default=None,
-                        help="job ID to wait for")
-
-    parser.add_argument("--level", type=str, default="INFO",
+    parser.add_argument("--level", type=str, default="DEBUG",
                         choices=["DEBUG", "ERROR", "INFO", "WARN"],
                         help="Log level, default to INFO")
+    parser.add_argument("--config", type=argparse.FileType("r"),
+                        default=None, help="configuration file")
+    parser.add_argument("--board", type=str, required=True, help="device type")
+    parser.add_argument("--name", type=str, required=True, help="job name")
+    parser.add_argument("--parallel", type=int, default=2, help="number of jobs to run in parallel")
+    parser.add_argument("--retries", type=int, default=3, help="number of retries to run")
+
+    parser.add_argument("linux", type=str, help="path to the linux build")
 
     # Parse the command line
     options = parser.parse_args()
-    if options.actions == "all":
-        options.actions = ["test"]
-
-    if "test" in options.actions:
-        options.actions == ["startest", "wait"]
 
     # Setup logging
     setup_logging(options)
@@ -251,29 +242,15 @@ def main():
     # Load the configuration
     config = load_configuration(options)
     if config is None:
-        return 1
-
-    if "starttest" in options.actions:
-        if options.board is None:
-            LOG.error("Missing --board option")
-            return 1
-        if options.linux_image is None:
-            LOG.error("Missing --linux-image option")
-            return 1
-        if options.linux_dtb is None:
-            LOG.error("Missing --linux-dtb option")
-            return 1
-
-        LOG.info("Starting test")
-        options.job_id=start_test(options, config)
-
-    if "wait" in options.actions:
-        if options.job_id is None:
-            LOG.error("Invalid job_id: %s", options.job_id)
-            return 1
-
-        LOG.info("Waiting for job %s", options.job_id)
-        return wait(options, config, options.job_id)
+        return ERROR_JOB
+
+    # Upload artifacts
+    ctx = upload(options, config)
+    if ctx is None:
+        return ERROR_INFRA
+
+    return test(options, config, ctx)
+
 
 if __name__ == "__main__":
     sys.exit(main())
diff --git a/share/arndale.jinja2 b/share/arndale.jinja2
index ecfdfcf..026eced 100644
--- a/share/arndale.jinja2
+++ b/share/arndale.jinja2
@@ -45,11 +45,21 @@ actions:
     - '\(initramfs\)'
     timeout:
       minutes: 5
+
 - test:
-    timeout:
-      minutes: 5
     definitions:
-    - repository: http://git.linaro.org/lava-team/lava-functional-tests.git
-      from: git
-      path: lava-test-shell/smoke-tests-basic.yaml
-      name: smoke-tests
+    - from: inline
+      name: smoke
+      path: smoke.yaml
+      repository:
+        metadata:
+          description: Basic system test command for Linaro Ubuntu images
+          format: Lava-Test Test Definition 1.0
+          name: smoke
+        run:
+          steps:
+          - uname -a
+          - cat /proc/cpuinfo
+          - cat /proc/meminfo
+    timeout:
+      minutes: 2
diff --git a/share/beaglebone-black.jinja2 b/share/beaglebone-black.jinja2
index 2beea46..afe4c0c 100644
--- a/share/beaglebone-black.jinja2
+++ b/share/beaglebone-black.jinja2
@@ -44,10 +44,19 @@ actions:
       minutes: 2
 
 - test:
-    timeout:
-      minutes: 5
     definitions:
-    - repository: http://git.linaro.org/lava-team/lava-functional-tests.git
-      from: git
-      path: lava-test-shell/smoke-tests-basic.yaml
-      name: smoke-tests
+    - from: inline
+      name: smoke
+      path: smoke.yaml
+      repository:
+        metadata:
+          description: Basic system test command for Linaro Ubuntu images
+          format: Lava-Test Test Definition 1.0
+          name: smoke
+        run:
+          steps:
+          - uname -a
+          - cat /proc/cpuinfo
+          - cat /proc/meminfo
+    timeout:
+      minutes: 2
diff --git a/share/juno.jinja2 b/share/juno.jinja2
new file mode 100644
index 0000000..61e38ba
--- /dev/null
+++ b/share/juno.jinja2
@@ -0,0 +1,71 @@
+device_type: juno
+
+job_name: {{ job_name }}
+timeouts:
+  job:
+    minutes: 60
+priority: medium
+visibility: public
+
+context:
+  # Default bootloader prompt is VExpress64
+  # Override for Juno device type
+  bootloader_prompt: juno#
+
+actions:
+# Deploy the recovery image
+- deploy:
+    namespace: recovery
+    timeout:
+      minutes: 10
+    to: vemsd
+    recovery_image:
+      url:  http://releases.linaro.org/members/arm/platforms/17.04/juno-latest-oe-uboot.zip
+      compression: zip
+
+- deploy:
+    namespace: busybox
+    timeout:
+      minutes: 2
+    to: tftp
+    os: oe
+    kernel:
+      url: {{ kernel }}
+      type: image
+    dtb:
+      url: {{ dtb }}
+    ramdisk:
+      url: http://snapshots.linaro.org/components/optee/build/juno/10/ramdisk.img
+      compression: gz
+      header: u-boot
+
+- boot:
+    namespace: busybox
+    connection-namespace: recovery
+    method: u-boot
+    commands: ramdisk
+    auto_login:
+      login_prompt: 'Please press Enter to activate this console.'
+      # simulate pressing enter because username is required by lava
+      username: ''
+    prompts:
+    - root@(.*):/
+
+- test:
+    namespace: busybox
+    definitions:
+    - from: inline
+      name: smoke
+      path: smoke.yaml
+      repository:
+        metadata:
+          description: Basic system test command for Linaro Ubuntu images
+          format: Lava-Test Test Definition 1.0
+          name: smoke
+        run:
+          steps:
+          - uname -a
+          - cat /proc/cpuinfo
+          - cat /proc/meminfo
+    timeout:
+      minutes: 2
diff --git a/share/pandaboard.jinja2 b/share/pandaboard.jinja2
index 6da19cc..5af6d97 100644
--- a/share/pandaboard.jinja2
+++ b/share/pandaboard.jinja2
@@ -39,11 +39,21 @@ actions:
     - '\(initramfs\)'
     timeout:
       minutes: 4
+
 - test:
-    timeout:
-      minutes: 5
     definitions:
-    - repository: http://git.linaro.org/lava-team/lava-functional-tests.git
-      from: git
-      path: lava-test-shell/smoke-tests-basic.yaml
-      name: smoke-tests
+    - from: inline
+      name: smoke
+      path: smoke.yaml
+      repository:
+        metadata:
+          description: Basic system test command for Linaro Ubuntu images
+          format: Lava-Test Test Definition 1.0
+          name: smoke
+        run:
+          steps:
+          - uname -a
+          - cat /proc/cpuinfo
+          - cat /proc/meminfo
+    timeout:
+      minutes: 2
author	Christophe Lyon <christophe.lyon@linaro.org>	2018-02-15 13:23:36 +0000
committer	Christophe Lyon <christophe.lyon@linaro.org>	2018-02-15 13:26:34 +0000
commit	65d798014d2d141cdff98c745e3b36d131157b18 (patch)
tree	561740976a6dc486f7297c41166283a8f7195afc
parent	28caee1a8d68271a39d8f80ce0b5662a3e831c3a (diff)