diff options
author | Sergei Trofimov <sergei.trofimov@arm.com> | 2018-02-28 10:24:56 +0000 |
---|---|---|
committer | Marc Bonnici <marc.bonnici@arm.com> | 2018-03-08 11:18:12 +0000 |
commit | 6fe31d6cad594571587bfe0679f84bd252d7175c (patch) | |
tree | 8927469ac0286a36d320f7afa5abf6aa86e646cb | |
parent | fdb872d9cd86f5a7d7dbfb17ee6a2549b29f0d8f (diff) |
fw/execution: Handle unresponsive targets
If a target error occurs, check whether the target is unresponsive. If
it is, attempt to hard reset it if possible, or gracefully terminate
execution if not.
-rw-r--r-- | wa/framework/execution.py | 34 | ||||
-rw-r--r-- | wa/framework/instrument.py | 7 | ||||
-rw-r--r-- | wa/framework/job.py | 9 | ||||
-rw-r--r-- | wa/framework/target/manager.py | 5 |
4 files changed, 40 insertions, 15 deletions
diff --git a/wa/framework/execution.py b/wa/framework/execution.py index 28e2f12d..82b18065 100644 --- a/wa/framework/execution.py +++ b/wa/framework/execution.py @@ -22,7 +22,8 @@ from datetime import datetime import wa.framework.signal as signal from wa.framework import instrument from wa.framework.configuration.core import Status -from wa.framework.exception import HostError, WorkloadError +from wa.framework.exception import TargetError, HostError, WorkloadError,\ + TargetNotRespondingError, TimeoutError from wa.framework.job import Job from wa.framework.output import init_job_output from wa.framework.output_processor import ProcessorManager @@ -375,17 +376,18 @@ class Runner(object): self.send(signal.RUN_INITIALIZED) while self.context.job_queue: - try: - with signal.wrap('JOB_EXECUTION', self, self.context): - self.run_next_job(self.context) - except KeyboardInterrupt: - self.context.skip_remaining_jobs() + with signal.wrap('JOB_EXECUTION', self, self.context): + self.run_next_job(self.context) + + except KeyboardInterrupt as e: + log.log_error(e, self.logger) + self.logger.info('Skipping remaining jobs.') + self.context.skip_remaining_jobs() except Exception as e: - self.context.add_event(e.message) - if (not getattr(e, 'logged', None) and - not isinstance(e, KeyboardInterrupt)): - log.log_error(e, self.logger) - e.logged = True + message = e.message if e.message else str(e) + log.log_error(e, self.logger) + self.logger.error('Skipping remaining jobs due to "{}".'.format(e)) + self.context.skip_remaining_jobs() raise e finally: self.finalize_run() @@ -429,6 +431,10 @@ class Runner(object): if not getattr(e, 'logged', None): log.log_error(e, self.logger) e.logged = True + if isinstance(e, ExecutionError): + raise e + elif isinstance(e, TargetError): + context.tm.verify_target_responsive() finally: self.logger.info('Completing job {}'.format(job.id)) self.send(signal.JOB_COMPLETED) @@ -467,6 +473,8 @@ class Runner(object): if not getattr(e, 'logged', None): log.log_error(e, self.logger) e.logged = True + if isinstance(e, TargetError) or isinstance(e, TimeoutError): + context.tm.verify_target_responsive() raise e finally: try: @@ -474,8 +482,10 @@ class Runner(object): job.process_output(context) self.pm.process_job_output(context) self.pm.export_job_output(context) - except Exception: + except Exception as e: job.set_status(Status.PARTIAL) + if isinstance(e, TargetError) or isinstance(e, TimeoutError): + context.tm.verify_target_responsive() raise except KeyboardInterrupt: diff --git a/wa/framework/instrument.py b/wa/framework/instrument.py index 053a8fc7..7d5c3d8d 100644 --- a/wa/framework/instrument.py +++ b/wa/framework/instrument.py @@ -105,7 +105,7 @@ from collections import OrderedDict from wa.framework import signal from wa.framework.plugin import Plugin from wa.framework.exception import (WAError, TargetNotRespondingError, TimeoutError, - WorkloadError) + WorkloadError, TargetError) from wa.utils.log import log_error from wa.utils.misc import isiterable from wa.utils.types import identifier, enum, level @@ -263,6 +263,9 @@ class ManagedCallback(object): def __call__(self, context): if self.instrument.is_enabled: try: + if not context.tm.is_responsive: + logger.debug("Target unreponsive; skipping callback {}".format(self.callback)) + return self.callback(context) except (KeyboardInterrupt, TargetNotRespondingError, TimeoutError): # pylint: disable=W0703 raise @@ -274,6 +277,8 @@ class ManagedCallback(object): context.add_event(e.message) if isinstance(e, WorkloadError): context.set_status('FAILED') + elif isinstance(e, TargetError) or isinstance(e, TimeoutError): + context.tm.verify_target_responsive() else: if context.current_job: context.set_status('PARTIAL') diff --git a/wa/framework/job.py b/wa/framework/job.py index 4d6d0ee3..d6b2a56a 100644 --- a/wa/framework/job.py +++ b/wa/framework/job.py @@ -115,6 +115,9 @@ class Job(object): self.run_time = datetime.utcnow() - start_time def process_output(self, context): + if not context.tm.is_responsive: + self.logger.info('Target unresponsive; not processing job output.') + return self.logger.info('Processing output for job {} [{}]'.format(self.id, self.iteration)) if self.status != Status.FAILED: with signal.wrap('WORKLOAD_RESULT_EXTRACTION', self, context): @@ -124,11 +127,17 @@ class Job(object): self.workload.update_output(context) def teardown(self, context): + if not context.tm.is_responsive: + self.logger.info('Target unresponsive; not tearing down.') + return self.logger.info('Tearing down job {} [{}]'.format(self.id, self.iteration)) with signal.wrap('WORKLOAD_TEARDOWN', self, context): self.workload.teardown(context) def finalize(self, context): + if not context.tm.is_responsive: + self.logger.info('Target unresponsive; not finalizing.') + return self.logger.info('Finalizing job {} [{}]'.format(self.id, self.iteration)) with signal.wrap('WORKLOAD_FINALIZED', self, context): self.workload.finalize(context) diff --git a/wa/framework/target/manager.py b/wa/framework/target/manager.py index fadb614c..7229d233 100644 --- a/wa/framework/target/manager.py +++ b/wa/framework/target/manager.py @@ -1,7 +1,7 @@ import logging from wa.framework import signal -from wa.framework.exception import ExecutionError, TargetError +from wa.framework.exception import ExecutionError, TargetError, TargetNotRespondingError from wa.framework.plugin import Parameter from wa.framework.target.descriptor import (get_target_description, instantiate_target, @@ -90,8 +90,9 @@ class TargetManager(object): self.logger.info('Target unresponsive; performing hard reset') self.target.reboot(hard=True) self.is_responsive = True + raise ExecutionError('Target became unresponsive but was recovered.') else: - raise ExecutionError('Target unresponsive and hard reset not supported; bailing.') + raise TargetNotRespondingError('Target unresponsive and hard reset not supported; bailing.') def _init_target(self): tdesc = get_target_description(self.target_name) |