Karsten Tausche | 2c59069 | 2018-06-19 15:57:46 +0200 | [diff] [blame] | 1 | import logging |
Karsten Tausche | dce1008 | 2019-01-14 16:32:32 +0100 | [diff] [blame] | 2 | import os.path |
Karsten Tausche | 2c59069 | 2018-06-19 15:57:46 +0200 | [diff] [blame] | 3 | import re |
| 4 | import shutil |
| 5 | import subprocess |
Karsten Tausche | 9fc6444 | 2019-01-22 11:30:22 +0100 | [diff] [blame] | 6 | import sys |
Karsten Tausche | 2c59069 | 2018-06-19 15:57:46 +0200 | [diff] [blame] | 7 | import time |
Karsten Tausche | dce1008 | 2019-01-14 16:32:32 +0100 | [diff] [blame] | 8 | from typing import Dict |
Karsten Tausche | 2c59069 | 2018-06-19 15:57:46 +0200 | [diff] [blame] | 9 | |
Karsten Tausche | 19d0648 | 2019-01-21 14:16:41 +0100 | [diff] [blame] | 10 | sys.path.insert(0, "../../../lib/") |
| 11 | from py_util_lib import call_shell_lib # nopep8 |
| 12 | |
Karsten Tausche | 2c59069 | 2018-06-19 15:57:46 +0200 | [diff] [blame] | 13 | |
| 14 | class Device: |
| 15 | tcpip_device_re = re.compile( |
| 16 | r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d{1,5}$" |
| 17 | ) |
| 18 | EXEC_IN_LAVA = shutil.which("lava-send") is not None |
| 19 | |
| 20 | def __init__( |
Karsten Tausche | dce1008 | 2019-01-14 16:32:32 +0100 | [diff] [blame] | 21 | self, |
| 22 | serial_or_address, |
| 23 | logcat_output_filename, |
| 24 | worker_job_id=None, |
| 25 | userdata_image_file=None, |
Karsten Tausche | 2c59069 | 2018-06-19 15:57:46 +0200 | [diff] [blame] | 26 | ): |
| 27 | self.serial_or_address = serial_or_address |
| 28 | self.is_tcpip_device = bool( |
| 29 | Device.tcpip_device_re.match(self.serial_or_address) |
| 30 | ) |
| 31 | self.logcat_output_file = open(logcat_output_filename, "w") |
| 32 | self.logcat = subprocess.Popen( |
| 33 | ["adb", "-s", serial_or_address, "logcat"], |
| 34 | stdout=self.logcat_output_file, |
| 35 | ) |
| 36 | self.worker_job_id = worker_job_id |
| 37 | self.worker_handshake_iteration = 1 |
Karsten Tausche | dce1008 | 2019-01-14 16:32:32 +0100 | [diff] [blame] | 38 | self.userdata_image_file = userdata_image_file |
Karsten Tausche | 2c59069 | 2018-06-19 15:57:46 +0200 | [diff] [blame] | 39 | self._is_available = True |
| 40 | |
| 41 | def ensure_available(self, logger, timeout_secs=30): |
| 42 | """ |
| 43 | High level function that encapsulates all logic for ensuring that a device is accessible. |
| 44 | Returns a boolean indicating if this function succeeded. This function will only return once |
| 45 | the device is available or no other options for reestablishing a connection are known. |
| 46 | |
| 47 | Keyword arguments: |
| 48 | tradefed_pexpect -- pexpect spawnu object that allows to communicate with TradeFed |
| 49 | logger -- logging.getLogger() object to paste some debug information |
| 50 | """ |
| 51 | if self.check_available(timeout_secs=timeout_secs): |
| 52 | self._is_available = True |
| 53 | logger.info("adb device %s is alive" % self.serial_or_address) |
| 54 | # Tell the hosting worker that everything is fine |
| 55 | self.worker_handshake("continue") |
| 56 | return self._is_available |
| 57 | |
| 58 | self._is_available = False |
| 59 | |
| 60 | logger.debug( |
| 61 | "adb connection to %s lost! Trying to reconnect..." |
| 62 | % self.serial_or_address |
| 63 | ) |
| 64 | |
| 65 | # Tell the hosting worker that something is broken |
| 66 | # This call will only return once the device is up and running again, if possible. |
| 67 | self.worker_handshake("reconnect") |
| 68 | |
| 69 | if not self.try_reconnect(): |
| 70 | logger.warning( |
| 71 | "adb connection to %s lost and reconnect failed!" |
| 72 | % self.serial_or_address |
| 73 | ) |
| 74 | return self._is_available |
| 75 | |
| 76 | logger.debug("Successfully reconnected to %s!" % self.serial_or_address) |
| 77 | |
| 78 | # TODO should check if TradeFed detected the device. |
| 79 | |
| 80 | self._is_available = True |
| 81 | return self._is_available |
| 82 | |
| 83 | def is_available(self): |
| 84 | """ |
| 85 | High level function that checks if the last ensure_available() |
| 86 | invocation led to a positive result. |
| 87 | """ |
| 88 | return self._is_available |
| 89 | |
| 90 | def check_available(self, timeout_secs=30): |
Karsten Tausche | 9fc6444 | 2019-01-22 11:30:22 +0100 | [diff] [blame] | 91 | try: |
| 92 | return ( |
| 93 | subprocess.run( |
| 94 | [ |
| 95 | "adb", |
| 96 | "-s", |
| 97 | self.serial_or_address, |
| 98 | "shell", |
| 99 | "echo", |
| 100 | "%s:" % self.serial_or_address, |
| 101 | "OK", |
| 102 | ], |
| 103 | timeout=timeout_secs, |
| 104 | ).returncode == 0 |
| 105 | ) |
| 106 | except subprocess.TimeoutExpired as e: |
| 107 | print(e) |
| 108 | return False |
Karsten Tausche | 2c59069 | 2018-06-19 15:57:46 +0200 | [diff] [blame] | 109 | |
| 110 | def try_reconnect(self, reconnectTimeoutSecs=60): |
| 111 | # NOTE: When running inside LAVA, self.is_tcpip_device == (self.worker_job_id is not None). |
| 112 | # However, when running this script directly, there is no such thing as a remote worker ID, |
| 113 | # and reconnect attempts to remote devices may still be useful. |
| 114 | if not self.is_tcpip_device: |
| 115 | # On local devices, we can currently only try to recover from fastboot. |
| 116 | # This would be a good point for a hard reset. |
| 117 | # NOTE: If the boot/reboot process takes longer than the specified timeout, this |
| 118 | # function will return failure, but the device can still become accessible in the next |
| 119 | # iteration of device availability checks. |
Karsten Tausche | 9fc6444 | 2019-01-22 11:30:22 +0100 | [diff] [blame] | 120 | |
Karsten Tausche | dce1008 | 2019-01-14 16:32:32 +0100 | [diff] [blame] | 121 | # `fastboot devices` prints in some versions more debug information |
| 122 | # than `fastboot reboot`, e.g., missing udev rules. |
| 123 | subprocess.run(["fastboot", "devices"]) |
| 124 | |
Karsten Tausche | 9fc6444 | 2019-01-22 11:30:22 +0100 | [diff] [blame] | 125 | # There is no point in waiting longer for `fastboot reboot`: |
| 126 | fastbootRebootTimeoutSecs = 10 |
| 127 | try: |
| 128 | subprocess.run( |
| 129 | ["fastboot", "-s", self.serial_or_address, "reboot"], |
| 130 | timeout=fastbootRebootTimeoutSecs, |
| 131 | ) |
| 132 | except subprocess.TimeoutExpired: |
| 133 | # Blocking `fastboot reboot` does not necessarily indicate a |
| 134 | # failure. |
| 135 | pass |
| 136 | |
Karsten Tausche | dce1008 | 2019-01-14 16:32:32 +0100 | [diff] [blame] | 137 | subprocess.run(["fastboot", "devices"]) |
| 138 | |
Karsten Tausche | 2c59069 | 2018-06-19 15:57:46 +0200 | [diff] [blame] | 139 | bootTimeoutSecs = max( |
| 140 | 10, int(reconnectTimeoutSecs) - fastbootRebootTimeoutSecs |
| 141 | ) |
Karsten Tausche | 19d0648 | 2019-01-21 14:16:41 +0100 | [diff] [blame] | 142 | return self._call_shell_lib( |
| 143 | "wait_boot_completed {}".format(bootTimeoutSecs) |
Karsten Tausche | 2c59069 | 2018-06-19 15:57:46 +0200 | [diff] [blame] | 144 | ) |
| 145 | |
| 146 | # adb may not yet have realized that the connection is broken |
| 147 | subprocess.run(["adb", "disconnect", self.serial_or_address]) |
| 148 | time.sleep( |
| 149 | 5 |
| 150 | ) # adb connect ~often~ fails when called ~directly~ after disconnect. |
| 151 | |
Karsten Tausche | 9fc6444 | 2019-01-22 11:30:22 +0100 | [diff] [blame] | 152 | try: |
| 153 | if ( |
| 154 | subprocess.run( |
| 155 | ["adb", "connect", self.serial_or_address], |
| 156 | timeout=reconnectTimeoutSecs, |
| 157 | ).returncode != 0 |
| 158 | ): |
| 159 | return False |
| 160 | except subprocess.TimeoutExpired: |
Karsten Tausche | 2c59069 | 2018-06-19 15:57:46 +0200 | [diff] [blame] | 161 | return False |
Karsten Tausche | 9fc6444 | 2019-01-22 11:30:22 +0100 | [diff] [blame] | 162 | |
Karsten Tausche | 2c59069 | 2018-06-19 15:57:46 +0200 | [diff] [blame] | 163 | if not self.check_available(): |
| 164 | return False |
Karsten Tausche | dce1008 | 2019-01-14 16:32:32 +0100 | [diff] [blame] | 165 | |
| 166 | # Ensure that the device screen is on during test runs. |
| 167 | if not self._call_shell_lib("disable_suspend"): |
| 168 | print("WARNING: Disabling device suspend may have failed.") |
| 169 | |
Karsten Tausche | 2c59069 | 2018-06-19 15:57:46 +0200 | [diff] [blame] | 170 | # reestablish logcat connection |
| 171 | self.logcat.kill() |
| 172 | self.logcat = subprocess.Popen( |
| 173 | ["adb", "-s", self.serial_or_address, "logcat"], |
| 174 | stdout=self.logcat_output_file, |
| 175 | ) |
| 176 | return True |
| 177 | |
Karsten Tausche | dce1008 | 2019-01-14 16:32:32 +0100 | [diff] [blame] | 178 | def userdata_reset(self, commandTimeoutSecs=60, reconnectTimeoutSecs=900): |
| 179 | """Reset the device to a clean state. This is equivalent to resetting to |
| 180 | factory settings and applying CTS set-up steps.""" |
| 181 | if not self.userdata_image_file: |
| 182 | print("WARNING: Skipping userdata_reset; no image file provided.") |
| 183 | return True |
| 184 | if not os.path.isfile(self.userdata_image_file): |
| 185 | print( |
| 186 | "WARNING: Skipping userdata_reset; image file not found: %s" |
| 187 | % self.userdata_image_file |
| 188 | ) |
| 189 | |
| 190 | print("Resetting userdata partition on %s" % self.serial_or_address) |
| 191 | |
| 192 | # Reflash the userdata partition. |
| 193 | if self.is_tcpip_device: |
| 194 | self.worker_handshake("userdata_reset") |
| 195 | else: |
| 196 | try: |
| 197 | subprocess.run( |
| 198 | [ |
| 199 | "adb", |
| 200 | "-s", |
| 201 | self.serial_or_address, |
| 202 | "reboot", |
| 203 | "bootloader", |
| 204 | ], |
| 205 | timeout=commandTimeoutSecs, |
| 206 | ) |
| 207 | except subprocess.TimeoutExpired: |
| 208 | # Blocking `adb reboot` does not necessarily indicate a failure. |
| 209 | pass |
| 210 | try: |
| 211 | subprocess.run( |
| 212 | [ |
| 213 | "fastboot", |
| 214 | "-s", |
| 215 | self.serial_or_address, |
| 216 | "flash", |
| 217 | "userdata", |
| 218 | self.userdata_image_file, |
| 219 | ], |
| 220 | timeout=commandTimeoutSecs, |
| 221 | ) |
| 222 | except subprocess.TimeoutExpired as e: |
| 223 | print(e) |
| 224 | return False |
| 225 | |
| 226 | # Reconnect as usual. |
| 227 | if not self.try_reconnect(reconnectTimeoutSecs=reconnectTimeoutSecs): |
| 228 | return False |
| 229 | |
Karsten Tausche | 2c59069 | 2018-06-19 15:57:46 +0200 | [diff] [blame] | 230 | def release(self): |
| 231 | self.logcat.kill() |
| 232 | self.logcat_output_file.close() |
| 233 | self.worker_handshake("release") |
| 234 | |
| 235 | def worker_handshake(self, command): |
| 236 | """ |
| 237 | This function implements the counterpart of wait-and-keep-local-device-accessible.yaml |
| 238 | It is basically a no-op when running outside LAVA. |
| 239 | |
| 240 | """ |
| 241 | |
| 242 | # Nothing to do for local devices and nothing to do when not called by LAVA. |
| 243 | if self.worker_job_id is None or not Device.EXEC_IN_LAVA: |
| 244 | self.worker_handshake_iteration += 1 |
| 245 | return True |
| 246 | |
| 247 | # All commands except release are followed by a lava-send from the worker side. |
| 248 | wait_for_acc = command != "release" |
| 249 | |
| 250 | subprocess.run( |
| 251 | [ |
| 252 | "lava-send", |
| 253 | "master-sync-%s-%s" |
| 254 | % (self.worker_job_id, str(self.worker_handshake_iteration)), |
| 255 | "command=%s" % command, |
| 256 | ] |
| 257 | ) |
| 258 | if wait_for_acc: |
| 259 | subprocess.run( |
| 260 | [ |
| 261 | "lava-wait", |
| 262 | "worker-sync-%s-%s" |
| 263 | % ( |
| 264 | self.worker_job_id, |
| 265 | str(self.worker_handshake_iteration), |
| 266 | ), |
| 267 | ] |
| 268 | ) |
| 269 | # TODO could check result variable from MultiNode cache |
| 270 | self.worker_handshake_iteration += 1 |
| 271 | return True |
| 272 | |
Karsten Tausche | 19d0648 | 2019-01-21 14:16:41 +0100 | [diff] [blame] | 273 | def _call_shell_lib(self, command: str) -> bool: |
| 274 | """Call a function implemented in the (Android) shell library. |
| 275 | Ensure that device-specific commands are executed on `self`. |
| 276 | |
| 277 | Arguments: |
| 278 | command: Function defined in sh-test-lib or android-test-lib to |
| 279 | call, including its parameters. |
| 280 | Return: |
| 281 | True if the executed shell exists with 0, False otherwise. |
| 282 | """ |
| 283 | return call_shell_lib(command, device=self.serial_or_address) == 0 |
| 284 | |
Karsten Tausche | 2c59069 | 2018-06-19 15:57:46 +0200 | [diff] [blame] | 285 | |
| 286 | class RetryCheck: |
| 287 | def __init__(self, total_max_retries, retries_if_unchanged): |
| 288 | self.total_max_retries = total_max_retries |
| 289 | self.retries_if_unchanged = retries_if_unchanged |
| 290 | self.current_retry = 0 |
| 291 | self.current_unchanged = 0 |
| 292 | self.last_value = None |
| 293 | |
| 294 | def post_result(self, value): |
| 295 | self.current_retry += 1 |
| 296 | if value == self.last_value: |
| 297 | self.current_unchanged += 1 |
| 298 | else: |
| 299 | self.current_unchanged = 1 |
| 300 | self.last_value = value |
| 301 | |
| 302 | def should_continue(self): |
| 303 | return ( |
| 304 | self.current_retry < self.total_max_retries and self.current_unchanged < self.retries_if_unchanged |
| 305 | ) |
| 306 | |
| 307 | |
| 308 | class ResultSummary: |
| 309 | def __init__( |
| 310 | self, failure_count, modules_completed, modules_total, timestamp |
| 311 | ): |
| 312 | self.failure_count = int(failure_count) |
| 313 | self.modules_completed = int(modules_completed) |
| 314 | self.modules_total = int(modules_total) |
| 315 | self.timestamp = timestamp |
| 316 | |
| 317 | def was_successful(self): |
| 318 | return self.failure_count == 0 and self.all_modules_completed() |
| 319 | |
| 320 | def all_modules_completed(self): |
| 321 | return self.modules_completed == self.modules_total |
| 322 | |
| 323 | def __eq__(self, other): |
| 324 | if isinstance(self, other.__class__): |
| 325 | return self.__dict__ == other.__dict__ |
| 326 | return NotImplemented |