Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

twister: DeviceHandler DUT selection improvements and fixes #75548

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 41 additions & 29 deletions scripts/pylib/twister/twisterlib/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -460,33 +460,40 @@ def monitor_serial(self, ser, halt_event, harness):
def device_is_available(self, instance):
device = instance.platform.name
fixture = instance.testsuite.harness_config.get("fixture")
dut_found = False
duts_found = []

for d in self.duts:
if fixture and fixture not in map(lambda f: f.split(sep=':')[0], d.fixtures):
continue
if d.platform != device or (d.serial is None and d.serial_pty is None):
continue
dut_found = True
duts_found.append(d)

if not duts_found:
raise TwisterException(f"No device to serve as {device} platform.")

# Select an available DUT with less failures
for d in sorted(duts_found, key=lambda _dut: _dut.failures):
d.lock.acquire()
avail = False
if d.available:
d.available = 0
d.counter += 1
d.counter_increment()
avail = True
logger.debug(f"Retain DUT:{d.platform}, Id:{d.id}, "
f"counter:{d.counter}, failures:{d.failures}")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pre my experiences, failure count is not that matter, instead the error count matters.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Both are necessary, as hardware failure oft shows as test timeouts, which are failed statuses, not errors.
At the same time, I'd rank them separately; a separate failures and errors counters, which would be used in sorting, with errors given a higher priority (sorted()'s key accepts tuples)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The proposed change counts both 'error' and 'failed' test instance statuses for each DUT as a failure situation happened there not going deeper into details of what was the origin. It is a single counter to avoid overcomplication, but to stimulate retry attempts on different DUTs choosing less 'suspicious' one as a candidate for the next round. This way it works for any root cause not known at the moment - either at a board, a harness, or a test, or any their combination.

d.lock.release()
if avail:
return d

if not dut_found:
raise TwisterException(f"No device to serve as {device} platform.")

return None

def make_device_available(self, serial):
for d in self.duts:
if serial in [d.serial_pty, d.serial]:
d.available = 1
def make_dut_available(self, dut):
if self.instance.status in [TwisterStatus.ERROR, TwisterStatus.FAIL]:
dut.failures_increment()
logger.debug(f"Release DUT:{dut.platform}, Id:{dut.id}, "
f"counter:{dut.counter}, failures:{dut.failures}")
dut.available = 1

@staticmethod
def run_custom_script(script, timeout):
Expand Down Expand Up @@ -586,7 +593,7 @@ def _terminate_pty(self, ser_pty, ser_pty_process):
logger.debug(f"Terminated serial-pty:'{ser_pty}'")
#

def _create_serial_connection(self, serial_device, hardware_baud,
def _create_serial_connection(self, dut, serial_device, hardware_baud,
flash_timeout, serial_pty, ser_pty_process):
try:
ser = serial.Serial(
Expand All @@ -599,28 +606,34 @@ def _create_serial_connection(self, serial_device, hardware_baud,
timeout=max(flash_timeout, self.get_test_timeout())
)
except serial.SerialException as e:
self.instance.status = TwisterStatus.FAIL
self.instance.reason = "Serial Device Error"
logger.error("Serial device error: %s" % (str(e)))

self.instance.add_missing_case_status(TwisterStatus.BLOCK, "Serial Device Error")
if serial_pty and ser_pty_process:
self._terminate_pty(serial_pty, ser_pty_process)

if serial_pty:
self.make_device_available(serial_pty)
else:
self.make_device_available(serial_device)
self._handle_serial_exception(e, dut, serial_pty, ser_pty_process)
raise

return ser


def _handle_serial_exception(self, exception, dut, serial_pty, ser_pty_process):
self.instance.status = TwisterStatus.FAIL
self.instance.reason = "Serial Device Error"
logger.error("Serial device error: %s" % (str(exception)))

self.instance.add_missing_case_status(TwisterStatus.BLOCK, "Serial Device Error")
if serial_pty and ser_pty_process:
self._terminate_pty(serial_pty, ser_pty_process)

self.make_dut_available(dut)


def get_hardware(self):
hardware = None
try:
hardware = self.device_is_available(self.instance)
in_waiting = 0
while not hardware:
time.sleep(1)
in_waiting += 1
if in_waiting%60 == 0:
logger.debug(f"Waiting for a DUT to run {self.instance.name}")
hardware = self.device_is_available(self.instance)
except TwisterException as error:
self.instance.status = TwisterStatus.FAIL
Expand Down Expand Up @@ -659,7 +672,7 @@ def handle(self, harness):
hardware = self.get_hardware()
if hardware:
self.instance.dut = hardware.id
if not hardware:
else:
return

runner = hardware.runner or self.options.west_runner
Expand Down Expand Up @@ -688,6 +701,7 @@ def handle(self, harness):

try:
ser = self._create_serial_connection(
hardware,
serial_port,
hardware.baud,
flash_timeout,
Expand Down Expand Up @@ -748,7 +762,8 @@ def handle(self, harness):
logger.debug(f"Attach serial device {serial_device} @ {hardware.baud} baud")
ser.port = serial_device
ser.open()
except serial.SerialException:
except serial.SerialException as e:
self._handle_serial_exception(e, hardware, serial_pty, ser_pty_process)
return

if not flash_error:
Expand Down Expand Up @@ -780,10 +795,7 @@ def handle(self, harness):
if post_script:
self.run_custom_script(post_script, 30)

if serial_pty:
self.make_device_available(serial_pty)
else:
self.make_device_available(serial_device)
self.make_dut_available(hardware)


class QEMUHandler(Handler):
Expand Down
25 changes: 22 additions & 3 deletions scripts/pylib/twister/twisterlib/hardwaremap.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ def __init__(self,
self.serial_pty = serial_pty
self._counter = Value("i", 0)
self._available = Value("i", 1)
self._failures = Value("i", 0)
self.connected = connected
self.pre_script = pre_script
self.id = id
Expand Down Expand Up @@ -96,9 +97,27 @@ def counter(self, value):
with self._counter.get_lock():
self._counter.value = value

def counter_increment(self, value=1):
with self._counter.get_lock():
self._counter.value += value

@property
def failures(self):
with self._failures.get_lock():
return self._failures.value

@failures.setter
def failures(self, value):
with self._failures.get_lock():
self._failures.value = value

def failures_increment(self, value=1):
with self._failures.get_lock():
self._failures.value += value

def to_dict(self):
d = {}
exclude = ['_available', '_counter', 'match']
exclude = ['_available', '_counter', '_failures', 'match']
v = vars(self)
for k in v.keys():
if k not in exclude and v[k]:
Expand Down Expand Up @@ -204,10 +223,10 @@ def discover(self):
def summary(self, selected_platforms):
print("\nHardware distribution summary:\n")
table = []
header = ['Board', 'ID', 'Counter']
header = ['Board', 'ID', 'Counter', 'Failures']
for d in self.duts:
if d.connected and d.platform in selected_platforms:
row = [d.platform, d.id, d.counter]
row = [d.platform, d.id, d.counter, d.failures]
table.append(row)
print(tabulate(table, headers=header, tablefmt="github"))

Expand Down
12 changes: 6 additions & 6 deletions scripts/pylib/twister/twisterlib/harness.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,7 @@ def configure(self, instance: TestInstance):
self.source_dir = instance.testsuite.source_dir
self.report_file = os.path.join(self.running_dir, 'report.xml')
self.pytest_log_file_path = os.path.join(self.running_dir, 'twister_harness.log')
self.reserved_serial = None
self.reserved_dut = None
self._output = []

def pytest_run(self, timeout):
Expand All @@ -366,10 +366,10 @@ def pytest_run(self, timeout):
self.status = TwisterStatus.FAIL
self.instance.reason = str(pytest_exception)
finally:
if self.reserved_serial:
self.instance.handler.make_device_available(self.reserved_serial)
self.instance.record(self.recording)
self._update_test_status()
self.instance.record(self.recording)
self._update_test_status()
if self.reserved_dut:
self.instance.handler.make_dut_available(self.reserved_dut)

def generate_command(self):
config = self.instance.testsuite.harness_config
Expand Down Expand Up @@ -438,7 +438,7 @@ def _generate_parameters_for_hardware(self, handler: Handler):
# update the instance with the device id to have it in the summary report
self.instance.dut = hardware.id

self.reserved_serial = hardware.serial_pty or hardware.serial
self.reserved_dut = hardware
if hardware.serial_pty:
command.append(f'--device-serial-pty={hardware.serial_pty}')
else:
Expand Down
Loading
Loading