From 8e377e511a0d3ed8f2b6b1bf437097c393773d0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Kundr=C3=A1t?= Date: Wed, 14 May 2025 21:41:18 +0200 Subject: [PATCH 1/9] tridonic: debug: fix a missing f-string Since the f-string defaults to calling __repr__ when the {foo=} form is used, and because Frame is an object, this would print the object address by default, which is why we have to ask for the __str__ version to get usable data. Fixes: c72374c ("Update the async Tridonic DALI-USB driver to work with 24-bit frames") --- dali/driver/hid.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dali/driver/hid.py b/dali/driver/hid.py index 9a4c79e..29f0ee2 100644 --- a/dali/driver/hid.py +++ b/dali/driver/hid.py @@ -447,7 +447,7 @@ async def _send_raw(self, command): response = None while outstanding_transmissions or response is None: self._log.debug(f"waiting for {outstanding_transmissions=} " - "{response=}") + f"{response=!s}") if len(messages) == 0: await event.wait() event.clear() From b9632cad9c8884bcbddac8ee434caad26ea3703d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Kundr=C3=A1t?= Date: Wed, 14 May 2025 21:58:55 +0200 Subject: [PATCH 2/9] tridonic: debug: unify the log prefix Both _bus_watch and _send_raw methods are "waiting for something", so let's ensure that at least one of them has a nice log prefix. This should make the debug log more readable and hopefully make me help debug issue #124. --- dali/driver/hid.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dali/driver/hid.py b/dali/driver/hid.py index 29f0ee2..bf3933e 100644 --- a/dali/driver/hid.py +++ b/dali/driver/hid.py @@ -498,13 +498,13 @@ async def _bus_watch(self): # Wait for data if len(self._bus_watch_data) == 0: if current_command: - self._log.debug("Bus watch waiting with timeout") + self._log.debug("bus_watch waiting with timeout") try: await asyncio.wait_for(self._bus_watch_data_available.wait(), 0.2) except asyncio.TimeoutError: pass else: - self._log.debug("Bus watch waiting for data, no timeout") + self._log.debug("bus_watch waiting for data, no timeout") await self._bus_watch_data_available.wait() self._bus_watch_data_available.clear() From b230634a65e44c797c19e822ad35e72575fdd7e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Kundr=C3=A1t?= Date: Thu, 15 May 2025 10:36:56 +0200 Subject: [PATCH 3/9] tridonic: debug: don't hardcode the size of _resptmpl --- dali/driver/hid.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/dali/driver/hid.py b/dali/driver/hid.py index bf3933e..a63ec63 100644 --- a/dali/driver/hid.py +++ b/dali/driver/hid.py @@ -332,6 +332,8 @@ class tridonic(hid): # Responses received from the interface # Decodes to mode, response type, frame, interval, seq _resptmpl = struct.Struct(">BB4sHB55x") + # length of the "useful part", without the trailing padding + _resptmpl_useful_size = _resptmpl.size - 55 _MODE_INFO = 0x01 # Response to an init command _MODE_OBSERVE = 0x11 # Other traffic observed on the bus _MODE_RESPONSE = 0x12 # Response to a send command @@ -515,7 +517,7 @@ async def _bus_watch(self): else: timeout = False message = self._bus_watch_data.pop(0) - self._log.debug("bus_watch message %s", _hex(message[0:9])) + self._log.debug("bus_watch message %s", _hex(message[0:self._resptmpl_useful_size])) origin, rtype, raw_frame, interval, seq = self._resptmpl.unpack(message) if origin not in (self._MODE_OBSERVE, self._MODE_RESPONSE): self._log.warning("bus_watch: unexpected packet mode, ignoring") @@ -625,7 +627,7 @@ async def _bus_watch(self): # self._log.debug("End of loop") def _handle_read(self, data): - self._log.debug("_handle_read %s", _hex(data[0:9])) + self._log.debug("_handle_read %s", _hex(data[0:self._resptmpl_useful_size])) if data[0] == self._MODE_INFO: # Response to initialisation command if not self.firmware_version: From eb62d7934aa31fe5a2effe6a4ff6ef29cf094d6d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Kundr=C3=A1t?= Date: Thu, 15 May 2025 10:07:39 +0200 Subject: [PATCH 4/9] tridonic: debug: dump all USB writes --- dali/driver/hid.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/dali/driver/hid.py b/dali/driver/hid.py index a63ec63..d8bc7c3 100644 --- a/dali/driver/hid.py +++ b/dali/driver/hid.py @@ -293,6 +293,8 @@ class tridonic(hid): # Commands sent to the interface # cmd, seq, ctrl, mode, frame (4 bytes), dtr, prio, devtype _cmdtmpl = struct.Struct(">4B4s3B53x") + # length of the "useful part", without the trailing padding + _cmdtmpl_useful_size = _cmdtmpl.size - 53 # _CMD send in byte 0 _CMD_INIT = 0x01 @@ -386,9 +388,13 @@ def __init__(self, *args, **kwargs): self._bus_watch_data_available = asyncio.Event() self._bus_watch_data = [] + def _raw_write(self, buf: bytes): + self._log.debug(f"_raw_write: {_hex(buf[0:self._cmdtmpl_useful_size])}") + os.write(self._f, buf) + def _initialise_device(self): # Read firmware version; pick up the reply in _handle_read - os.write(self._f, self._cmd( + self._raw_write(self._cmd( tridonic._CMD_INIT, tridonic._CMD_INIT_READVERSION)) async def _power_supply(self, supply_on): @@ -398,7 +404,7 @@ async def _power_supply(self, supply_on): self._POWER_SUPPLY_ON if supply_on else self._POWER_SUPPLY_OFF) try: - os.write(self._f, data) + self._raw_write(data) except OSError: # The device has failed. Disconnect, schedule a # reconnection, and report this command as failed. @@ -437,7 +443,7 @@ async def _send_raw(self, command): mode=self._command_mode(frame), frame=frame.pack_len(4)) try: - os.write(self._f, data) + self._raw_write(data) except OSError: # The device has failed. Disconnect, schedule a # reconnection, and report this command as failed. @@ -633,7 +639,7 @@ def _handle_read(self, data): if not self.firmware_version: self.firmware_version = f"{data[3]}.{data[4]}" # Now read the serial number - os.write(self._f, self._cmd( + self._raw_write(self._cmd( tridonic._CMD_INIT, tridonic._CMD_INIT_READSERIAL)) elif not self.serial: self.serial = _hex(data[1:5]) @@ -724,7 +730,7 @@ async def _send_raw(self, command): async with self._command_lock: times = 2 if command.sendtwice else 1 for rep in range(times): - os.write(self._f, frame.pack_len(2)) + self._raw_write(frame.pack_len(2)) # Earlier commands may have left a response available that # we need to ignore. We're only interested in responses # that become available in the future. From f2e0a584c5cc942ef6acfbfd94e97519ea9edc52 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Kundr=C3=A1t?= Date: Fri, 16 May 2025 23:40:07 +0200 Subject: [PATCH 5/9] tridonic: debug: unify `rtype` and `seq` printing style --- dali/driver/hid.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dali/driver/hid.py b/dali/driver/hid.py index d8bc7c3..82fea07 100644 --- a/dali/driver/hid.py +++ b/dali/driver/hid.py @@ -429,7 +429,7 @@ async def _send_raw(self, command): await self.connected.wait() async with self._command_semaphore: seq = next(self._cmd_seq) - self._log.debug("Sending with seq %x", seq) + self._log.debug(f"Sending with {seq=:02x}") event = asyncio.Event() messages = [] # If seq is in self._outstanding this means we've wrapped @@ -482,7 +482,7 @@ async def _send_raw(self, command): elif rtype == self._RESPONSE_NO_FRAME: response = "no" else: - self._log.debug(f"didn't understand {rtype=}") + self._log.debug(f"didn't understand {rtype=:02x}") del self._outstanding[seq], event, messages if command.response: # Construct response and return it From d4e7d6d237f1549baeeeca2b1c5046ae57485ca7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Kundr=C3=A1t?= Date: Fri, 16 May 2025 23:41:12 +0200 Subject: [PATCH 6/9] tridonic: debug: simplify formatting & use proper escaping for frame content --- dali/driver/hid.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dali/driver/hid.py b/dali/driver/hid.py index 82fea07..7038ac1 100644 --- a/dali/driver/hid.py +++ b/dali/driver/hid.py @@ -469,7 +469,7 @@ async def _send_raw(self, command): # The message mode is guaranteed to be _MODE_RESPONSE mode, rtype, frame, interval, seq = self._resptmpl.unpack( message) - self._log.debug(f"message mode={mode:02x} rtype={rtype:02x} frame={frame} interval={interval:04x} seq={seq:02x}") + self._log.debug(f"_send_raw got message: {mode=:02x} {rtype=:02x} frame={_hex(frame)} {interval=:04x} {seq=:02x}") if rtype in (self._RESPONSE_FRAME_DALI16, self._RESPONSE_FRAME_DALI24): # XXX check the frame contents? From e0ed4f253fd612f59f54ad0e62517a3b50968d7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Kundr=C3=A1t?= Date: Fri, 16 May 2025 23:48:45 +0200 Subject: [PATCH 7/9] tridonic: debug: trace lifetime of outstanding commands --- dali/driver/hid.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/dali/driver/hid.py b/dali/driver/hid.py index 7038ac1..672e444 100644 --- a/dali/driver/hid.py +++ b/dali/driver/hid.py @@ -454,7 +454,7 @@ async def _send_raw(self, command): outstanding_transmissions = 2 if command.sendtwice else 1 response = None while outstanding_transmissions or response is None: - self._log.debug(f"waiting for {outstanding_transmissions=} " + self._log.debug(f"_send_raw waiting for {outstanding_transmissions=} " f"{response=!s}") if len(messages) == 0: await event.wait() @@ -474,6 +474,7 @@ async def _send_raw(self, command): self._RESPONSE_FRAME_DALI24): # XXX check the frame contents? outstanding_transmissions -= 1 + self._log.debug(f"_send_raw rtype DALI16 or DALI24, new {outstanding_transmissions=}") elif rtype == self._RESPONSE_FRAME_DALI8: response = dali.frame.BackwardFrame(frame) elif rtype == self._RESPONSE_INFO \ @@ -483,6 +484,7 @@ async def _send_raw(self, command): response = "no" else: self._log.debug(f"didn't understand {rtype=:02x}") + self._log.debug(f"_send_raw finished {seq=:02x}") del self._outstanding[seq], event, messages if command.response: # Construct response and return it @@ -664,10 +666,13 @@ def _handle_read(self, data): # ignore this - the sequence number will have been removed # from self._outstanding. if seq in self._outstanding: + self._log.debug(f"_handle_read: signaling for {seq=:02x}") event, messages = self._outstanding[seq] messages.append(data) event.set() del event, messages + else: + self._log.debug(f"_handle_read: nothing to do for {seq=:02x}") else: self._log.debug("Unknown response mode %x", data[0]) From c38176aef906a361e167a39020d336e7edae97c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Kundr=C3=A1t?= Date: Fri, 16 May 2025 23:51:01 +0200 Subject: [PATCH 8/9] tridonic: refactor: clarify when the device reuses sequence numbers Before this patch, the comment suggested (to me, anyway) that it applies to the first branch. That's not the case, the "sequence number is among the outstandings" is the common code path which is taken when we do not hit that bug. --- dali/driver/hid.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/dali/driver/hid.py b/dali/driver/hid.py index 672e444..4474387 100644 --- a/dali/driver/hid.py +++ b/dali/driver/hid.py @@ -659,12 +659,6 @@ def _handle_read(self, data): self._bus_watch_data.append(data) self._bus_watch_data_available.set() seq = data[8] - # The Tridonic DALI USB has a firmware bug. When it - # observes a frame on the bus, not generated by itself, - # that matches the most recent frame it transmitted, it - # reports it as if it had transmitted it itself. We - # ignore this - the sequence number will have been removed - # from self._outstanding. if seq in self._outstanding: self._log.debug(f"_handle_read: signaling for {seq=:02x}") event, messages = self._outstanding[seq] @@ -672,6 +666,12 @@ def _handle_read(self, data): event.set() del event, messages else: + # The Tridonic DALI USB has a firmware bug. When it + # observes a frame on the bus, not generated by itself, + # that matches the most recent frame it transmitted, it + # reports it as if it had transmitted it itself. We + # ignore this - the sequence number will have been removed + # from self._outstanding. self._log.debug(f"_handle_read: nothing to do for {seq=:02x}") else: From 7bb3505fcf24ac875182e427fafb7d12af57504f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Kundr=C3=A1t?= Date: Sat, 17 May 2025 00:14:00 +0200 Subject: [PATCH 9/9] tridonic: recover when the device doesn't report back the response to our packet When we issue a command via the Lunatone DALI-to-USB dongle, the first USB packet which we get back has a copy of what we sent, like this: 20:13:24 D: tridonic: _raw_write: 12D6000300002A01000000 20:13:24 D: tridonic: _send_raw waiting for outstanding_transmissions=1 response=None 20:13:24 D: tridonic: _handle_read 127300002A0100F5D6 [...] 20:13:24 D: tridonic: _send_raw got message: mode=12 rtype=73 frame=00002a01 interval=00f5 seq=d6 20:13:24 D: tridonic: _send_raw waiting for outstanding_transmissions=0 response=None When stuff works as expected, this is also accompanied by a USB packet which tells us what the USB adapter read back from DALI in response to our command. It looks like this: 20:13:24 D: tridonic: _handle_read 127100000000008AD6 20:13:24 D: tridonic: _send_raw got message: mode=12 rtype=71 frame=00000000 interval=008a seq=d6 20:13:24 D: tridonic: _send_raw finished seq=d6 Sometimes this second USB packet, however, never arrives. When that happens, the USB dongle appears to continue working properly, but the code would get stuck waiting for a response which will never arrive (and holding a lock, preventing other commands form being sent). Solve this by treating that as an erroneous response, but continue working. In my experience, this is sufficient and quicker than a full reconnect. fixes #124 --- dali/driver/hid.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/dali/driver/hid.py b/dali/driver/hid.py index 4474387..f8e4876 100644 --- a/dali/driver/hid.py +++ b/dali/driver/hid.py @@ -457,8 +457,14 @@ async def _send_raw(self, command): self._log.debug(f"_send_raw waiting for {outstanding_transmissions=} " f"{response=!s}") if len(messages) == 0: - await event.wait() - event.clear() + try: + await asyncio.wait_for(event.wait(), timeout=0.2) + except TimeoutError: + self._log.error(f"_send_raw timed out {seq=:02x}, faking an error response") + response = dali.frame.BackwardFrameError(255) + break + finally: + event.clear() message = messages.pop(0) if message == "fail": # The device has gone away, possibly in the middle