Skip to content

Commit bdcd6b6

Browse files
authored
Save diagnostics to disk on connection error (#325)
Adds an opt-in option to automatically persist device diagnostics to disk when a BLE connection enters an error state, making it easier to troubleshoot intermittent connection / auth failures after the fact. - New `with_diagnostics_on_exception(enabled)` builder on `DeviceBase` and matching `diagnostics_on_exception` toggle in the diagnostics options of the config flow. When enabled, packet collection is force-enabled and a connection-state listener saves the collected diagnostics to disk on any error state. - Captures sent raw data and the initial session key in the diagnostics payload so post-mortem analysis has the full session context, not just received frames. - Refactors `diagnostics.py` to build the dict via `DeviceDiagnosticsCollector.build_diagnostics_dict()`, consolidating the per-device fields in one place.
1 parent 2f9e268 commit bdcd6b6

8 files changed

Lines changed: 173 additions & 25 deletions

File tree

custom_components/ef_ble/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
CONF_BLUEZ_START_NOTIFY,
3030
CONF_COLLECT_PACKETS_AMOUNT,
3131
CONF_CONNECTION_TIMEOUT,
32+
CONF_DIAGNOSTICS_ON_EXCEPTION,
3233
CONF_DIAGNOSTICS_OPTIONS,
3334
CONF_EXTRA_BATTERY,
3435
CONF_PACKET_VERSION,
@@ -105,6 +106,7 @@ async def async_setup_entry(hass: HomeAssistant, entry: DeviceConfigEntry) -> bo
105106
packet_collection_enabled = diag_options.get(
106107
CONF_COLLECT_PACKETS, eflib.is_unsupported(device)
107108
)
109+
diagnostics_on_exception = diag_options.get(CONF_DIAGNOSTICS_ON_EXCEPTION, False)
108110

109111
advanced = merged_options.get(CONF_ADVANCED_CONNECTION_OPTIONS, {})
110112
timeout = advanced.get(CONF_CONNECTION_TIMEOUT, DEFAULT_CONNECTION_TIMEOUT)
@@ -121,6 +123,7 @@ async def async_setup_entry(hass: HomeAssistant, entry: DeviceConfigEntry) -> bo
121123
.with_disabled_reconnect()
122124
.with_packet_version(packet_version.to_num())
123125
.with_enabled_packet_diagnostics(packet_collection_enabled)
126+
.with_diagnostics_on_exception(diagnostics_on_exception)
124127
.with_connection_options(options)
125128
.connect(
126129
user_id=user_id,
@@ -285,6 +288,7 @@ async def _update_listener(hass: HomeAssistant, entry: DeviceConfigEntry):
285288
CONF_COLLECT_PACKETS, eflib.is_unsupported(device)
286289
)
287290
diagnostics_buffer_size = diag_options.get(CONF_COLLECT_PACKETS_AMOUNT, 100)
291+
diagnostics_on_exception = diag_options.get(CONF_DIAGNOSTICS_ON_EXCEPTION, False)
288292
advanced = merged_options.get(CONF_ADVANCED_CONNECTION_OPTIONS, {})
289293
options = Connection.Options(
290294
timeout=advanced.get(CONF_CONNECTION_TIMEOUT, DEFAULT_CONNECTION_TIMEOUT),
@@ -298,5 +302,6 @@ async def _update_listener(hass: HomeAssistant, entry: DeviceConfigEntry):
298302
enabled=packet_collection,
299303
buffer_size=diagnostics_buffer_size,
300304
)
305+
.with_diagnostics_on_exception(diagnostics_on_exception)
301306
.with_connection_options(options)
302307
)

custom_components/ef_ble/config_flow.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
CONF_COLLECT_PACKETS_AMOUNT,
4343
CONF_CONNECTION_TIMEOUT,
4444
CONF_DIAGNOSTICS_ENCRYPT,
45+
CONF_DIAGNOSTICS_ON_EXCEPTION,
4546
CONF_DIAGNOSTICS_OPTIONS,
4647
CONF_EXTRA_BATTERY,
4748
CONF_LOG_BLEAK,
@@ -433,10 +434,11 @@ async def _validate_user_id(
433434
if error := self._check_user_id(user_id):
434435
return error
435436

436-
device.with_logging_options(
437-
ConfLogOptions.from_config(user_input)
438-
).with_packet_version(packet_version.to_num()).with_connection_options(
439-
Connection.Options(timeout=timeout)
437+
(
438+
device.with_logging_options(ConfLogOptions.from_config(user_input))
439+
.with_packet_version(packet_version.to_num())
440+
.with_diagnostics_on_exception(True)
441+
.with_connection_options(Connection.Options(timeout=timeout))
440442
)
441443

442444
await device.connect(self._user_id)
@@ -445,7 +447,9 @@ async def _validate_user_id(
445447
conn_state, exc = await asyncio.wait_for(
446448
device.wait_until_authenticated_or_error(return_exc=True), timeout
447449
)
448-
except TimeoutError:
450+
except TimeoutError as e:
451+
exc = e
452+
device.set_connection_state(ConnectionState.ERROR_TIMEOUT, e)
449453
conn_state = device.connection_state
450454

451455
await device.disconnect()
@@ -707,6 +711,11 @@ def diagnostics_options(
707711
vol.Required(CONF_DIAGNOSTICS_OPTIONS): section(
708712
(
709713
schema_builder()
714+
.optional(
715+
CONF_DIAGNOSTICS_ON_EXCEPTION,
716+
bool,
717+
diag.get(CONF_DIAGNOSTICS_ON_EXCEPTION, False),
718+
)
710719
.optional(
711720
CONF_COLLECT_PACKETS,
712721
bool,

custom_components/ef_ble/const.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
CONF_DIAGNOSTICS_OPTIONS = "diagnostics_options"
1818
CONF_DIAGNOSTICS_ENCRYPT = "diagnostics_encrypt"
19+
CONF_DIAGNOSTICS_ON_EXCEPTION = "diagnostics_on_exception"
1920

2021
CONF_LOG_MASKED = "log_masked"
2122
CONF_LOG_PACKETS = "log_packets"

custom_components/ef_ble/diagnostics.py

Lines changed: 2 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -14,29 +14,13 @@ async def async_get_config_entry_diagnostics(
1414

1515
session = Session() if encrypt else None
1616

17-
diagnostics = {
18-
"local_name": entry.data.get("local_name", None),
19-
"device": device.device,
20-
"name": device.name,
21-
"default_name": device._default_name,
22-
"sn_prefix": device._sn[:4],
23-
"connection_state": device.connection_state,
24-
"connection_state_history": list(device.connection_log.history),
25-
"manufacturer_data": (
26-
session.encrypt(device._manufacturer_data).hex()
27-
if session is not None
28-
else device._manufacturer_data.hex()
29-
),
30-
}
31-
32-
if session is not None:
33-
diagnostics["session"] = session.header.hex()
17+
diagnostics: dict = {"local_name": entry.data.get("local_name", None)}
18+
diagnostics |= device.diagnostics.build_diagnostics_dict(session)
3419

3520
if device.diagnostics.is_enabled:
3621
connection_setup = await hass.async_add_executor_job(
3722
device.connection_log.load_from_cache
3823
)
39-
diagnostics |= device.diagnostics.as_dict(session)
4024
diagnostics |= {"connection_setup": connection_setup}
4125

4226
return diagnostics

custom_components/ef_ble/eflib/connection.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,7 @@ def __init__(
229229
self._packet_version = packet_version
230230
self._encrypt_type = encrypt_type
231231
self._encryption: EncryptionStrategy | None = None
232+
self._initial_session_key: bytes = b""
232233
self._simple_assembler = SimplePacketAssembler()
233234
self._options = Connection.Options()
234235

@@ -631,6 +632,11 @@ def _set_state(
631632
if state.is_error:
632633
self._notify_disconnect(exc)
633634

635+
def set_state(
636+
self, state: ConnectionState, exc: Exception | type[Exception] | None = None
637+
) -> None:
638+
self._set_state(state, exc)
639+
634640
def _get_characteristics(self, char_type: Literal["write", "notify"]):
635641
assert self._client is not None
636642

@@ -944,6 +950,7 @@ async def getKeyInfoReqHandler(
944950

945951
# Parse the data that contains sRand (first 16 bytes) & seed (last 2 bytes)
946952
session_key = await self.genSessionKey(data[16:18], data[:16])
953+
self._initial_session_key = self._encryption.session_key
947954
self._encryption = Type7Encryption(session_key, self._encryption.iv)
948955

949956
await self.getAuthStatus()

custom_components/ef_ble/eflib/devicebase.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,15 @@ def auth_header_dst(self) -> int:
139139
def connection_state(self):
140140
return None if self._conn is None else self._conn._connection_state
141141

142+
def set_connection_state(
143+
self,
144+
state: ConnectionState,
145+
exc: Exception | type[Exception] | None = None,
146+
) -> None:
147+
if self._conn is None:
148+
return
149+
self._conn.set_state(state, exc)
150+
142151
@property
143152
def diagnostics(self):
144153
return self._diagnostics
@@ -220,6 +229,11 @@ def with_enabled_packet_diagnostics(
220229
self._diagnostics.with_buffer_size(buffer_size)
221230
return self
222231

232+
def with_diagnostics_on_exception(self, enabled: bool = True):
233+
"""Enable automatic diagnostics save to disk on connection errors"""
234+
self._diagnostics.with_save_on_exception(enabled)
235+
return self
236+
223237
def with_name(self, name: str):
224238
self._name = name
225239
return self

custom_components/ef_ble/eflib/logging_util.py

Lines changed: 128 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
1+
import asyncio
12
import dataclasses
23
import json
34
import logging
45
import re
56
import time
7+
import traceback
68
from collections import deque
79
from collections.abc import Callable, Mapping, Sequence
810
from dataclasses import dataclass
11+
from datetime import UTC, datetime
912
from enum import Flag, auto
1013
from functools import cached_property
1114
from pathlib import Path
@@ -268,8 +271,10 @@ class DeviceDiagnostics:
268271
disconnect_times: list[float]
269272
raw_data_connection: list[tuple[float, bytes]]
270273
raw_data_messages: list[tuple[float, bytes]]
274+
raw_data_send: list[tuple[float, bytes]]
271275
iv: bytes
272276
session_key: bytes
277+
initial_session_key: bytes
273278

274279
def _encode_bytes(self, value: bytes, session: Session | None) -> str:
275280
if session is not None:
@@ -289,8 +294,12 @@ def serialize(self, session: Session | None = None):
289294
raw_data_messages=[
290295
(k, self._encode_bytes(v, session)) for (k, v) in self.raw_data_messages
291296
],
297+
raw_data_send=[
298+
(k, self._encode_bytes(v, session)) for (k, v) in self.raw_data_send
299+
],
292300
iv=self._encode_bytes(self.iv, session),
293301
session_key=self._encode_bytes(self.session_key, session),
302+
initial_session_key=self._encode_bytes(self.initial_session_key, session),
294303
)
295304

296305
def as_dict(self):
@@ -304,36 +313,66 @@ class DeviceDiagnosticsCollector:
304313
def __init__(self, device: "DeviceBase", buffer_size: int = 100):
305314
self._device = device
306315
self._enabled = False
316+
self._save_on_exception = False
307317
self._buffer_size = buffer_size
308318

309319
self._last_packets: deque[tuple[float, bytes]] = deque(maxlen=buffer_size)
310320
self._last_errors: deque[tuple[float, str]] = deque(maxlen=buffer_size)
311321
self._connect_times: deque[float] = deque(maxlen=buffer_size)
312322
self._raw_data_connection: list[tuple[float, bytes]] = []
313323
self._raw_data_messages: deque[tuple[float, bytes]] = deque(maxlen=1000)
324+
self._raw_data_send: deque[tuple[float, bytes]] = deque(maxlen=1000)
314325

315326
self._disconnect_times: deque[float] = deque(maxlen=buffer_size)
316327
self._skip_first_messages: int = 8
317328
self._unlisten_callbacks: list[Callable[[], None]] = []
318329

319330
self._start_time = time.time()
320331

332+
self._logger = logging.getLogger(__name__)
333+
321334
def as_dict(self, session: Session | None = None):
322335
"""Get diagnostics data as dictionary"""
323336
return self.diagnostics.serialize(session).as_dict()
324337

338+
def build_diagnostics_dict(self, session: Session | None = None) -> dict:
339+
device = self._device
340+
result: dict = {
341+
"device": device.device,
342+
"name": device.name,
343+
"default_name": device._default_name,
344+
"sn_prefix": device._sn[:4],
345+
"connection_state": device.connection_state,
346+
"connection_state_history": list(device.connection_log.history),
347+
"manufacturer_data": (
348+
session.encrypt(device._manufacturer_data).hex()
349+
if session is not None
350+
else device._manufacturer_data.hex()
351+
),
352+
}
353+
if session is not None:
354+
result["session"] = session.header.hex()
355+
if self.is_enabled:
356+
result |= self.as_dict(session)
357+
return result
358+
325359
@property
326360
def diagnostics(self):
327361
"""Get diagnostics data"""
362+
conn = self._device._conn
363+
encryption = conn._encryption
364+
328365
return DeviceDiagnostics(
329366
last_packets=list(self._last_packets),
330367
last_errors=list(self._last_errors),
331368
connect_times=list(self._connect_times),
332369
disconnect_times=list(self._disconnect_times),
333370
raw_data_connection=self._raw_data_connection,
334371
raw_data_messages=list(self._raw_data_messages),
335-
iv=self._device._conn._encryption.iv,
336-
session_key=self._device._conn._encryption.session_key,
372+
raw_data_send=list(self._raw_data_send),
373+
iv=encryption.iv if encryption is not None else b"",
374+
session_key=encryption.session_key if encryption is not None else b"",
375+
initial_session_key=conn._initial_session_key,
337376
)
338377

339378
@property
@@ -368,6 +407,7 @@ def enabled(self, enabled: bool = True):
368407
self._device.on_packet_received(self._on_packet_received),
369408
self._device.on_packet_parsed(self._on_packet_parsed),
370409
self._device.on_data_received(self._on_data_received),
410+
self._device.on_data_send(self._on_data_send),
371411
]
372412
)
373413
return self
@@ -448,11 +488,97 @@ def _on_data_received(self, data: bytes, state: "ConnectionState"):
448488

449489
buffer.append(self._with_time(data))
450490

491+
def _on_data_send(self, data: bytes):
492+
self._raw_data_send.append(self._with_time(data))
493+
494+
def with_save_on_exception(self, enabled: bool = True):
495+
"""
496+
Enable or disable automatic diagnostics save on connection error
497+
498+
When enabled, packet collection is force-enabled and a state change
499+
listener is registered. On any error state, collected diagnostics
500+
are saved to disk automatically.
501+
"""
502+
if enabled == self._save_on_exception:
503+
return self
504+
505+
self._save_on_exception = enabled
506+
507+
if enabled:
508+
self.enabled(True)
509+
self._unlisten_callbacks.append(
510+
self._device.on_connection_state_change(self._on_state_change)
511+
)
512+
513+
return self
514+
515+
def _on_state_change(self, state: "ConnectionState") -> None:
516+
if not self._save_on_exception or not state.is_error:
517+
return
518+
519+
conn = self._device._conn
520+
exc = getattr(conn, "_last_exception", None) if conn is not None else None
521+
try:
522+
self._save_to_disk(state, exc)
523+
except Exception:
524+
self._device._logger.exception(
525+
"Failed to save diagnostics-on-exception snapshot"
526+
)
527+
528+
def _save_to_disk(
529+
self,
530+
state: "ConnectionState",
531+
exc: Exception | type[Exception] | None = None,
532+
) -> None:
533+
session = Session()
534+
535+
exc_message = None
536+
exc_traceback = None
537+
if exc is not None:
538+
exc_message = session.encrypt(str(exc).encode()).hex()
539+
if isinstance(exc, BaseException) and exc.__traceback__ is not None:
540+
tb = "".join(traceback.format_tb(exc.__traceback__))
541+
exc_traceback = session.encrypt(tb.encode()).hex()
542+
543+
data = {
544+
"timestamp": datetime.now(UTC).isoformat(),
545+
"exception": {
546+
"type": type(exc).__name__ if exc is not None else None,
547+
"message": exc_message,
548+
"traceback": exc_traceback,
549+
"state_on_error": state.name,
550+
},
551+
"data": self.build_diagnostics_dict(session),
552+
}
553+
554+
sn_prefix = self._device._sn[:4]
555+
ts = datetime.now(UTC).strftime("%Y%m%d_%H%M%S")
556+
cache_dir = Path(__file__).parent.parent / ".diagnostics"
557+
path = cache_dir / f"{sn_prefix}_exception_{ts}.json"
558+
content = json.dumps(data, default=str, indent=2)
559+
560+
def _write() -> None:
561+
cache_dir.mkdir(exist_ok=True)
562+
path.write_text(content)
563+
564+
task = asyncio.get_running_loop().run_in_executor(None, _write)
565+
566+
def _log_result(future: asyncio.Future) -> None:
567+
if (err := future.exception()) is not None:
568+
self._device._logger.error(
569+
"Failed to save diagnostics to %s: %s", path, err
570+
)
571+
else:
572+
self._device._logger.info("Diagnostics saved to %s", path)
573+
574+
task.add_done_callback(_log_result)
575+
451576
def _clear_buffers(self):
452577
self._last_packets.clear()
453578
self._last_errors.clear()
454579
self._connect_times.clear()
455580
self._disconnect_times.clear()
581+
self._raw_data_send.clear()
456582

457583

458584
class _LazyHex:

0 commit comments

Comments
 (0)