Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion custom_components/ef_ble/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,10 @@ async def async_unload_entry(hass: HomeAssistant, entry: DeviceConfigEntry) -> b
"""Unload a config entry."""
_cancel_reappear_callback(hass, entry)
device = entry.runtime_data
await device.disconnect()
try:
await device.disconnect()
except Exception:
_LOGGER.exception("Error disconnecting device during unload, continuing")
device.with_logging_options(LogOptions.no_options())
return await hass.config_entries.async_unload_platforms(entry, PLATFORMS)

Expand Down
97 changes: 71 additions & 26 deletions custom_components/ef_ble/eflib/connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from dataclasses import dataclass
from enum import StrEnum, auto
from functools import cached_property
from typing import Literal, Self
from typing import Any, Literal, Self

import ecdsa
from bleak import BleakClient
Expand Down Expand Up @@ -43,13 +43,19 @@
SimplePacketAssembler,
)
from .listeners import ListenerGroup, ListenerRegistry
from .logging_util import ConnectionLogger, LogOptions
from .logging_util import ConnectionLogger, LogOptions, caller_chain
from .packet import Packet
from .props.utils import classproperty

MAX_RECONNECT_ATTEMPTS = 2
MAX_CONNECTION_ATTEMPTS = 10

# `BleakClient.disconnect()` can block until the connect timeout (default 20s) when a
# write-with-response is still pending on the transport after a mid-auth BLE drop
# (notably through an ESPHome proxy). Left unbounded it stalls `async_unload_entry`
# long enough for HA to mark the entry `FAILED_UNLOAD`, so cap every disconnect.
DISCONNECT_TIMEOUT = 5.0


_BT_PROTOCOL_UUIDS = {
"rfcomm": {
Expand Down Expand Up @@ -236,6 +242,7 @@ def __init__(

self._errors = 0
self._last_errors = deque(maxlen=10)
self._disconnect_log: deque[dict[str, Any]] = deque(maxlen=10)
self._client = None
self._connected = asyncio.Event()
self._disconnected = asyncio.Event()
Expand All @@ -257,6 +264,7 @@ def __init__(
self._reconnect = True

self._connection_state: ConnectionState = None # pyright: ignore[reportAttributeAccessIssue]
self._state_reason: str | None = None
self._set_state(ConnectionState.CREATED)

@property
Expand Down Expand Up @@ -387,8 +395,7 @@ async def connect(
self._set_state(ConnectionState.ERROR_BLEAK, e)

if error is not None:
if self._client is not None and self._client.is_connected:
await self._client.disconnect()
await self._disconnect_client()

self._logger.error("Failed to connect to the device: %s", error)
self._last_errors.append(f"Failed to connect to the device: {error}")
Expand All @@ -405,7 +412,10 @@ async def connect(
await self.initBleSessionKey()

def disconnected(self, *args, **kwargs) -> None:
self._logger.warning("Disconnected from device")
# Traces the trigger: an unsolicited bleak drop shows bleak/asyncio frames here,
# whereas a drop we requested shows our own `disconnect` chain.
trigger = caller_chain()
self._logger.warning("Disconnected from device (%s)", trigger)
self._client = None

# NOTE(gnox): don't trigger disconnect/reconnect logic while
Expand All @@ -422,7 +432,7 @@ def disconnected(self, *args, **kwargs) -> None:
self._disconnected.set()
if self._state is not ConnectionState.DISCONNECTING:
self._notify_disconnect()
self._set_state(ConnectionState.DISCONNECTED)
self._set_state(ConnectionState.DISCONNECTED, reason=trigger)
return

if self._reconnect_task is not None:
Expand Down Expand Up @@ -476,31 +486,62 @@ async def reconnect(self) -> None:
self._set_state(ConnectionState.RECONNECTING)
await self.connect()

async def disconnect(self) -> None:
self._logger.info(msg="Disconnecting from device")
async def _disconnect_client(self) -> None:
if self._client is None or not self._client.is_connected:
return
trigger = caller_chain()
self._logger.debug("Disconnecting BLE client (%s)", trigger)
outcome = "ok"
try:
async with asyncio.timeout(DISCONNECT_TIMEOUT):
await self._client.disconnect()
except (EOFError, BleakError) as e:
outcome = f"already_down: {e}"
self._logger.warning("Disconnect failed (already down): %s", e)
except TimeoutError:
outcome = "timeout"
self._logger.warning(
"BleakClient.disconnect() did not return within %ss (%s); continuing "
"with local cleanup (write-with-response likely still pending after a "
"mid-auth BLE drop)",
DISCONNECT_TIMEOUT,
trigger,
)
except (OSError, RuntimeError) as e:
outcome = f"transport_broken: {e}"
self._logger.warning(
"BleakClient.disconnect() raised %s (%s); the BLE transport is broken, "
"continuing with local cleanup",
type(e).__name__,
trigger,
)
self._disconnect_log.append(
{"time": time.time(), "trigger": trigger, "outcome": outcome}
)

@property
def disconnect_log(self) -> list[dict[str, Any]]:
"""Recent BLE client disconnect outcomes, for diagnostics"""
return list(self._disconnect_log)

async def disconnect(self, reason: str | None = None) -> None:
self._logger.info("Disconnecting from device (%s)", reason or "no reason given")
self._retry_on_disconnect = False

self._reconnect_attempt = 0
self._cancel_tasks()

if self._client is not None and self._client.is_connected:
self._set_state(ConnectionState.DISCONNECTING)
try:
await self._client.disconnect()
except (EOFError, BleakError) as e:
self._logger.debug("Disconnect failed (already down): %s", e)
self._set_state(ConnectionState.DISCONNECTING, reason=reason)
await self._disconnect_client()

self._client = None
if self._state == ConnectionState.DISCONNECTING:
self._set_state(ConnectionState.DISCONNECTED)
self._set_state(ConnectionState.DISCONNECTED, reason=reason)

async def _disconnect_error(self, state: ConnectionState, exc: Exception):
self._set_state(state, exc)
if self._client is not None and self._client.is_connected:
try:
await self._client.disconnect()
except (EOFError, BleakError) as e:
self._logger.debug("Disconnect failed (already down): %s", e)
await self._disconnect_client()
raise exc

@staticmethod
Expand Down Expand Up @@ -601,10 +642,7 @@ async def add_error(self, exception: Exception):
self._set_state(ConnectionState.ERROR_TOO_MANY_ERRORS, exception)
if self._client is not None and self._client.is_connected:
self._logger.warning("Client disconnected after encountering 5 errors")
try:
await self._client.disconnect()
except (EOFError, BleakError) as e:
self._logger.debug("Disconnect failed (already down): %s", e)
await self._disconnect_client()

def _reset_error_counter(self):
self._errors = 0
Expand All @@ -621,13 +659,21 @@ def _state(self, value: ConnectionState):
self._state_changed.clear()
self._listeners.on_connection_state_change(value)

@property
def state_reason(self) -> str | None:
return self._state_reason

def _set_state(
self, state: ConnectionState, exc: Exception | type[Exception] | None = None
self,
state: ConnectionState,
exc: Exception | type[Exception] | None = None,
reason: str | None = None,
):
self._state_exception = exc
if exc is not None:
self._last_exception = exc

self._state_reason = reason
self._state = state

if state.is_error:
Expand Down Expand Up @@ -1041,8 +1087,7 @@ async def _check_auth(self, packet: Packet):
self._logger.error("Authentication failed, packet: %s", packet, exc_info=exc)
self._set_state(ConnectionState.ERROR_AUTH_FAILED, exc)

if self._client is not None and self._client.is_connected:
await self._client.disconnect()
await self._disconnect_client()
raise exc

async def send_auth_status_packet(self):
Expand Down
9 changes: 7 additions & 2 deletions custom_components/ef_ble/eflib/devicebase.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
DeviceDiagnosticsCollector,
DeviceLogger,
LogOptions,
caller_chain,
)
from .packet import Packet
from .props.raw_data_props import Literal
Expand Down Expand Up @@ -283,7 +284,7 @@ async def connect(
self._conn.on_packet_data_received(self._listeners.on_packet_received)
self._conn.on_packet_parsed(self._listeners.on_packet_parsed)
self._conn.on_state_change(self._listeners.on_connection_state_change)
self._conn.on_state_change(self.connection_log.append)
self._conn.on_state_change(self._append_state_to_log)
self._conn.on_data_received(self._listeners.on_data_received)
self._conn.on_data_send(self._listeners.on_data_send)

Expand All @@ -292,12 +293,16 @@ async def connect(

await self._conn.connect(max_attempts=max_attempts)

def _append_state_to_log(self, state: ConnectionState) -> None:
reason = self._conn.state_reason if self._conn is not None else None
self.connection_log.append(state, reason)

async def disconnect(self):
if self._conn is None:
self._logger.error("Device has no connection")
return

await self._conn.disconnect()
await self._conn.disconnect(reason=caller_chain())
self._connection_event.clear()
self._conn = None

Expand Down
13 changes: 13 additions & 0 deletions custom_components/ef_ble/eflib/logging_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -352,6 +352,8 @@ def build_diagnostics_dict(self, session: Session | None = None) -> dict:
}
if session is not None:
result["session"] = session.header.hex()
if (conn := device._conn) is not None and conn.disconnect_log:
result["disconnect_log"] = conn.disconnect_log
if self.is_enabled:
result |= self.as_dict(session)
return result
Expand Down Expand Up @@ -592,3 +594,14 @@ def __str__(self) -> str:

def __repr__(self) -> str:
return bytes(self._data).hex()


def caller_chain(depth: int = 4) -> str:
"""
Compact `->` chain of the innermost `depth` call frames, for tracing triggers

Drops this helper's own frame; awaited coroutine frames stay on the stack, so the
chain reaches across `await` boundaries to whatever ultimately triggered the call.
"""
frames = traceback.extract_stack()[:-1]
return " -> ".join(frame.name for frame in frames[-depth:])
Loading