Skip to content

Commit 3263608

Browse files
committed
Fix TNC sync_interface_ips empty IPs and repeated concurrent calls
This commit fixes an issue where sync_interface_ips could send empty IPs to the TNC account-service (causing 400 errors) and where concurrent netlink events would each independently hit the TNC API with the same payload. When the HTTP call failed due to empty IPs, the cache was never populated, so every subsequent netlink event retried the same failing call — creating an infinite retry storm. Additionally, a single DHCP renewal would fire 3-5 netlink events, each scheduling a call_later(5), all passing the cache check simultaneously, and all hitting the TNC API concurrently with identical payloads. An asyncio.Lock serializes concurrent sync_interface_ips calls so only the first performs the HTTP sync while subsequent calls hit the cache and return early. An empty IP guard skips the HTTP call when no IPs are available (static + dynamic combined) but still caches the result to prevent retry storms.
1 parent ca6f6d0 commit 3263608

File tree

2 files changed

+141
-38
lines changed

2 files changed

+141
-38
lines changed

src/middlewared/middlewared/plugins/truenas_connect/hostname.py

Lines changed: 49 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111

1212
logger = logging.getLogger('truenas_connect')
1313

14+
_sync_lock = asyncio.Lock()
15+
1416

1517
class TNCHostnameService(Service):
1618

@@ -52,46 +54,55 @@ async def register_system_config(self, websocket_port: int) -> dict:
5254
raise CallError(str(e))
5355

5456
async def sync_interface_ips(self, event_details=None):
55-
tnc_config = await self.middleware.call('tn_connect.config')
56-
57-
# Get interface IPs based on use_all_interfaces flag
58-
if tnc_config['use_all_interfaces']:
59-
interfaces_ips = await self.middleware.call('tn_connect.get_all_interface_ips')
60-
else:
61-
interfaces_ips = await self.middleware.call('tn_connect.get_interface_ips', tnc_config['interfaces'])
62-
63-
try:
64-
cached_ips = await self.middleware.call('cache.get', TNC_IPS_CACHE_KEY)
65-
except KeyError:
66-
skip_syncing = False
67-
else:
68-
skip_syncing = set(cached_ips) == set(interfaces_ips)
69-
70-
# If cached IPs are the same as current, skip syncing
71-
if skip_syncing:
72-
return
73-
74-
if event_details:
75-
logger.info(
76-
'Updating IPs for TrueNAS Connect due to %s change on interface %s',
77-
event_details['type'], event_details['iface'],
57+
async with _sync_lock:
58+
tnc_config = await self.middleware.call('tn_connect.config')
59+
60+
# Get interface IPs based on use_all_interfaces flag
61+
if tnc_config['use_all_interfaces']:
62+
interfaces_ips = await self.middleware.call('tn_connect.get_all_interface_ips')
63+
else:
64+
interfaces_ips = await self.middleware.call('tn_connect.get_interface_ips', tnc_config['interfaces'])
65+
66+
try:
67+
cached_ips = await self.middleware.call('cache.get', TNC_IPS_CACHE_KEY)
68+
except KeyError:
69+
skip_syncing = False
70+
else:
71+
skip_syncing = set(cached_ips) == set(interfaces_ips)
72+
73+
# If cached IPs are the same as current, skip syncing
74+
if skip_syncing:
75+
return
76+
77+
if event_details:
78+
logger.info(
79+
'Updating IPs for TrueNAS Connect due to %s change on interface %s',
80+
event_details['type'], event_details['iface'],
81+
)
82+
83+
logger.debug('Updating TrueNAS Connect database with interface IPs: %r', ', '.join(interfaces_ips))
84+
await self.middleware.call(
85+
'datastore.update', 'truenas_connect', tnc_config['id'], {
86+
'interfaces_ips': interfaces_ips,
87+
}
7888
)
7989

80-
logger.debug('Updating TrueNAS Connect database with interface IPs: %r', ', '.join(interfaces_ips))
81-
await self.middleware.call(
82-
'datastore.update', 'truenas_connect', tnc_config['id'], {
83-
'interfaces_ips': interfaces_ips,
84-
}
85-
)
86-
87-
logger.debug('Syncing interface IPs for TrueNAS Connect')
88-
try:
89-
await self.middleware.call('tn_connect.hostname.register_update_ips')
90-
except CallError:
91-
logger.error('Failed to update IPs with TrueNAS Connect', exc_info=True)
92-
else:
93-
await self.middleware.call('cache.put', TNC_IPS_CACHE_KEY, interfaces_ips, 60 * 60)
94-
await self.middleware.call_hook('tn_connect.hostname.updated', await self.config())
90+
# Skip HTTP call if no IPs available (static + dynamic combined)
91+
# to avoid sending an empty payload that would cause a 400 error.
92+
# Still cache the empty result to prevent retry storms from repeated netlink events.
93+
combined_ips = tnc_config['ips'] + interfaces_ips
94+
if not combined_ips:
95+
await self.middleware.call('cache.put', TNC_IPS_CACHE_KEY, interfaces_ips, 60 * 60)
96+
return
97+
98+
logger.debug('Syncing interface IPs for TrueNAS Connect')
99+
try:
100+
await self.middleware.call('tn_connect.hostname.register_update_ips')
101+
except CallError:
102+
logger.error('Failed to update IPs with TrueNAS Connect', exc_info=True)
103+
else:
104+
await self.middleware.call('cache.put', TNC_IPS_CACHE_KEY, interfaces_ips, 60 * 60)
105+
await self.middleware.call_hook('tn_connect.hostname.updated', await self.config())
95106

96107
async def handle_update_ips(self, event_type, args):
97108
"""

src/middlewared/middlewared/pytest/unit/plugins/test_truenas_connect.py

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from middlewared.service import ValidationErrors
55
from middlewared.plugins.truenas_connect.update import TrueNASConnectService
66
from middlewared.plugins.truenas_connect.hostname import TNCHostnameService
7+
from middlewared.plugins.truenas_connect.utils import TNC_IPS_CACHE_KEY
78
from truenas_connect_utils.status import Status
89

910

@@ -617,6 +618,7 @@ async def mock_middleware_call(method, *args, **kwargs):
617618
if method == 'tn_connect.config':
618619
return {
619620
'id': 1,
621+
'ips': [],
620622
'interfaces': ['ens3'],
621623
'use_all_interfaces': True
622624
}
@@ -663,6 +665,7 @@ async def mock_middleware_call(method, *args, **kwargs):
663665
if method == 'tn_connect.config':
664666
return {
665667
'id': 1,
668+
'ips': [],
666669
'interfaces': ['ens3', 'ens4'],
667670
'use_all_interfaces': False
668671
}
@@ -694,6 +697,95 @@ async def mock_middleware_call(method, *args, **kwargs):
694697
assert not get_all_called
695698
assert get_interfaces_called
696699

700+
@pytest.mark.asyncio
701+
async def test_sync_interface_ips_empty_ips_skips_http(self):
702+
"""Test that sync skips HTTP call when both static and interface IPs are empty."""
703+
service = TNCHostnameService(MagicMock())
704+
service.config = AsyncMock()
705+
706+
register_called = False
707+
cache_put_args = None
708+
709+
async def mock_middleware_call(method, *args, **kwargs):
710+
nonlocal register_called, cache_put_args
711+
712+
if method == 'tn_connect.config':
713+
return {
714+
'id': 1,
715+
'ips': [],
716+
'interfaces': [],
717+
'use_all_interfaces': True,
718+
}
719+
elif method == 'tn_connect.get_all_interface_ips':
720+
return []
721+
elif method == 'cache.get':
722+
raise KeyError('Key not found')
723+
elif method == 'datastore.update':
724+
assert args[2]['interfaces_ips'] == []
725+
return None
726+
elif method == 'tn_connect.hostname.register_update_ips':
727+
register_called = True
728+
return {'error': None}
729+
elif method == 'cache.put':
730+
cache_put_args = args
731+
return None
732+
else:
733+
return None
734+
735+
service.middleware.call = AsyncMock(side_effect=mock_middleware_call)
736+
service.middleware.call_hook = AsyncMock()
737+
738+
await service.sync_interface_ips()
739+
740+
# HTTP call should NOT have been made
741+
assert not register_called
742+
# Cache should still be populated to prevent retry storms
743+
assert cache_put_args is not None
744+
assert cache_put_args[0] == TNC_IPS_CACHE_KEY
745+
assert cache_put_args[1] == []
746+
# Hook should NOT have been called
747+
service.middleware.call_hook.assert_not_called()
748+
749+
@pytest.mark.asyncio
750+
async def test_sync_interface_ips_empty_interface_ips_with_static_ips_syncs(self):
751+
"""Test that sync proceeds when interface IPs are empty but static IPs exist."""
752+
service = TNCHostnameService(MagicMock())
753+
service.config = AsyncMock()
754+
755+
register_called = False
756+
757+
async def mock_middleware_call(method, *args, **kwargs):
758+
nonlocal register_called
759+
760+
if method == 'tn_connect.config':
761+
return {
762+
'id': 1,
763+
'ips': ['192.168.1.100'],
764+
'interfaces': [],
765+
'use_all_interfaces': True,
766+
}
767+
elif method == 'tn_connect.get_all_interface_ips':
768+
return []
769+
elif method == 'cache.get':
770+
raise KeyError('Key not found')
771+
elif method == 'datastore.update':
772+
return None
773+
elif method == 'tn_connect.hostname.register_update_ips':
774+
register_called = True
775+
return {'error': None}
776+
elif method == 'cache.put':
777+
return None
778+
else:
779+
return None
780+
781+
service.middleware.call = AsyncMock(side_effect=mock_middleware_call)
782+
service.middleware.call_hook = AsyncMock()
783+
784+
await service.sync_interface_ips()
785+
786+
# HTTP call SHOULD have been made because static IPs exist
787+
assert register_called
788+
697789
@pytest.mark.asyncio
698790
async def test_register_update_ips_uses_combined_ips(self):
699791
"""Test that register_update_ips uses combined IPs when no IPs provided."""

0 commit comments

Comments
 (0)