-
Notifications
You must be signed in to change notification settings - Fork 1.4k
BFDD : BFD session stays until the last static route over nexhop #19031
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -530,9 +530,10 @@ static void bfdd_dest_deregister(struct stream *msg, vrf_id_t vrf_id) | |
| return; | ||
| } | ||
|
|
||
| bs->refcount--; | ||
| /* Unregister client peer notification. */ | ||
| pcn = pcn_lookup(pc, bs); | ||
| if (pcn != NULL) { | ||
| if (pcn != NULL && bs->refcount == 0) { | ||
| pcn_free(pcn); | ||
| return; | ||
| } | ||
|
|
@@ -545,7 +546,7 @@ static void bfdd_dest_deregister(struct stream *msg, vrf_id_t vrf_id) | |
| * created this is no longer around. Lets try to delete it anyway | ||
| * and the worst case is the refcount will detain us. | ||
| */ | ||
| _ptm_bfd_session_del(bs, BD_NEIGHBOR_DOWN); | ||
| //_ptm_bfd_session_del(bs, BD_NEIGHBOR_DOWN); | ||
| } | ||
|
|
||
| /* | ||
|
|
@@ -936,6 +937,8 @@ static struct ptm_client_notification *pcn_new(struct ptm_client *pc, | |
| struct bfd_session *bs) | ||
| { | ||
| struct ptm_client_notification *pcn; | ||
| /* Lets increment the refcount in the beginning itself regardless of its a new pcn or old*/ | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What is the difference here or how was it before?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Earlier flow : ===> if we add multiple static route over the same bfd peer nh, for the subsequent addition, pcn_lookup will give the existing pcn which result in NOT updating the refcount
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Now refcount incrementation has been added before the pcn_lookup which makes the refcount increment properly |
||
| bs->refcount++; | ||
|
|
||
| /* Try to find an existing pcn fist. */ | ||
| pcn = pcn_lookup(pc, bs); | ||
|
|
@@ -948,7 +951,6 @@ static struct ptm_client_notification *pcn_new(struct ptm_client *pc, | |
| TAILQ_INSERT_HEAD(&pc->pc_pcnqueue, pcn, pcn_entry); | ||
| pcn->pcn_pc = pc; | ||
| pcn->pcn_bs = bs; | ||
| bs->refcount++; | ||
|
|
||
| return pcn; | ||
| } | ||
|
|
@@ -976,7 +978,6 @@ static void pcn_free(struct ptm_client_notification *pcn) | |
| /* Handle session de-registration. */ | ||
| bs = pcn->pcn_bs; | ||
| pcn->pcn_bs = NULL; | ||
| bs->refcount--; | ||
|
|
||
| /* Log modification to users. */ | ||
| if (bglobal.debug_zebra) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,17 @@ | ||
| hostname rt1 | ||
| ! | ||
| ip route 192.170.1.2/32 192.168.2.2 bfd | ||
| ip route 192.170.1.3/32 192.168.2.2 bfd | ||
| ip route 192.170.1.4/32 192.168.2.2 bfd | ||
| ! | ||
| interface eth-rt1 | ||
| ip address 192.168.2.1/24 | ||
| exit | ||
| ! | ||
|
|
||
| interface lo | ||
| ip address 1.1.1.1/32 | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No Cloudflare, please, use private or documentation ranges. |
||
| exit | ||
| ! | ||
| bfd | ||
| exit | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,25 @@ | ||
| [ | ||
| { | ||
| "multihop":false, | ||
| "peer":"192.168.2.2", | ||
| "vrf":"default", | ||
| "passive-mode":false, | ||
| "log-session-changes":false, | ||
| "status":"up", | ||
| "diagnostic":"ok", | ||
| "remote-diagnostic":"ok", | ||
| "type":"dynamic", | ||
| "receive-interval":300, | ||
| "transmit-interval":300, | ||
| "echo-receive-interval":50, | ||
| "echo-transmit-interval":0, | ||
| "detect-multiplier":3, | ||
| "remote-receive-interval":300, | ||
| "remote-transmit-interval":300, | ||
| "remote-echo-receive-interval":50, | ||
| "remote-detect-multiplier":3, | ||
| "rtt-min":0, | ||
| "rtt-avg":0, | ||
| "rtt-max":0 | ||
| } | ||
| ] |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,17 @@ | ||
| hostname rt2 | ||
| ! | ||
| ip route 192.169.1.1/32 192.168.2.1 bfd | ||
| ip route 192.169.1.2/32 192.168.2.1 bfd | ||
| ip route 192.169.1.3/32 192.168.2.1 bfd | ||
| ! | ||
| interface eth-rt2 | ||
| ip address 192.168.2.2/24 | ||
| exit | ||
| ! | ||
| interface lo | ||
| ip address 2.2.2.2/32 | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ditto.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Removed
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please, use private or documentation ranges. |
||
| exit | ||
| ! | ||
| bfd | ||
| exit | ||
| ! | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,25 @@ | ||
| [ | ||
| { | ||
| "multihop":false, | ||
| "peer":"192.168.2.1", | ||
| "vrf":"default", | ||
| "passive-mode":false, | ||
| "log-session-changes":false, | ||
| "status":"up", | ||
| "diagnostic":"ok", | ||
| "remote-diagnostic":"ok", | ||
| "type":"dynamic", | ||
| "receive-interval":300, | ||
| "transmit-interval":300, | ||
| "echo-receive-interval":50, | ||
| "echo-transmit-interval":0, | ||
| "detect-multiplier":3, | ||
| "remote-receive-interval":300, | ||
| "remote-transmit-interval":300, | ||
| "remote-echo-receive-interval":50, | ||
| "remote-detect-multiplier":3, | ||
| "rtt-min":0, | ||
| "rtt-avg":0, | ||
| "rtt-max":0 | ||
| } | ||
| ] |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,167 @@ | ||
| #!/usr/bin/env python | ||
| # SPDX-License-Identifier: ISC | ||
|
|
||
| # | ||
| # test_bfd_static_topo1.py | ||
| # | ||
| # Copyright (c) 2024 by Varun Hegde | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. My calendar now shows it's 2025. |
||
|
|
||
| """ | ||
| test_bfd_static_topo1.py: Test the FRR multiple static routes over same gateway with BFD tracking. | ||
| """ | ||
|
|
||
| import os | ||
| import sys | ||
| import json | ||
| import platform | ||
| from functools import partial | ||
| import pytest | ||
|
|
||
| pytestmark = [pytest.mark.staticd, pytest.mark.bfdd] | ||
|
|
||
| # Save the Current Working Directory to find configuration files. | ||
| CWD = os.path.dirname(os.path.realpath(__file__)) | ||
| sys.path.append(os.path.join(CWD, "../")) | ||
|
|
||
| # pylint: disable=C0413 | ||
| # Import topogen and topotest helpers | ||
| from lib import topotest | ||
| from lib.topogen import Topogen, TopoRouter, get_topogen | ||
| from lib.topolog import logger | ||
|
|
||
|
|
||
| def setup_module(mod): | ||
| "Sets up the pytest environment" | ||
| topodef = { | ||
| "s1": ("rt1:eth-rt1", "rt2:eth-rt2"), | ||
| } | ||
| tgen = Topogen(topodef, mod.__name__) | ||
| tgen.start_topology() | ||
|
|
||
| router_list = tgen.routers() | ||
|
|
||
| for _, (rname, router) in enumerate(router_list.items(), 1): | ||
| router.load_frr_config( | ||
| os.path.join(CWD, "{}/frr.conf".format(rname)), | ||
| [ | ||
| (TopoRouter.RD_ZEBRA, None), | ||
| (TopoRouter.RD_MGMTD, None), | ||
| (TopoRouter.RD_BFD, None), | ||
| (TopoRouter.RD_STATIC, None), | ||
| ], | ||
| ) | ||
|
|
||
| # Initialize all routers. | ||
| tgen.start_router() | ||
|
|
||
|
|
||
| def teardown_module(_mod): | ||
| "Teardown the pytest environment" | ||
| tgen = get_topogen() | ||
| tgen.stop_topology() | ||
|
|
||
| def filter_dynamic_fields(json_data): | ||
| if isinstance(json_data, str): | ||
| json_data = json.loads(json_data) | ||
|
|
||
| # If it's a list of BFD sessions | ||
| for entry in json_data: | ||
| entry.pop("id", None) | ||
| entry.pop("remote-id", None) | ||
| entry.pop("uptime", None) | ||
| return json_data | ||
|
|
||
| def filtered_router_json_cmp(router, command, expected): | ||
| output = router.vtysh_cmd(command) | ||
| return topotest.json_cmp(filter_dynamic_fields(output), | ||
| filter_dynamic_fields(expected)) | ||
|
|
||
|
|
||
| def router_compare_json_output(rname, command, reference, count=5, wait=5, is_down=False): | ||
| "Compare router JSON output" | ||
|
|
||
| logger.info('Comparing router "%s" "%s" output', rname, command) | ||
|
|
||
| tgen = get_topogen() | ||
| filename = "{}/{}/{}".format(CWD, rname, reference) | ||
| if not is_down: | ||
| expected = json.loads(open(filename).read()) | ||
| else: | ||
| # If the BFD session is down, we expect the output to be empty | ||
| expected = [] | ||
|
|
||
| # print("Output filtered: {}\n".format(output_filtered)) | ||
| # print("Expected: {}\n".format(expected)) | ||
| test_func = partial(filtered_router_json_cmp, tgen.gears[rname], command, expected) | ||
|
|
||
| # Run test function until we get an result. Wait at most 25 seconds. | ||
| _, diff = topotest.run_and_expect(test_func, None, count=count, wait=wait) | ||
| assertmsg = '"{}" JSON output mismatches the expected result'.format(rname) | ||
| assert diff is None, assertmsg | ||
|
|
||
|
|
||
|
|
||
| #### Test cases for BFD static routes #### | ||
|
|
||
| def test_bfd_static_routes_step1(): | ||
| logger.info("Test (step 1): verify BFD peers for staic routes peer") | ||
| tgen = get_topogen() | ||
|
|
||
| # Skip if previous fatal error condition is raised | ||
| if tgen.routers_have_failure(): | ||
| pytest.skip(tgen.errors) | ||
|
|
||
| # BFD is just used on three routers | ||
| for rt in ["rt1", "rt2"]: | ||
| router_compare_json_output( | ||
| rt, "show bfd peers json", "show_bfd_peers.ref" | ||
| ) | ||
|
|
||
| def test_bfd_static_routes_step2(): | ||
| logger.info("Test (step 2): verify BFD static routes") | ||
| tgen = get_topogen() | ||
|
|
||
| # Skip if previous fatal error condition is raised | ||
| if tgen.routers_have_failure(): | ||
| pytest.skip(tgen.errors) | ||
|
|
||
|
|
||
| # Now we will remove the BFD static routes and check that the BFD session is still up | ||
| tgen.gears["rt1"].vtysh_cmd("no ip route 192.170.1.3/32 10.0.1.2 bfd") | ||
| tgen.gears["rt2"].vtysh_cmd("no ip route 192.169.1.3/32 10.0.1.1 bfd") | ||
| # Check that the BFD session is still up | ||
| for rt in ["rt1", "rt2"]: | ||
| router_compare_json_output( | ||
| rt, "show bfd peers json", "show_bfd_peers.ref" | ||
| ) | ||
|
|
||
| tgen.gears["rt1"].vtysh_cmd("no ip route 192.170.1.2/32 10.0.1.2 bfd") | ||
| tgen.gears["rt2"].vtysh_cmd("no ip route 192.169.1..2/32 10.0.1.1 bfd") | ||
|
|
||
| # Check that the BFD session is still up | ||
| for rt in ["rt1", "rt2"]: | ||
| router_compare_json_output( | ||
| rt, "show bfd peers json", "show_bfd_peers.ref" | ||
| ) | ||
|
|
||
| # Now remove the last static route which has BFD tracking and veryfy that the BFD session is down | ||
| tgen.gears["rt1"].vtysh_cmd("no ip route 192.170.1.4/32 10.0.1.2 bfd") | ||
| tgen.gears["rt2"].vtysh_cmd("no ip route 192.169.1.4/32 10.0.1.1 bfd") | ||
| for rt in ["rt1", "rt2"]: | ||
| router_compare_json_output( | ||
| rt, "show bfd peers json", "show_bfd_peers.ref", is_down=True | ||
| ) | ||
|
|
||
|
|
||
| def test_memory_leak(): | ||
| "Run the memory leak test and report results." | ||
| tgen = get_topogen() | ||
| if not tgen.is_memleak_enabled(): | ||
| pytest.skip("Memory leak test/report is disabled") | ||
|
|
||
| tgen.report_memory_leaks() | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| args = ["-s"] + sys.argv[1:] | ||
| sys.exit(pytest.main(args)) | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Leftovers?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Here,
This func call ( _ptm_bfd_session_del ) looks like it has been added for a specific scenario rather to address specific bug, not sure how to handle this here.
Basically this call should be deleted, why ?
As we remove the static routes one by one, refcount gets decremented one by one and once it reaches 0, we call the pcn_free where we clean up the pcn related data structures and call _ptm_bfd_session_del where it notifies the peer about the session down and frees up BFD session related data structures.
So according to my opinion, this call is not needed.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Then let's drop it instead of commenting.