diff --git a/src/sonic-frr/patch/0030-isisd-staticd-need-to-link-directly-against-libyang.patch b/src/sonic-frr/patch/0030-isisd-staticd-need-to-link-directly-against-libyang.patch new file mode 100644 index 000000000000..328a3405699f --- /dev/null +++ b/src/sonic-frr/patch/0030-isisd-staticd-need-to-link-directly-against-libyang.patch @@ -0,0 +1,37 @@ +From df79586612fb0445fbdf6b191747294e5494ece2 Mon Sep 17 00:00:00 2001 +From: Christian Hopps +Date: Thu, 26 Oct 2023 22:51:08 -0400 +Subject: [PATCH 01/11] isisd: staticd: need to link directly against libyang + +Signed-off-by: Christian Hopps +(cherry picked from commit 81d1d399521bb18f3fdd5353c9d58c4b3988f225) + +diff --git a/isisd/subdir.am b/isisd/subdir.am +index dabf6a925e..10f9f5b964 100644 +--- a/isisd/subdir.am ++++ b/isisd/subdir.am +@@ -92,7 +92,7 @@ ISIS_SOURCES = \ + isisd/isis_pfpacket.c \ + # end + +-ISIS_LDADD_COMMON = lib/libfrr.la $(LIBCAP) ++ISIS_LDADD_COMMON = lib/libfrr.la $(LIBCAP) $(LIBYANG_LIBS) + + # Building isisd + +diff --git a/staticd/subdir.am b/staticd/subdir.am +index 022428281f..07ebe3c02c 100644 +--- a/staticd/subdir.am ++++ b/staticd/subdir.am +@@ -36,7 +36,7 @@ clippy_scan += \ + # end + + staticd_staticd_SOURCES = staticd/static_main.c +-staticd_staticd_LDADD = staticd/libstatic.a lib/libfrr.la $(LIBCAP) ++staticd_staticd_LDADD = staticd/libstatic.a lib/libfrr.la $(LIBCAP) $(LIBYANG_LIBS) + + nodist_staticd_staticd_SOURCES = \ + yang/frr-bfdd.yang.c \ +-- +2.17.1 + diff --git a/src/sonic-frr/patch/0031-zebra-backpressure-Zebra-push-back-on-Buffer-Stream-.patch b/src/sonic-frr/patch/0031-zebra-backpressure-Zebra-push-back-on-Buffer-Stream-.patch new file mode 100644 index 000000000000..0bebd1ea12ba --- /dev/null +++ b/src/sonic-frr/patch/0031-zebra-backpressure-Zebra-push-back-on-Buffer-Stream-.patch @@ -0,0 +1,174 @@ +From 7c711ff437985b23a4dd919a98b22b8ea14ef553 Mon Sep 17 00:00:00 2001 +From: Rajasekar Raja +Date: Mon, 12 Feb 2024 10:44:18 -0800 +Subject: [PATCH 02/11] zebra: backpressure - Zebra push back on Buffer/Stream + creation + +Currently, the way zebra works is it creates pthread per client (BGP is +of interest in this case) and this thread loops itself in zserv_read() +to check for any incoming data. If there is one, then it reads, +validates and adds it in the ibuf_fifo signalling the main thread to +process the message. The main thread when it gets a change, processes +the message, and invokes the function pointer registered in the header +command. (Ex: zserv_handlers). + +Finally, if all of this was successful, this task reschedules itself and +loops in zserv_read() again + +However, if there are already items on ibuf FIFO, that means zebra is +slow in processing. And with the current mechanism if Zebra main is +busy, the ibuf FIFO keeps growing holding up the memory. + +Show memory zebra:(Example: 15k streams hoarding ~160 MB of data) +--- qmem libfrr --- +Stream : 44 variable 3432352 15042 161243800 + +Fix: +Client IO Thread: (zserv_read) + - Stop doing the read events when we know there are X number of items + on the FIFO already.(X - zebra zapi-packets <1-10000> (Default-1000) + + - Determine the number of items on the zserv->ibuf_fifo. Subtract this + from the work items and only pull the number of items off that would + take us to X items on the ibuf_fifo again. + + - If the number of items in the ibuf_fifo has reached to the maximum + * Either initially when zserv_read() is called (or) + * when processing the remainders of the incoming buffer + the client IO thread is woken by the the zebra main. + +Main thread: (zserv_process_message) +If the client ibuf always schedules a wakeup to the client IO to read +more items from the socked buffer. This way we ensure + - Client IO thread always tries to read the socket buffer and add more + items to the ibuf_fifo (until max limit) + - hidden config change (zebra zapi-packets <>) is taken into account + +Ticket: #3390099 + +Signed-off-by: Donald Sharp +Signed-off-by: Rajasekar Raja + +diff --git a/zebra/zserv.c b/zebra/zserv.c +index 2024f34534..de6e404fc4 100644 +--- a/zebra/zserv.c ++++ b/zebra/zserv.c +@@ -318,6 +318,14 @@ zwrite_fail: + * this task reschedules itself. + * + * Any failure in any of these actions is handled by terminating the client. ++ * ++ * The client's input buffer ibuf_fifo can have a maximum items as configured ++ * in the packets_to_process. This way we are not filling up the FIFO more ++ * than the maximum when the zebra main is busy. If the fifo has space, we ++ * reschedule ourselves to read more. ++ * ++ * The main thread processes the items in ibuf_fifo and always signals the ++ * client IO thread. + */ + static void zserv_read(struct thread *thread) + { +@@ -325,15 +333,25 @@ static void zserv_read(struct thread *thread) + int sock; + size_t already; + struct stream_fifo *cache; +- uint32_t p2p_orig; +- +- uint32_t p2p; ++ uint32_t p2p; /* Temp p2p used to process */ ++ uint32_t p2p_orig; /* Configured p2p (Default-1000) */ ++ int p2p_avail; /* How much space is available for p2p */ + struct zmsghdr hdr; ++ size_t client_ibuf_fifo_cnt = stream_fifo_count_safe(client->ibuf_fifo); + + p2p_orig = atomic_load_explicit(&zrouter.packets_to_process, + memory_order_relaxed); ++ p2p_avail = p2p_orig - client_ibuf_fifo_cnt; ++ ++ /* ++ * Do nothing if ibuf_fifo count has reached its max limit. Otherwise ++ * proceed and reschedule ourselves if there is space in the ibuf_fifo. ++ */ ++ if (p2p_avail <= 0) ++ return; ++ ++ p2p = p2p_avail; + cache = stream_fifo_new(); +- p2p = p2p_orig; + sock = THREAD_FD(thread); + + while (p2p) { +@@ -433,7 +451,7 @@ static void zserv_read(struct thread *thread) + p2p--; + } + +- if (p2p < p2p_orig) { ++ if (p2p < (uint32_t)p2p_avail) { + uint64_t time_now = monotime(NULL); + + /* update session statistics */ +@@ -447,19 +465,23 @@ static void zserv_read(struct thread *thread) + while (cache->head) + stream_fifo_push(client->ibuf_fifo, + stream_fifo_pop(cache)); ++ /* Need to update count as main thread could have processed few */ ++ client_ibuf_fifo_cnt = ++ stream_fifo_count_safe(client->ibuf_fifo); + } + + /* Schedule job to process those packets */ + zserv_event(client, ZSERV_PROCESS_MESSAGES); +- + } + + if (IS_ZEBRA_DEBUG_PACKET) +- zlog_debug("Read %d packets from client: %s", p2p_orig - p2p, +- zebra_route_string(client->proto)); ++ zlog_debug("Read %d packets from client: %s. Current ibuf fifo count: %zu. Conf P2p %d", ++ p2p_avail - p2p, zebra_route_string(client->proto), ++ client_ibuf_fifo_cnt, p2p_orig); + +- /* Reschedule ourselves */ +- zserv_client_event(client, ZSERV_CLIENT_READ); ++ /* Reschedule ourselves since we have space in ibuf_fifo */ ++ if (client_ibuf_fifo_cnt < p2p_orig) ++ zserv_client_event(client, ZSERV_CLIENT_READ); + + stream_fifo_free(cache); + +@@ -495,14 +517,20 @@ static void zserv_client_event(struct zserv *client, + * as the task argument. + * + * Each message is popped off the client's input queue and the action associated +- * with the message is executed. This proceeds until there are no more messages, +- * an error occurs, or the processing limit is reached. ++ * with the message is executed. This proceeds until an error occurs, or the ++ * processing limit is reached. + * + * The client's I/O thread can push at most zrouter.packets_to_process messages + * onto the input buffer before notifying us there are packets to read. As long + * as we always process zrouter.packets_to_process messages here, then we can + * rely on the read thread to handle queuing this task enough times to process + * everything on the input queue. ++ * ++ * If the client ibuf always schedules a wakeup to the client IO to read more ++ * items from the socked buffer. This way we ensure ++ * - Client IO thread always tries to read the socket buffer and add more ++ * items to the ibuf_fifo (until max limit) ++ * - the hidden config change (zebra zapi-packets <>) is taken into account. + */ + static void zserv_process_messages(struct thread *thread) + { +@@ -538,6 +566,9 @@ static void zserv_process_messages(struct thread *thread) + /* Reschedule ourselves if necessary */ + if (need_resched) + zserv_event(client, ZSERV_PROCESS_MESSAGES); ++ ++ /* Ensure to include the read socket in the select/poll/etc.. */ ++ zserv_client_event(client, ZSERV_CLIENT_READ); + } + + int zserv_send_message(struct zserv *client, struct stream *msg) +-- +2.17.1 + diff --git a/src/sonic-frr/patch/0032-bgpd-backpressure-Add-a-typesafe-list-for-Zebra-Anno.patch b/src/sonic-frr/patch/0032-bgpd-backpressure-Add-a-typesafe-list-for-Zebra-Anno.patch new file mode 100644 index 000000000000..21a2b90f44f4 --- /dev/null +++ b/src/sonic-frr/patch/0032-bgpd-backpressure-Add-a-typesafe-list-for-Zebra-Anno.patch @@ -0,0 +1,90 @@ +From 7796ce2bb6eb1650ae1bec41ab2f270807b33c62 Mon Sep 17 00:00:00 2001 +From: Donald Sharp +Date: Thu, 25 Jan 2024 12:53:24 -0500 +Subject: [PATCH 03/11] bgpd: backpressure - Add a typesafe list for Zebra + Announcement + +Modify the bgp master to hold a type safe list for bgp_dests that need +to be passed to zebra. + +Future commits will use this. + +Ticket: #3390099 + +Signed-off-by: Donald Sharp +Signed-off-by: Rajasekar Raja + +diff --git a/bgpd/bgp_main.c b/bgpd/bgp_main.c +index 90ae580bab..e28dde5d16 100644 +--- a/bgpd/bgp_main.c ++++ b/bgpd/bgp_main.c +@@ -214,6 +214,8 @@ static __attribute__((__noreturn__)) void bgp_exit(int status) + bgp_evpn_mh_finish(); + bgp_l3nhg_finish(); + ++ zebra_announce_fini(&bm->zebra_announce_head); ++ + /* reverse bgp_dump_init */ + bgp_dump_finish(); + +diff --git a/bgpd/bgp_table.h b/bgpd/bgp_table.h +index 121afc481f..d43bf86eb9 100644 +--- a/bgpd/bgp_table.h ++++ b/bgpd/bgp_table.h +@@ -101,6 +101,8 @@ struct bgp_node { + + STAILQ_ENTRY(bgp_dest) pq; + ++ struct zebra_announce_item zai; ++ + uint64_t version; + + mpls_label_t local_label; +@@ -121,6 +123,8 @@ struct bgp_node { + enum bgp_path_selection_reason reason; + }; + ++DECLARE_LIST(zebra_announce, struct bgp_dest, zai); ++ + extern void bgp_delete_listnode(struct bgp_dest *dest); + /* + * bgp_table_iter_t +diff --git a/bgpd/bgpd.c b/bgpd/bgpd.c +index 023047050b..392423e028 100644 +--- a/bgpd/bgpd.c ++++ b/bgpd/bgpd.c +@@ -8017,6 +8017,8 @@ void bgp_master_init(struct thread_master *master, const int buffer_size, + memset(&bgp_master, 0, sizeof(bgp_master)); + + bm = &bgp_master; ++ ++ zebra_announce_init(&bm->zebra_announce_head); + bm->bgp = list_new(); + bm->listen_sockets = list_new(); + bm->port = BGP_PORT_DEFAULT; +diff --git a/bgpd/bgpd.h b/bgpd/bgpd.h +index 72b5b50fb4..55f53bf9d3 100644 +--- a/bgpd/bgpd.h ++++ b/bgpd/bgpd.h +@@ -32,6 +32,8 @@ + #include "srv6.h" + #include "iana_afi.h" + ++PREDECL_LIST(zebra_announce); ++ + /* For union sockunion. */ + #include "queue.h" + #include "sockunion.h" +@@ -180,6 +182,9 @@ struct bgp_master { + uint32_t inq_limit; + uint32_t outq_limit; + ++ /* To preserve ordering of installations into zebra across all Vrfs */ ++ struct zebra_announce_head zebra_announce_head; ++ + QOBJ_FIELDS; + }; + DECLARE_QOBJ_TYPE(bgp_master); +-- +2.17.1 + diff --git a/src/sonic-frr/patch/0033-bgpd-fix-flushing-ipv6-flowspec-entries-when-peering.patch b/src/sonic-frr/patch/0033-bgpd-fix-flushing-ipv6-flowspec-entries-when-peering.patch new file mode 100644 index 000000000000..bfefccceff10 --- /dev/null +++ b/src/sonic-frr/patch/0033-bgpd-fix-flushing-ipv6-flowspec-entries-when-peering.patch @@ -0,0 +1,158 @@ +From 69e38aa82f325129ebad4535be5d834c599b5c0b Mon Sep 17 00:00:00 2001 +From: Philippe Guibert +Date: Wed, 7 Feb 2024 22:34:34 +0100 +Subject: [PATCH 04/11] bgpd: fix flushing ipv6 flowspec entries when peering + stops + +When a BGP flowspec peering stops, the BGP RIB entries for IPv6 +flowspec entries are removed, but not the ZEBRA RIB IPv6 entries. + +Actually, when calling bgp_zebra_withdraw() function call, only +the AFI_IP parameter is passed to the bgp_pbr_update_entry() function +in charge of the Flowspec add/delete in zebra. Fix this by passing +the AFI parameter to the bgp_zebra_withdraw() function. + +Note that using topotest does not show up the problem as the +flowspec driver code is not present and was refused. Without that, +routes are not installed, and can not be uninstalled. + +Fixes: 529efa234655 ("bgpd: allow flowspec entries to be announced to zebra") +Link: https://github.com/FRRouting/frr/pull/2025 + +Signed-off-by: Philippe Guibert + +diff --git a/bgpd/bgp_route.c b/bgpd/bgp_route.c +index fbff57634a..455cd6cdbb 100644 +--- a/bgpd/bgp_route.c ++++ b/bgpd/bgp_route.c +@@ -3312,7 +3312,8 @@ static void bgp_process_main_one(struct bgp *bgp, struct bgp_dest *dest, + */ + if (old_select && + is_route_parent_evpn(old_select)) +- bgp_zebra_withdraw(p, old_select, bgp, safi); ++ bgp_zebra_withdraw(p, old_select, bgp, afi, ++ safi); + + bgp_zebra_announce(dest, p, new_select, bgp, afi, safi); + } else { +@@ -3322,7 +3323,8 @@ static void bgp_process_main_one(struct bgp *bgp, struct bgp_dest *dest, + || old_select->sub_type == BGP_ROUTE_AGGREGATE + || old_select->sub_type == BGP_ROUTE_IMPORTED)) + +- bgp_zebra_withdraw(p, old_select, bgp, safi); ++ bgp_zebra_withdraw(p, old_select, bgp, afi, ++ safi); + } + } + +@@ -4201,7 +4203,7 @@ void bgp_update(struct peer *peer, const struct prefix *p, uint32_t addpath_id, + if (pi && pi->attr->rmap_table_id != new_attr.rmap_table_id) { + if (CHECK_FLAG(pi->flags, BGP_PATH_SELECTED)) + /* remove from RIB previous entry */ +- bgp_zebra_withdraw(p, pi, bgp, safi); ++ bgp_zebra_withdraw(p, pi, bgp, afi, safi); + } + + if (peer->sort == BGP_PEER_EBGP) { +@@ -5841,7 +5843,7 @@ bool bgp_inbound_policy_exists(struct peer *peer, struct bgp_filter *filter) + } + + static void bgp_cleanup_table(struct bgp *bgp, struct bgp_table *table, +- safi_t safi) ++ afi_t afi, safi_t safi) + { + struct bgp_dest *dest; + struct bgp_path_info *pi; +@@ -5865,7 +5867,8 @@ static void bgp_cleanup_table(struct bgp *bgp, struct bgp_table *table, + || pi->sub_type == BGP_ROUTE_IMPORTED)) { + + if (bgp_fibupd_safi(safi)) +- bgp_zebra_withdraw(p, pi, bgp, safi); ++ bgp_zebra_withdraw(p, pi, bgp, afi, ++ safi); + } + + bgp_path_info_reap(dest, pi); +@@ -5882,7 +5885,7 @@ void bgp_cleanup_routes(struct bgp *bgp) + for (afi = AFI_IP; afi < AFI_MAX; ++afi) { + if (afi == AFI_L2VPN) + continue; +- bgp_cleanup_table(bgp, bgp->rib[afi][SAFI_UNICAST], ++ bgp_cleanup_table(bgp, bgp->rib[afi][SAFI_UNICAST], afi, + SAFI_UNICAST); + /* + * VPN and ENCAP and EVPN tables are two-level (RD is top level) +@@ -5894,7 +5897,7 @@ void bgp_cleanup_routes(struct bgp *bgp) + dest = bgp_route_next(dest)) { + table = bgp_dest_get_bgp_table_info(dest); + if (table != NULL) { +- bgp_cleanup_table(bgp, table, safi); ++ bgp_cleanup_table(bgp, table, afi, safi); + bgp_table_finish(&table); + bgp_dest_set_bgp_table_info(dest, NULL); + bgp_dest_unlock_node(dest); +@@ -5905,7 +5908,7 @@ void bgp_cleanup_routes(struct bgp *bgp) + dest = bgp_route_next(dest)) { + table = bgp_dest_get_bgp_table_info(dest); + if (table != NULL) { +- bgp_cleanup_table(bgp, table, safi); ++ bgp_cleanup_table(bgp, table, afi, safi); + bgp_table_finish(&table); + bgp_dest_set_bgp_table_info(dest, NULL); + bgp_dest_unlock_node(dest); +@@ -5917,7 +5920,7 @@ void bgp_cleanup_routes(struct bgp *bgp) + dest = bgp_route_next(dest)) { + table = bgp_dest_get_bgp_table_info(dest); + if (table != NULL) { +- bgp_cleanup_table(bgp, table, SAFI_EVPN); ++ bgp_cleanup_table(bgp, table, afi, SAFI_EVPN); + bgp_table_finish(&table); + bgp_dest_set_bgp_table_info(dest, NULL); + bgp_dest_unlock_node(dest); +diff --git a/bgpd/bgp_zebra.c b/bgpd/bgp_zebra.c +index ff79746b4c..69240a3b83 100644 +--- a/bgpd/bgp_zebra.c ++++ b/bgpd/bgp_zebra.c +@@ -1761,7 +1761,7 @@ void bgp_zebra_announce_table_all_subtypes(struct bgp *bgp, afi_t afi, + } + + void bgp_zebra_withdraw(const struct prefix *p, struct bgp_path_info *info, +- struct bgp *bgp, safi_t safi) ++ struct bgp *bgp, afi_t afi, safi_t safi) + { + struct zapi_route api; + struct peer *peer; +@@ -1780,7 +1780,7 @@ void bgp_zebra_withdraw(const struct prefix *p, struct bgp_path_info *info, + + if (safi == SAFI_FLOWSPEC) { + peer = info->peer; +- bgp_pbr_update_entry(peer->bgp, p, info, AFI_IP, safi, false); ++ bgp_pbr_update_entry(peer->bgp, p, info, afi, safi, false); + return; + } + +@@ -1821,7 +1821,7 @@ void bgp_zebra_withdraw_table_all_subtypes(struct bgp *bgp, afi_t afi, safi_t sa + if (CHECK_FLAG(pi->flags, BGP_PATH_SELECTED) + && (pi->type == ZEBRA_ROUTE_BGP)) + bgp_zebra_withdraw(bgp_dest_get_prefix(dest), +- pi, bgp, safi); ++ pi, bgp, afi, safi); + } + } + } +diff --git a/bgpd/bgp_zebra.h b/bgpd/bgp_zebra.h +index 0a41069411..a5fe8d7ace 100644 +--- a/bgpd/bgp_zebra.h ++++ b/bgpd/bgp_zebra.h +@@ -49,7 +49,7 @@ extern void bgp_zebra_announce(struct bgp_dest *dest, const struct prefix *p, + extern void bgp_zebra_announce_table(struct bgp *bgp, afi_t afi, safi_t safi); + extern void bgp_zebra_withdraw(const struct prefix *p, + struct bgp_path_info *path, struct bgp *bgp, +- safi_t safi); ++ afi_t afi, safi_t safi); + + /* Announce routes of any bgp subtype of a table to zebra */ + extern void bgp_zebra_announce_table_all_subtypes(struct bgp *bgp, afi_t afi, +-- +2.17.1 + diff --git a/src/sonic-frr/patch/0034-bgpd-backpressure-cleanup-bgp_zebra_XX-func-args.patch b/src/sonic-frr/patch/0034-bgpd-backpressure-cleanup-bgp_zebra_XX-func-args.patch new file mode 100644 index 000000000000..16383dc95caa --- /dev/null +++ b/src/sonic-frr/patch/0034-bgpd-backpressure-cleanup-bgp_zebra_XX-func-args.patch @@ -0,0 +1,234 @@ +From 679ad9ee5f3c15570d697506183d37aa29f6ebf2 Mon Sep 17 00:00:00 2001 +From: Donald Sharp +Date: Thu, 25 Jan 2024 13:07:37 -0500 +Subject: [PATCH 05/11] bgpd: backpressure - cleanup bgp_zebra_XX func args + +Since installing/withdrawing routes into zebra is going to be changed +around to be dest based in a list, + - Retrieve the afi/safi to use based upon the dest's afi/safi + instead of passing it in. + - Prefix is known by the dest. Remove this arg as well + +Ticket: #3390099 + +Signed-off-by: Donald Sharp +Signed-off-by: Rajasekar Raja + +diff --git a/bgpd/bgp_route.c b/bgpd/bgp_route.c +index 455cd6cdbb..d19f27110e 100644 +--- a/bgpd/bgp_route.c ++++ b/bgpd/bgp_route.c +@@ -3214,8 +3214,8 @@ static void bgp_process_main_one(struct bgp *bgp, struct bgp_dest *dest, + || new_select->sub_type + == BGP_ROUTE_IMPORTED)) + +- bgp_zebra_announce(dest, p, old_select, +- bgp, afi, safi); ++ bgp_zebra_announce(dest, old_select, ++ bgp); + } + } + +@@ -3312,10 +3312,9 @@ static void bgp_process_main_one(struct bgp *bgp, struct bgp_dest *dest, + */ + if (old_select && + is_route_parent_evpn(old_select)) +- bgp_zebra_withdraw(p, old_select, bgp, afi, +- safi); ++ bgp_zebra_withdraw(dest, old_select, bgp); + +- bgp_zebra_announce(dest, p, new_select, bgp, afi, safi); ++ bgp_zebra_announce(dest, new_select, bgp); + } else { + /* Withdraw the route from the kernel. */ + if (old_select && old_select->type == ZEBRA_ROUTE_BGP +@@ -3323,8 +3322,7 @@ static void bgp_process_main_one(struct bgp *bgp, struct bgp_dest *dest, + || old_select->sub_type == BGP_ROUTE_AGGREGATE + || old_select->sub_type == BGP_ROUTE_IMPORTED)) + +- bgp_zebra_withdraw(p, old_select, bgp, afi, +- safi); ++ bgp_zebra_withdraw(dest, old_select, bgp); + } + } + +@@ -4203,7 +4201,7 @@ void bgp_update(struct peer *peer, const struct prefix *p, uint32_t addpath_id, + if (pi && pi->attr->rmap_table_id != new_attr.rmap_table_id) { + if (CHECK_FLAG(pi->flags, BGP_PATH_SELECTED)) + /* remove from RIB previous entry */ +- bgp_zebra_withdraw(p, pi, bgp, afi, safi); ++ bgp_zebra_withdraw(dest, pi, bgp); + } + + if (peer->sort == BGP_PEER_EBGP) { +@@ -5867,8 +5865,7 @@ static void bgp_cleanup_table(struct bgp *bgp, struct bgp_table *table, + || pi->sub_type == BGP_ROUTE_IMPORTED)) { + + if (bgp_fibupd_safi(safi)) +- bgp_zebra_withdraw(p, pi, bgp, afi, +- safi); ++ bgp_zebra_withdraw(dest, pi, bgp); + } + + bgp_path_info_reap(dest, pi); +diff --git a/bgpd/bgp_zebra.c b/bgpd/bgp_zebra.c +index 69240a3b83..920df835a4 100644 +--- a/bgpd/bgp_zebra.c ++++ b/bgpd/bgp_zebra.c +@@ -1292,9 +1292,8 @@ static bool bgp_zebra_use_nhop_weighted(struct bgp *bgp, struct attr *attr, + return true; + } + +-void bgp_zebra_announce(struct bgp_dest *dest, const struct prefix *p, +- struct bgp_path_info *info, struct bgp *bgp, afi_t afi, +- safi_t safi) ++void bgp_zebra_announce(struct bgp_dest *dest, struct bgp_path_info *info, ++ struct bgp *bgp) + { + struct zapi_route api = { 0 }; + struct zapi_nexthop *api_nh; +@@ -1321,6 +1320,8 @@ void bgp_zebra_announce(struct bgp_dest *dest, const struct prefix *p, + uint32_t ttl = 0; + uint32_t bos = 0; + uint32_t exp = 0; ++ struct bgp_table *table = bgp_dest_table(dest); ++ const struct prefix *p = bgp_dest_get_prefix(dest); + + /* + * BGP is installing this route and bgp has been configured +@@ -1339,9 +1340,9 @@ void bgp_zebra_announce(struct bgp_dest *dest, const struct prefix *p, + if (bgp->main_zebra_update_hold) + return; + +- if (safi == SAFI_FLOWSPEC) { +- bgp_pbr_update_entry(bgp, bgp_dest_get_prefix(dest), info, afi, +- safi, true); ++ if (table->safi == SAFI_FLOWSPEC) { ++ bgp_pbr_update_entry(bgp, p, info, table->afi, table->safi, ++ true); + return; + } + +@@ -1354,7 +1355,7 @@ void bgp_zebra_announce(struct bgp_dest *dest, const struct prefix *p, + /* Make Zebra API structure. */ + api.vrf_id = bgp->vrf_id; + api.type = ZEBRA_ROUTE_BGP; +- api.safi = safi; ++ api.safi = table->safi; + api.prefix = *p; + SET_FLAG(api.message, ZAPI_MESSAGE_NEXTHOP); + +@@ -1458,12 +1459,13 @@ void bgp_zebra_announce(struct bgp_dest *dest, const struct prefix *p, + } + } + +- if (bgp->table_map[afi][safi].name) { ++ if (bgp->table_map[table->afi][table->safi].name) { + /* Copy info and attributes, so the route-map + apply doesn't modify the BGP route info. */ + local_attr = *mpinfo->attr; + mpinfo_cp->attr = &local_attr; +- if (!bgp_table_map_apply(bgp->table_map[afi][safi].map, ++ if (!bgp_table_map_apply(bgp->table_map[table->afi] ++ [table->safi].map, + p, mpinfo_cp)) + continue; + +@@ -1619,7 +1621,7 @@ void bgp_zebra_announce(struct bgp_dest *dest, const struct prefix *p, + api.tag = tag; + } + +- distance = bgp_distance_apply(p, info, afi, safi, bgp); ++ distance = bgp_distance_apply(p, info, table->afi, table->safi, bgp); + if (distance) { + SET_FLAG(api.message, ZAPI_MESSAGE_DISTANCE); + api.distance = distance; +@@ -1731,9 +1733,7 @@ void bgp_zebra_announce_table(struct bgp *bgp, afi_t afi, safi_t safi) + && (pi->sub_type == BGP_ROUTE_NORMAL + || pi->sub_type == BGP_ROUTE_IMPORTED))) + +- bgp_zebra_announce(dest, +- bgp_dest_get_prefix(dest), +- pi, bgp, afi, safi); ++ bgp_zebra_announce(dest, pi, bgp); + } + + /* Announce routes of any bgp subtype of a table to zebra */ +@@ -1755,16 +1755,16 @@ void bgp_zebra_announce_table_all_subtypes(struct bgp *bgp, afi_t afi, + for (pi = bgp_dest_get_bgp_path_info(dest); pi; pi = pi->next) + if (CHECK_FLAG(pi->flags, BGP_PATH_SELECTED) && + pi->type == ZEBRA_ROUTE_BGP) +- bgp_zebra_announce(dest, +- bgp_dest_get_prefix(dest), +- pi, bgp, afi, safi); ++ bgp_zebra_announce(dest, pi, bgp); + } + +-void bgp_zebra_withdraw(const struct prefix *p, struct bgp_path_info *info, +- struct bgp *bgp, afi_t afi, safi_t safi) ++void bgp_zebra_withdraw(struct bgp_dest *dest, struct bgp_path_info *info, ++ struct bgp *bgp) + { + struct zapi_route api; + struct peer *peer; ++ struct bgp_table *table = bgp_dest_table(dest); ++ const struct prefix *p = bgp_dest_get_prefix(dest); + + /* + * If we are withdrawing the route, we don't need to have this +@@ -1778,16 +1778,17 @@ void bgp_zebra_withdraw(const struct prefix *p, struct bgp_path_info *info, + if (!bgp_install_info_to_zebra(bgp)) + return; + +- if (safi == SAFI_FLOWSPEC) { ++ if (table->safi == SAFI_FLOWSPEC) { + peer = info->peer; +- bgp_pbr_update_entry(peer->bgp, p, info, afi, safi, false); ++ bgp_pbr_update_entry(peer->bgp, p, info, table->afi, ++ table->safi, false); + return; + } + + memset(&api, 0, sizeof(api)); + api.vrf_id = bgp->vrf_id; + api.type = ZEBRA_ROUTE_BGP; +- api.safi = safi; ++ api.safi = table->safi; + api.prefix = *p; + + if (info->attr->rmap_table_id) { +@@ -1820,8 +1821,7 @@ void bgp_zebra_withdraw_table_all_subtypes(struct bgp *bgp, afi_t afi, safi_t sa + for (pi = bgp_dest_get_bgp_path_info(dest); pi; pi = pi->next) { + if (CHECK_FLAG(pi->flags, BGP_PATH_SELECTED) + && (pi->type == ZEBRA_ROUTE_BGP)) +- bgp_zebra_withdraw(bgp_dest_get_prefix(dest), +- pi, bgp, afi, safi); ++ bgp_zebra_withdraw(dest, pi, bgp); + } + } + } +diff --git a/bgpd/bgp_zebra.h b/bgpd/bgp_zebra.h +index a5fe8d7ace..b77e423f8f 100644 +--- a/bgpd/bgp_zebra.h ++++ b/bgpd/bgp_zebra.h +@@ -43,13 +43,11 @@ extern void bgp_zebra_destroy(void); + extern int bgp_zebra_get_table_range(uint32_t chunk_size, + uint32_t *start, uint32_t *end); + extern int bgp_if_update_all(void); +-extern void bgp_zebra_announce(struct bgp_dest *dest, const struct prefix *p, +- struct bgp_path_info *path, struct bgp *bgp, +- afi_t afi, safi_t safi); ++extern void bgp_zebra_announce(struct bgp_dest *dest, ++ struct bgp_path_info *path, struct bgp *bgp); + extern void bgp_zebra_announce_table(struct bgp *bgp, afi_t afi, safi_t safi); +-extern void bgp_zebra_withdraw(const struct prefix *p, +- struct bgp_path_info *path, struct bgp *bgp, +- afi_t afi, safi_t safi); ++extern void bgp_zebra_withdraw(struct bgp_dest *dest, ++ struct bgp_path_info *path, struct bgp *bgp); + + /* Announce routes of any bgp subtype of a table to zebra */ + extern void bgp_zebra_announce_table_all_subtypes(struct bgp *bgp, afi_t afi, +-- +2.17.1 + diff --git a/src/sonic-frr/patch/0035-gpd-backpressure-Handle-BGP-Zebra-Install-evt-Creat.patch b/src/sonic-frr/patch/0035-gpd-backpressure-Handle-BGP-Zebra-Install-evt-Creat.patch new file mode 100644 index 000000000000..724cf1297a59 --- /dev/null +++ b/src/sonic-frr/patch/0035-gpd-backpressure-Handle-BGP-Zebra-Install-evt-Creat.patch @@ -0,0 +1,512 @@ +From 6d5604a9315801e9380c11357d663ad6537ed8ab Mon Sep 17 00:00:00 2001 +From: Donald Sharp +Date: Fri, 26 Jan 2024 14:48:53 -0500 +Subject: [PATCH 06/11] bgpd : backpressure - Handle BGP-Zebra Install evt + Creation + +BGP is now keeping a list of dests with the dest having a pointer +to the bgp_path_info that it will be working on. + +1) When bgp receives a prefix, process it, add the bgp_dest of the +prefix into the new Fifo list if not present, update the flags (Ex: +earlier if the prefix was advertised and now it is a withdrawn), +increment the ref_count and DO NOT advertise the install/withdraw +to zebra yet. + +2) Schedule an event to wake up to invoke the new function which will +walk the list one by one and installs/withdraws the routes into zebra. + a) if BUFFER_EMPTY, process the next item on the list + b) if BUFFER_PENDING, bail out and the callback in + zclient_flush_data() will invoke the same function when BUFFER_EMPTY + +Changes + - rename old bgp_zebra_announce to bgp_zebra_announce_actual + - rename old bgp_zebra_withdrw to bgp_zebra_withdraw_actual + - Handle new fifo list cleanup in bgp_exit() + - New funcs: bgp_handle_route_announcements_to_zebra() and + bgp_zebra_route_install() + - Define a callback function to invoke + bgp_handle_route_announcements_to_zebra() when BUFFER_EMPTY in + zclient_flush_data() + +The current change deals with bgp installing routes via +bgp_process_main_one() + +Ticket: #3390099 + +Signed-off-by: Donald Sharp +Signed-off-by: Rajasekar Raja + +diff --git a/bgpd/bgp_route.c b/bgpd/bgp_route.c +index d19f27110e..c29442d96c 100644 +--- a/bgpd/bgp_route.c ++++ b/bgpd/bgp_route.c +@@ -3214,8 +3214,8 @@ static void bgp_process_main_one(struct bgp *bgp, struct bgp_dest *dest, + || new_select->sub_type + == BGP_ROUTE_IMPORTED)) + +- bgp_zebra_announce(dest, old_select, +- bgp); ++ bgp_zebra_route_install( ++ dest, old_select, bgp, true); + } + } + +@@ -3312,9 +3312,10 @@ static void bgp_process_main_one(struct bgp *bgp, struct bgp_dest *dest, + */ + if (old_select && + is_route_parent_evpn(old_select)) +- bgp_zebra_withdraw(dest, old_select, bgp); ++ bgp_zebra_route_install(dest, old_select, bgp, ++ false); + +- bgp_zebra_announce(dest, new_select, bgp); ++ bgp_zebra_route_install(dest, new_select, bgp, true); + } else { + /* Withdraw the route from the kernel. */ + if (old_select && old_select->type == ZEBRA_ROUTE_BGP +@@ -3322,7 +3323,8 @@ static void bgp_process_main_one(struct bgp *bgp, struct bgp_dest *dest, + || old_select->sub_type == BGP_ROUTE_AGGREGATE + || old_select->sub_type == BGP_ROUTE_IMPORTED)) + +- bgp_zebra_withdraw(dest, old_select, bgp); ++ bgp_zebra_route_install(dest, old_select, bgp, ++ false); + } + } + +@@ -4201,7 +4203,7 @@ void bgp_update(struct peer *peer, const struct prefix *p, uint32_t addpath_id, + if (pi && pi->attr->rmap_table_id != new_attr.rmap_table_id) { + if (CHECK_FLAG(pi->flags, BGP_PATH_SELECTED)) + /* remove from RIB previous entry */ +- bgp_zebra_withdraw(dest, pi, bgp); ++ bgp_zebra_route_install(dest, pi, bgp, false); + } + + if (peer->sort == BGP_PEER_EBGP) { +@@ -5865,7 +5867,8 @@ static void bgp_cleanup_table(struct bgp *bgp, struct bgp_table *table, + || pi->sub_type == BGP_ROUTE_IMPORTED)) { + + if (bgp_fibupd_safi(safi)) +- bgp_zebra_withdraw(dest, pi, bgp); ++ bgp_zebra_withdraw_actual(dest, pi, ++ bgp); + } + + bgp_path_info_reap(dest, pi); +diff --git a/bgpd/bgp_table.h b/bgpd/bgp_table.h +index d43bf86eb9..45d61f8dfd 100644 +--- a/bgpd/bgp_table.h ++++ b/bgpd/bgp_table.h +@@ -102,6 +102,7 @@ struct bgp_node { + STAILQ_ENTRY(bgp_dest) pq; + + struct zebra_announce_item zai; ++ struct bgp_path_info *za_bgp_pi; + + uint64_t version; + +@@ -117,6 +118,8 @@ struct bgp_node { + #define BGP_NODE_FIB_INSTALLED (1 << 6) + #define BGP_NODE_LABEL_REQUESTED (1 << 7) + #define BGP_NODE_SOFT_RECONFIG (1 << 8) ++#define BGP_NODE_SCHEDULE_FOR_INSTALL (1 << 10) ++#define BGP_NODE_SCHEDULE_FOR_DELETE (1 << 11) + + struct bgp_addpath_node_data tx_addpath; + +diff --git a/bgpd/bgp_zebra.c b/bgpd/bgp_zebra.c +index 920df835a4..1162941ef1 100644 +--- a/bgpd/bgp_zebra.c ++++ b/bgpd/bgp_zebra.c +@@ -1292,8 +1292,9 @@ static bool bgp_zebra_use_nhop_weighted(struct bgp *bgp, struct attr *attr, + return true; + } + +-void bgp_zebra_announce(struct bgp_dest *dest, struct bgp_path_info *info, +- struct bgp *bgp) ++static enum zclient_send_status ++bgp_zebra_announce_actual(struct bgp_dest *dest, struct bgp_path_info *info, ++ struct bgp *bgp) + { + struct zapi_route api = { 0 }; + struct zapi_nexthop *api_nh; +@@ -1323,27 +1324,10 @@ void bgp_zebra_announce(struct bgp_dest *dest, struct bgp_path_info *info, + struct bgp_table *table = bgp_dest_table(dest); + const struct prefix *p = bgp_dest_get_prefix(dest); + +- /* +- * BGP is installing this route and bgp has been configured +- * to suppress announcements until the route has been installed +- * let's set the fact that we expect this route to be installed +- */ +- if (BGP_SUPPRESS_FIB_ENABLED(bgp)) +- SET_FLAG(dest->flags, BGP_NODE_FIB_INSTALL_PENDING); +- +- /* Don't try to install if we're not connected to Zebra or Zebra doesn't +- * know of this instance. +- */ +- if (!bgp_install_info_to_zebra(bgp)) +- return; +- +- if (bgp->main_zebra_update_hold) +- return; +- + if (table->safi == SAFI_FLOWSPEC) { + bgp_pbr_update_entry(bgp, p, info, table->afi, table->safi, + true); +- return; ++ return ZCLIENT_SEND_SUCCESS; + } + + /* +@@ -1704,10 +1688,11 @@ void bgp_zebra_announce(struct bgp_dest *dest, struct bgp_path_info *info, + zlog_debug("%s: %pFX: announcing to zebra (recursion %sset)", + __func__, p, (recursion_flag ? "" : "NOT ")); + } +- zclient_route_send(is_add ? ZEBRA_ROUTE_ADD : ZEBRA_ROUTE_DELETE, +- zclient, &api); ++ return zclient_route_send(is_add ? ZEBRA_ROUTE_ADD : ZEBRA_ROUTE_DELETE, ++ zclient, &api); + } + ++ + /* Announce all routes of a table to zebra */ + void bgp_zebra_announce_table(struct bgp *bgp, afi_t afi, safi_t safi) + { +@@ -1733,7 +1718,7 @@ void bgp_zebra_announce_table(struct bgp *bgp, afi_t afi, safi_t safi) + && (pi->sub_type == BGP_ROUTE_NORMAL + || pi->sub_type == BGP_ROUTE_IMPORTED))) + +- bgp_zebra_announce(dest, pi, bgp); ++ bgp_zebra_route_install(dest, pi, bgp, true); + } + + /* Announce routes of any bgp subtype of a table to zebra */ +@@ -1755,34 +1740,23 @@ void bgp_zebra_announce_table_all_subtypes(struct bgp *bgp, afi_t afi, + for (pi = bgp_dest_get_bgp_path_info(dest); pi; pi = pi->next) + if (CHECK_FLAG(pi->flags, BGP_PATH_SELECTED) && + pi->type == ZEBRA_ROUTE_BGP) +- bgp_zebra_announce(dest, pi, bgp); ++ bgp_zebra_route_install(dest, pi, bgp, true); + } + +-void bgp_zebra_withdraw(struct bgp_dest *dest, struct bgp_path_info *info, +- struct bgp *bgp) ++enum zclient_send_status bgp_zebra_withdraw_actual(struct bgp_dest *dest, ++ struct bgp_path_info *info, ++ struct bgp *bgp) + { + struct zapi_route api; + struct peer *peer; + struct bgp_table *table = bgp_dest_table(dest); + const struct prefix *p = bgp_dest_get_prefix(dest); + +- /* +- * If we are withdrawing the route, we don't need to have this +- * flag set. So unset it. +- */ +- UNSET_FLAG(info->net->flags, BGP_NODE_FIB_INSTALL_PENDING); +- +- /* Don't try to install if we're not connected to Zebra or Zebra doesn't +- * know of this instance. +- */ +- if (!bgp_install_info_to_zebra(bgp)) +- return; +- + if (table->safi == SAFI_FLOWSPEC) { + peer = info->peer; + bgp_pbr_update_entry(peer->bgp, p, info, table->afi, + table->safi, false); +- return; ++ return ZCLIENT_SEND_SUCCESS; + } + + memset(&api, 0, sizeof(api)); +@@ -1800,7 +1774,172 @@ void bgp_zebra_withdraw(struct bgp_dest *dest, struct bgp_path_info *info, + zlog_debug("Tx route delete VRF %u %pFX", bgp->vrf_id, + &api.prefix); + +- zclient_route_send(ZEBRA_ROUTE_DELETE, zclient, &api); ++ return zclient_route_send(ZEBRA_ROUTE_DELETE, zclient, &api); ++} ++ ++/* ++ * Walk the new Fifo list one by one and invoke bgp_zebra_announce/withdraw ++ * to install/withdraw the routes to zebra. ++ * ++ * If status = ZCLIENT_SEND_SUCCESS (Buffer empt)y i.e. Zebra is free to ++ * receive more incoming data, then pick the next item on the list and ++ * continue processing. ++ * ++ * If status = ZCLIENT_SEND_BUFFERED (Buffer pending) i.e. Zebra is busy, ++ * break and bail out of the function because once at some point when zebra ++ * is free, a callback is triggered which inturn call this same function and ++ * continue processing items on list. ++ */ ++#define ZEBRA_ANNOUNCEMENTS_LIMIT 1000 ++static void bgp_handle_route_announcements_to_zebra(struct thread *e) ++{ ++ uint32_t count = 0; ++ struct bgp_dest *dest = NULL; ++ struct bgp_table *table = NULL; ++ enum zclient_send_status status = ZCLIENT_SEND_SUCCESS; ++ bool install; ++ ++ while (count < ZEBRA_ANNOUNCEMENTS_LIMIT) { ++ dest = zebra_announce_pop(&bm->zebra_announce_head); ++ ++ if (!dest) ++ break; ++ ++ table = bgp_dest_table(dest); ++ install = ++ CHECK_FLAG(dest->flags, BGP_NODE_SCHEDULE_FOR_INSTALL); ++ ++ if (BGP_DEBUG(zebra, ZEBRA)) ++ zlog_debug( ++ "BGP %s route %pBD(%s) with dest %p and flags 0x%x to zebra", ++ install ? "announcing" : "withdrawing", dest, ++ table->bgp->name_pretty, dest, dest->flags); ++ ++ if (install) { ++ status = bgp_zebra_announce_actual( ++ dest, dest->za_bgp_pi, table->bgp); ++ UNSET_FLAG(dest->flags, BGP_NODE_SCHEDULE_FOR_INSTALL); ++ } else { ++ status = bgp_zebra_withdraw_actual( ++ dest, dest->za_bgp_pi, table->bgp); ++ UNSET_FLAG(dest->flags, BGP_NODE_SCHEDULE_FOR_DELETE); ++ } ++ ++ bgp_path_info_unlock(dest->za_bgp_pi); ++ dest->za_bgp_pi = NULL; ++ bgp_dest_unlock_node(dest); ++ ++ if (status == ZCLIENT_SEND_BUFFERED) ++ break; ++ ++ count++; ++ } ++ ++ if (status != ZCLIENT_SEND_BUFFERED && ++ zebra_announce_count(&bm->zebra_announce_head)) ++ thread_add_event(bm->master, ++ bgp_handle_route_announcements_to_zebra, NULL, ++ 0, &bm->t_bgp_zebra_route); ++} ++ ++/* ++ * Callback function invoked when zclient_flush_data() receives a BUFFER_EMPTY ++ * i.e. zebra is free to receive more incoming data. ++ */ ++static void bgp_zebra_buffer_write_ready(void) ++{ ++ bgp_handle_route_announcements_to_zebra(NULL); ++} ++ ++/* ++ * BGP is now keeping a list of dests with the dest having a pointer ++ * to the bgp_path_info that it will be working on. ++ * Here is the sequence of events that should happen: ++ * ++ * Current State New State Action ++ * ------------- --------- ------ ++ * ---- Install Place dest on list, save pi, mark ++ * as going to be installed ++ * ---- Withdrawal Place dest on list, save pi, mark ++ * as going to be deleted ++ * ++ * Install Install Leave dest on list, release old pi, ++ * save new pi, mark as going to be ++ * Installed ++ * Install Withdrawal Leave dest on list, release old pi, ++ * save new pi, mark as going to be ++ * withdrawan, remove install flag ++ * ++ * Withdrawal Install Special case, send withdrawal immediately ++ * Leave dest on list, release old pi, ++ * save new pi, mark as going to be ++ * installed. ++ * Withdrawal Withdrawal Leave dest on list, release old pi, ++ * save new pi, mark as going to be ++ * withdrawn. ++ */ ++void bgp_zebra_route_install(struct bgp_dest *dest, struct bgp_path_info *info, ++ struct bgp *bgp, bool install) ++{ ++ /* ++ * BGP is installing this route and bgp has been configured ++ * to suppress announcements until the route has been installed ++ * let's set the fact that we expect this route to be installed ++ */ ++ if (install) { ++ if (BGP_SUPPRESS_FIB_ENABLED(bgp)) ++ SET_FLAG(dest->flags, BGP_NODE_FIB_INSTALL_PENDING); ++ ++ if (bgp->main_zebra_update_hold) ++ return; ++ } else { ++ UNSET_FLAG(dest->flags, BGP_NODE_FIB_INSTALL_PENDING); ++ } ++ ++ /* ++ * Don't try to install if we're not connected to Zebra or Zebra doesn't ++ * know of this instance. ++ */ ++ if (!bgp_install_info_to_zebra(bgp)) ++ return; ++ ++ if (!CHECK_FLAG(dest->flags, BGP_NODE_SCHEDULE_FOR_INSTALL) && ++ !CHECK_FLAG(dest->flags, BGP_NODE_SCHEDULE_FOR_DELETE)) { ++ zebra_announce_add_tail(&bm->zebra_announce_head, dest); ++ /* ++ * If neither flag is set and za_bgp_pi is not set then it is a ++ * bug ++ */ ++ assert(!dest->za_bgp_pi); ++ bgp_path_info_lock(info); ++ bgp_dest_lock_node(dest); ++ dest->za_bgp_pi = info; ++ } else if (CHECK_FLAG(dest->flags, BGP_NODE_SCHEDULE_FOR_INSTALL)) { ++ assert(dest->za_bgp_pi); ++ bgp_path_info_unlock(dest->za_bgp_pi); ++ bgp_path_info_lock(info); ++ dest->za_bgp_pi = info; ++ } else if (CHECK_FLAG(dest->flags, BGP_NODE_SCHEDULE_FOR_DELETE)) { ++ assert(dest->za_bgp_pi); ++ if (install) ++ bgp_zebra_withdraw_actual(dest, dest->za_bgp_pi, bgp); ++ ++ bgp_path_info_unlock(dest->za_bgp_pi); ++ bgp_path_info_lock(info); ++ dest->za_bgp_pi = info; ++ } ++ ++ if (install) { ++ UNSET_FLAG(dest->flags, BGP_NODE_SCHEDULE_FOR_DELETE); ++ SET_FLAG(dest->flags, BGP_NODE_SCHEDULE_FOR_INSTALL); ++ } else { ++ UNSET_FLAG(dest->flags, BGP_NODE_SCHEDULE_FOR_INSTALL); ++ SET_FLAG(dest->flags, BGP_NODE_SCHEDULE_FOR_DELETE); ++ } ++ ++ thread_add_event(bm->master, bgp_handle_route_announcements_to_zebra, ++ NULL, 0, &bm->t_bgp_zebra_route); + } + + /* Withdraw all entries in a BGP instances RIB table from Zebra */ +@@ -1821,7 +1960,7 @@ void bgp_zebra_withdraw_table_all_subtypes(struct bgp *bgp, afi_t afi, safi_t sa + for (pi = bgp_dest_get_bgp_path_info(dest); pi; pi = pi->next) { + if (CHECK_FLAG(pi->flags, BGP_PATH_SELECTED) + && (pi->type == ZEBRA_ROUTE_BGP)) +- bgp_zebra_withdraw(dest, pi, bgp); ++ bgp_zebra_route_install(dest, pi, bgp, false); + } + } + } +@@ -3470,6 +3609,7 @@ void bgp_zebra_init(struct thread_master *master, unsigned short instance) + zclient = zclient_new(master, &zclient_options_default, bgp_handlers, + array_size(bgp_handlers)); + zclient_init(zclient, ZEBRA_ROUTE_BGP, 0, &bgpd_privs); ++ zclient->zebra_buffer_write_ready = bgp_zebra_buffer_write_ready; + zclient->zebra_connected = bgp_zebra_connected; + zclient->instance = instance; + } +diff --git a/bgpd/bgp_zebra.h b/bgpd/bgp_zebra.h +index b77e423f8f..45fcf7f514 100644 +--- a/bgpd/bgp_zebra.h ++++ b/bgpd/bgp_zebra.h +@@ -43,11 +43,10 @@ extern void bgp_zebra_destroy(void); + extern int bgp_zebra_get_table_range(uint32_t chunk_size, + uint32_t *start, uint32_t *end); + extern int bgp_if_update_all(void); +-extern void bgp_zebra_announce(struct bgp_dest *dest, +- struct bgp_path_info *path, struct bgp *bgp); ++extern void bgp_zebra_route_install(struct bgp_dest *dest, ++ struct bgp_path_info *path, struct bgp *bgp, ++ bool install); + extern void bgp_zebra_announce_table(struct bgp *bgp, afi_t afi, safi_t safi); +-extern void bgp_zebra_withdraw(struct bgp_dest *dest, +- struct bgp_path_info *path, struct bgp *bgp); + + /* Announce routes of any bgp subtype of a table to zebra */ + extern void bgp_zebra_announce_table_all_subtypes(struct bgp *bgp, afi_t afi, +@@ -131,4 +130,7 @@ extern int bgp_zebra_update(afi_t afi, safi_t safi, vrf_id_t vrf_id, int type); + extern int bgp_zebra_stale_timer_update(struct bgp *bgp); + extern int bgp_zebra_srv6_manager_get_locator_chunk(const char *name); + extern int bgp_zebra_srv6_manager_release_locator_chunk(const char *name); ++extern enum zclient_send_status ++bgp_zebra_withdraw_actual(struct bgp_dest *dest, struct bgp_path_info *info, ++ struct bgp *bgp); + #endif /* _QUAGGA_BGP_ZEBRA_H */ +diff --git a/bgpd/bgpd.c b/bgpd/bgpd.c +index 392423e028..da133d71c1 100644 +--- a/bgpd/bgpd.c ++++ b/bgpd/bgpd.c +@@ -3688,10 +3688,20 @@ int bgp_delete(struct bgp *bgp) + afi_t afi; + safi_t safi; + int i; ++ struct bgp_dest *dest = NULL; + struct graceful_restart_info *gr_info; + + assert(bgp); + ++ while (zebra_announce_count(&bm->zebra_announce_head)) { ++ dest = zebra_announce_pop(&bm->zebra_announce_head); ++ if (dest->za_bgp_pi->peer->bgp == bgp) { ++ bgp_path_info_unlock(dest->za_bgp_pi); ++ bgp_dest_unlock_node(dest); ++ } else ++ zebra_announce_add_tail(&bm->zebra_announce_head, dest); ++ } ++ + bgp_soft_reconfig_table_task_cancel(bgp, NULL, NULL); + + /* make sure we withdraw any exported routes */ +@@ -8035,6 +8045,7 @@ void bgp_master_init(struct thread_master *master, const int buffer_size, + bm->tcp_dscp = IPTOS_PREC_INTERNETCONTROL; + bm->inq_limit = BM_DEFAULT_Q_LIMIT; + bm->outq_limit = BM_DEFAULT_Q_LIMIT; ++ bm->t_bgp_zebra_route = NULL; + + bgp_mac_init(); + /* init the rd id space. +@@ -8278,6 +8289,7 @@ void bgp_terminate(void) + list_delete(&bm->listen_sockets); + + THREAD_OFF(bm->t_rmap_update); ++ THREAD_OFF(bm->t_bgp_zebra_route); + + bgp_mac_finish(); + } +diff --git a/bgpd/bgpd.h b/bgpd/bgpd.h +index 55f53bf9d3..bdf31f5161 100644 +--- a/bgpd/bgpd.h ++++ b/bgpd/bgpd.h +@@ -182,6 +182,8 @@ struct bgp_master { + uint32_t inq_limit; + uint32_t outq_limit; + ++ struct thread *t_bgp_zebra_route; ++ + /* To preserve ordering of installations into zebra across all Vrfs */ + struct zebra_announce_head zebra_announce_head; + +diff --git a/lib/zclient.c b/lib/zclient.c +index 0082b21485..c48c1c6ee4 100644 +--- a/lib/zclient.c ++++ b/lib/zclient.c +@@ -285,6 +285,7 @@ static void zclient_flush_data(struct thread *thread) + zclient->sock, &zclient->t_write); + break; + case BUFFER_EMPTY: ++ /* Currently only Sharpd and Bgpd has callbacks defined */ + if (zclient->zebra_buffer_write_ready) + (*zclient->zebra_buffer_write_ready)(); + break; +-- +2.17.1 + diff --git a/src/sonic-frr/patch/0036-bgpd-backpressure-Handle-BGP-Zebra-EPVN-Install-evt-.patch b/src/sonic-frr/patch/0036-bgpd-backpressure-Handle-BGP-Zebra-EPVN-Install-evt-.patch new file mode 100644 index 000000000000..e83526b6e8ef --- /dev/null +++ b/src/sonic-frr/patch/0036-bgpd-backpressure-Handle-BGP-Zebra-EPVN-Install-evt-.patch @@ -0,0 +1,995 @@ +From 84f7778808b7fee771f26c3cae46292ef85f06c0 Mon Sep 17 00:00:00 2001 +From: Rajasekar Raja +Date: Thu, 15 Feb 2024 11:23:51 -0800 +Subject: [PATCH 07/11] bgpd : backpressure - Handle BGP-Zebra(EPVN) Install + evt Creation + +Current changes deals with EVPN routes installation to zebra. + +In evpn_route_select_install() we invoke evpn_zebra_install/uninstall +which sends zclient_send_message(). + +This is a continuation of code changes (similar to +ccfe452763d16c432fa81fd20e805bec819b345e) but to handle evpn part +of the code. + +Ticket: #3390099 + +Signed-off-by: Rajasekar Raja + +diff --git a/bgpd/bgp_evpn.c b/bgpd/bgp_evpn.c +index 2b2cfa0f4c..622fd6afd2 100644 +--- a/bgpd/bgp_evpn.c ++++ b/bgpd/bgp_evpn.c +@@ -863,11 +863,10 @@ struct bgp_dest *bgp_evpn_vni_node_lookup(const struct bgpevpn *vpn, + /* + * Add (update) or delete MACIP from zebra. + */ +-static int bgp_zebra_send_remote_macip(struct bgp *bgp, struct bgpevpn *vpn, +- const struct prefix_evpn *p, +- const struct ethaddr *mac, +- struct in_addr remote_vtep_ip, int add, +- uint8_t flags, uint32_t seq, esi_t *esi) ++static enum zclient_send_status bgp_zebra_send_remote_macip( ++ struct bgp *bgp, struct bgpevpn *vpn, const struct prefix_evpn *p, ++ const struct ethaddr *mac, struct in_addr remote_vtep_ip, int add, ++ uint8_t flags, uint32_t seq, esi_t *esi) + { + struct stream *s; + uint16_t ipa_len; +@@ -875,8 +874,12 @@ static int bgp_zebra_send_remote_macip(struct bgp *bgp, struct bgpevpn *vpn, + bool esi_valid; + + /* Check socket. */ +- if (!zclient || zclient->sock < 0) +- return 0; ++ if (!zclient || zclient->sock < 0) { ++ if (BGP_DEBUG(zebra, ZEBRA)) ++ zlog_debug("%s: No zclient or zclient->sock exists", ++ __func__); ++ return ZCLIENT_SEND_SUCCESS; ++ } + + /* Don't try to register if Zebra doesn't know of this instance. */ + if (!IS_BGP_INST_KNOWN_TO_ZEBRA(bgp)) { +@@ -884,7 +887,7 @@ static int bgp_zebra_send_remote_macip(struct bgp *bgp, struct bgpevpn *vpn, + zlog_debug( + "%s: No zebra instance to talk to, not installing remote macip", + __func__); +- return 0; ++ return ZCLIENT_SEND_SUCCESS; + } + + if (!esi) +@@ -956,15 +959,20 @@ static int bgp_zebra_send_remote_macip(struct bgp *bgp, struct bgpevpn *vpn, + /* + * Add (update) or delete remote VTEP from zebra. + */ +-static int bgp_zebra_send_remote_vtep(struct bgp *bgp, struct bgpevpn *vpn, +- const struct prefix_evpn *p, +- int flood_control, int add) ++static enum zclient_send_status ++bgp_zebra_send_remote_vtep(struct bgp *bgp, struct bgpevpn *vpn, ++ const struct prefix_evpn *p, int flood_control, ++ int add) + { + struct stream *s; + + /* Check socket. */ +- if (!zclient || zclient->sock < 0) +- return 0; ++ if (!zclient || zclient->sock < 0) { ++ if (BGP_DEBUG(zebra, ZEBRA)) ++ zlog_debug("%s: No zclient or zclient->sock exists", ++ __func__); ++ return ZCLIENT_SEND_SUCCESS; ++ } + + /* Don't try to register if Zebra doesn't know of this instance. */ + if (!IS_BGP_INST_KNOWN_TO_ZEBRA(bgp)) { +@@ -972,7 +980,7 @@ static int bgp_zebra_send_remote_vtep(struct bgp *bgp, struct bgpevpn *vpn, + zlog_debug( + "%s: No zebra instance to talk to, not installing remote vtep", + __func__); +- return 0; ++ return ZCLIENT_SEND_SUCCESS; + } + + s = zclient->obuf; +@@ -989,7 +997,7 @@ static int bgp_zebra_send_remote_vtep(struct bgp *bgp, struct bgpevpn *vpn, + EC_BGP_VTEP_INVALID, + "Bad remote IP when trying to %s remote VTEP for VNI %u", + add ? "ADD" : "DEL", vpn->vni); +- return -1; ++ return ZCLIENT_SEND_FAILURE; + } + stream_putl(s, flood_control); + +@@ -1222,14 +1230,15 @@ static void add_mac_mobility_to_attr(uint32_t seq_num, struct attr *attr) + } + + /* Install EVPN route into zebra. */ +-static int evpn_zebra_install(struct bgp *bgp, struct bgpevpn *vpn, +- const struct prefix_evpn *p, +- struct bgp_path_info *pi) ++enum zclient_send_status evpn_zebra_install(struct bgp *bgp, ++ struct bgpevpn *vpn, ++ const struct prefix_evpn *p, ++ struct bgp_path_info *pi) + { +- int ret; + uint8_t flags; + int flood_control; + uint32_t seq; ++ enum zclient_send_status ret = ZCLIENT_SEND_SUCCESS; + + if (p->prefix.route_type == BGP_EVPN_MAC_IP_ROUTE) { + flags = 0; +@@ -1302,6 +1311,7 @@ static int evpn_zebra_install(struct bgp *bgp, struct bgpevpn *vpn, + flood_control = VXLAN_FLOOD_DISABLED; + break; + } ++ + ret = bgp_zebra_send_remote_vtep(bgp, vpn, p, flood_control, 1); + } + +@@ -1309,11 +1319,13 @@ static int evpn_zebra_install(struct bgp *bgp, struct bgpevpn *vpn, + } + + /* Uninstall EVPN route from zebra. */ +-static int evpn_zebra_uninstall(struct bgp *bgp, struct bgpevpn *vpn, +- const struct prefix_evpn *p, +- struct bgp_path_info *pi, bool is_sync) ++enum zclient_send_status evpn_zebra_uninstall(struct bgp *bgp, ++ struct bgpevpn *vpn, ++ const struct prefix_evpn *p, ++ struct bgp_path_info *pi, ++ bool is_sync) + { +- int ret; ++ enum zclient_send_status ret = ZCLIENT_SEND_SUCCESS; + + if (p->prefix.route_type == BGP_EVPN_MAC_IP_ROUTE) + ret = bgp_zebra_send_remote_macip( +@@ -1328,7 +1340,7 @@ static int evpn_zebra_uninstall(struct bgp *bgp, struct bgpevpn *vpn, + ret = bgp_evpn_remote_es_evi_del(bgp, vpn, p); + else + ret = bgp_zebra_send_remote_vtep(bgp, vpn, p, +- VXLAN_FLOOD_DISABLED, 0); ++ VXLAN_FLOOD_DISABLED, 0); + + return ret; + } +@@ -1419,12 +1431,18 @@ int evpn_route_select_install(struct bgp *bgp, struct bgpevpn *vpn, + && !CHECK_FLAG(dest->flags, BGP_NODE_USER_CLEAR) + && !CHECK_FLAG(old_select->flags, BGP_PATH_ATTR_CHANGED) + && !bgp_addpath_is_addpath_used(&bgp->tx_addpath, afi, safi)) { +- if (bgp_zebra_has_route_changed(old_select)) +- ret = evpn_zebra_install( +- bgp, vpn, +- (const struct prefix_evpn *)bgp_dest_get_prefix( +- dest), +- old_select); ++ if (bgp_zebra_has_route_changed(old_select)) { ++ if (CHECK_FLAG(bgp->flags, BGP_FLAG_DELETE_IN_PROGRESS)) ++ evpn_zebra_install( ++ bgp, vpn, ++ (const struct prefix_evpn *) ++ bgp_dest_get_prefix(dest), ++ old_select); ++ else ++ bgp_zebra_route_install(dest, old_select, bgp, ++ true, vpn, false); ++ } ++ + UNSET_FLAG(old_select->flags, BGP_PATH_MULTIPATH_CHG); + UNSET_FLAG(old_select->flags, BGP_PATH_LINK_BW_CHG); + bgp_zebra_clear_route_change_flags(dest); +@@ -1456,10 +1474,14 @@ int evpn_route_select_install(struct bgp *bgp, struct bgpevpn *vpn, + if (new_select && new_select->type == ZEBRA_ROUTE_BGP + && (new_select->sub_type == BGP_ROUTE_IMPORTED || + bgp_evpn_attr_is_sync(new_select->attr))) { +- ret = evpn_zebra_install( +- bgp, vpn, +- (struct prefix_evpn *)bgp_dest_get_prefix(dest), +- new_select); ++ if (CHECK_FLAG(bgp->flags, BGP_FLAG_DELETE_IN_PROGRESS)) ++ evpn_zebra_install(bgp, vpn, ++ (const struct prefix_evpn *) ++ bgp_dest_get_prefix(dest), ++ new_select); ++ else ++ bgp_zebra_route_install(dest, new_select, bgp, true, ++ vpn, false); + + /* If an old best existed and it was a "local" route, the only + * reason +@@ -1476,13 +1498,20 @@ int evpn_route_select_install(struct bgp *bgp, struct bgpevpn *vpn, + evpn_delete_old_local_route(bgp, vpn, dest, + old_select, new_select); + } else { +- if (old_select && old_select->type == ZEBRA_ROUTE_BGP +- && old_select->sub_type == BGP_ROUTE_IMPORTED) +- ret = evpn_zebra_uninstall( +- bgp, vpn, +- (const struct prefix_evpn *)bgp_dest_get_prefix( +- dest), +- old_select, false); ++ if (old_select && old_select->type == ZEBRA_ROUTE_BGP && ++ old_select->sub_type == BGP_ROUTE_IMPORTED) { ++ if (CHECK_FLAG(bgp->flags, ++ BGP_FLAG_DELETE_IN_PROGRESS) || ++ CHECK_FLAG(bgp->flags, BGP_FLAG_VNI_DOWN)) ++ evpn_zebra_uninstall( ++ bgp, vpn, ++ (const struct prefix_evpn *) ++ bgp_dest_get_prefix(dest), ++ old_select, false); ++ else ++ bgp_zebra_route_install(dest, old_select, bgp, ++ false, vpn, false); ++ } + } + + /* Clear any route change flags. */ +@@ -2012,9 +2041,19 @@ static void evpn_zebra_reinstall_best_route(struct bgp *bgp, + if (curr_select && curr_select->type == ZEBRA_ROUTE_BGP + && (curr_select->sub_type == BGP_ROUTE_IMPORTED || + bgp_evpn_attr_is_sync(curr_select->attr))) +- evpn_zebra_install(bgp, vpn, +- (const struct prefix_evpn *)bgp_dest_get_prefix(dest), +- curr_select); ++ if (curr_select && curr_select->type == ZEBRA_ROUTE_BGP && ++ (curr_select->sub_type == BGP_ROUTE_IMPORTED || ++ bgp_evpn_attr_is_sync(curr_select->attr))) { ++ if (CHECK_FLAG(bgp->flags, BGP_FLAG_DELETE_IN_PROGRESS)) ++ evpn_zebra_install( ++ bgp, vpn, ++ (const struct prefix_evpn *) ++ bgp_dest_get_prefix(dest), ++ curr_select); ++ else ++ bgp_zebra_route_install(dest, curr_select, bgp, ++ true, vpn, false); ++ } + } + + /* +@@ -2189,8 +2228,16 @@ static int update_evpn_route(struct bgp *bgp, struct bgpevpn *vpn, + * has been removed. + */ + new_is_sync = bgp_evpn_attr_is_sync(pi->attr); +- if (!new_is_sync && old_is_sync) +- evpn_zebra_uninstall(bgp, vpn, p, pi, true); ++ if (!new_is_sync && old_is_sync) { ++ if (CHECK_FLAG(bgp->flags, ++ BGP_FLAG_DELETE_IN_PROGRESS)) ++ evpn_zebra_uninstall(bgp, vpn, p, pi, ++ true); ++ else ++ bgp_zebra_route_install(dest, pi, bgp, ++ false, vpn, ++ true); ++ } + } + } + bgp_path_info_unlock(pi); +@@ -2444,8 +2491,16 @@ void bgp_evpn_update_type2_route_entry(struct bgp *bgp, struct bgpevpn *vpn, + * has been removed. + */ + new_is_sync = bgp_evpn_attr_is_sync(pi->attr); +- if (!new_is_sync && old_is_sync) +- evpn_zebra_uninstall(bgp, vpn, &evp, pi, true); ++ if (!new_is_sync && old_is_sync) { ++ if (CHECK_FLAG(bgp->flags, ++ BGP_FLAG_DELETE_IN_PROGRESS)) ++ (void)evpn_zebra_uninstall( ++ bgp, vpn, &evp, pi, true); ++ else ++ bgp_zebra_route_install(dest, pi, bgp, ++ false, vpn, ++ true); ++ } + } + } + +@@ -2701,7 +2756,22 @@ static int delete_routes_for_vni(struct bgp *bgp, struct bgpevpn *vpn) + delete_all_type2_routes(bgp, vpn); + + build_evpn_type3_prefix(&p, vpn->originator_ip); ++ ++ /* ++ * To handle the following scenario: ++ * - Say, the new zebra announce fifo list has few vni Evpn prefixes ++ * yet to be sent to zebra. ++ * - At this point if we have triggers like "no advertise-all-vni" or ++ * "networking restart", where a vni is going down. ++ * ++ * Perform the below ++ * 1) send withdraw routes to zebra immediately in case it is ++ * installed. 2) before we blow up the vni table, we need to walk the ++ * list and pop all the dest whose za_vpn points to this vni. ++ */ ++ SET_FLAG(bgp->flags, BGP_FLAG_VNI_DOWN); + ret = delete_evpn_route(bgp, vpn, &p); ++ UNSET_FLAG(bgp->flags, BGP_FLAG_VNI_DOWN); + if (ret) + return ret; + +@@ -6028,6 +6098,17 @@ struct bgpevpn *bgp_evpn_new(struct bgp *bgp, vni_t vni, + */ + void bgp_evpn_free(struct bgp *bgp, struct bgpevpn *vpn) + { ++ struct bgp_dest *dest = NULL; ++ ++ while (zebra_announce_count(&bm->zebra_announce_head)) { ++ dest = zebra_announce_pop(&bm->zebra_announce_head); ++ if (dest->za_vpn == vpn) { ++ bgp_path_info_unlock(dest->za_bgp_pi); ++ bgp_dest_unlock_node(dest); ++ } else ++ zebra_announce_add_tail(&bm->zebra_announce_head, dest); ++ } ++ + bgp_evpn_remote_ip_hash_destroy(vpn); + bgp_evpn_vni_es_cleanup(vpn); + bgpevpn_unlink_from_l3vni(vpn); +diff --git a/bgpd/bgp_evpn.h b/bgpd/bgp_evpn.h +index 3cbc5af5af..bf1943a2db 100644 +--- a/bgpd/bgp_evpn.h ++++ b/bgpd/bgp_evpn.h +@@ -230,4 +230,12 @@ extern void + bgp_evpn_handle_resolve_overlay_index_unset(struct hash_bucket *bucket, + void *arg); + ++extern enum zclient_send_status evpn_zebra_install(struct bgp *bgp, ++ struct bgpevpn *vpn, ++ const struct prefix_evpn *p, ++ struct bgp_path_info *pi); ++extern enum zclient_send_status ++evpn_zebra_uninstall(struct bgp *bgp, struct bgpevpn *vpn, ++ const struct prefix_evpn *p, struct bgp_path_info *pi, ++ bool is_sync); + #endif /* _QUAGGA_BGP_EVPN_H */ +diff --git a/bgpd/bgp_evpn_mh.c b/bgpd/bgp_evpn_mh.c +index 552365959d..40687c558d 100644 +--- a/bgpd/bgp_evpn_mh.c ++++ b/bgpd/bgp_evpn_mh.c +@@ -56,13 +56,14 @@ static void bgp_evpn_local_es_down(struct bgp *bgp, + struct bgp_evpn_es *es); + static void bgp_evpn_local_type1_evi_route_del(struct bgp *bgp, + struct bgp_evpn_es *es); +-static struct bgp_evpn_es_vtep *bgp_evpn_es_vtep_add(struct bgp *bgp, ++static struct bgp_evpn_es_vtep * ++bgp_evpn_es_vtep_add(struct bgp *bgp, struct bgp_evpn_es *es, ++ struct in_addr vtep_ip, bool esr, uint8_t df_alg, ++ uint16_t df_pref, int *zret); ++static enum zclient_send_status bgp_evpn_es_vtep_del(struct bgp *bgp, + struct bgp_evpn_es *es, + struct in_addr vtep_ip, +- bool esr, uint8_t df_alg, +- uint16_t df_pref); +-static void bgp_evpn_es_vtep_del(struct bgp *bgp, +- struct bgp_evpn_es *es, struct in_addr vtep_ip, bool esr); ++ bool esr); + static void bgp_evpn_es_cons_checks_pend_add(struct bgp_evpn_es *es); + static void bgp_evpn_es_cons_checks_pend_del(struct bgp_evpn_es *es); + static struct bgp_evpn_es_evi * +@@ -105,6 +106,7 @@ static int bgp_evpn_es_route_select_install(struct bgp *bgp, + struct bgp_dest *dest) + { + int ret = 0; ++ int zret = 0; + afi_t afi = AFI_L2VPN; + safi_t safi = SAFI_EVPN; + struct bgp_path_info *old_select; /* old best */ +@@ -131,7 +133,7 @@ static int bgp_evpn_es_route_select_install(struct bgp *bgp, + bgp_evpn_es_vtep_add(bgp, es, old_select->attr->nexthop, + true /*esr*/, + old_select->attr->df_alg, +- old_select->attr->df_pref); ++ old_select->attr->df_pref, &zret); + } + UNSET_FLAG(old_select->flags, BGP_PATH_MULTIPATH_CHG); + bgp_zebra_clear_route_change_flags(dest); +@@ -160,7 +162,7 @@ static int bgp_evpn_es_route_select_install(struct bgp *bgp, + && new_select->sub_type == BGP_ROUTE_IMPORTED) { + bgp_evpn_es_vtep_add(bgp, es, new_select->attr->nexthop, + true /*esr */, new_select->attr->df_alg, +- new_select->attr->df_pref); ++ new_select->attr->df_pref, &zret); + } else { + if (old_select && old_select->type == ZEBRA_ROUTE_BGP + && old_select->sub_type == BGP_ROUTE_IMPORTED) +@@ -447,7 +449,7 @@ int bgp_evpn_mh_route_update(struct bgp *bgp, struct bgp_evpn_es *es, + &attr->mp_nexthop_global_in); + } + +- /* Return back the route entry. */ ++ /* Return back th*e route entry. */ + *ri = tmp_pi; + return 0; + } +@@ -1366,23 +1368,28 @@ static struct bgp_evpn_es_vtep *bgp_evpn_es_vtep_find(struct bgp_evpn_es *es, + } + + /* Send the remote ES to zebra for NHG programming */ +-static int bgp_zebra_send_remote_es_vtep(struct bgp *bgp, +- struct bgp_evpn_es_vtep *es_vtep, bool add) ++static enum zclient_send_status ++bgp_zebra_send_remote_es_vtep(struct bgp *bgp, struct bgp_evpn_es_vtep *es_vtep, ++ bool add) + { + struct bgp_evpn_es *es = es_vtep->es; + struct stream *s; + uint32_t flags = 0; + + /* Check socket. */ +- if (!zclient || zclient->sock < 0) +- return 0; ++ if (!zclient || zclient->sock < 0) { ++ if (BGP_DEBUG(zebra, ZEBRA)) ++ zlog_debug("%s: No zclient or zclient->sock exists", ++ __func__); ++ return ZCLIENT_SEND_SUCCESS; ++ } + + /* Don't try to register if Zebra doesn't know of this instance. */ + if (!IS_BGP_INST_KNOWN_TO_ZEBRA(bgp)) { + if (BGP_DEBUG(zebra, ZEBRA)) + zlog_debug("No zebra instance, not installing remote es %s", + es->esi_str); +- return 0; ++ return ZCLIENT_SEND_SUCCESS; + } + + if (es_vtep->flags & BGP_EVPNES_VTEP_ESR) +@@ -1413,12 +1420,12 @@ static int bgp_zebra_send_remote_es_vtep(struct bgp *bgp, + return zclient_send_message(zclient); + } + +-static void bgp_evpn_es_vtep_re_eval_active(struct bgp *bgp, +- struct bgp_evpn_es_vtep *es_vtep, +- bool param_change) ++static enum zclient_send_status bgp_evpn_es_vtep_re_eval_active( ++ struct bgp *bgp, struct bgp_evpn_es_vtep *es_vtep, bool param_change) + { + bool old_active; + bool new_active; ++ enum zclient_send_status ret = ZCLIENT_SEND_SUCCESS; + + old_active = CHECK_FLAG(es_vtep->flags, BGP_EVPNES_VTEP_ACTIVE); + /* currently we need an active EVI reference to use the VTEP as +@@ -1440,7 +1447,7 @@ static void bgp_evpn_es_vtep_re_eval_active(struct bgp *bgp, + es_vtep->df_alg, es_vtep->df_pref); + + /* send remote ES to zebra */ +- bgp_zebra_send_remote_es_vtep(bgp, es_vtep, new_active); ++ ret = bgp_zebra_send_remote_es_vtep(bgp, es_vtep, new_active); + + /* The NHG is updated first for efficient failover handling. + * Note the NHG can be de-activated while there are bgp +@@ -1452,13 +1459,14 @@ static void bgp_evpn_es_vtep_re_eval_active(struct bgp *bgp, + /* queue up the es for background consistency checks */ + bgp_evpn_es_cons_checks_pend_add(es_vtep->es); + } ++ ++ return ret; + } + +-static struct bgp_evpn_es_vtep *bgp_evpn_es_vtep_add(struct bgp *bgp, +- struct bgp_evpn_es *es, +- struct in_addr vtep_ip, +- bool esr, uint8_t df_alg, +- uint16_t df_pref) ++static struct bgp_evpn_es_vtep * ++bgp_evpn_es_vtep_add(struct bgp *bgp, struct bgp_evpn_es *es, ++ struct in_addr vtep_ip, bool esr, uint8_t df_alg, ++ uint16_t df_pref, int *zret) + { + struct bgp_evpn_es_vtep *es_vtep; + bool param_change = false; +@@ -1485,15 +1493,17 @@ static struct bgp_evpn_es_vtep *bgp_evpn_es_vtep_add(struct bgp *bgp, + ++es_vtep->evi_cnt; + } + +- bgp_evpn_es_vtep_re_eval_active(bgp, es_vtep, param_change); ++ *zret = bgp_evpn_es_vtep_re_eval_active(bgp, es_vtep, param_change); + + return es_vtep; + } + +-static void bgp_evpn_es_vtep_do_del(struct bgp *bgp, +- struct bgp_evpn_es_vtep *es_vtep, bool esr) ++static enum zclient_send_status ++bgp_evpn_es_vtep_do_del(struct bgp *bgp, struct bgp_evpn_es_vtep *es_vtep, ++ bool esr) + { + bool param_change = false; ++ enum zclient_send_status ret = ZCLIENT_SEND_SUCCESS; + + if (BGP_DEBUG(evpn_mh, EVPN_MH_ES)) + zlog_debug("es %s vtep %pI4 del %s", es_vtep->es->esi_str, +@@ -1510,18 +1520,25 @@ static void bgp_evpn_es_vtep_do_del(struct bgp *bgp, + --es_vtep->evi_cnt; + } + +- bgp_evpn_es_vtep_re_eval_active(bgp, es_vtep, param_change); ++ ret = bgp_evpn_es_vtep_re_eval_active(bgp, es_vtep, param_change); + bgp_evpn_es_vtep_free(es_vtep); ++ ++ return ret; + } + +-static void bgp_evpn_es_vtep_del(struct bgp *bgp, +- struct bgp_evpn_es *es, struct in_addr vtep_ip, bool esr) ++static enum zclient_send_status bgp_evpn_es_vtep_del(struct bgp *bgp, ++ struct bgp_evpn_es *es, ++ struct in_addr vtep_ip, ++ bool esr) + { + struct bgp_evpn_es_vtep *es_vtep; ++ enum zclient_send_status ret = ZCLIENT_SEND_SUCCESS; + + es_vtep = bgp_evpn_es_vtep_find(es, vtep_ip); + if (es_vtep) +- bgp_evpn_es_vtep_do_del(bgp, es_vtep, esr); ++ ret = bgp_evpn_es_vtep_do_del(bgp, es_vtep, esr); ++ ++ return ret; + } + + /********************** ES MAC-IP paths ************************************* +@@ -3382,12 +3399,14 @@ static struct bgp_evpn_es_evi_vtep *bgp_evpn_es_evi_vtep_find( + /* A VTEP can be added as "active" attach to an ES if EAD-per-ES and + * EAD-per-EVI routes are rxed from it. + */ +-static void bgp_evpn_es_evi_vtep_re_eval_active(struct bgp *bgp, +- struct bgp_evpn_es_evi_vtep *evi_vtep) ++static enum zclient_send_status ++bgp_evpn_es_evi_vtep_re_eval_active(struct bgp *bgp, ++ struct bgp_evpn_es_evi_vtep *evi_vtep) + { + bool old_active; + bool new_active; + uint32_t ead_activity_flags; ++ enum zclient_send_status ret = ZCLIENT_SEND_SUCCESS; + + old_active = CHECK_FLAG(evi_vtep->flags, BGP_EVPN_EVI_VTEP_ACTIVE); + +@@ -3408,7 +3427,7 @@ static void bgp_evpn_es_evi_vtep_re_eval_active(struct bgp *bgp, + new_active = CHECK_FLAG(evi_vtep->flags, BGP_EVPN_EVI_VTEP_ACTIVE); + + if (old_active == new_active) +- return; ++ return ret; + + if (BGP_DEBUG(evpn_mh, EVPN_MH_ES)) + zlog_debug("es %s evi %u vtep %pI4 %s", +@@ -3417,24 +3436,26 @@ static void bgp_evpn_es_evi_vtep_re_eval_active(struct bgp *bgp, + new_active ? "active" : "inactive"); + + /* add VTEP to parent es */ +- if (new_active) ++ if (new_active) { + evi_vtep->es_vtep = bgp_evpn_es_vtep_add( + bgp, evi_vtep->es_evi->es, evi_vtep->vtep_ip, +- false /*esr*/, 0, 0); +- else { ++ false /*esr*/, 0, 0, &ret); ++ } else { + if (evi_vtep->es_vtep) { +- bgp_evpn_es_vtep_do_del(bgp, evi_vtep->es_vtep, +- false /*esr*/); ++ ret = bgp_evpn_es_vtep_do_del(bgp, evi_vtep->es_vtep, ++ false /*esr*/); + evi_vtep->es_vtep = NULL; + } + } + /* queue up the parent es for background consistency checks */ + bgp_evpn_es_cons_checks_pend_add(evi_vtep->es_evi->es); ++ ++ return ret; + } + +-static void bgp_evpn_es_evi_vtep_add(struct bgp *bgp, +- struct bgp_evpn_es_evi *es_evi, struct in_addr vtep_ip, +- bool ead_es) ++static enum zclient_send_status ++bgp_evpn_es_evi_vtep_add(struct bgp *bgp, struct bgp_evpn_es_evi *es_evi, ++ struct in_addr vtep_ip, bool ead_es) + { + struct bgp_evpn_es_evi_vtep *evi_vtep; + +@@ -3454,18 +3475,19 @@ static void bgp_evpn_es_evi_vtep_add(struct bgp *bgp, + else + SET_FLAG(evi_vtep->flags, BGP_EVPN_EVI_VTEP_EAD_PER_EVI); + +- bgp_evpn_es_evi_vtep_re_eval_active(bgp, evi_vtep); ++ return bgp_evpn_es_evi_vtep_re_eval_active(bgp, evi_vtep); + } + +-static void bgp_evpn_es_evi_vtep_del(struct bgp *bgp, +- struct bgp_evpn_es_evi *es_evi, struct in_addr vtep_ip, +- bool ead_es) ++static enum zclient_send_status ++bgp_evpn_es_evi_vtep_del(struct bgp *bgp, struct bgp_evpn_es_evi *es_evi, ++ struct in_addr vtep_ip, bool ead_es) + { + struct bgp_evpn_es_evi_vtep *evi_vtep; ++ enum zclient_send_status ret = ZCLIENT_SEND_SUCCESS; + + evi_vtep = bgp_evpn_es_evi_vtep_find(es_evi, vtep_ip); + if (!evi_vtep) +- return; ++ return ret; + + if (BGP_DEBUG(evpn_mh, EVPN_MH_ES)) + zlog_debug("del es %s evi %u vtep %pI4 %s", +@@ -3478,8 +3500,10 @@ static void bgp_evpn_es_evi_vtep_del(struct bgp *bgp, + else + UNSET_FLAG(evi_vtep->flags, BGP_EVPN_EVI_VTEP_EAD_PER_EVI); + +- bgp_evpn_es_evi_vtep_re_eval_active(bgp, evi_vtep); ++ ret = bgp_evpn_es_evi_vtep_re_eval_active(bgp, evi_vtep); + bgp_evpn_es_evi_vtep_free(evi_vtep); ++ ++ return ret; + } + + /* compare ES-IDs for the ES-EVI RB tree maintained per-VNI */ +@@ -3755,18 +3779,20 @@ int bgp_evpn_local_es_evi_add(struct bgp *bgp, esi_t *esi, vni_t vni) + /* Add remote ES-EVI entry. This is actually the remote VTEP add and the + * ES-EVI is implicity created on first VTEP's reference. + */ +-int bgp_evpn_remote_es_evi_add(struct bgp *bgp, struct bgpevpn *vpn, +- const struct prefix_evpn *p) ++enum zclient_send_status bgp_evpn_remote_es_evi_add(struct bgp *bgp, ++ struct bgpevpn *vpn, ++ const struct prefix_evpn *p) + { + char buf[ESI_STR_LEN]; + struct bgp_evpn_es *es; + struct bgp_evpn_es_evi *es_evi; + bool ead_es; + const esi_t *esi = &p->prefix.ead_addr.esi; ++ enum zclient_send_status ret = ZCLIENT_SEND_SUCCESS; + + if (!vpn) + /* local EAD-ES need not be sent back to zebra */ +- return 0; ++ return ret; + + if (BGP_DEBUG(evpn_mh, EVPN_MH_ES)) + zlog_debug("add remote %s es %s evi %u vtep %pI4", +@@ -3783,27 +3809,29 @@ int bgp_evpn_remote_es_evi_add(struct bgp *bgp, struct bgpevpn *vpn, + es_evi = bgp_evpn_es_evi_new(es, vpn); + + ead_es = !!p->prefix.ead_addr.eth_tag; +- bgp_evpn_es_evi_vtep_add(bgp, es_evi, p->prefix.ead_addr.ip.ipaddr_v4, +- ead_es); ++ ret = bgp_evpn_es_evi_vtep_add(bgp, es_evi, ++ p->prefix.ead_addr.ip.ipaddr_v4, ead_es); + + bgp_evpn_es_evi_remote_info_re_eval(es_evi); +- return 0; ++ return ret; + } + + /* A remote VTEP has withdrawn. The es-evi-vtep will be deleted and the + * parent es-evi freed up implicitly in last VTEP's deref. + */ +-int bgp_evpn_remote_es_evi_del(struct bgp *bgp, struct bgpevpn *vpn, +- const struct prefix_evpn *p) ++enum zclient_send_status bgp_evpn_remote_es_evi_del(struct bgp *bgp, ++ struct bgpevpn *vpn, ++ const struct prefix_evpn *p) + { + char buf[ESI_STR_LEN]; + struct bgp_evpn_es *es; + struct bgp_evpn_es_evi *es_evi; + bool ead_es; ++ enum zclient_send_status ret = ZCLIENT_SEND_SUCCESS; + + if (!vpn) + /* local EAD-ES need not be sent back to zebra */ +- return 0; ++ return ret; + + if (BGP_DEBUG(evpn_mh, EVPN_MH_ES)) + zlog_debug( +@@ -3822,7 +3850,7 @@ int bgp_evpn_remote_es_evi_del(struct bgp *bgp, struct bgpevpn *vpn, + esi_to_str(&p->prefix.ead_addr.esi, buf, + sizeof(buf)), + vpn->vni, &p->prefix.ead_addr.ip.ipaddr_v4); +- return 0; ++ return ret; + } + es_evi = bgp_evpn_es_evi_find(es, vpn); + if (!es_evi) { +@@ -3835,14 +3863,15 @@ int bgp_evpn_remote_es_evi_del(struct bgp *bgp, struct bgpevpn *vpn, + sizeof(buf)), + vpn->vni, + &p->prefix.ead_addr.ip.ipaddr_v4); +- return 0; ++ return ret; + } + + ead_es = !!p->prefix.ead_addr.eth_tag; +- bgp_evpn_es_evi_vtep_del(bgp, es_evi, p->prefix.ead_addr.ip.ipaddr_v4, +- ead_es); ++ ret = bgp_evpn_es_evi_vtep_del(bgp, es_evi, ++ p->prefix.ead_addr.ip.ipaddr_v4, ead_es); + bgp_evpn_es_evi_remote_info_re_eval(es_evi); +- return 0; ++ ++ return ret; + } + + /* If a VNI is being deleted we need to force del all remote VTEPs */ +diff --git a/bgpd/bgp_evpn_mh.h b/bgpd/bgp_evpn_mh.h +index 11030e323f..d6e77e982f 100644 +--- a/bgpd/bgp_evpn_mh.h ++++ b/bgpd/bgp_evpn_mh.h +@@ -434,10 +434,12 @@ extern int bgp_evpn_local_es_add(struct bgp *bgp, esi_t *esi, + extern int bgp_evpn_local_es_del(struct bgp *bgp, esi_t *esi); + extern int bgp_evpn_local_es_evi_add(struct bgp *bgp, esi_t *esi, vni_t vni); + extern int bgp_evpn_local_es_evi_del(struct bgp *bgp, esi_t *esi, vni_t vni); +-extern int bgp_evpn_remote_es_evi_add(struct bgp *bgp, struct bgpevpn *vpn, +- const struct prefix_evpn *p); +-extern int bgp_evpn_remote_es_evi_del(struct bgp *bgp, struct bgpevpn *vpn, +- const struct prefix_evpn *p); ++extern enum zclient_send_status ++bgp_evpn_remote_es_evi_add(struct bgp *bgp, struct bgpevpn *vpn, ++ const struct prefix_evpn *p); ++extern enum zclient_send_status ++bgp_evpn_remote_es_evi_del(struct bgp *bgp, struct bgpevpn *vpn, ++ const struct prefix_evpn *p); + extern void bgp_evpn_mh_init(void); + extern void bgp_evpn_mh_finish(void); + void bgp_evpn_vni_es_init(struct bgpevpn *vpn); +diff --git a/bgpd/bgp_route.c b/bgpd/bgp_route.c +index c29442d96c..679abba463 100644 +--- a/bgpd/bgp_route.c ++++ b/bgpd/bgp_route.c +@@ -3213,9 +3213,9 @@ static void bgp_process_main_one(struct bgp *bgp, struct bgp_dest *dest, + && (new_select->sub_type == BGP_ROUTE_NORMAL + || new_select->sub_type + == BGP_ROUTE_IMPORTED)) +- + bgp_zebra_route_install( +- dest, old_select, bgp, true); ++ dest, old_select, bgp, true, ++ NULL, false); + } + } + +@@ -3313,9 +3313,10 @@ static void bgp_process_main_one(struct bgp *bgp, struct bgp_dest *dest, + if (old_select && + is_route_parent_evpn(old_select)) + bgp_zebra_route_install(dest, old_select, bgp, +- false); ++ false, NULL, false); + +- bgp_zebra_route_install(dest, new_select, bgp, true); ++ bgp_zebra_route_install(dest, new_select, bgp, true, ++ NULL, false); + } else { + /* Withdraw the route from the kernel. */ + if (old_select && old_select->type == ZEBRA_ROUTE_BGP +@@ -3324,7 +3325,7 @@ static void bgp_process_main_one(struct bgp *bgp, struct bgp_dest *dest, + || old_select->sub_type == BGP_ROUTE_IMPORTED)) + + bgp_zebra_route_install(dest, old_select, bgp, +- false); ++ false, NULL, false); + } + } + +@@ -4203,7 +4204,8 @@ void bgp_update(struct peer *peer, const struct prefix *p, uint32_t addpath_id, + if (pi && pi->attr->rmap_table_id != new_attr.rmap_table_id) { + if (CHECK_FLAG(pi->flags, BGP_PATH_SELECTED)) + /* remove from RIB previous entry */ +- bgp_zebra_route_install(dest, pi, bgp, false); ++ bgp_zebra_route_install(dest, pi, bgp, false, NULL, ++ false); + } + + if (peer->sort == BGP_PEER_EBGP) { +diff --git a/bgpd/bgp_table.h b/bgpd/bgp_table.h +index 45d61f8dfd..9eb681ea3f 100644 +--- a/bgpd/bgp_table.h ++++ b/bgpd/bgp_table.h +@@ -103,6 +103,8 @@ struct bgp_node { + + struct zebra_announce_item zai; + struct bgp_path_info *za_bgp_pi; ++ struct bgpevpn *za_vpn; ++ bool za_is_sync; + + uint64_t version; + +diff --git a/bgpd/bgp_zebra.c b/bgpd/bgp_zebra.c +index 1162941ef1..b81acaf8ec 100644 +--- a/bgpd/bgp_zebra.c ++++ b/bgpd/bgp_zebra.c +@@ -1713,12 +1713,11 @@ void bgp_zebra_announce_table(struct bgp *bgp, afi_t afi, safi_t safi) + for (dest = bgp_table_top(table); dest; dest = bgp_route_next(dest)) + for (pi = bgp_dest_get_bgp_path_info(dest); pi; pi = pi->next) + if (CHECK_FLAG(pi->flags, BGP_PATH_SELECTED) && +- +- (pi->type == ZEBRA_ROUTE_BGP +- && (pi->sub_type == BGP_ROUTE_NORMAL +- || pi->sub_type == BGP_ROUTE_IMPORTED))) +- +- bgp_zebra_route_install(dest, pi, bgp, true); ++ (pi->type == ZEBRA_ROUTE_BGP && ++ (pi->sub_type == BGP_ROUTE_NORMAL || ++ pi->sub_type == BGP_ROUTE_IMPORTED))) ++ bgp_zebra_route_install(dest, pi, bgp, true, ++ NULL, false); + } + + /* Announce routes of any bgp subtype of a table to zebra */ +@@ -1740,7 +1739,8 @@ void bgp_zebra_announce_table_all_subtypes(struct bgp *bgp, afi_t afi, + for (pi = bgp_dest_get_bgp_path_info(dest); pi; pi = pi->next) + if (CHECK_FLAG(pi->flags, BGP_PATH_SELECTED) && + pi->type == ZEBRA_ROUTE_BGP) +- bgp_zebra_route_install(dest, pi, bgp, true); ++ bgp_zebra_route_install(dest, pi, bgp, true, ++ NULL, false); + } + + enum zclient_send_status bgp_zebra_withdraw_actual(struct bgp_dest *dest, +@@ -1793,6 +1793,7 @@ enum zclient_send_status bgp_zebra_withdraw_actual(struct bgp_dest *dest, + #define ZEBRA_ANNOUNCEMENTS_LIMIT 1000 + static void bgp_handle_route_announcements_to_zebra(struct thread *e) + { ++ bool is_evpn = false; + uint32_t count = 0; + struct bgp_dest *dest = NULL; + struct bgp_table *table = NULL; +@@ -1808,6 +1809,9 @@ static void bgp_handle_route_announcements_to_zebra(struct thread *e) + table = bgp_dest_table(dest); + install = + CHECK_FLAG(dest->flags, BGP_NODE_SCHEDULE_FOR_INSTALL); ++ if (table && table->afi == AFI_L2VPN && ++ table->safi == SAFI_EVPN) ++ is_evpn = true; + + if (BGP_DEBUG(zebra, ZEBRA)) + zlog_debug( +@@ -1816,17 +1820,33 @@ static void bgp_handle_route_announcements_to_zebra(struct thread *e) + table->bgp->name_pretty, dest, dest->flags); + + if (install) { +- status = bgp_zebra_announce_actual( +- dest, dest->za_bgp_pi, table->bgp); ++ if (is_evpn) ++ status = evpn_zebra_install( ++ table->bgp, dest->za_vpn, ++ (const struct prefix_evpn *) ++ bgp_dest_get_prefix(dest), ++ dest->za_bgp_pi); ++ else ++ status = bgp_zebra_announce_actual( ++ dest, dest->za_bgp_pi, table->bgp); + UNSET_FLAG(dest->flags, BGP_NODE_SCHEDULE_FOR_INSTALL); + } else { +- status = bgp_zebra_withdraw_actual( +- dest, dest->za_bgp_pi, table->bgp); ++ if (is_evpn) ++ status = evpn_zebra_uninstall( ++ table->bgp, dest->za_vpn, ++ (const struct prefix_evpn *) ++ bgp_dest_get_prefix(dest), ++ dest->za_bgp_pi, false); ++ else ++ status = bgp_zebra_withdraw_actual( ++ dest, dest->za_bgp_pi, table->bgp); ++ + UNSET_FLAG(dest->flags, BGP_NODE_SCHEDULE_FOR_DELETE); + } + + bgp_path_info_unlock(dest->za_bgp_pi); + dest->za_bgp_pi = NULL; ++ dest->za_vpn = NULL; + bgp_dest_unlock_node(dest); + + if (status == ZCLIENT_SEND_BUFFERED) +@@ -1880,8 +1900,16 @@ static void bgp_zebra_buffer_write_ready(void) + * withdrawn. + */ + void bgp_zebra_route_install(struct bgp_dest *dest, struct bgp_path_info *info, +- struct bgp *bgp, bool install) ++ struct bgp *bgp, bool install, struct bgpevpn *vpn, ++ bool is_sync) + { ++ bool is_evpn = false; ++ struct bgp_table *table = NULL; ++ ++ table = bgp_dest_table(dest); ++ if (table && table->afi == AFI_L2VPN && table->safi == SAFI_EVPN) ++ is_evpn = true; ++ + /* + * BGP is installing this route and bgp has been configured + * to suppress announcements until the route has been installed +@@ -1891,7 +1919,7 @@ void bgp_zebra_route_install(struct bgp_dest *dest, struct bgp_path_info *info, + if (BGP_SUPPRESS_FIB_ENABLED(bgp)) + SET_FLAG(dest->flags, BGP_NODE_FIB_INSTALL_PENDING); + +- if (bgp->main_zebra_update_hold) ++ if (bgp->main_zebra_update_hold && !is_evpn) + return; + } else { + UNSET_FLAG(dest->flags, BGP_NODE_FIB_INSTALL_PENDING); +@@ -1901,7 +1929,7 @@ void bgp_zebra_route_install(struct bgp_dest *dest, struct bgp_path_info *info, + * Don't try to install if we're not connected to Zebra or Zebra doesn't + * know of this instance. + */ +- if (!bgp_install_info_to_zebra(bgp)) ++ if (!bgp_install_info_to_zebra(bgp) && !is_evpn) + return; + + if (!CHECK_FLAG(dest->flags, BGP_NODE_SCHEDULE_FOR_INSTALL) && +@@ -1922,7 +1950,7 @@ void bgp_zebra_route_install(struct bgp_dest *dest, struct bgp_path_info *info, + dest->za_bgp_pi = info; + } else if (CHECK_FLAG(dest->flags, BGP_NODE_SCHEDULE_FOR_DELETE)) { + assert(dest->za_bgp_pi); +- if (install) ++ if (install & !is_evpn) + bgp_zebra_withdraw_actual(dest, dest->za_bgp_pi, bgp); + + bgp_path_info_unlock(dest->za_bgp_pi); +@@ -1930,6 +1958,11 @@ void bgp_zebra_route_install(struct bgp_dest *dest, struct bgp_path_info *info, + dest->za_bgp_pi = info; + } + ++ if (is_evpn) { ++ dest->za_vpn = vpn; ++ dest->za_is_sync = is_sync; ++ } ++ + if (install) { + UNSET_FLAG(dest->flags, BGP_NODE_SCHEDULE_FOR_DELETE); + SET_FLAG(dest->flags, BGP_NODE_SCHEDULE_FOR_INSTALL); +@@ -1960,7 +1993,8 @@ void bgp_zebra_withdraw_table_all_subtypes(struct bgp *bgp, afi_t afi, safi_t sa + for (pi = bgp_dest_get_bgp_path_info(dest); pi; pi = pi->next) { + if (CHECK_FLAG(pi->flags, BGP_PATH_SELECTED) + && (pi->type == ZEBRA_ROUTE_BGP)) +- bgp_zebra_route_install(dest, pi, bgp, false); ++ bgp_zebra_route_install(dest, pi, bgp, false, ++ NULL, false); + } + } + } +diff --git a/bgpd/bgp_zebra.h b/bgpd/bgp_zebra.h +index 45fcf7f514..5186b7454e 100644 +--- a/bgpd/bgp_zebra.h ++++ b/bgpd/bgp_zebra.h +@@ -45,7 +45,8 @@ extern int bgp_zebra_get_table_range(uint32_t chunk_size, + extern int bgp_if_update_all(void); + extern void bgp_zebra_route_install(struct bgp_dest *dest, + struct bgp_path_info *path, struct bgp *bgp, +- bool install); ++ bool install, struct bgpevpn *vpn, ++ bool is_sync); + extern void bgp_zebra_announce_table(struct bgp *bgp, afi_t afi, safi_t safi); + + /* Announce routes of any bgp subtype of a table to zebra */ +diff --git a/bgpd/bgpd.h b/bgpd/bgpd.h +index bdf31f5161..dc660fb8f0 100644 +--- a/bgpd/bgpd.h ++++ b/bgpd/bgpd.h +@@ -512,6 +512,7 @@ struct bgp { + #define BGP_FLAG_HARD_ADMIN_RESET (1ULL << 31) + /* Evaluate the AIGP attribute during the best path selection process */ + #define BGP_FLAG_COMPARE_AIGP (1ULL << 32) ++#define BGP_FLAG_VNI_DOWN (1ULL << 38) + + /* BGP default address-families. + * New peers inherit enabled afi/safis from bgp instance. +-- +2.17.1 + diff --git a/src/sonic-frr/patch/0037-zebra-backpressure-Fix-Null-ptr-access-Coverity-Issu.patch b/src/sonic-frr/patch/0037-zebra-backpressure-Fix-Null-ptr-access-Coverity-Issu.patch new file mode 100644 index 000000000000..a3b1fa5fe0da --- /dev/null +++ b/src/sonic-frr/patch/0037-zebra-backpressure-Fix-Null-ptr-access-Coverity-Issu.patch @@ -0,0 +1,29 @@ +From b6caf88bd3bce9673d49435453991c49712287aa Mon Sep 17 00:00:00 2001 +From: Rajasekar Raja +Date: Thu, 11 Apr 2024 22:27:37 -0700 +Subject: [PATCH 08/11] zebra: backpressure - Fix Null ptr access (Coverity + Issue) + +Fix dereferencing NULL ptr making coverity happy. + +Ticket :#3390099 + +Signed-off-by: Rajasekar Raja + +diff --git a/bgpd/bgp_zebra.c b/bgpd/bgp_zebra.c +index b81acaf8ec..524551b1e0 100644 +--- a/bgpd/bgp_zebra.c ++++ b/bgpd/bgp_zebra.c +@@ -1809,8 +1809,7 @@ static void bgp_handle_route_announcements_to_zebra(struct thread *e) + table = bgp_dest_table(dest); + install = + CHECK_FLAG(dest->flags, BGP_NODE_SCHEDULE_FOR_INSTALL); +- if (table && table->afi == AFI_L2VPN && +- table->safi == SAFI_EVPN) ++ if (table->afi == AFI_L2VPN && table->safi == SAFI_EVPN) + is_evpn = true; + + if (BGP_DEBUG(zebra, ZEBRA)) +-- +2.17.1 + diff --git a/src/sonic-frr/patch/0038-bgpd-Increase-install-uninstall-speed-of-evpn-vpn-vn.patch b/src/sonic-frr/patch/0038-bgpd-Increase-install-uninstall-speed-of-evpn-vpn-vn.patch new file mode 100644 index 000000000000..a360a3101599 --- /dev/null +++ b/src/sonic-frr/patch/0038-bgpd-Increase-install-uninstall-speed-of-evpn-vpn-vn.patch @@ -0,0 +1,116 @@ +From 7166c2222cb82885510c3e8c7906c1d7de950f9b Mon Sep 17 00:00:00 2001 +From: Donald Sharp +Date: Thu, 11 Apr 2024 13:28:30 -0400 +Subject: [PATCH 09/11] bgpd: Increase install/uninstall speed of evpn vpn + vni's + +BGP receives notification from zebra about an vpn that +needs to be installed into the evpn tables. Unfortunately +this function was walking the entirety of evpn tables +3 times. Modify the code to walk the tree 1 time and +to just look for the needed route types as you go. + +This reduces, in a scaled environment, processing +time of the zclient_read function from 130 seconds +to 95 seconds. For a up / down / up interface +scenario. + +Signed-off-by: Rajasekar Raja +Signed-off-by: Donald Sharp + +diff --git a/bgpd/bgp_evpn.c b/bgpd/bgp_evpn.c +index 622fd6afd2..eb5aa9f077 100644 +--- a/bgpd/bgp_evpn.c ++++ b/bgpd/bgp_evpn.c +@@ -3752,9 +3752,7 @@ static int install_uninstall_routes_for_vrf(struct bgp *bgp_vrf, int install) + * particular VNI. + */ + static int install_uninstall_routes_for_vni(struct bgp *bgp, +- struct bgpevpn *vpn, +- bgp_evpn_route_type rtype, +- int install) ++ struct bgpevpn *vpn, int install) + { + afi_t afi; + safi_t safi; +@@ -3785,7 +3783,9 @@ static int install_uninstall_routes_for_vni(struct bgp *bgp, + (const struct prefix_evpn *)bgp_dest_get_prefix( + dest); + +- if (evp->prefix.route_type != rtype) ++ if (evp->prefix.route_type != BGP_EVPN_IMET_ROUTE && ++ evp->prefix.route_type != BGP_EVPN_AD_ROUTE && ++ evp->prefix.route_type != BGP_EVPN_MAC_IP_ROUTE) + continue; + + for (pi = bgp_dest_get_bgp_path_info(dest); pi; +@@ -3812,7 +3812,8 @@ static int install_uninstall_routes_for_vni(struct bgp *bgp, + bgp->vrf_id, + install ? "install" + : "uninstall", +- rtype == BGP_EVPN_MAC_IP_ROUTE ++ evp->prefix.route_type == ++ BGP_EVPN_MAC_IP_ROUTE + ? "MACIP" + : "IMET", + vpn->vni); +@@ -3845,23 +3846,11 @@ static int install_routes_for_vrf(struct bgp *bgp_vrf) + */ + static int install_routes_for_vni(struct bgp *bgp, struct bgpevpn *vpn) + { +- int ret; +- +- /* Install type-3 routes followed by type-2 routes - the ones applicable ++ /* ++ * Install type-3 routes followed by type-2 routes - the ones applicable + * for this VNI. + */ +- ret = install_uninstall_routes_for_vni(bgp, vpn, BGP_EVPN_IMET_ROUTE, +- 1); +- if (ret) +- return ret; +- +- ret = install_uninstall_routes_for_vni(bgp, vpn, BGP_EVPN_AD_ROUTE, +- 1); +- if (ret) +- return ret; +- +- return install_uninstall_routes_for_vni(bgp, vpn, BGP_EVPN_MAC_IP_ROUTE, +- 1); ++ return install_uninstall_routes_for_vni(bgp, vpn, 1); + } + + /* uninstall routes from l3vni vrf. */ +@@ -3877,25 +3866,11 @@ static int uninstall_routes_for_vrf(struct bgp *bgp_vrf) + */ + static int uninstall_routes_for_vni(struct bgp *bgp, struct bgpevpn *vpn) + { +- int ret; +- +- /* Uninstall type-2 routes followed by type-3 routes - the ones +- * applicable +- * for this VNI. ++ /* ++ * Uninstall type-2 routes followed by type-3 routes - the ones ++ * applicable for this VNI. + */ +- ret = install_uninstall_routes_for_vni(bgp, vpn, BGP_EVPN_MAC_IP_ROUTE, +- 0); +- if (ret) +- return ret; +- +- ret = install_uninstall_routes_for_vni(bgp, vpn, BGP_EVPN_AD_ROUTE, +- 0); +- if (ret) +- return ret; +- +- +- return install_uninstall_routes_for_vni(bgp, vpn, BGP_EVPN_IMET_ROUTE, +- 0); ++ return install_uninstall_routes_for_vni(bgp, vpn, 0); + } + + /* +-- +2.17.1 + diff --git a/src/sonic-frr/patch/0039-zebra-Actually-display-I-O-buffer-sizes.patch b/src/sonic-frr/patch/0039-zebra-Actually-display-I-O-buffer-sizes.patch new file mode 100644 index 000000000000..904cb169b840 --- /dev/null +++ b/src/sonic-frr/patch/0039-zebra-Actually-display-I-O-buffer-sizes.patch @@ -0,0 +1,49 @@ +From 72781109e5dadafe55f10d72ae2b3505bf0ccb93 Mon Sep 17 00:00:00 2001 +From: Donald Sharp +Date: Mon, 8 Apr 2024 16:24:05 -0400 +Subject: [PATCH 10/11] zebra: Actually display I/O buffer sizes + +An operator found a situation where zebra was +backing up in a significant way towards BGP +with EVPN changes taking up some serious amounts +of memory. The key lines that would have clued +us in on it were behind a dev build. Let's change +this. + +Signed-off-by: Donald Sharp + +diff --git a/lib/stream.h b/lib/stream.h +index a3c148c9c9..7acbbd2dc7 100644 +--- a/lib/stream.h ++++ b/lib/stream.h +@@ -120,9 +120,7 @@ struct stream_fifo { + + /* number of streams in this fifo */ + atomic_size_t count; +-#if defined DEV_BUILD + atomic_size_t max_count; +-#endif + + struct stream *head; + struct stream *tail; +diff --git a/zebra/zserv.c b/zebra/zserv.c +index de6e404fc4..daccfa59d5 100644 +--- a/zebra/zserv.c ++++ b/zebra/zserv.c +@@ -1130,12 +1130,10 @@ static void zebra_show_client_detail(struct vty *vty, struct zserv *client) + vty_out(vty, "ES-EVI %-12u%-12u%-12u\n", + client->local_es_evi_add_cnt, 0, client->local_es_evi_del_cnt); + vty_out(vty, "Errors: %u\n", client->error_cnt); +- +-#if defined DEV_BUILD + vty_out(vty, "Input Fifo: %zu:%zu Output Fifo: %zu:%zu\n", + client->ibuf_fifo->count, client->ibuf_fifo->max_count, + client->obuf_fifo->count, client->obuf_fifo->max_count); +-#endif ++ + vty_out(vty, "\n"); + } + +-- +2.17.1 + diff --git a/src/sonic-frr/patch/0040-zebra-Actually-display-I-O-buffer-sizes-part-2.patch b/src/sonic-frr/patch/0040-zebra-Actually-display-I-O-buffer-sizes-part-2.patch new file mode 100644 index 000000000000..9088bfebbdad --- /dev/null +++ b/src/sonic-frr/patch/0040-zebra-Actually-display-I-O-buffer-sizes-part-2.patch @@ -0,0 +1,49 @@ +From 981bfca72414bfa7a97b6526e4736ea4f6834620 Mon Sep 17 00:00:00 2001 +From: Rajasekar Raja +Date: Mon, 22 Apr 2024 13:50:47 -0400 +Subject: [PATCH] From a88498262d8d88fb3846d1a5c1a373751fe6f381 Mon Sep 17 + 00:00:00 2001 Subject: [PATCH 11/11] zebra: Actually display I/O buffer sizes + (part-2) + +An extension of commit-8d8f12ba8e5cd11c189b8475b05539fa8415ccb9 + +Removing ifdef DEV_BUILD in stream_fifo_push as well to make the 'sh +zebra client' display the current I/O fifo along with max fifo items. + +TICKET :#3390099 + +Signed-off-by: Rajasekar Raja + +diff --git a/lib/stream.c b/lib/stream.c +index 2de3abdf45..3aef70794e 100644 +--- a/lib/stream.c ++++ b/lib/stream.c +@@ -1256,9 +1256,7 @@ void stream_fifo_init(struct stream_fifo *fifo) + /* Add new stream to fifo. */ + void stream_fifo_push(struct stream_fifo *fifo, struct stream *s) + { +-#if defined DEV_BUILD + size_t max, curmax; +-#endif + + if (fifo->tail) + fifo->tail->next = s; +@@ -1267,15 +1265,11 @@ void stream_fifo_push(struct stream_fifo *fifo, struct stream *s) + + fifo->tail = s; + fifo->tail->next = NULL; +-#if !defined DEV_BUILD +- atomic_fetch_add_explicit(&fifo->count, 1, memory_order_release); +-#else + max = atomic_fetch_add_explicit(&fifo->count, 1, memory_order_release); + curmax = atomic_load_explicit(&fifo->max_count, memory_order_relaxed); + if (max > curmax) + atomic_store_explicit(&fifo->max_count, max, + memory_order_relaxed); +-#endif + } + + void stream_fifo_push_safe(struct stream_fifo *fifo, struct stream *s) +-- +2.43.2 + diff --git a/src/sonic-frr/patch/series b/src/sonic-frr/patch/series index 445bc9dd9653..626a6888e541 100644 --- a/src/sonic-frr/patch/series +++ b/src/sonic-frr/patch/series @@ -27,3 +27,14 @@ 0027-lib-Do-not-convert-EVPN-prefixes-into-IPv4-IPv6-if-n.patch 0028-zebra-fix-parse-attr-problems-for-encap.patch 0029-zebra-nhg-fix-on-intf-up.patch +0030-isisd-staticd-need-to-link-directly-against-libyang.patch +0031-zebra-backpressure-Zebra-push-back-on-Buffer-Stream-.patch +0032-bgpd-backpressure-Add-a-typesafe-list-for-Zebra-Anno.patch +0033-bgpd-fix-flushing-ipv6-flowspec-entries-when-peering.patch +0034-bgpd-backpressure-cleanup-bgp_zebra_XX-func-args.patch +0035-gpd-backpressure-Handle-BGP-Zebra-Install-evt-Creat.patch +0036-bgpd-backpressure-Handle-BGP-Zebra-EPVN-Install-evt-.patch +0037-zebra-backpressure-Fix-Null-ptr-access-Coverity-Issu.patch +0038-bgpd-Increase-install-uninstall-speed-of-evpn-vpn-vn.patch +0039-zebra-Actually-display-I-O-buffer-sizes.patch +0040-zebra-Actually-display-I-O-buffer-sizes-part-2.patch