diff --git a/src/sonic-frr/patch/0041-bgpd-backpressure-Fix-to-withdraw-evpn-type-5-routes.patch b/src/sonic-frr/patch/0041-bgpd-backpressure-Fix-to-withdraw-evpn-type-5-routes.patch new file mode 100644 index 000000000000..b74d86881ae5 --- /dev/null +++ b/src/sonic-frr/patch/0041-bgpd-backpressure-Fix-to-withdraw-evpn-type-5-routes.patch @@ -0,0 +1,63 @@ +From f3ec35b461a06f1278383d2347dfbc611b6a91a6 Mon Sep 17 00:00:00 2001 +From: Rajasekar Raja +Date: Fri, 17 May 2024 12:36:31 -0700 +Subject: [PATCH] bgpd: backpressure - Fix to withdraw evpn type-5 routes + immediately + +As part of backpressure changes, there is a bug where immediate withdraw +is to be sent for evpn imported type-5 prefix to clear the nh neigh and +RMAC entry. + +Fixing this by sending withdraw immediately to keep it inline with the +code today + +Ticket: #3905571 + +Signed-off-by: Donald Sharp +Signed-off-by: Rajasekar Raja + +diff --git a/bgpd/bgp_route.c b/bgpd/bgp_route.c +index 679abba463..e28069767f 100644 +--- a/bgpd/bgp_route.c ++++ b/bgpd/bgp_route.c +@@ -3312,8 +3312,7 @@ static void bgp_process_main_one(struct bgp *bgp, struct bgp_dest *dest, + */ + if (old_select && + is_route_parent_evpn(old_select)) +- bgp_zebra_route_install(dest, old_select, bgp, +- false, NULL, false); ++ bgp_zebra_withdraw_actual(dest, old_select, bgp); + + bgp_zebra_route_install(dest, new_select, bgp, true, + NULL, false); +diff --git a/bgpd/bgp_zebra.c b/bgpd/bgp_zebra.c +index 524551b1e0..5d5525156b 100644 +--- a/bgpd/bgp_zebra.c ++++ b/bgpd/bgp_zebra.c +@@ -1889,11 +1889,9 @@ static void bgp_zebra_buffer_write_ready(void) + * save new pi, mark as going to be + * withdrawan, remove install flag + * +- * Withdrawal Install Special case, send withdrawal immediately +- * Leave dest on list, release old pi, ++ * Withdrawal Install Leave dest on list, release old pi, + * save new pi, mark as going to be +- * installed. ++ * installed. + * Withdrawal Withdrawal Leave dest on list, release old pi, + * save new pi, mark as going to be + * withdrawn. +@@ -1949,9 +1947,6 @@ void bgp_zebra_route_install(struct bgp_dest *dest, struct bgp_path_info *info, + dest->za_bgp_pi = info; + } else if (CHECK_FLAG(dest->flags, BGP_NODE_SCHEDULE_FOR_DELETE)) { + assert(dest->za_bgp_pi); +- if (install & !is_evpn) +- bgp_zebra_withdraw_actual(dest, dest->za_bgp_pi, bgp); +- + bgp_path_info_unlock(dest->za_bgp_pi); + bgp_path_info_lock(info); + dest->za_bgp_pi = info; +-- +2.43.2 + diff --git a/src/sonic-frr/patch/0042-bgpd-backpressure-Fix-to-avoid-CPU-hog.patch b/src/sonic-frr/patch/0042-bgpd-backpressure-Fix-to-avoid-CPU-hog.patch new file mode 100644 index 000000000000..860d928d3247 --- /dev/null +++ b/src/sonic-frr/patch/0042-bgpd-backpressure-Fix-to-avoid-CPU-hog.patch @@ -0,0 +1,53 @@ +From 4775b2b2bf39caa4bc5aed10ef44c6dcf9c7fc80 Mon Sep 17 00:00:00 2001 +From: Rajasekar Raja +Date: Fri, 17 May 2024 15:43:59 -0700 +Subject: [PATCH] bgpd: backpressure - Fix to avoid CPU hog + +In case when bgp_evpn_free or bgp_delete is called and the announce_list +has few items where vpn/bgp does not match, we add the item back to the +list. Because of this the list count is always > 0 thereby hogging CPU or +infinite loop. + +Ticket: #3905624 + +Signed-off-by: Rajasekar Raja + +diff --git a/bgpd/bgp_evpn.c b/bgpd/bgp_evpn.c +index eb5aa9f077..2243ffdc77 100644 +--- a/bgpd/bgp_evpn.c ++++ b/bgpd/bgp_evpn.c +@@ -6074,9 +6074,11 @@ struct bgpevpn *bgp_evpn_new(struct bgp *bgp, vni_t vni, + void bgp_evpn_free(struct bgp *bgp, struct bgpevpn *vpn) + { + struct bgp_dest *dest = NULL; ++ uint32_t ann_count = zebra_announce_count(&bm->zebra_announce_head); + +- while (zebra_announce_count(&bm->zebra_announce_head)) { ++ while (ann_count) { + dest = zebra_announce_pop(&bm->zebra_announce_head); ++ ann_count--; + if (dest->za_vpn == vpn) { + bgp_path_info_unlock(dest->za_bgp_pi); + bgp_dest_unlock_node(dest); +diff --git a/bgpd/bgpd.c b/bgpd/bgpd.c +index da133d71c1..492566f8c8 100644 +--- a/bgpd/bgpd.c ++++ b/bgpd/bgpd.c +@@ -3690,11 +3690,13 @@ int bgp_delete(struct bgp *bgp) + int i; + struct bgp_dest *dest = NULL; + struct graceful_restart_info *gr_info; ++ uint32_t ann_count = zebra_announce_count(&bm->zebra_announce_head); + + assert(bgp); + +- while (zebra_announce_count(&bm->zebra_announce_head)) { ++ while (ann_count) { + dest = zebra_announce_pop(&bm->zebra_announce_head); ++ ann_count--; + if (dest->za_bgp_pi->peer->bgp == bgp) { + bgp_path_info_unlock(dest->za_bgp_pi); + bgp_dest_unlock_node(dest); +-- +2.43.2 + diff --git a/src/sonic-frr/patch/0043-zebra-Use-built-in-data-structure-counter.patch b/src/sonic-frr/patch/0043-zebra-Use-built-in-data-structure-counter.patch new file mode 100644 index 000000000000..3401dae27158 --- /dev/null +++ b/src/sonic-frr/patch/0043-zebra-Use-built-in-data-structure-counter.patch @@ -0,0 +1,156 @@ +From 529cdfa09065c6c77aff4a96a52f0d3bcb385b6f Mon Sep 17 00:00:00 2001 +From: Donald Sharp +Date: Thu, 13 Jun 2024 15:30:00 -0400 +Subject: [PATCH 1/5] zebra: Use built in data structure counter + +Instead of keeping a counter that is independent +of the queue's data structure. Just use the queue's +built-in counter. Ensure that it's pthread safe by +keeping it wrapped inside the mutex for adding/deleting +to the queue. + +Signed-off-by: Donald Sharp + +diff --git a/zebra/dplane_fpm_nl.c b/zebra/dplane_fpm_nl.c +index caa2f988e2..bc9815bb10 100644 +--- a/zebra/dplane_fpm_nl.c ++++ b/zebra/dplane_fpm_nl.c +@@ -135,8 +135,6 @@ struct fpm_nl_ctx { + + /* Amount of data plane context processed. */ + _Atomic uint32_t dplane_contexts; +- /* Amount of data plane contexts enqueued. */ +- _Atomic uint32_t ctxqueue_len; + /* Peak amount of data plane contexts enqueued. */ + _Atomic uint32_t ctxqueue_len_peak; + +@@ -311,6 +309,12 @@ DEFUN(fpm_show_counters, fpm_show_counters_cmd, + FPM_STR + "FPM statistic counters\n") + { ++ uint32_t curr_queue_len; ++ ++ frr_with_mutex (&gfnc->ctxqueue_mutex) { ++ curr_queue_len = dplane_ctx_queue_count(&gfnc->ctxqueue); ++ } ++ + vty_out(vty, "%30s\n%30s\n", "FPM counters", "============"); + + #define SHOW_COUNTER(label, counter) \ +@@ -324,8 +328,7 @@ DEFUN(fpm_show_counters, fpm_show_counters_cmd, + SHOW_COUNTER("Connection errors", gfnc->counters.connection_errors); + SHOW_COUNTER("Data plane items processed", + gfnc->counters.dplane_contexts); +- SHOW_COUNTER("Data plane items enqueued", +- gfnc->counters.ctxqueue_len); ++ SHOW_COUNTER("Data plane items enqueued", curr_queue_len); + SHOW_COUNTER("Data plane items queue peak", + gfnc->counters.ctxqueue_len_peak); + SHOW_COUNTER("Buffer full hits", gfnc->counters.buffer_full); +@@ -344,6 +347,12 @@ DEFUN(fpm_show_counters_json, fpm_show_counters_json_cmd, + "FPM statistic counters\n" + JSON_STR) + { ++ uint32_t curr_queue_len; ++ ++ frr_with_mutex (&gfnc->ctxqueue_mutex) { ++ curr_queue_len = dplane_ctx_queue_count(&gfnc->ctxqueue); ++ } ++ + struct json_object *jo; + + jo = json_object_new_object(); +@@ -357,8 +366,7 @@ DEFUN(fpm_show_counters_json, fpm_show_counters_json_cmd, + gfnc->counters.connection_errors); + json_object_int_add(jo, "data-plane-contexts", + gfnc->counters.dplane_contexts); +- json_object_int_add(jo, "data-plane-contexts-queue", +- gfnc->counters.ctxqueue_len); ++ json_object_int_add(jo, "data-plane-contexts-queue", curr_queue_len); + json_object_int_add(jo, "data-plane-contexts-queue-peak", + gfnc->counters.ctxqueue_len_peak); + json_object_int_add(jo, "buffer-full-hits", gfnc->counters.buffer_full); +@@ -1380,8 +1388,6 @@ static void fpm_process_queue(struct thread *t) + + /* Account the processed entries. */ + processed_contexts++; +- atomic_fetch_sub_explicit(&fnc->counters.ctxqueue_len, 1, +- memory_order_relaxed); + + dplane_ctx_set_status(ctx, ZEBRA_DPLANE_REQUEST_SUCCESS); + dplane_provider_enqueue_out_ctx(fnc->prov, ctx); +@@ -1550,7 +1556,7 @@ static int fpm_nl_process(struct zebra_dplane_provider *prov) + struct zebra_dplane_ctx *ctx; + struct fpm_nl_ctx *fnc; + int counter, limit; +- uint64_t cur_queue, peak_queue = 0, stored_peak_queue; ++ uint64_t cur_queue = 0, peak_queue = 0, stored_peak_queue; + + fnc = dplane_provider_get_data(prov); + limit = dplane_provider_get_work_limit(prov); +@@ -1564,20 +1570,12 @@ static int fpm_nl_process(struct zebra_dplane_provider *prov) + * anyway. + */ + if (fnc->socket != -1 && fnc->connecting == false) { +- /* +- * Update the number of queued contexts *before* +- * enqueueing, to ensure counter consistency. +- */ +- atomic_fetch_add_explicit(&fnc->counters.ctxqueue_len, +- 1, memory_order_relaxed); +- + frr_with_mutex (&fnc->ctxqueue_mutex) { + dplane_ctx_enqueue_tail(&fnc->ctxqueue, ctx); ++ cur_queue = ++ dplane_ctx_queue_count(&fnc->ctxqueue); + } + +- cur_queue = atomic_load_explicit( +- &fnc->counters.ctxqueue_len, +- memory_order_relaxed); + if (peak_queue < cur_queue) + peak_queue = cur_queue; + continue; +@@ -1594,9 +1592,7 @@ static int fpm_nl_process(struct zebra_dplane_provider *prov) + atomic_store_explicit(&fnc->counters.ctxqueue_len_peak, + peak_queue, memory_order_relaxed); + +- if (atomic_load_explicit(&fnc->counters.ctxqueue_len, +- memory_order_relaxed) +- > 0) ++ if (cur_queue > 0) + thread_add_timer(fnc->fthread->master, fpm_process_queue, + fnc, 0, &fnc->t_dequeue); + +diff --git a/zebra/zebra_dplane.c b/zebra/zebra_dplane.c +index 59b8d6cc63..c252a370b8 100644 +--- a/zebra/zebra_dplane.c ++++ b/zebra/zebra_dplane.c +@@ -969,6 +969,11 @@ struct zebra_dplane_ctx *dplane_ctx_dequeue(struct dplane_ctx_list_head *q) + return ctx; + } + ++uint32_t dplane_ctx_queue_count(struct dplane_ctx_list_head *q) ++{ ++ return dplane_ctx_list_count(q); ++} ++ + /* + * Accessors for information from the context object + */ +diff --git a/zebra/zebra_dplane.h b/zebra/zebra_dplane.h +index 9f9496c8f4..c29e05bbc9 100644 +--- a/zebra/zebra_dplane.h ++++ b/zebra/zebra_dplane.h +@@ -324,6 +324,8 @@ struct zebra_dplane_ctx *dplane_ctx_get_head(struct dplane_ctx_list_head *q); + /* Init a list of contexts */ + void dplane_ctx_q_init(struct dplane_ctx_list_head *q); + ++uint32_t dplane_ctx_queue_count(struct dplane_ctx_list_head *q); ++ + /* + * Accessors for information from the context object + */ +-- +2.43.2 + diff --git a/src/sonic-frr/patch/0044-zebra-Use-the-ctx-queue-counters.patch b/src/sonic-frr/patch/0044-zebra-Use-the-ctx-queue-counters.patch new file mode 100644 index 000000000000..e57684aee9a3 --- /dev/null +++ b/src/sonic-frr/patch/0044-zebra-Use-the-ctx-queue-counters.patch @@ -0,0 +1,108 @@ +From 59e2b19d2d08349ef0197b0adcb13d0bd7de2b79 Mon Sep 17 00:00:00 2001 +From: Donald Sharp +Date: Mon, 17 Jun 2024 11:05:28 -0400 +Subject: [PATCH 2/5] zebra: Use the ctx queue counters + +The ctx queue data structures already have a counter +associated with them. Let's just use them instead. + +Signed-off-by: Donald Sharp + +diff --git a/zebra/zebra_dplane.c b/zebra/zebra_dplane.c +index c252a370b8..c52e032660 100644 +--- a/zebra/zebra_dplane.c ++++ b/zebra/zebra_dplane.c +@@ -492,10 +492,8 @@ struct zebra_dplane_provider { + int (*dp_fini)(struct zebra_dplane_provider *prov, bool early_p); + + _Atomic uint32_t dp_in_counter; +- _Atomic uint32_t dp_in_queued; + _Atomic uint32_t dp_in_max; + _Atomic uint32_t dp_out_counter; +- _Atomic uint32_t dp_out_queued; + _Atomic uint32_t dp_out_max; + _Atomic uint32_t dp_error_counter; + +@@ -6008,17 +6006,19 @@ int dplane_show_provs_helper(struct vty *vty, bool detailed) + + /* Show counters, useful info from each registered provider */ + while (prov) { ++ dplane_provider_lock(prov); ++ in_q = dplane_ctx_queue_count(&prov->dp_ctx_in_list); ++ out_q = dplane_ctx_queue_count(&prov->dp_ctx_out_list); ++ dplane_provider_unlock(prov); + + in = atomic_load_explicit(&prov->dp_in_counter, + memory_order_relaxed); +- in_q = atomic_load_explicit(&prov->dp_in_queued, +- memory_order_relaxed); ++ + in_max = atomic_load_explicit(&prov->dp_in_max, + memory_order_relaxed); + out = atomic_load_explicit(&prov->dp_out_counter, + memory_order_relaxed); +- out_q = atomic_load_explicit(&prov->dp_out_queued, +- memory_order_relaxed); ++ + out_max = atomic_load_explicit(&prov->dp_out_max, + memory_order_relaxed); + +@@ -6169,10 +6169,6 @@ struct zebra_dplane_ctx *dplane_provider_dequeue_in_ctx( + dplane_provider_lock(prov); + + ctx = dplane_ctx_list_pop(&(prov->dp_ctx_in_list)); +- if (ctx) { +- atomic_fetch_sub_explicit(&prov->dp_in_queued, 1, +- memory_order_relaxed); +- } + + dplane_provider_unlock(prov); + +@@ -6200,10 +6196,6 @@ int dplane_provider_dequeue_in_list(struct zebra_dplane_provider *prov, + break; + } + +- if (ret > 0) +- atomic_fetch_sub_explicit(&prov->dp_in_queued, ret, +- memory_order_relaxed); +- + dplane_provider_unlock(prov); + + return ret; +@@ -6228,10 +6220,7 @@ void dplane_provider_enqueue_out_ctx(struct zebra_dplane_provider *prov, + dplane_ctx_list_add_tail(&(prov->dp_ctx_out_list), ctx); + + /* Maintain out-queue counters */ +- atomic_fetch_add_explicit(&(prov->dp_out_queued), 1, +- memory_order_relaxed); +- curr = atomic_load_explicit(&prov->dp_out_queued, +- memory_order_relaxed); ++ curr = dplane_ctx_queue_count(&prov->dp_ctx_out_list); + high = atomic_load_explicit(&prov->dp_out_max, + memory_order_relaxed); + if (curr > high) +@@ -6253,9 +6242,6 @@ dplane_provider_dequeue_out_ctx(struct zebra_dplane_provider *prov) + if (!ctx) + return NULL; + +- atomic_fetch_sub_explicit(&(prov->dp_out_queued), 1, +- memory_order_relaxed); +- + return ctx; + } + +@@ -7260,10 +7246,7 @@ static void dplane_thread_loop(struct thread *event) + + atomic_fetch_add_explicit(&prov->dp_in_counter, counter, + memory_order_relaxed); +- atomic_fetch_add_explicit(&prov->dp_in_queued, counter, +- memory_order_relaxed); +- curr = atomic_load_explicit(&prov->dp_in_queued, +- memory_order_relaxed); ++ curr = dplane_ctx_queue_count(&prov->dp_ctx_in_list); + high = atomic_load_explicit(&prov->dp_in_max, + memory_order_relaxed); + if (curr > high) +-- +2.43.2 + diff --git a/src/sonic-frr/patch/0045-zebra-Modify-dplane-loop-to-allow-backpressure-to-fi.patch b/src/sonic-frr/patch/0045-zebra-Modify-dplane-loop-to-allow-backpressure-to-fi.patch new file mode 100644 index 000000000000..67b76d03424a --- /dev/null +++ b/src/sonic-frr/patch/0045-zebra-Modify-dplane-loop-to-allow-backpressure-to-fi.patch @@ -0,0 +1,199 @@ +From 4671ddf4920553b663fda129f7c4366839347645 Mon Sep 17 00:00:00 2001 +From: Donald Sharp +Date: Wed, 12 Jun 2024 14:14:48 -0400 +Subject: [PATCH 3/5] zebra: Modify dplane loop to allow backpressure to filter + up + +Currently when the dplane_thread_loop is run, it moves contexts +from the dg_update_list and puts the contexts on the input queue +of the first provider. This provider is given a chance to run +and then the items on the output queue are pulled off and placed +on the input queue of the next provider. Rinse/Repeat down through +the entire list of providers. Now imagine that we have a list +of multiple providers and the last provider is getting backed up. +Contexts will end up sticking in the input Queue of the `slow` +provider. This can grow without bounds. This is a real problem +when you have a situation where an interface is flapping and an +upper level protocol is sending a continous stream of route +updates to reflect the change in ecmp. You can end up with +a very very large backlog of contexts. This is bad because +zebra can easily grow to a very very large memory size and on +restricted systems you can run out of memory. Fortunately +for us, the MetaQ already participates with this process +by not doing more route processing until the dg_update_list +goes below the working limit of dg_updates_per_cycle. Thus +if FRR modifies the behavior of this loop to not move more +contexts onto the input queue if either the input queue +or output queue of the next provider has reached this limit. +FRR will naturaly start auto handling backpressure for the dplane +context system and memory will not go out of control. + +Signed-off-by: Donald Sharp + +diff --git a/zebra/zebra_dplane.c b/zebra/zebra_dplane.c +index c52e032660..f0e1ff6f27 100644 +--- a/zebra/zebra_dplane.c ++++ b/zebra/zebra_dplane.c +@@ -7155,10 +7155,10 @@ static void dplane_thread_loop(struct thread *event) + { + struct dplane_ctx_list_head work_list; + struct dplane_ctx_list_head error_list; +- struct zebra_dplane_provider *prov; ++ struct zebra_dplane_provider *prov, *next_prov; + struct zebra_dplane_ctx *ctx; + int limit, counter, error_counter; +- uint64_t curr, high; ++ uint64_t curr, out_curr, high; + bool reschedule = false; + + /* Capture work limit per cycle */ +@@ -7182,18 +7182,48 @@ static void dplane_thread_loop(struct thread *event) + /* Locate initial registered provider */ + prov = dplane_prov_list_first(&zdplane_info.dg_providers); + +- /* Move new work from incoming list to temp list */ +- for (counter = 0; counter < limit; counter++) { +- ctx = dplane_ctx_list_pop(&zdplane_info.dg_update_list); +- if (ctx) { +- ctx->zd_provider = prov->dp_id; ++ curr = dplane_ctx_queue_count(&prov->dp_ctx_in_list); ++ out_curr = dplane_ctx_queue_count(&prov->dp_ctx_out_list); + +- dplane_ctx_list_add_tail(&work_list, ctx); +- } else { +- break; ++ if (curr >= (uint64_t)limit) { ++ if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) ++ zlog_debug("%s: Current first provider(%s) Input queue is %" PRIu64 ++ ", holding off work", ++ __func__, prov->dp_name, curr); ++ counter = 0; ++ } else if (out_curr >= (uint64_t)limit) { ++ if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) ++ zlog_debug("%s: Current first provider(%s) Output queue is %" PRIu64 ++ ", holding off work", ++ __func__, prov->dp_name, out_curr); ++ counter = 0; ++ } else { ++ int tlimit; ++ /* ++ * Let's limit the work to how what can be put on the ++ * in or out queue without going over ++ */ ++ tlimit = limit - MAX(curr, out_curr); ++ /* Move new work from incoming list to temp list */ ++ for (counter = 0; counter < tlimit; counter++) { ++ ctx = dplane_ctx_list_pop(&zdplane_info.dg_update_list); ++ if (ctx) { ++ ctx->zd_provider = prov->dp_id; ++ ++ dplane_ctx_list_add_tail(&work_list, ctx); ++ } else { ++ break; ++ } + } + } + ++ /* ++ * If there is anything still on the two input queues reschedule ++ */ ++ if (dplane_ctx_queue_count(&prov->dp_ctx_in_list) > 0 || ++ dplane_ctx_queue_count(&zdplane_info.dg_update_list) > 0) ++ reschedule = true; ++ + DPLANE_UNLOCK(); + + atomic_fetch_sub_explicit(&zdplane_info.dg_routes_queued, counter, +@@ -7212,8 +7242,9 @@ static void dplane_thread_loop(struct thread *event) + * items. + */ + if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) +- zlog_debug("dplane enqueues %d new work to provider '%s'", +- counter, dplane_provider_get_name(prov)); ++ zlog_debug("dplane enqueues %d new work to provider '%s' curr is %" PRIu64, ++ counter, dplane_provider_get_name(prov), ++ curr); + + /* Capture current provider id in each context; check for + * error status. +@@ -7271,18 +7302,61 @@ static void dplane_thread_loop(struct thread *event) + if (!zdplane_info.dg_run) + break; + ++ /* Locate next provider */ ++ next_prov = dplane_prov_list_next(&zdplane_info.dg_providers, ++ prov); ++ if (next_prov) { ++ curr = dplane_ctx_queue_count( ++ &next_prov->dp_ctx_in_list); ++ out_curr = dplane_ctx_queue_count( ++ &next_prov->dp_ctx_out_list); ++ } else ++ out_curr = curr = 0; ++ + /* Dequeue completed work from the provider */ + dplane_provider_lock(prov); + +- while (counter < limit) { +- ctx = dplane_provider_dequeue_out_ctx(prov); +- if (ctx) { +- dplane_ctx_list_add_tail(&work_list, ctx); +- counter++; +- } else +- break; ++ if (curr >= (uint64_t)limit) { ++ if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) ++ zlog_debug("%s: Next Provider(%s) Input queue is %" PRIu64 ++ ", holding off work", ++ __func__, next_prov->dp_name, curr); ++ counter = 0; ++ } else if (out_curr >= (uint64_t)limit) { ++ if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) ++ zlog_debug("%s: Next Provider(%s) Output queue is %" PRIu64 ++ ", holding off work", ++ __func__, next_prov->dp_name, ++ out_curr); ++ counter = 0; ++ } else { ++ int tlimit; ++ ++ /* ++ * Let's limit the work to how what can be put on the ++ * in or out queue without going over ++ */ ++ tlimit = limit - MAX(curr, out_curr); ++ while (counter < tlimit) { ++ ctx = dplane_provider_dequeue_out_ctx(prov); ++ if (ctx) { ++ dplane_ctx_list_add_tail(&work_list, ++ ctx); ++ counter++; ++ } else ++ break; ++ } + } + ++ /* ++ * Let's check if there are still any items on the ++ * input or output queus of the current provider ++ * if so then we know we need to reschedule. ++ */ ++ if (dplane_ctx_queue_count(&prov->dp_ctx_in_list) > 0 || ++ dplane_ctx_queue_count(&prov->dp_ctx_out_list) > 0) ++ reschedule = true; ++ + dplane_provider_unlock(prov); + + if (counter >= limit) +@@ -7293,7 +7367,7 @@ static void dplane_thread_loop(struct thread *event) + counter, dplane_provider_get_name(prov)); + + /* Locate next provider */ +- prov = dplane_prov_list_next(&zdplane_info.dg_providers, prov); ++ prov = next_prov; + } + + /* +-- +2.43.2 + diff --git a/src/sonic-frr/patch/0046-zebra-Limit-queue-depth-in-dplane_fpm_nl.patch b/src/sonic-frr/patch/0046-zebra-Limit-queue-depth-in-dplane_fpm_nl.patch new file mode 100644 index 000000000000..7f25a773a101 --- /dev/null +++ b/src/sonic-frr/patch/0046-zebra-Limit-queue-depth-in-dplane_fpm_nl.patch @@ -0,0 +1,52 @@ +From 50f606c158f6c89abd0d3f531905005d3a48a5b6 Mon Sep 17 00:00:00 2001 +From: Donald Sharp +Date: Wed, 12 Jun 2024 15:16:08 -0400 +Subject: [PATCH 4/5] zebra: Limit queue depth in dplane_fpm_nl + +The dplane providers have a concept of input queues +and output queues. These queues are chained together +during normal operation. The code in zebra also has +a feedback mechanism where the MetaQ will not run when +the first input queue is backed up. Having the dplane_fpm_nl +code grab all contexts when it is backed up prevents +this system from behaving appropriately. + +Modify the code to not add to the dplane_fpm_nl's internal +queue when it is already full. This will allow the backpressure +to work appropriately in zebra proper. + +Signed-off-by: Donald Sharp + +diff --git a/zebra/dplane_fpm_nl.c b/zebra/dplane_fpm_nl.c +index bc9815bb10..4fd42f64a2 100644 +--- a/zebra/dplane_fpm_nl.c ++++ b/zebra/dplane_fpm_nl.c +@@ -1560,6 +1560,25 @@ static int fpm_nl_process(struct zebra_dplane_provider *prov) + + fnc = dplane_provider_get_data(prov); + limit = dplane_provider_get_work_limit(prov); ++ ++ frr_with_mutex (&fnc->ctxqueue_mutex) { ++ cur_queue = dplane_ctx_queue_count(&fnc->ctxqueue); ++ } ++ ++ if (cur_queue >= (uint64_t)limit) { ++ if (IS_ZEBRA_DEBUG_FPM) ++ zlog_debug("%s: Already at a limit(%" PRIu64 ++ ") of internal work, hold off", ++ __func__, cur_queue); ++ limit = 0; ++ } else { ++ if (IS_ZEBRA_DEBUG_FPM) ++ zlog_debug("%s: current queue is %" PRIu64 ++ ", limiting to lesser amount of %" PRIu64, ++ __func__, cur_queue, limit - cur_queue); ++ limit -= cur_queue; ++ } ++ + for (counter = 0; counter < limit; counter++) { + ctx = dplane_provider_dequeue_in_ctx(prov); + if (ctx == NULL) +-- +2.43.2 + diff --git a/src/sonic-frr/patch/0047-zebra-Modify-show-zebra-dplane-providers-to-give-mor.patch b/src/sonic-frr/patch/0047-zebra-Modify-show-zebra-dplane-providers-to-give-mor.patch new file mode 100644 index 000000000000..e865b861d192 --- /dev/null +++ b/src/sonic-frr/patch/0047-zebra-Modify-show-zebra-dplane-providers-to-give-mor.patch @@ -0,0 +1,102 @@ +From d695dfb88418834f0054255dd4385b293efb5a17 Mon Sep 17 00:00:00 2001 +From: Donald Sharp +Date: Mon, 17 Jun 2024 10:42:41 -0400 +Subject: [PATCH 5/5] zebra: Modify show `zebra dplane providers` to give more + data + +The show zebra dplane provider command was ommitting +the input and output queues to the dplane itself. +It would be nice to have this insight as well. + +New output: +r1# show zebra dplane providers +dataplane Incoming Queue from Zebra: 100 +Zebra dataplane providers: + Kernel (1): in: 6, q: 0, q_max: 3, out: 6, q: 14, q_max: 3 + dplane_fpm_nl (2): in: 6, q: 10, q_max: 3, out: 6, q: 0, q_max: 3 +dataplane Outgoing Queue to Zebra: 43 +r1# + +Signed-off-by: Donald Sharp + +diff --git a/zebra/rib.h b/zebra/rib.h +index 2e62148ea0..b78cd218f6 100644 +--- a/zebra/rib.h ++++ b/zebra/rib.h +@@ -630,6 +630,7 @@ static inline struct nexthop_group *rib_get_fib_backup_nhg( + } + + extern void zebra_vty_init(void); ++extern uint32_t zebra_rib_dplane_results_count(void); + + extern pid_t pid; + +diff --git a/zebra/zebra_dplane.c b/zebra/zebra_dplane.c +index f0e1ff6f27..21c73a3796 100644 +--- a/zebra/zebra_dplane.c ++++ b/zebra/zebra_dplane.c +@@ -5998,12 +5998,14 @@ int dplane_show_provs_helper(struct vty *vty, bool detailed) + struct zebra_dplane_provider *prov; + uint64_t in, in_q, in_max, out, out_q, out_max; + +- vty_out(vty, "Zebra dataplane providers:\n"); +- + DPLANE_LOCK(); + prov = dplane_prov_list_first(&zdplane_info.dg_providers); ++ in = dplane_ctx_queue_count(&zdplane_info.dg_update_list); + DPLANE_UNLOCK(); + ++ vty_out(vty, "dataplane Incoming Queue from Zebra: %" PRIu64 "\n", in); ++ vty_out(vty, "Zebra dataplane providers:\n"); ++ + /* Show counters, useful info from each registered provider */ + while (prov) { + dplane_provider_lock(prov); +@@ -6022,13 +6024,19 @@ int dplane_show_provs_helper(struct vty *vty, bool detailed) + out_max = atomic_load_explicit(&prov->dp_out_max, + memory_order_relaxed); + +- vty_out(vty, "%s (%u): in: %"PRIu64", q: %"PRIu64", q_max: %"PRIu64", out: %"PRIu64", q: %"PRIu64", q_max: %"PRIu64"\n", +- prov->dp_name, prov->dp_id, in, in_q, in_max, +- out, out_q, out_max); ++ vty_out(vty, ++ " %s (%u): in: %" PRIu64 ", q: %" PRIu64 ++ ", q_max: %" PRIu64 ", out: %" PRIu64 ", q: %" PRIu64 ++ ", q_max: %" PRIu64 "\n", ++ prov->dp_name, prov->dp_id, in, in_q, in_max, out, ++ out_q, out_max); + + prov = dplane_prov_list_next(&zdplane_info.dg_providers, prov); + } + ++ out = zebra_rib_dplane_results_count(); ++ vty_out(vty, "dataplane Outgoing Queue to Zebra: %" PRIu64 "\n", out); ++ + return CMD_SUCCESS; + } + +diff --git a/zebra/zebra_rib.c b/zebra/zebra_rib.c +index 1ff3d98545..ae051d37eb 100644 +--- a/zebra/zebra_rib.c ++++ b/zebra/zebra_rib.c +@@ -4874,6 +4874,17 @@ static int rib_dplane_results(struct dplane_ctx_list_head *ctxlist) + return 0; + } + ++uint32_t zebra_rib_dplane_results_count(void) ++{ ++ uint32_t count; ++ ++ frr_with_mutex (&dplane_mutex) { ++ count = dplane_ctx_queue_count(&rib_dplane_q); ++ } ++ ++ return count; ++} ++ + /* + * Ensure there are no empty slots in the route_info array. + * Every route type in zebra should be present there. +-- +2.43.2 + diff --git a/src/sonic-frr/patch/series b/src/sonic-frr/patch/series index 626a6888e541..bdfc06a4c954 100644 --- a/src/sonic-frr/patch/series +++ b/src/sonic-frr/patch/series @@ -38,3 +38,10 @@ 0038-bgpd-Increase-install-uninstall-speed-of-evpn-vpn-vn.patch 0039-zebra-Actually-display-I-O-buffer-sizes.patch 0040-zebra-Actually-display-I-O-buffer-sizes-part-2.patch +0041-bgpd-backpressure-Fix-to-withdraw-evpn-type-5-routes.patch +0042-bgpd-backpressure-Fix-to-avoid-CPU-hog.patch +0043-zebra-Use-built-in-data-structure-counter.patch +0044-zebra-Use-the-ctx-queue-counters.patch +0045-zebra-Modify-dplane-loop-to-allow-backpressure-to-fi.patch +0046-zebra-Limit-queue-depth-in-dplane_fpm_nl.patch +0047-zebra-Modify-show-zebra-dplane-providers-to-give-mor.patch