Skip to content

Commit

Permalink
Fixing route withdrawal issues
Browse files Browse the repository at this point in the history
  • Loading branch information
dgsudharsan committed Jun 27, 2024
1 parent 365bdf3 commit 9a384ef
Show file tree
Hide file tree
Showing 8 changed files with 740 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
From f3ec35b461a06f1278383d2347dfbc611b6a91a6 Mon Sep 17 00:00:00 2001
From: Rajasekar Raja <[email protected]>
Date: Fri, 17 May 2024 12:36:31 -0700
Subject: [PATCH] bgpd: backpressure - Fix to withdraw evpn type-5 routes
immediately

As part of backpressure changes, there is a bug where immediate withdraw
is to be sent for evpn imported type-5 prefix to clear the nh neigh and
RMAC entry.

Fixing this by sending withdraw immediately to keep it inline with the
code today

Ticket: #3905571

Signed-off-by: Donald Sharp <[email protected]>
Signed-off-by: Rajasekar Raja <[email protected]>

diff --git a/bgpd/bgp_route.c b/bgpd/bgp_route.c
index 679abba463..e28069767f 100644
--- a/bgpd/bgp_route.c
+++ b/bgpd/bgp_route.c
@@ -3312,8 +3312,7 @@ static void bgp_process_main_one(struct bgp *bgp, struct bgp_dest *dest,
*/
if (old_select &&
is_route_parent_evpn(old_select))
- bgp_zebra_route_install(dest, old_select, bgp,
- false, NULL, false);
+ bgp_zebra_withdraw_actual(dest, old_select, bgp);

bgp_zebra_route_install(dest, new_select, bgp, true,
NULL, false);
diff --git a/bgpd/bgp_zebra.c b/bgpd/bgp_zebra.c
index 524551b1e0..5d5525156b 100644
--- a/bgpd/bgp_zebra.c
+++ b/bgpd/bgp_zebra.c
@@ -1889,11 +1889,9 @@ static void bgp_zebra_buffer_write_ready(void)
* save new pi, mark as going to be
* withdrawan, remove install flag
*
- * Withdrawal Install Special case, send withdrawal immediately
- * Leave dest on list, release old pi,
+ * Withdrawal Install Leave dest on list, release old pi,
* save new pi, mark as going to be
- * installed. <see note about evpn
- * in bgp_route.c in bgp_process_main_one>
+ * installed.
* Withdrawal Withdrawal Leave dest on list, release old pi,
* save new pi, mark as going to be
* withdrawn.
@@ -1949,9 +1947,6 @@ void bgp_zebra_route_install(struct bgp_dest *dest, struct bgp_path_info *info,
dest->za_bgp_pi = info;
} else if (CHECK_FLAG(dest->flags, BGP_NODE_SCHEDULE_FOR_DELETE)) {
assert(dest->za_bgp_pi);
- if (install & !is_evpn)
- bgp_zebra_withdraw_actual(dest, dest->za_bgp_pi, bgp);
-
bgp_path_info_unlock(dest->za_bgp_pi);
bgp_path_info_lock(info);
dest->za_bgp_pi = info;
--
2.43.2

Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
From 4775b2b2bf39caa4bc5aed10ef44c6dcf9c7fc80 Mon Sep 17 00:00:00 2001
From: Rajasekar Raja <[email protected]>
Date: Fri, 17 May 2024 15:43:59 -0700
Subject: [PATCH] bgpd: backpressure - Fix to avoid CPU hog

In case when bgp_evpn_free or bgp_delete is called and the announce_list
has few items where vpn/bgp does not match, we add the item back to the
list. Because of this the list count is always > 0 thereby hogging CPU or
infinite loop.

Ticket: #3905624

Signed-off-by: Rajasekar Raja <[email protected]>

diff --git a/bgpd/bgp_evpn.c b/bgpd/bgp_evpn.c
index eb5aa9f077..2243ffdc77 100644
--- a/bgpd/bgp_evpn.c
+++ b/bgpd/bgp_evpn.c
@@ -6074,9 +6074,11 @@ struct bgpevpn *bgp_evpn_new(struct bgp *bgp, vni_t vni,
void bgp_evpn_free(struct bgp *bgp, struct bgpevpn *vpn)
{
struct bgp_dest *dest = NULL;
+ uint32_t ann_count = zebra_announce_count(&bm->zebra_announce_head);

- while (zebra_announce_count(&bm->zebra_announce_head)) {
+ while (ann_count) {
dest = zebra_announce_pop(&bm->zebra_announce_head);
+ ann_count--;
if (dest->za_vpn == vpn) {
bgp_path_info_unlock(dest->za_bgp_pi);
bgp_dest_unlock_node(dest);
diff --git a/bgpd/bgpd.c b/bgpd/bgpd.c
index da133d71c1..492566f8c8 100644
--- a/bgpd/bgpd.c
+++ b/bgpd/bgpd.c
@@ -3690,11 +3690,13 @@ int bgp_delete(struct bgp *bgp)
int i;
struct bgp_dest *dest = NULL;
struct graceful_restart_info *gr_info;
+ uint32_t ann_count = zebra_announce_count(&bm->zebra_announce_head);

assert(bgp);

- while (zebra_announce_count(&bm->zebra_announce_head)) {
+ while (ann_count) {
dest = zebra_announce_pop(&bm->zebra_announce_head);
+ ann_count--;
if (dest->za_bgp_pi->peer->bgp == bgp) {
bgp_path_info_unlock(dest->za_bgp_pi);
bgp_dest_unlock_node(dest);
--
2.43.2

Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
From 529cdfa09065c6c77aff4a96a52f0d3bcb385b6f Mon Sep 17 00:00:00 2001
From: Donald Sharp <[email protected]>
Date: Thu, 13 Jun 2024 15:30:00 -0400
Subject: [PATCH 1/5] zebra: Use built in data structure counter

Instead of keeping a counter that is independent
of the queue's data structure. Just use the queue's
built-in counter. Ensure that it's pthread safe by
keeping it wrapped inside the mutex for adding/deleting
to the queue.

Signed-off-by: Donald Sharp <[email protected]>

diff --git a/zebra/dplane_fpm_nl.c b/zebra/dplane_fpm_nl.c
index caa2f988e2..bc9815bb10 100644
--- a/zebra/dplane_fpm_nl.c
+++ b/zebra/dplane_fpm_nl.c
@@ -135,8 +135,6 @@ struct fpm_nl_ctx {

/* Amount of data plane context processed. */
_Atomic uint32_t dplane_contexts;
- /* Amount of data plane contexts enqueued. */
- _Atomic uint32_t ctxqueue_len;
/* Peak amount of data plane contexts enqueued. */
_Atomic uint32_t ctxqueue_len_peak;

@@ -311,6 +309,12 @@ DEFUN(fpm_show_counters, fpm_show_counters_cmd,
FPM_STR
"FPM statistic counters\n")
{
+ uint32_t curr_queue_len;
+
+ frr_with_mutex (&gfnc->ctxqueue_mutex) {
+ curr_queue_len = dplane_ctx_queue_count(&gfnc->ctxqueue);
+ }
+
vty_out(vty, "%30s\n%30s\n", "FPM counters", "============");

#define SHOW_COUNTER(label, counter) \
@@ -324,8 +328,7 @@ DEFUN(fpm_show_counters, fpm_show_counters_cmd,
SHOW_COUNTER("Connection errors", gfnc->counters.connection_errors);
SHOW_COUNTER("Data plane items processed",
gfnc->counters.dplane_contexts);
- SHOW_COUNTER("Data plane items enqueued",
- gfnc->counters.ctxqueue_len);
+ SHOW_COUNTER("Data plane items enqueued", curr_queue_len);
SHOW_COUNTER("Data plane items queue peak",
gfnc->counters.ctxqueue_len_peak);
SHOW_COUNTER("Buffer full hits", gfnc->counters.buffer_full);
@@ -344,6 +347,12 @@ DEFUN(fpm_show_counters_json, fpm_show_counters_json_cmd,
"FPM statistic counters\n"
JSON_STR)
{
+ uint32_t curr_queue_len;
+
+ frr_with_mutex (&gfnc->ctxqueue_mutex) {
+ curr_queue_len = dplane_ctx_queue_count(&gfnc->ctxqueue);
+ }
+
struct json_object *jo;

jo = json_object_new_object();
@@ -357,8 +366,7 @@ DEFUN(fpm_show_counters_json, fpm_show_counters_json_cmd,
gfnc->counters.connection_errors);
json_object_int_add(jo, "data-plane-contexts",
gfnc->counters.dplane_contexts);
- json_object_int_add(jo, "data-plane-contexts-queue",
- gfnc->counters.ctxqueue_len);
+ json_object_int_add(jo, "data-plane-contexts-queue", curr_queue_len);
json_object_int_add(jo, "data-plane-contexts-queue-peak",
gfnc->counters.ctxqueue_len_peak);
json_object_int_add(jo, "buffer-full-hits", gfnc->counters.buffer_full);
@@ -1380,8 +1388,6 @@ static void fpm_process_queue(struct thread *t)

/* Account the processed entries. */
processed_contexts++;
- atomic_fetch_sub_explicit(&fnc->counters.ctxqueue_len, 1,
- memory_order_relaxed);

dplane_ctx_set_status(ctx, ZEBRA_DPLANE_REQUEST_SUCCESS);
dplane_provider_enqueue_out_ctx(fnc->prov, ctx);
@@ -1550,7 +1556,7 @@ static int fpm_nl_process(struct zebra_dplane_provider *prov)
struct zebra_dplane_ctx *ctx;
struct fpm_nl_ctx *fnc;
int counter, limit;
- uint64_t cur_queue, peak_queue = 0, stored_peak_queue;
+ uint64_t cur_queue = 0, peak_queue = 0, stored_peak_queue;

fnc = dplane_provider_get_data(prov);
limit = dplane_provider_get_work_limit(prov);
@@ -1564,20 +1570,12 @@ static int fpm_nl_process(struct zebra_dplane_provider *prov)
* anyway.
*/
if (fnc->socket != -1 && fnc->connecting == false) {
- /*
- * Update the number of queued contexts *before*
- * enqueueing, to ensure counter consistency.
- */
- atomic_fetch_add_explicit(&fnc->counters.ctxqueue_len,
- 1, memory_order_relaxed);
-
frr_with_mutex (&fnc->ctxqueue_mutex) {
dplane_ctx_enqueue_tail(&fnc->ctxqueue, ctx);
+ cur_queue =
+ dplane_ctx_queue_count(&fnc->ctxqueue);
}

- cur_queue = atomic_load_explicit(
- &fnc->counters.ctxqueue_len,
- memory_order_relaxed);
if (peak_queue < cur_queue)
peak_queue = cur_queue;
continue;
@@ -1594,9 +1592,7 @@ static int fpm_nl_process(struct zebra_dplane_provider *prov)
atomic_store_explicit(&fnc->counters.ctxqueue_len_peak,
peak_queue, memory_order_relaxed);

- if (atomic_load_explicit(&fnc->counters.ctxqueue_len,
- memory_order_relaxed)
- > 0)
+ if (cur_queue > 0)
thread_add_timer(fnc->fthread->master, fpm_process_queue,
fnc, 0, &fnc->t_dequeue);

diff --git a/zebra/zebra_dplane.c b/zebra/zebra_dplane.c
index 59b8d6cc63..c252a370b8 100644
--- a/zebra/zebra_dplane.c
+++ b/zebra/zebra_dplane.c
@@ -969,6 +969,11 @@ struct zebra_dplane_ctx *dplane_ctx_dequeue(struct dplane_ctx_list_head *q)
return ctx;
}

+uint32_t dplane_ctx_queue_count(struct dplane_ctx_list_head *q)
+{
+ return dplane_ctx_list_count(q);
+}
+
/*
* Accessors for information from the context object
*/
diff --git a/zebra/zebra_dplane.h b/zebra/zebra_dplane.h
index 9f9496c8f4..c29e05bbc9 100644
--- a/zebra/zebra_dplane.h
+++ b/zebra/zebra_dplane.h
@@ -324,6 +324,8 @@ struct zebra_dplane_ctx *dplane_ctx_get_head(struct dplane_ctx_list_head *q);
/* Init a list of contexts */
void dplane_ctx_q_init(struct dplane_ctx_list_head *q);

+uint32_t dplane_ctx_queue_count(struct dplane_ctx_list_head *q);
+
/*
* Accessors for information from the context object
*/
--
2.43.2

108 changes: 108 additions & 0 deletions src/sonic-frr/patch/0044-zebra-Use-the-ctx-queue-counters.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
From 59e2b19d2d08349ef0197b0adcb13d0bd7de2b79 Mon Sep 17 00:00:00 2001
From: Donald Sharp <[email protected]>
Date: Mon, 17 Jun 2024 11:05:28 -0400
Subject: [PATCH 2/5] zebra: Use the ctx queue counters

The ctx queue data structures already have a counter
associated with them. Let's just use them instead.

Signed-off-by: Donald Sharp <[email protected]>

diff --git a/zebra/zebra_dplane.c b/zebra/zebra_dplane.c
index c252a370b8..c52e032660 100644
--- a/zebra/zebra_dplane.c
+++ b/zebra/zebra_dplane.c
@@ -492,10 +492,8 @@ struct zebra_dplane_provider {
int (*dp_fini)(struct zebra_dplane_provider *prov, bool early_p);

_Atomic uint32_t dp_in_counter;
- _Atomic uint32_t dp_in_queued;
_Atomic uint32_t dp_in_max;
_Atomic uint32_t dp_out_counter;
- _Atomic uint32_t dp_out_queued;
_Atomic uint32_t dp_out_max;
_Atomic uint32_t dp_error_counter;

@@ -6008,17 +6006,19 @@ int dplane_show_provs_helper(struct vty *vty, bool detailed)

/* Show counters, useful info from each registered provider */
while (prov) {
+ dplane_provider_lock(prov);
+ in_q = dplane_ctx_queue_count(&prov->dp_ctx_in_list);
+ out_q = dplane_ctx_queue_count(&prov->dp_ctx_out_list);
+ dplane_provider_unlock(prov);

in = atomic_load_explicit(&prov->dp_in_counter,
memory_order_relaxed);
- in_q = atomic_load_explicit(&prov->dp_in_queued,
- memory_order_relaxed);
+
in_max = atomic_load_explicit(&prov->dp_in_max,
memory_order_relaxed);
out = atomic_load_explicit(&prov->dp_out_counter,
memory_order_relaxed);
- out_q = atomic_load_explicit(&prov->dp_out_queued,
- memory_order_relaxed);
+
out_max = atomic_load_explicit(&prov->dp_out_max,
memory_order_relaxed);

@@ -6169,10 +6169,6 @@ struct zebra_dplane_ctx *dplane_provider_dequeue_in_ctx(
dplane_provider_lock(prov);

ctx = dplane_ctx_list_pop(&(prov->dp_ctx_in_list));
- if (ctx) {
- atomic_fetch_sub_explicit(&prov->dp_in_queued, 1,
- memory_order_relaxed);
- }

dplane_provider_unlock(prov);

@@ -6200,10 +6196,6 @@ int dplane_provider_dequeue_in_list(struct zebra_dplane_provider *prov,
break;
}

- if (ret > 0)
- atomic_fetch_sub_explicit(&prov->dp_in_queued, ret,
- memory_order_relaxed);
-
dplane_provider_unlock(prov);

return ret;
@@ -6228,10 +6220,7 @@ void dplane_provider_enqueue_out_ctx(struct zebra_dplane_provider *prov,
dplane_ctx_list_add_tail(&(prov->dp_ctx_out_list), ctx);

/* Maintain out-queue counters */
- atomic_fetch_add_explicit(&(prov->dp_out_queued), 1,
- memory_order_relaxed);
- curr = atomic_load_explicit(&prov->dp_out_queued,
- memory_order_relaxed);
+ curr = dplane_ctx_queue_count(&prov->dp_ctx_out_list);
high = atomic_load_explicit(&prov->dp_out_max,
memory_order_relaxed);
if (curr > high)
@@ -6253,9 +6242,6 @@ dplane_provider_dequeue_out_ctx(struct zebra_dplane_provider *prov)
if (!ctx)
return NULL;

- atomic_fetch_sub_explicit(&(prov->dp_out_queued), 1,
- memory_order_relaxed);
-
return ctx;
}

@@ -7260,10 +7246,7 @@ static void dplane_thread_loop(struct thread *event)

atomic_fetch_add_explicit(&prov->dp_in_counter, counter,
memory_order_relaxed);
- atomic_fetch_add_explicit(&prov->dp_in_queued, counter,
- memory_order_relaxed);
- curr = atomic_load_explicit(&prov->dp_in_queued,
- memory_order_relaxed);
+ curr = dplane_ctx_queue_count(&prov->dp_ctx_in_list);
high = atomic_load_explicit(&prov->dp_in_max,
memory_order_relaxed);
if (curr > high)
--
2.43.2

Loading

0 comments on commit 9a384ef

Please sign in to comment.