From 2c1c1acae5b939bcd95155ab83d142bc0b48063f Mon Sep 17 00:00:00 2001 From: Bogdan-Andrei Iancu Date: Fri, 30 Jun 2023 12:50:01 +0300 Subject: [PATCH] Fix resuming partial msg reading on TCP When conn rebalancing or parallel processing is enabled for TCP, a read operation may start (partial reading) in one process and be completed in a different one. So the con_req (buffer for partial reading) must be in shm mem and also it must survive a conn release (passing the conn back to MAIN). Many thanks to @liviuchircu for reporting and helping with the troubleshooting This issue was found during the TCP performance tests done for the 3.4 release (cherry picked from commit e0fe732e9c9d28c24ed6860fbd3531940978b8f9) --- net/net_tcp.c | 3 +++ net/net_tcp_proc.c | 8 -------- net/proto_tcp/proto_tcp.c | 6 +++--- net/proto_tcp/tcp_common.h | 6 +++--- 4 files changed, 9 insertions(+), 14 deletions(-) diff --git a/net/net_tcp.c b/net/net_tcp.c index da2dcb42880..d30fd47fd85 100644 --- a/net/net_tcp.c +++ b/net/net_tcp.c @@ -732,6 +732,9 @@ static void _tcpconn_rm(struct tcp_connection* c, int no_event) c->async = NULL; } + if (c->con_req) + shm_free(c->con_req); + if (protos[c->type].net.conn_clean) protos[c->type].net.conn_clean(c); diff --git a/net/net_tcp_proc.c b/net/net_tcp_proc.c index d33889080bd..773c137aab8 100644 --- a/net/net_tcp_proc.c +++ b/net/net_tcp_proc.c @@ -68,14 +68,6 @@ static void tcpconn_release(struct tcp_connection* c, long state, int writer, c, state, c->fd, c->id); LM_DBG(" extra_data %p\n", c->extra_data); - /* if we are in a writer context, do not touch the buffer contain read packets per connection - might be in a completely different process - even if in our process we shouldn't touch it, since it might currently be in use, when we've read multiple SIP messages in one try*/ - if (!writer && c->con_req) { - pkg_free(c->con_req); - c->con_req = NULL; - } - /* release req & signal the parent */ if (!writer) c->proc_id = -1; diff --git a/net/proto_tcp/proto_tcp.c b/net/proto_tcp/proto_tcp.c index 624cb5b197f..ce4e67263c4 100644 --- a/net/proto_tcp/proto_tcp.c +++ b/net/proto_tcp/proto_tcp.c @@ -677,9 +677,9 @@ static int tcp_read_req(struct tcp_connection* con, int* bytes_read) if (con->con_req) { req=con->con_req; - LM_DBG("Using the per connection buff \n"); + LM_DBG("Using the per connection buff for conn %p\n",con); } else { - LM_DBG("Using the global ( per process ) buff \n"); + LM_DBG("Using the global ( per process ) buff for conn %p\n",con); init_tcp_req(&tcp_current_req, 0); req=&tcp_current_req; } @@ -742,7 +742,7 @@ static int tcp_read_req(struct tcp_connection* con, int* bytes_read) goto error; } - LM_DBG("tcp_read_req end\n"); + LM_DBG("tcp_read_req end for conn %p, req is %p\n",con,con->con_req); done: if (bytes_read) *bytes_read=total_bytes; /* connection will be released */ diff --git a/net/proto_tcp/tcp_common.h b/net/proto_tcp/tcp_common.h index 0b229d905a5..1580f0672ea 100644 --- a/net/proto_tcp/tcp_common.h +++ b/net/proto_tcp/tcp_common.h @@ -449,7 +449,7 @@ static inline int tcp_handle_req(struct tcp_req *req, if (req != &_tcp_common_current_req) { /* if we no longer need this tcp_req * we can free it now */ - pkg_free(req); + shm_free(req); con->con_req = NULL; } } else { @@ -469,8 +469,8 @@ static inline int tcp_handle_req(struct tcp_req *req, if (req == &_tcp_common_current_req) { /* let's duplicate this - most likely another conn will come in */ - LM_DBG("We didn't manage to read a full request\n"); - con->con_req = pkg_malloc(sizeof(struct tcp_req)); + LM_ERR("We didn't manage to read a full request on con %p\n",con); + con->con_req = shm_malloc(sizeof(struct tcp_req)); if (con->con_req == NULL) { LM_ERR("No more mem for dynamic con request buffer\n"); goto error;