Skip to content

Commit

Permalink
xfrm: iptfs: add skb-fragment sharing code
Browse files Browse the repository at this point in the history
Avoid copying the inner packet data by sharing the skb data fragments
from the output packet skb into new inner packet skb.

Signed-off-by: Christian Hopps <[email protected]>
  • Loading branch information
choppsv1 committed Nov 4, 2024
1 parent 6896f67 commit 247f208
Showing 1 changed file with 292 additions and 6 deletions.
298 changes: 292 additions & 6 deletions net/xfrm/xfrm_iptfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,9 @@
#define XFRM_IPTFS_MIN_L3HEADROOM 128
#define XFRM_IPTFS_MIN_L2HEADROOM (L1_CACHE_BYTES > 64 ? 64 : 64 + 16)

/* Min to try to share outer iptfs skb data vs copying into new skb */
#define IPTFS_PKT_SHARE_MIN 129

#define NSECS_IN_USEC 1000

#define IPTFS_HRTIMER_MODE HRTIMER_MODE_REL_SOFT
Expand Down Expand Up @@ -234,10 +237,254 @@ static void iptfs_skb_head_to_frag(const struct sk_buff *skb, skb_frag_t *frag)
skb_frag_fill_page_desc(frag, page, skb->data - addr, skb_headlen(skb));
}

/**
* struct iptfs_skb_frag_walk - use to track a walk through fragments
* @fragi: current fragment index
* @past: length of data in fragments before @fragi
* @total: length of data in all fragments
* @nr_frags: number of fragments present in array
* @initial_offset: the value passed in to skb_prepare_frag_walk()
* @frags: the page fragments inc. room for head page
* @pp_recycle: copy of skb->pp_recycle
*/
struct iptfs_skb_frag_walk {
u32 fragi;
u32 past;
u32 total;
u32 nr_frags;
u32 initial_offset;
skb_frag_t frags[MAX_SKB_FRAGS + 1];
bool pp_recycle;
};

/**
* iptfs_skb_prepare_frag_walk() - initialize a frag walk over an skb.
* @skb: the skb to walk.
* @initial_offset: start the walk @initial_offset into the skb.
* @walk: the walk to initialize
*
* Future calls to skb_add_frags() will expect the @offset value to be at
* least @initial_offset large.
*/
static void iptfs_skb_prepare_frag_walk(struct sk_buff *skb, u32 initial_offset,
struct iptfs_skb_frag_walk *walk)
{
struct skb_shared_info *shinfo = skb_shinfo(skb);
skb_frag_t *frag, *from;
u32 i;

walk->initial_offset = initial_offset;
walk->fragi = 0;
walk->past = 0;
walk->total = 0;
walk->nr_frags = 0;
walk->pp_recycle = skb->pp_recycle;

if (skb->head_frag) {
if (initial_offset >= skb_headlen(skb)) {
initial_offset -= skb_headlen(skb);
} else {
frag = &walk->frags[walk->nr_frags++];
iptfs_skb_head_to_frag(skb, frag);
frag->offset += initial_offset;
frag->len -= initial_offset;
walk->total += frag->len;
initial_offset = 0;
}
} else {
initial_offset -= skb_headlen(skb);
}

for (i = 0; i < shinfo->nr_frags; i++) {
from = &shinfo->frags[i];
if (initial_offset >= from->len) {
initial_offset -= from->len;
continue;
}
frag = &walk->frags[walk->nr_frags++];
*frag = *from;
if (initial_offset) {
frag->offset += initial_offset;
frag->len -= initial_offset;
initial_offset = 0;
}
walk->total += frag->len;
}
}

static u32 iptfs_skb_reset_frag_walk(struct iptfs_skb_frag_walk *walk,
u32 offset)
{
/* Adjust offset to refer to internal walk values */
offset -= walk->initial_offset;

/* Get to the correct fragment for offset */
while (offset < walk->past) {
walk->past -= walk->frags[--walk->fragi].len;
if (offset >= walk->past)
break;
}
while (offset >= walk->past + walk->frags[walk->fragi].len)
walk->past += walk->frags[walk->fragi++].len;

/* offset now relative to this current frag */
offset -= walk->past;
return offset;
}

/**
* iptfs_skb_can_add_frags() - check if ok to add frags from walk to skb
* @skb: skb to check for adding frags to
* @walk: the walk that will be used as source for frags.
* @offset: offset from beginning of original skb to start from.
* @len: amount of data to add frag references to in @skb.
*
* Return: true if ok to add frags.
*/
static bool iptfs_skb_can_add_frags(const struct sk_buff *skb,
struct iptfs_skb_frag_walk *walk,
u32 offset, u32 len)
{
struct skb_shared_info *shinfo = skb_shinfo(skb);
u32 fragi, nr_frags, fraglen;

if (skb_has_frag_list(skb) || skb->pp_recycle != walk->pp_recycle)
return false;

/* Make offset relative to current frag after setting that */
offset = iptfs_skb_reset_frag_walk(walk, offset);

/* Verify we have array space for the fragments we need to add */
fragi = walk->fragi;
nr_frags = shinfo->nr_frags;
while (len && fragi < walk->nr_frags) {
skb_frag_t *frag = &walk->frags[fragi];

fraglen = frag->len;
if (offset) {
fraglen -= offset;
offset = 0;
}
if (++nr_frags > MAX_SKB_FRAGS)
return false;
if (len <= fraglen)
return true;
len -= fraglen;
fragi++;
}
/* We may not copy all @len but what we have will fit. */
return true;
}

/**
* iptfs_skb_add_frags() - add a range of fragment references into an skb
* @skb: skb to add references into
* @walk: the walk to add referenced fragments from.
* @offset: offset from beginning of original skb to start from.
* @len: amount of data to add frag references to in @skb.
*
* iptfs_skb_can_add_frags() should be called before this function to verify
* that the destination @skb is compatible with the walk and has space in the
* array for the to be added frag references.
*
* Return: The number of bytes not added to @skb b/c we reached the end of the
* walk before adding all of @len.
*/
static int iptfs_skb_add_frags(struct sk_buff *skb,
struct iptfs_skb_frag_walk *walk, u32 offset,
u32 len)
{
struct skb_shared_info *shinfo = skb_shinfo(skb);
u32 fraglen;

if (!walk->nr_frags || offset >= walk->total + walk->initial_offset)
return len;

/* make offset relative to current frag after setting that */
offset = iptfs_skb_reset_frag_walk(walk, offset);

while (len && walk->fragi < walk->nr_frags) {
skb_frag_t *frag = &walk->frags[walk->fragi];
skb_frag_t *tofrag = &shinfo->frags[shinfo->nr_frags];

*tofrag = *frag;
if (offset) {
tofrag->offset += offset;
tofrag->len -= offset;
offset = 0;
}
__skb_frag_ref(tofrag);
shinfo->nr_frags++;

/* see if we are done */
fraglen = tofrag->len;
if (len < fraglen) {
tofrag->len = len;
skb->len += len;
skb->data_len += len;
return 0;
}
/* advance to next source fragment */
len -= fraglen; /* careful, use dst bv_len */
skb->len += fraglen; /* careful, " " " */
skb->data_len += fraglen; /* careful, " " " */
walk->past += frag->len; /* careful, use src bv_len */
walk->fragi++;
}
return len;
}

/* ================================== */
/* IPTFS Receiving (egress) Functions */
/* ================================== */

/**
* iptfs_pskb_add_frags() - Create and add frags into a new sk_buff.
* @tpl: template to create new skb from.
* @walk: The source for fragments to add.
* @off: The offset into @walk to add frags from, also used with @st and
* @copy_len.
* @len: The length of data to add covering frags from @walk into @skb.
* This must be <= @skblen.
* @st: The sequence state to copy from into the new head skb.
* @copy_len: Copy @copy_len bytes from @st at offset @off into the new skb
* linear space.
*
* Create a new sk_buff `skb` using the template @tpl. Copy @copy_len bytes from
* @st into the new skb linear space, and then add shared fragments from the
* frag walk for the remaining @len of data (i.e., @len - @copy_len bytes).
*
* Return: The newly allocated sk_buff `skb` or NULL if an error occurs.
*/
static struct sk_buff *
iptfs_pskb_add_frags(struct sk_buff *tpl, struct iptfs_skb_frag_walk *walk,
u32 off, u32 len, struct skb_seq_state *st, u32 copy_len)
{
struct sk_buff *skb;

skb = iptfs_alloc_skb(tpl, copy_len, false);
if (!skb)
return NULL;

/* this should not normally be happening */
if (!iptfs_skb_can_add_frags(skb, walk, off + copy_len,
len - copy_len)) {
kfree_skb(skb);
return NULL;
}

if (copy_len &&
skb_copy_seq_read(st, off, skb_put(skb, copy_len), copy_len)) {
XFRM_INC_STATS(dev_net(st->root_skb->dev),
LINUX_MIB_XFRMINERROR);
kfree_skb(skb);
return NULL;
}

iptfs_skb_add_frags(skb, walk, off + copy_len, len - copy_len);
return skb;
}

/**
* iptfs_pskb_extract_seq() - Create and load data into a new sk_buff.
* @skblen: the total data size for `skb`.
Expand Down Expand Up @@ -423,6 +670,8 @@ static u32 iptfs_reassem_cont(struct xfrm_iptfs_data *xtfs, u64 seq,
struct skb_seq_state *st, struct sk_buff *skb,
u32 data, u32 blkoff, struct list_head *list)
{
struct iptfs_skb_frag_walk _fragwalk;
struct iptfs_skb_frag_walk *fragwalk = NULL;
struct sk_buff *newskb = xtfs->ra_newskb;
u32 remaining = skb->len - data;
u32 runtlen = xtfs->ra_runtlen;
Expand Down Expand Up @@ -567,10 +816,28 @@ static u32 iptfs_reassem_cont(struct xfrm_iptfs_data *xtfs, u64 seq,
fraglen = min(blkoff, remaining);
copylen = min(fraglen, ipremain);

/* copy fragment data into newskb */
if (skb_copy_seq_read(st, data, skb_put(newskb, copylen), copylen)) {
XFRM_INC_STATS(dev_net(skb->dev), LINUX_MIB_XFRMINBUFFERERROR);
goto abandon;
/* If we may have the opportunity to share prepare a fragwalk. */
if (!skb_has_frag_list(skb) && !skb_has_frag_list(newskb) &&
(skb->head_frag || skb->len == skb->data_len) &&
skb->pp_recycle == newskb->pp_recycle) {
fragwalk = &_fragwalk;
iptfs_skb_prepare_frag_walk(skb, data, fragwalk);
}

/* Try share then copy. */
if (fragwalk &&
iptfs_skb_can_add_frags(newskb, fragwalk, data, copylen)) {
u32 leftover;

leftover = iptfs_skb_add_frags(newskb, fragwalk, data, copylen);
} else {
/* copy fragment data into newskb */
if (skb_copy_seq_read(st, data, skb_put(newskb, copylen),
copylen)) {
XFRM_INC_STATS(xs_net(xtfs->x),
LINUX_MIB_XFRMINBUFFERERROR);
goto abandon;
}
}

if (copylen < ipremain) {
Expand Down Expand Up @@ -601,6 +868,8 @@ static bool __input_process_payload(struct xfrm_state *x, u32 data,
struct list_head *sublist)
{
u8 hbytes[sizeof(struct ipv6hdr)];
struct iptfs_skb_frag_walk _fragwalk;
struct iptfs_skb_frag_walk *fragwalk = NULL;
struct sk_buff *defer, *first_skb, *next, *skb;
const unsigned char *old_mac;
struct xfrm_iptfs_data *xtfs;
Expand Down Expand Up @@ -694,6 +963,7 @@ static bool __input_process_payload(struct xfrm_state *x, u32 data,
} else {
first_skb = skb;
first_iplen = iplen;
fragwalk = NULL;

/* We are going to skip over `data` bytes to reach the
* start of the IP header of `iphlen` len for `iplen`
Expand Down Expand Up @@ -745,6 +1015,13 @@ static bool __input_process_payload(struct xfrm_state *x, u32 data,
/* all pointers could be changed now reset walk */
skb_abort_seq_read(skbseq);
skb_prepare_seq_read(skb, data, tail, skbseq);
} else if (skb->head_frag &&
/* We have the IP header right now */
remaining >= iphlen) {
fragwalk = &_fragwalk;
iptfs_skb_prepare_frag_walk(skb, data, fragwalk);
defer = skb;
skb = NULL;
} else {
/* We couldn't reuse the input skb so allocate a
* new one.
Expand All @@ -760,8 +1037,17 @@ static bool __input_process_payload(struct xfrm_state *x, u32 data,

capturelen = min(iplen, remaining);
if (!skb) {
skb = iptfs_pskb_extract_seq(iplen, skbseq, data,
capturelen);
if (!fragwalk ||
/* Large enough to be worth sharing */
iplen < IPTFS_PKT_SHARE_MIN ||
/* Have IP header + some data to share. */
capturelen <= iphlen ||
/* Try creating skb and adding frags */
!(skb = iptfs_pskb_add_frags(first_skb, fragwalk,
data, capturelen,
skbseq, iphlen))) {
skb = iptfs_pskb_extract_seq(iplen, skbseq, data, capturelen);
}
if (!skb) {
/* skip to next packet or done */
data += capturelen;
Expand Down

0 comments on commit 247f208

Please sign in to comment.