Skip to content

Commit

Permalink
xfrm: iptfs: add skb-fragment sharing code
Browse files Browse the repository at this point in the history
Avoid copying the inner packet data by sharing the skb data fragments
from the output packet skb into new inner packet skb.

Signed-off-by: Christian Hopps <[email protected]>
  • Loading branch information
choppsv1 committed Sep 14, 2024
1 parent 666b6a0 commit f29143a
Showing 1 changed file with 304 additions and 8 deletions.
312 changes: 304 additions & 8 deletions net/xfrm/xfrm_iptfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,9 @@
#define XFRM_IPTFS_MIN_L3HEADROOM 128
#define XFRM_IPTFS_MIN_L2HEADROOM (L1_CACHE_BYTES > 64 ? 64 : 64 + 16)

/* Min to try to share outer iptfs skb data vs copying into new skb */
#define IPTFS_PKT_SHARE_MIN 129

#define NSECS_IN_USEC 1000

#define IPTFS_HRTIMER_MODE HRTIMER_MODE_REL_SOFT
Expand Down Expand Up @@ -236,10 +239,261 @@ static void iptfs_skb_head_to_frag(const struct sk_buff *skb, skb_frag_t *frag)
skb_frag_fill_page_desc(frag, page, skb->data - addr, skb_headlen(skb));
}

/**
* struct iptfs_skb_frag_walk - use to track a walk through fragments
* @fragi: current fragment index
* @past: length of data in fragments before @fragi
* @total: length of data in all fragments
* @nr_frags: number of fragments present in array
* @initial_offset: the value passed in to skb_prepare_frag_walk()
* @pp_recycle: copy of skb->pp_recycle
* @frags: the page fragments inc. room for head page
*/
struct iptfs_skb_frag_walk {
u32 fragi;
u32 past;
u32 total;
u32 nr_frags;
u32 initial_offset;
bool pp_recycle;
skb_frag_t frags[MAX_SKB_FRAGS + 1];
};

/**
* iptfs_skb_prepare_frag_walk() - initialize a frag walk over an skb.
* @skb: the skb to walk.
* @initial_offset: start the walk @initial_offset into the skb.
* @walk: the walk to initialize
*
* Future calls to skb_add_frags() will expect the @offset value to be at
* least @initial_offset large.
*/
static void iptfs_skb_prepare_frag_walk(struct sk_buff *skb, u32 initial_offset,
struct iptfs_skb_frag_walk *walk)
{
struct skb_shared_info *shinfo = skb_shinfo(skb);
skb_frag_t *frag, *from;
u32 i;

walk->initial_offset = initial_offset;
walk->fragi = 0;
walk->past = 0;
walk->total = 0;
walk->nr_frags = 0;
walk->pp_recycle = skb->pp_recycle;

if (skb->head_frag) {
if (initial_offset >= skb_headlen(skb)) {
initial_offset -= skb_headlen(skb);
} else {
frag = &walk->frags[walk->nr_frags++];
iptfs_skb_head_to_frag(skb, frag);
frag->offset += initial_offset;
frag->len -= initial_offset;
walk->total += frag->len;
initial_offset = 0;
}
} else {
WARN_ON_ONCE(skb_headlen(skb) > initial_offset);
initial_offset -= skb_headlen(skb);
}

for (i = 0; i < shinfo->nr_frags; i++) {
from = &shinfo->frags[i];
if (initial_offset >= from->len) {
initial_offset -= from->len;
continue;
}
frag = &walk->frags[walk->nr_frags++];
*frag = *from;
if (initial_offset) {
frag->offset += initial_offset;
frag->len -= initial_offset;
initial_offset = 0;
}
walk->total += frag->len;
}
WARN_ON_ONCE(initial_offset != 0);
}

static u32 iptfs_skb_reset_frag_walk(struct iptfs_skb_frag_walk *walk,
u32 offset)
{
/* Adjust offset to refer to internal walk values */
WARN_ON_ONCE(offset < walk->initial_offset);
offset -= walk->initial_offset;

/* Get to the correct fragment for offset */
while (offset < walk->past) {
walk->past -= walk->frags[--walk->fragi].len;
if (offset >= walk->past)
break;
WARN_ON_ONCE(walk->fragi == 0);
}
while (offset >= walk->past + walk->frags[walk->fragi].len)
walk->past += walk->frags[walk->fragi++].len;

/* offset now relative to this current frag */
offset -= walk->past;
return offset;
}

/**
* iptfs_skb_can_add_frags() - check if ok to add frags from walk to skb
* @skb: skb to check for adding frags to
* @walk: the walk that will be used as source for frags.
* @offset: offset from beginning of original skb to start from.
* @len: amount of data to add frag references to in @skb.
*
* Return: true if ok to add frags.
*/
static bool iptfs_skb_can_add_frags(const struct sk_buff *skb,
struct iptfs_skb_frag_walk *walk,
u32 offset, u32 len)
{
struct skb_shared_info *shinfo = skb_shinfo(skb);
u32 fragi, nr_frags, fraglen;

if (skb_has_frag_list(skb) || skb->pp_recycle != walk->pp_recycle)
return false;

/* Make offset relative to current frag after setting that */
offset = iptfs_skb_reset_frag_walk(walk, offset);

/* Verify we have array space for the fragments we need to add */
fragi = walk->fragi;
nr_frags = shinfo->nr_frags;
while (len && fragi < walk->nr_frags) {
skb_frag_t *frag = &walk->frags[fragi];

fraglen = frag->len;
if (offset) {
fraglen -= offset;
offset = 0;
}
if (++nr_frags > MAX_SKB_FRAGS)
return false;
if (len <= fraglen)
return true;
len -= fraglen;
fragi++;
}
/* We may not copy all @len but what we have will fit. */
return true;
}

/**
* iptfs_skb_add_frags() - add a range of fragment references into an skb
* @skb: skb to add references into
* @walk: the walk to add referenced fragments from.
* @offset: offset from beginning of original skb to start from.
* @len: amount of data to add frag references to in @skb.
*
* iptfs_skb_can_add_frags() should be called before this function to verify
* that the destination @skb is compatible with the walk and has space in the
* array for the to be added frag references.
*
* Return: The number of bytes not added to @skb b/c we reached the end of the
* walk before adding all of @len.
*/
static int iptfs_skb_add_frags(struct sk_buff *skb,
struct iptfs_skb_frag_walk *walk, u32 offset,
u32 len)
{
struct skb_shared_info *shinfo = skb_shinfo(skb);
u32 fraglen;

WARN_ON_ONCE(skb->pp_recycle != walk->pp_recycle);
if (!walk->nr_frags || offset >= walk->total + walk->initial_offset)
return len;

/* make offset relative to current frag after setting that */
offset = iptfs_skb_reset_frag_walk(walk, offset);
WARN_ON_ONCE(shinfo->nr_frags >= MAX_SKB_FRAGS);

while (len && walk->fragi < walk->nr_frags) {
skb_frag_t *frag = &walk->frags[walk->fragi];
skb_frag_t *tofrag = &shinfo->frags[shinfo->nr_frags];

*tofrag = *frag;
if (offset) {
tofrag->offset += offset;
tofrag->len -= offset;
offset = 0;
}
__skb_frag_ref(tofrag);
shinfo->nr_frags++;
WARN_ON_ONCE(shinfo->nr_frags > MAX_SKB_FRAGS);

/* see if we are done */
fraglen = tofrag->len;
if (len < fraglen) {
tofrag->len = len;
skb->len += len;
skb->data_len += len;
return 0;
}
/* advance to next source fragment */
len -= fraglen; /* careful, use dst bv_len */
skb->len += fraglen; /* careful, " " " */
skb->data_len += fraglen; /* careful, " " " */
walk->past += frag->len; /* careful, use src bv_len */
walk->fragi++;
}
return len;
}

/* ================================== */
/* IPTFS Receiving (egress) Functions */
/* ================================== */

/**
* iptfs_pskb_add_frags() - Create and add frags into a new sk_buff.
* @tpl: template to create new skb from.
* @walk: The source for fragments to add.
* @off: The offset into @walk to add frags from, also used with @st and
* @copy_len.
* @len: The length of data to add covering frags from @walk into @skb.
* This must be <= @skblen.
* @st: The sequence state to copy from into the new head skb.
* @copy_len: Copy @copy_len bytes from @st at offset @off into the new skb
* linear space.
*
* Create a new sk_buff `skb` using the template @tpl. Copy @copy_len bytes from
* @st into the new skb linear space, and then add shared fragments from the
* frag walk for the remaining @len of data (i.e., @len - @copy_len bytes).
*
* Return: The newly allocated sk_buff `skb` or NULL if an error occurs.
*/
static struct sk_buff *
iptfs_pskb_add_frags(struct sk_buff *tpl, struct iptfs_skb_frag_walk *walk,
u32 off, u32 len, struct skb_seq_state *st, u32 copy_len)
{
struct sk_buff *skb;

skb = iptfs_alloc_skb(tpl, copy_len, false);
if (!skb)
return NULL;

/* this should not normally be happening */
if (!iptfs_skb_can_add_frags(skb, walk, off + copy_len,
len - copy_len)) {
kfree_skb(skb);
return NULL;
}

if (copy_len &&
skb_copy_seq_read(st, off, skb_put(skb, copy_len), copy_len)) {
XFRM_INC_STATS(dev_net(st->root_skb->dev),
LINUX_MIB_XFRMINERROR);
kfree_skb(skb);
return NULL;
}

iptfs_skb_add_frags(skb, walk, off + copy_len, len - copy_len);
return skb;
}

/**
* iptfs_pskb_extract_seq() - Create and load data into a new sk_buff.
* @skblen: the total data size for `skb`.
Expand Down Expand Up @@ -430,6 +684,8 @@ static u32 iptfs_reassem_cont(struct xfrm_iptfs_data *xtfs, u64 seq,
struct skb_seq_state *st, struct sk_buff *skb,
u32 data, u32 blkoff, struct list_head *list)
{
struct iptfs_skb_frag_walk _fragwalk;
struct iptfs_skb_frag_walk *fragwalk = NULL;
struct sk_buff *newskb = xtfs->ra_newskb;
u32 remaining = skb->len - data;
u32 runtlen = xtfs->ra_runtlen;
Expand Down Expand Up @@ -582,13 +838,32 @@ static u32 iptfs_reassem_cont(struct xfrm_iptfs_data *xtfs, u64 seq,
fraglen = min(blkoff, remaining);
copylen = min(fraglen, ipremain);

/* We verified this was true in the main receive routine */
WARN_ON_ONCE(skb_tailroom(newskb) < copylen);
/* If we may have the opportunity to share prepare a fragwalk. */
if (!skb_has_frag_list(skb) && !skb_has_frag_list(newskb) &&
(skb->head_frag || skb->len == skb->data_len) &&
skb->pp_recycle == newskb->pp_recycle) {
fragwalk = &_fragwalk;
iptfs_skb_prepare_frag_walk(skb, data, fragwalk);
}

/* copy fragment data into newskb */
if (skb_copy_seq_read(st, data, skb_put(newskb, copylen), copylen)) {
XFRM_INC_STATS(dev_net(skb->dev), LINUX_MIB_XFRMINBUFFERERROR);
goto abandon;
/* Try share then copy. */
if (fragwalk &&
iptfs_skb_can_add_frags(newskb, fragwalk, data, copylen)) {
u32 leftover;

leftover = iptfs_skb_add_frags(newskb, fragwalk, data, copylen);
WARN_ON_ONCE(leftover != 0);
} else {
/* We verified this was true in the main receive routine */
WARN_ON_ONCE(skb_tailroom(newskb) < copylen);

/* copy fragment data into newskb */
if (skb_copy_seq_read(st, data, skb_put(newskb, copylen),
copylen)) {
XFRM_INC_STATS(xs_net(xtfs->x),
LINUX_MIB_XFRMINBUFFERERROR);
goto abandon;
}
}

if (copylen < ipremain) {
Expand Down Expand Up @@ -629,6 +904,8 @@ static int iptfs_input(struct xfrm_state *x, struct sk_buff *skb)
u8 hbytes[sizeof(struct ipv6hdr)];
struct ip_iptfs_cc_hdr iptcch;
struct skb_seq_state skbseq;
struct iptfs_skb_frag_walk _fragwalk;
struct iptfs_skb_frag_walk *fragwalk = NULL;
struct list_head sublist; /* rename this it's just a list */
struct sk_buff *first_skb, *defer, *next;
const unsigned char *old_mac;
Expand Down Expand Up @@ -775,6 +1052,7 @@ static int iptfs_input(struct xfrm_state *x, struct sk_buff *skb)
} else {
first_skb = skb;
first_iplen = iplen;
fragwalk = NULL;

/* We are going to skip over `data` bytes to reach the
* start of the IP header of `iphlen` len for `iplen`
Expand Down Expand Up @@ -826,6 +1104,14 @@ static int iptfs_input(struct xfrm_state *x, struct sk_buff *skb)
/* all pointers could be changed now reset walk */
skb_abort_seq_read(&skbseq);
skb_prepare_seq_read(skb, data, tail, &skbseq);
} else if (skb->head_frag &&
/* We have the IP header right now */
remaining >= iphlen) {
fragwalk = &_fragwalk;
iptfs_skb_prepare_frag_walk(skb, data,
fragwalk);
defer = skb;
skb = NULL;
} else {
/* We couldn't reuse the input skb so allocate a
* new one.
Expand All @@ -841,8 +1127,18 @@ static int iptfs_input(struct xfrm_state *x, struct sk_buff *skb)

capturelen = min(iplen, remaining);
if (!skb) {
skb = iptfs_pskb_extract_seq(iplen, &skbseq, data,
capturelen);
if (!fragwalk ||
/* Large enough to be worth sharing */
iplen < IPTFS_PKT_SHARE_MIN ||
/* Have IP header + some data to share. */
capturelen <= iphlen ||
/* Try creating skb and adding frags */
!(skb = iptfs_pskb_add_frags(first_skb, fragwalk,
data, capturelen,
&skbseq, iphlen))) {
skb = iptfs_pskb_extract_seq(iplen, &skbseq,
data, capturelen);
}
if (!skb) {
/* skip to next packet or done */
data += capturelen;
Expand Down

0 comments on commit f29143a

Please sign in to comment.