[v6,06/10] ipsec: add transmit segmentation offload support
Checks
Commit Message
Add support for transmit segmentation offload to inline crypto processing
mode. This offload is not supported by other offload modes, as at a
minimum it requires inline crypto for IPsec to be supported on the
network interface.
Signed-off-by: Declan Doherty <declan.doherty@intel.com>
Signed-off-by: Radu Nicolau <radu.nicolau@intel.com>
Signed-off-by: Abhijit Sinha <abhijit.sinha@intel.com>
Signed-off-by: Daniel Martin Buckley <daniel.m.buckley@intel.com>
Acked-by: Fan Zhang <roy.fan.zhang@intel.com>
---
lib/ipsec/esp_inb.c | 4 +-
lib/ipsec/esp_outb.c | 115 +++++++++++++++++++++++++++++++++++--------
lib/ipsec/iph.h | 10 +++-
lib/ipsec/sa.c | 6 +++
lib/ipsec/sa.h | 4 ++
5 files changed, 114 insertions(+), 25 deletions(-)
Comments
> Add support for transmit segmentation offload to inline crypto processing
> mode. This offload is not supported by other offload modes, as at a
> minimum it requires inline crypto for IPsec to be supported on the
> network interface.
>
> Signed-off-by: Declan Doherty <declan.doherty@intel.com>
> Signed-off-by: Radu Nicolau <radu.nicolau@intel.com>
> Signed-off-by: Abhijit Sinha <abhijit.sinha@intel.com>
> Signed-off-by: Daniel Martin Buckley <daniel.m.buckley@intel.com>
> Acked-by: Fan Zhang <roy.fan.zhang@intel.com>
> ---
> lib/ipsec/esp_inb.c | 4 +-
> lib/ipsec/esp_outb.c | 115 +++++++++++++++++++++++++++++++++++--------
> lib/ipsec/iph.h | 10 +++-
> lib/ipsec/sa.c | 6 +++
> lib/ipsec/sa.h | 4 ++
> 5 files changed, 114 insertions(+), 25 deletions(-)
>
> diff --git a/lib/ipsec/esp_inb.c b/lib/ipsec/esp_inb.c
> index d66c88f05d..a6ab8fbdd5 100644
> --- a/lib/ipsec/esp_inb.c
> +++ b/lib/ipsec/esp_inb.c
> @@ -668,8 +668,8 @@ trs_process(const struct rte_ipsec_sa *sa, struct rte_mbuf *mb[],
> /* modify packet's layout */
> np = trs_process_step2(mb[i], ml[i], hl[i], cofs,
> to[i], tl, sqn + k);
> - update_trs_l3hdr(sa, np + l2, mb[i]->pkt_len,
> - l2, hl[i] - l2, espt[i].next_proto);
> + update_trs_l34hdrs(sa, np + l2, mb[i]->pkt_len,
> + l2, hl[i] - l2, espt[i].next_proto, 0);
>
> /* update mbuf's metadata */
> trs_process_step3(mb[i]);
> diff --git a/lib/ipsec/esp_outb.c b/lib/ipsec/esp_outb.c
> index a3f77469c3..9fc7075796 100644
> --- a/lib/ipsec/esp_outb.c
> +++ b/lib/ipsec/esp_outb.c
> @@ -2,6 +2,8 @@
> * Copyright(c) 2018-2020 Intel Corporation
> */
>
> +#include <math.h>
> +
> #include <rte_ipsec.h>
> #include <rte_esp.h>
> #include <rte_ip.h>
> @@ -156,11 +158,20 @@ outb_tun_pkt_prepare(struct rte_ipsec_sa *sa, rte_be64_t sqc,
>
> /* number of bytes to encrypt */
> clen = plen + sizeof(*espt);
> - clen = RTE_ALIGN_CEIL(clen, sa->pad_align);
> +
> + /* We don't need to pad/ailgn packet when using TSO offload */
> + if (likely(!(mb->ol_flags & (PKT_TX_TCP_SEG | PKT_TX_UDP_SEG))))
> + clen = RTE_ALIGN_CEIL(clen, sa->pad_align);
> +
Here and everywhere:
It doesn't look nice that we have to pollute generic functions with
checking TSO specific flags all over the place.
Can we probably have a specific prepare/process function for inline+tso case?
As we do have for cpu and inline cases right now.
Or just update inline version?
>
> /* pad length + esp tail */
> pdlen = clen - plen;
> - tlen = pdlen + sa->icv_len + sqh_len;
> +
> + /* We don't append ICV length when using TSO offload */
> + if (likely(!(mb->ol_flags & (PKT_TX_TCP_SEG | PKT_TX_UDP_SEG))))
> + tlen = pdlen + sa->icv_len + sqh_len;
> + else
> + tlen = pdlen + sqh_len;
>
> /* do append and prepend */
> ml = rte_pktmbuf_lastseg(mb);
> @@ -337,6 +348,7 @@ outb_trs_pkt_prepare(struct rte_ipsec_sa *sa, rte_be64_t sqc,
> char *ph, *pt;
> uint64_t *iv;
> uint32_t l2len, l3len;
> + uint8_t tso = mb->ol_flags & (PKT_TX_TCP_SEG | PKT_TX_UDP_SEG) ? 1 : 0;
>
> l2len = mb->l2_len;
> l3len = mb->l3_len;
> @@ -349,11 +361,19 @@ outb_trs_pkt_prepare(struct rte_ipsec_sa *sa, rte_be64_t sqc,
>
> /* number of bytes to encrypt */
> clen = plen + sizeof(*espt);
> - clen = RTE_ALIGN_CEIL(clen, sa->pad_align);
> +
> + /* We don't need to pad/ailgn packet when using TSO offload */
> + if (likely(!tso))
> + clen = RTE_ALIGN_CEIL(clen, sa->pad_align);
>
> /* pad length + esp tail */
> pdlen = clen - plen;
> - tlen = pdlen + sa->icv_len + sqh_len;
> +
> + /* We don't append ICV length when using TSO offload */
> + if (likely(!tso))
> + tlen = pdlen + sa->icv_len + sqh_len;
> + else
> + tlen = pdlen + sqh_len;
>
> /* do append and insert */
> ml = rte_pktmbuf_lastseg(mb);
> @@ -375,8 +395,8 @@ outb_trs_pkt_prepare(struct rte_ipsec_sa *sa, rte_be64_t sqc,
> insert_esph(ph, ph + hlen, uhlen);
>
> /* update ip header fields */
> - np = update_trs_l3hdr(sa, ph + l2len, mb->pkt_len - sqh_len, l2len,
> - l3len, IPPROTO_ESP);
> + np = update_trs_l34hdrs(sa, ph + l2len, mb->pkt_len - sqh_len, l2len,
> + l3len, IPPROTO_ESP, tso);
>
> /* update spi, seqn and iv */
> esph = (struct rte_esp_hdr *)(ph + uhlen);
> @@ -651,6 +671,33 @@ inline_outb_mbuf_prepare(const struct rte_ipsec_session *ss,
> }
> }
>
> +/* check if packet will exceed MSS and segmentation is required */
> +static inline int
> +esn_outb_nb_segments(const struct rte_ipsec_sa *sa, struct rte_mbuf *m) {
> + uint16_t segments = 1;
> + uint16_t pkt_l3len = m->pkt_len - m->l2_len;
> +
> + /* Only support segmentation for UDP/TCP flows */
> + if (!(m->packet_type & (RTE_PTYPE_L4_UDP | RTE_PTYPE_L4_TCP)))
> + return segments;
> +
> + if (sa->tso.enabled && pkt_l3len > sa->tso.mss) {
> + segments = ceil((float)pkt_l3len / sa->tso.mss);
Float calculations in the middle of data-path?
Just to calculate roundup?
Doesn't look good to me at all.
> +
> + if (m->packet_type & RTE_PTYPE_L4_TCP) {
> + m->ol_flags |= (PKT_TX_TCP_SEG | PKT_TX_TCP_CKSUM);
That's really strange - why ipsec library will set PKT_TX_TCP_SEG unconditionally?
That should be responsibility of the upper layer, I think.
In the lib we should only check was tso requested for that packet or not.
Same for UDP.
> + m->l4_len = sizeof(struct rte_tcp_hdr);
Hmm, how do we know there are no TCP options present for that packet?
Wouldn't it be better to expect user to provide proper l4_len for such packets?
> + } else {
> + m->ol_flags |= (PKT_TX_UDP_SEG | PKT_TX_UDP_CKSUM);
> + m->l4_len = sizeof(struct rte_udp_hdr);
> + }
> +
> + m->tso_segsz = sa->tso.mss;
> + }
> +
> + return segments;
> +}
> +
> /*
> * process group of ESP outbound tunnel packets destined for
> * INLINE_CRYPTO type of device.
> @@ -660,24 +707,29 @@ inline_outb_tun_pkt_process(const struct rte_ipsec_session *ss,
> struct rte_mbuf *mb[], uint16_t num)
> {
> int32_t rc;
> - uint32_t i, k, n;
> + uint32_t i, k, nb_sqn = 0, nb_sqn_alloc;
> uint64_t sqn;
> rte_be64_t sqc;
> struct rte_ipsec_sa *sa;
> union sym_op_data icv;
> uint64_t iv[IPSEC_MAX_IV_QWORD];
> uint32_t dr[num];
> + uint16_t nb_segs[num];
>
> sa = ss->sa;
>
> - n = num;
> - sqn = esn_outb_update_sqn(sa, &n);
> - if (n != num)
> + for (i = 0; i != num; i++) {
> + nb_segs[i] = esn_outb_nb_segments(sa, mb[i]);
> + nb_sqn += nb_segs[i];
> + }
> +
> + nb_sqn_alloc = nb_sqn;
> + sqn = esn_outb_update_sqn(sa, &nb_sqn_alloc);
> + if (nb_sqn_alloc != nb_sqn)
> rte_errno = EOVERFLOW;
>
> k = 0;
> - for (i = 0; i != n; i++) {
> -
> + for (i = 0; i != num; i++) {
> sqc = rte_cpu_to_be_64(sqn + i);
> gen_iv(iv, sqc);
>
> @@ -691,11 +743,18 @@ inline_outb_tun_pkt_process(const struct rte_ipsec_session *ss,
> dr[i - k] = i;
> rte_errno = -rc;
> }
> +
> + /**
> + * If packet is using tso, increment sqn by the number of
> + * segments for packet
> + */
> + if (mb[i]->ol_flags & (PKT_TX_TCP_SEG | PKT_TX_UDP_SEG))
> + sqn += nb_segs[i] - 1;
> }
>
> /* copy not processed mbufs beyond good ones */
> - if (k != n && k != 0)
> - move_bad_mbufs(mb, dr, n, n - k);
> + if (k != num && k != 0)
> + move_bad_mbufs(mb, dr, num, num - k);
>
> inline_outb_mbuf_prepare(ss, mb, k);
> return k;
> @@ -710,23 +769,30 @@ inline_outb_trs_pkt_process(const struct rte_ipsec_session *ss,
> struct rte_mbuf *mb[], uint16_t num)
> {
> int32_t rc;
> - uint32_t i, k, n;
> + uint32_t i, k, nb_sqn, nb_sqn_alloc;
> uint64_t sqn;
> rte_be64_t sqc;
> struct rte_ipsec_sa *sa;
> union sym_op_data icv;
> uint64_t iv[IPSEC_MAX_IV_QWORD];
> uint32_t dr[num];
> + uint16_t nb_segs[num];
>
> sa = ss->sa;
>
> - n = num;
> - sqn = esn_outb_update_sqn(sa, &n);
> - if (n != num)
> + /* Calculate number of sequence numbers required */
> + for (i = 0, nb_sqn = 0; i != num; i++) {
> + nb_segs[i] = esn_outb_nb_segments(sa, mb[i]);
> + nb_sqn += nb_segs[i];
> + }
> +
> + nb_sqn_alloc = nb_sqn;
> + sqn = esn_outb_update_sqn(sa, &nb_sqn_alloc);
> + if (nb_sqn_alloc != nb_sqn)
> rte_errno = EOVERFLOW;
>
> k = 0;
> - for (i = 0; i != n; i++) {
> + for (i = 0; i != num; i++) {
>
> sqc = rte_cpu_to_be_64(sqn + i);
> gen_iv(iv, sqc);
> @@ -741,11 +807,18 @@ inline_outb_trs_pkt_process(const struct rte_ipsec_session *ss,
> dr[i - k] = i;
> rte_errno = -rc;
> }
> +
> + /**
> + * If packet is using tso, increment sqn by the number of
> + * segments for packet
> + */
> + if (mb[i]->ol_flags & (PKT_TX_TCP_SEG | PKT_TX_UDP_SEG))
> + sqn += nb_segs[i] - 1;
> }
>
> /* copy not processed mbufs beyond good ones */
> - if (k != n && k != 0)
> - move_bad_mbufs(mb, dr, n, n - k);
> + if (k != num && k != 0)
> + move_bad_mbufs(mb, dr, num, num - k);
>
> inline_outb_mbuf_prepare(ss, mb, k);
> return k;
> diff --git a/lib/ipsec/iph.h b/lib/ipsec/iph.h
> index 861f16905a..2d223199ac 100644
> --- a/lib/ipsec/iph.h
> +++ b/lib/ipsec/iph.h
> @@ -6,6 +6,8 @@
> #define _IPH_H_
>
> #include <rte_ip.h>
> +#include <rte_udp.h>
> +#include <rte_tcp.h>
>
> /**
> * @file iph.h
> @@ -39,8 +41,8 @@ insert_esph(char *np, char *op, uint32_t hlen)
>
> /* update original ip header fields for transport case */
> static inline int
> -update_trs_l3hdr(const struct rte_ipsec_sa *sa, void *p, uint32_t plen,
> - uint32_t l2len, uint32_t l3len, uint8_t proto)
> +update_trs_l34hdrs(const struct rte_ipsec_sa *sa, void *p, uint32_t plen,
> + uint32_t l2len, uint32_t l3len, uint8_t proto, uint8_t tso)
Hmm... why to change name of the function?
> {
> int32_t rc;
>
> @@ -51,6 +53,10 @@ update_trs_l3hdr(const struct rte_ipsec_sa *sa, void *p, uint32_t plen,
> v4h = p;
> rc = v4h->next_proto_id;
> v4h->next_proto_id = proto;
> + if (tso) {
> + v4h->hdr_checksum = 0;
> + v4h->total_length = 0;
total_len will be overwritten unconditionally at next line below.
Another question - why it is necessary?
Is it HW specific requirment or ... ?
> + }
> v4h->total_length = rte_cpu_to_be_16(plen - l2len);
> /* IPv6 */
> } else {
> diff --git a/lib/ipsec/sa.c b/lib/ipsec/sa.c
> index 720e0f365b..2ecbbce0a4 100644
> --- a/lib/ipsec/sa.c
> +++ b/lib/ipsec/sa.c
> @@ -565,6 +565,12 @@ rte_ipsec_sa_init(struct rte_ipsec_sa *sa, const struct rte_ipsec_sa_prm *prm,
> sa->type = type;
> sa->size = sz;
>
> +
> + if (prm->ipsec_xform.options.tso == 1) {
> + sa->tso.enabled = 1;
> + sa->tso.mss = prm->ipsec_xform.mss;
> + }
> +
> /* check for ESN flag */
> sa->sqn_mask = (prm->ipsec_xform.options.esn == 0) ?
> UINT32_MAX : UINT64_MAX;
> diff --git a/lib/ipsec/sa.h b/lib/ipsec/sa.h
> index 107ebd1519..5e237f3525 100644
> --- a/lib/ipsec/sa.h
> +++ b/lib/ipsec/sa.h
> @@ -113,6 +113,10 @@ struct rte_ipsec_sa {
> uint8_t iv_len;
> uint8_t pad_align;
> uint8_t tos_mask;
> + struct {
> + uint8_t enabled:1;
> + uint16_t mss;
> + } tso;
Wouldn't one field be enough?
uint16_t tso_mss;
And it it is zero, then tso is disabled.
In fact, do we need it at all?
Wouldn't it be better to request user to fill mbuf->tso_segsz properly for us?
>
> /* template for tunnel header */
> uint8_t hdr[IPSEC_MAX_HDR_SIZE];
> --
> 2.25.1
On 9/23/2021 3:09 PM, Ananyev, Konstantin wrote:
>
>> Add support for transmit segmentation offload to inline crypto processing
>> mode. This offload is not supported by other offload modes, as at a
>> minimum it requires inline crypto for IPsec to be supported on the
>> network interface.
>>
>> Signed-off-by: Declan Doherty <declan.doherty@intel.com>
>> Signed-off-by: Radu Nicolau <radu.nicolau@intel.com>
>> Signed-off-by: Abhijit Sinha <abhijit.sinha@intel.com>
>> Signed-off-by: Daniel Martin Buckley <daniel.m.buckley@intel.com>
>> Acked-by: Fan Zhang <roy.fan.zhang@intel.com>
>> ---
>> lib/ipsec/esp_inb.c | 4 +-
>> lib/ipsec/esp_outb.c | 115 +++++++++++++++++++++++++++++++++++--------
>> lib/ipsec/iph.h | 10 +++-
>> lib/ipsec/sa.c | 6 +++
>> lib/ipsec/sa.h | 4 ++
>> 5 files changed, 114 insertions(+), 25 deletions(-)
>>
>> diff --git a/lib/ipsec/esp_inb.c b/lib/ipsec/esp_inb.c
>> index d66c88f05d..a6ab8fbdd5 100644
>> --- a/lib/ipsec/esp_inb.c
>> +++ b/lib/ipsec/esp_inb.c
>> @@ -668,8 +668,8 @@ trs_process(const struct rte_ipsec_sa *sa, struct rte_mbuf *mb[],
>> /* modify packet's layout */
>> np = trs_process_step2(mb[i], ml[i], hl[i], cofs,
>> to[i], tl, sqn + k);
>> - update_trs_l3hdr(sa, np + l2, mb[i]->pkt_len,
>> - l2, hl[i] - l2, espt[i].next_proto);
>> + update_trs_l34hdrs(sa, np + l2, mb[i]->pkt_len,
>> + l2, hl[i] - l2, espt[i].next_proto, 0);
>>
>> /* update mbuf's metadata */
>> trs_process_step3(mb[i]);
>> diff --git a/lib/ipsec/esp_outb.c b/lib/ipsec/esp_outb.c
>> index a3f77469c3..9fc7075796 100644
>> --- a/lib/ipsec/esp_outb.c
>> +++ b/lib/ipsec/esp_outb.c
>> @@ -2,6 +2,8 @@
>> * Copyright(c) 2018-2020 Intel Corporation
>> */
>>
>> +#include <math.h>
>> +
>> #include <rte_ipsec.h>
>> #include <rte_esp.h>
>> #include <rte_ip.h>
>> @@ -156,11 +158,20 @@ outb_tun_pkt_prepare(struct rte_ipsec_sa *sa, rte_be64_t sqc,
>>
>> /* number of bytes to encrypt */
>> clen = plen + sizeof(*espt);
>> - clen = RTE_ALIGN_CEIL(clen, sa->pad_align);
>> +
>> + /* We don't need to pad/ailgn packet when using TSO offload */
>> + if (likely(!(mb->ol_flags & (PKT_TX_TCP_SEG | PKT_TX_UDP_SEG))))
>> + clen = RTE_ALIGN_CEIL(clen, sa->pad_align);
>> +
> Here and everywhere:
> It doesn't look nice that we have to pollute generic functions with
> checking TSO specific flags all over the place.
> Can we probably have a specific prepare/process function for inline+tso case?
> As we do have for cpu and inline cases right now.
> Or just update inline version?
I looked at doing this but unless I copy these 2 functions I can't move
this out.
>
>> /* pad length + esp tail */
>> pdlen = clen - plen;
>> - tlen = pdlen + sa->icv_len + sqh_len;
>> +
>> + /* We don't append ICV length when using TSO offload */
>> + if (likely(!(mb->ol_flags & (PKT_TX_TCP_SEG | PKT_TX_UDP_SEG))))
>> + tlen = pdlen + sa->icv_len + sqh_len;
>> + else
>> + tlen = pdlen + sqh_len;
>>
>> /* do append and prepend */
>> ml = rte_pktmbuf_lastseg(mb);
>> @@ -337,6 +348,7 @@ outb_trs_pkt_prepare(struct rte_ipsec_sa *sa, rte_be64_t sqc,
>> char *ph, *pt;
>> uint64_t *iv;
>> uint32_t l2len, l3len;
>> + uint8_t tso = mb->ol_flags & (PKT_TX_TCP_SEG | PKT_TX_UDP_SEG) ? 1 : 0;
>>
>> l2len = mb->l2_len;
>> l3len = mb->l3_len;
>> @@ -349,11 +361,19 @@ outb_trs_pkt_prepare(struct rte_ipsec_sa *sa, rte_be64_t sqc,
>>
>> /* number of bytes to encrypt */
>> clen = plen + sizeof(*espt);
>> - clen = RTE_ALIGN_CEIL(clen, sa->pad_align);
>> +
>> + /* We don't need to pad/ailgn packet when using TSO offload */
>> + if (likely(!tso))
>> + clen = RTE_ALIGN_CEIL(clen, sa->pad_align);
>>
>> /* pad length + esp tail */
>> pdlen = clen - plen;
>> - tlen = pdlen + sa->icv_len + sqh_len;
>> +
>> + /* We don't append ICV length when using TSO offload */
>> + if (likely(!tso))
>> + tlen = pdlen + sa->icv_len + sqh_len;
>> + else
>> + tlen = pdlen + sqh_len;
>>
>> /* do append and insert */
>> ml = rte_pktmbuf_lastseg(mb);
>> @@ -375,8 +395,8 @@ outb_trs_pkt_prepare(struct rte_ipsec_sa *sa, rte_be64_t sqc,
>> insert_esph(ph, ph + hlen, uhlen);
>>
>> /* update ip header fields */
>> - np = update_trs_l3hdr(sa, ph + l2len, mb->pkt_len - sqh_len, l2len,
>> - l3len, IPPROTO_ESP);
>> + np = update_trs_l34hdrs(sa, ph + l2len, mb->pkt_len - sqh_len, l2len,
>> + l3len, IPPROTO_ESP, tso);
>>
>> /* update spi, seqn and iv */
>> esph = (struct rte_esp_hdr *)(ph + uhlen);
>> @@ -651,6 +671,33 @@ inline_outb_mbuf_prepare(const struct rte_ipsec_session *ss,
>> }
>> }
>>
>> +/* check if packet will exceed MSS and segmentation is required */
>> +static inline int
>> +esn_outb_nb_segments(const struct rte_ipsec_sa *sa, struct rte_mbuf *m) {
>> + uint16_t segments = 1;
>> + uint16_t pkt_l3len = m->pkt_len - m->l2_len;
>> +
>> + /* Only support segmentation for UDP/TCP flows */
>> + if (!(m->packet_type & (RTE_PTYPE_L4_UDP | RTE_PTYPE_L4_TCP)))
>> + return segments;
>> +
>> + if (sa->tso.enabled && pkt_l3len > sa->tso.mss) {
>> + segments = ceil((float)pkt_l3len / sa->tso.mss);
> Float calculations in the middle of data-path?
> Just to calculate roundup?
> Doesn't look good to me at all.
It doesn't look good to me either - I will rework it.
>
>> +
>> + if (m->packet_type & RTE_PTYPE_L4_TCP) {
>> + m->ol_flags |= (PKT_TX_TCP_SEG | PKT_TX_TCP_CKSUM);
> That's really strange - why ipsec library will set PKT_TX_TCP_SEG unconditionally?
> That should be responsibility of the upper layer, I think.
> In the lib we should only check was tso requested for that packet or not.
> Same for UDP.
These are under an if(TSO) condition.
>
>> + m->l4_len = sizeof(struct rte_tcp_hdr);
> Hmm, how do we know there are no TCP options present for that packet?
> Wouldn't it be better to expect user to provide proper l4_len for such packets?
You're right, I will update it.
>
>> + } else {
>> + m->ol_flags |= (PKT_TX_UDP_SEG | PKT_TX_UDP_CKSUM);
>> + m->l4_len = sizeof(struct rte_udp_hdr);
>> + }
>> +
>> + m->tso_segsz = sa->tso.mss;
>> + }
>> +
>> + return segments;
>> +}
>> +
>> /*
>> * process group of ESP outbound tunnel packets destined for
>> * INLINE_CRYPTO type of device.
>> @@ -660,24 +707,29 @@ inline_outb_tun_pkt_process(const struct rte_ipsec_session *ss,
>> struct rte_mbuf *mb[], uint16_t num)
>> {
>> int32_t rc;
>> - uint32_t i, k, n;
>> + uint32_t i, k, nb_sqn = 0, nb_sqn_alloc;
>> uint64_t sqn;
>> rte_be64_t sqc;
>> struct rte_ipsec_sa *sa;
>> union sym_op_data icv;
>> uint64_t iv[IPSEC_MAX_IV_QWORD];
>> uint32_t dr[num];
>> + uint16_t nb_segs[num];
>>
>> sa = ss->sa;
>>
>> - n = num;
>> - sqn = esn_outb_update_sqn(sa, &n);
>> - if (n != num)
>> + for (i = 0; i != num; i++) {
>> + nb_segs[i] = esn_outb_nb_segments(sa, mb[i]);
>> + nb_sqn += nb_segs[i];
>> + }
>> +
>> + nb_sqn_alloc = nb_sqn;
>> + sqn = esn_outb_update_sqn(sa, &nb_sqn_alloc);
>> + if (nb_sqn_alloc != nb_sqn)
>> rte_errno = EOVERFLOW;
>>
>> k = 0;
>> - for (i = 0; i != n; i++) {
>> -
>> + for (i = 0; i != num; i++) {
>> sqc = rte_cpu_to_be_64(sqn + i);
>> gen_iv(iv, sqc);
>>
>> @@ -691,11 +743,18 @@ inline_outb_tun_pkt_process(const struct rte_ipsec_session *ss,
>> dr[i - k] = i;
>> rte_errno = -rc;
>> }
>> +
>> + /**
>> + * If packet is using tso, increment sqn by the number of
>> + * segments for packet
>> + */
>> + if (mb[i]->ol_flags & (PKT_TX_TCP_SEG | PKT_TX_UDP_SEG))
>> + sqn += nb_segs[i] - 1;
>> }
>>
>> /* copy not processed mbufs beyond good ones */
>> - if (k != n && k != 0)
>> - move_bad_mbufs(mb, dr, n, n - k);
>> + if (k != num && k != 0)
>> + move_bad_mbufs(mb, dr, num, num - k);
>>
>> inline_outb_mbuf_prepare(ss, mb, k);
>> return k;
>> @@ -710,23 +769,30 @@ inline_outb_trs_pkt_process(const struct rte_ipsec_session *ss,
>> struct rte_mbuf *mb[], uint16_t num)
>> {
>> int32_t rc;
>> - uint32_t i, k, n;
>> + uint32_t i, k, nb_sqn, nb_sqn_alloc;
>> uint64_t sqn;
>> rte_be64_t sqc;
>> struct rte_ipsec_sa *sa;
>> union sym_op_data icv;
>> uint64_t iv[IPSEC_MAX_IV_QWORD];
>> uint32_t dr[num];
>> + uint16_t nb_segs[num];
>>
>> sa = ss->sa;
>>
>> - n = num;
>> - sqn = esn_outb_update_sqn(sa, &n);
>> - if (n != num)
>> + /* Calculate number of sequence numbers required */
>> + for (i = 0, nb_sqn = 0; i != num; i++) {
>> + nb_segs[i] = esn_outb_nb_segments(sa, mb[i]);
>> + nb_sqn += nb_segs[i];
>> + }
>> +
>> + nb_sqn_alloc = nb_sqn;
>> + sqn = esn_outb_update_sqn(sa, &nb_sqn_alloc);
>> + if (nb_sqn_alloc != nb_sqn)
>> rte_errno = EOVERFLOW;
>>
>> k = 0;
>> - for (i = 0; i != n; i++) {
>> + for (i = 0; i != num; i++) {
>>
>> sqc = rte_cpu_to_be_64(sqn + i);
>> gen_iv(iv, sqc);
>> @@ -741,11 +807,18 @@ inline_outb_trs_pkt_process(const struct rte_ipsec_session *ss,
>> dr[i - k] = i;
>> rte_errno = -rc;
>> }
>> +
>> + /**
>> + * If packet is using tso, increment sqn by the number of
>> + * segments for packet
>> + */
>> + if (mb[i]->ol_flags & (PKT_TX_TCP_SEG | PKT_TX_UDP_SEG))
>> + sqn += nb_segs[i] - 1;
>> }
>>
>> /* copy not processed mbufs beyond good ones */
>> - if (k != n && k != 0)
>> - move_bad_mbufs(mb, dr, n, n - k);
>> + if (k != num && k != 0)
>> + move_bad_mbufs(mb, dr, num, num - k);
>>
>> inline_outb_mbuf_prepare(ss, mb, k);
>> return k;
>> diff --git a/lib/ipsec/iph.h b/lib/ipsec/iph.h
>> index 861f16905a..2d223199ac 100644
>> --- a/lib/ipsec/iph.h
>> +++ b/lib/ipsec/iph.h
>> @@ -6,6 +6,8 @@
>> #define _IPH_H_
>>
>> #include <rte_ip.h>
>> +#include <rte_udp.h>
>> +#include <rte_tcp.h>
>>
>> /**
>> * @file iph.h
>> @@ -39,8 +41,8 @@ insert_esph(char *np, char *op, uint32_t hlen)
>>
>> /* update original ip header fields for transport case */
>> static inline int
>> -update_trs_l3hdr(const struct rte_ipsec_sa *sa, void *p, uint32_t plen,
>> - uint32_t l2len, uint32_t l3len, uint8_t proto)
>> +update_trs_l34hdrs(const struct rte_ipsec_sa *sa, void *p, uint32_t plen,
>> + uint32_t l2len, uint32_t l3len, uint8_t proto, uint8_t tso)
> Hmm... why to change name of the function?
>
>> {
>> int32_t rc;
>>
>> @@ -51,6 +53,10 @@ update_trs_l3hdr(const struct rte_ipsec_sa *sa, void *p, uint32_t plen,
>> v4h = p;
>> rc = v4h->next_proto_id;
>> v4h->next_proto_id = proto;
>> + if (tso) {
>> + v4h->hdr_checksum = 0;
>> + v4h->total_length = 0;
> total_len will be overwritten unconditionally at next line below.
>
> Another question - why it is necessary?
> Is it HW specific requirment or ... ?
It looks wrong I will rewrite this.
>
>
>> + }
>> v4h->total_length = rte_cpu_to_be_16(plen - l2len);
>
>> /* IPv6 */
>> } else {
>> diff --git a/lib/ipsec/sa.c b/lib/ipsec/sa.c
>> index 720e0f365b..2ecbbce0a4 100644
>> --- a/lib/ipsec/sa.c
>> +++ b/lib/ipsec/sa.c
>> @@ -565,6 +565,12 @@ rte_ipsec_sa_init(struct rte_ipsec_sa *sa, const struct rte_ipsec_sa_prm *prm,
>> sa->type = type;
>> sa->size = sz;
>>
>> +
>> + if (prm->ipsec_xform.options.tso == 1) {
>> + sa->tso.enabled = 1;
>> + sa->tso.mss = prm->ipsec_xform.mss;
>> + }
>> +
>> /* check for ESN flag */
>> sa->sqn_mask = (prm->ipsec_xform.options.esn == 0) ?
>> UINT32_MAX : UINT64_MAX;
>> diff --git a/lib/ipsec/sa.h b/lib/ipsec/sa.h
>> index 107ebd1519..5e237f3525 100644
>> --- a/lib/ipsec/sa.h
>> +++ b/lib/ipsec/sa.h
>> @@ -113,6 +113,10 @@ struct rte_ipsec_sa {
>> uint8_t iv_len;
>> uint8_t pad_align;
>> uint8_t tos_mask;
>> + struct {
>> + uint8_t enabled:1;
>> + uint16_t mss;
>> + } tso;
> Wouldn't one field be enough?
> uint16_t tso_mss;
> And it it is zero, then tso is disabled.
> In fact, do we need it at all?
> Wouldn't it be better to request user to fill mbuf->tso_segsz properly for us?
We added an option to rte_security_ipsec_sa_options to allow the user to
enable TSO per SA and specify the MSS in the sessions parameters.
We can request user to fill mbuf->tso_segsz, but with this patch we are
doing it for the user.
>
>> /* template for tunnel header */
>> uint8_t hdr[IPSEC_MAX_HDR_SIZE];
>> --
>> 2.25.1
> On 9/23/2021 3:09 PM, Ananyev, Konstantin wrote:
> >
> >> Add support for transmit segmentation offload to inline crypto processing
> >> mode. This offload is not supported by other offload modes, as at a
> >> minimum it requires inline crypto for IPsec to be supported on the
> >> network interface.
> >>
> >> Signed-off-by: Declan Doherty <declan.doherty@intel.com>
> >> Signed-off-by: Radu Nicolau <radu.nicolau@intel.com>
> >> Signed-off-by: Abhijit Sinha <abhijit.sinha@intel.com>
> >> Signed-off-by: Daniel Martin Buckley <daniel.m.buckley@intel.com>
> >> Acked-by: Fan Zhang <roy.fan.zhang@intel.com>
> >> ---
> >> lib/ipsec/esp_inb.c | 4 +-
> >> lib/ipsec/esp_outb.c | 115 +++++++++++++++++++++++++++++++++++--------
> >> lib/ipsec/iph.h | 10 +++-
> >> lib/ipsec/sa.c | 6 +++
> >> lib/ipsec/sa.h | 4 ++
> >> 5 files changed, 114 insertions(+), 25 deletions(-)
> >>
> >> diff --git a/lib/ipsec/esp_inb.c b/lib/ipsec/esp_inb.c
> >> index d66c88f05d..a6ab8fbdd5 100644
> >> --- a/lib/ipsec/esp_inb.c
> >> +++ b/lib/ipsec/esp_inb.c
> >> @@ -668,8 +668,8 @@ trs_process(const struct rte_ipsec_sa *sa, struct rte_mbuf *mb[],
> >> /* modify packet's layout */
> >> np = trs_process_step2(mb[i], ml[i], hl[i], cofs,
> >> to[i], tl, sqn + k);
> >> - update_trs_l3hdr(sa, np + l2, mb[i]->pkt_len,
> >> - l2, hl[i] - l2, espt[i].next_proto);
> >> + update_trs_l34hdrs(sa, np + l2, mb[i]->pkt_len,
> >> + l2, hl[i] - l2, espt[i].next_proto, 0);
> >>
> >> /* update mbuf's metadata */
> >> trs_process_step3(mb[i]);
> >> diff --git a/lib/ipsec/esp_outb.c b/lib/ipsec/esp_outb.c
> >> index a3f77469c3..9fc7075796 100644
> >> --- a/lib/ipsec/esp_outb.c
> >> +++ b/lib/ipsec/esp_outb.c
> >> @@ -2,6 +2,8 @@
> >> * Copyright(c) 2018-2020 Intel Corporation
> >> */
> >>
> >> +#include <math.h>
> >> +
> >> #include <rte_ipsec.h>
> >> #include <rte_esp.h>
> >> #include <rte_ip.h>
> >> @@ -156,11 +158,20 @@ outb_tun_pkt_prepare(struct rte_ipsec_sa *sa, rte_be64_t sqc,
> >>
> >> /* number of bytes to encrypt */
> >> clen = plen + sizeof(*espt);
> >> - clen = RTE_ALIGN_CEIL(clen, sa->pad_align);
> >> +
> >> + /* We don't need to pad/ailgn packet when using TSO offload */
> >> + if (likely(!(mb->ol_flags & (PKT_TX_TCP_SEG | PKT_TX_UDP_SEG))))
> >> + clen = RTE_ALIGN_CEIL(clen, sa->pad_align);
> >> +
> > Here and everywhere:
> > It doesn't look nice that we have to pollute generic functions with
> > checking TSO specific flags all over the place.
> > Can we probably have a specific prepare/process function for inline+tso case?
> > As we do have for cpu and inline cases right now.
> > Or just update inline version?
> I looked at doing this but unless I copy these 2 functions I can't move
> this out.
> >
> >> /* pad length + esp tail */
> >> pdlen = clen - plen;
> >> - tlen = pdlen + sa->icv_len + sqh_len;
> >> +
> >> + /* We don't append ICV length when using TSO offload */
> >> + if (likely(!(mb->ol_flags & (PKT_TX_TCP_SEG | PKT_TX_UDP_SEG))))
> >> + tlen = pdlen + sa->icv_len + sqh_len;
> >> + else
> >> + tlen = pdlen + sqh_len;
> >>
> >> /* do append and prepend */
> >> ml = rte_pktmbuf_lastseg(mb);
> >> @@ -337,6 +348,7 @@ outb_trs_pkt_prepare(struct rte_ipsec_sa *sa, rte_be64_t sqc,
> >> char *ph, *pt;
> >> uint64_t *iv;
> >> uint32_t l2len, l3len;
> >> + uint8_t tso = mb->ol_flags & (PKT_TX_TCP_SEG | PKT_TX_UDP_SEG) ? 1 : 0;
> >>
> >> l2len = mb->l2_len;
> >> l3len = mb->l3_len;
> >> @@ -349,11 +361,19 @@ outb_trs_pkt_prepare(struct rte_ipsec_sa *sa, rte_be64_t sqc,
> >>
> >> /* number of bytes to encrypt */
> >> clen = plen + sizeof(*espt);
> >> - clen = RTE_ALIGN_CEIL(clen, sa->pad_align);
> >> +
> >> + /* We don't need to pad/ailgn packet when using TSO offload */
> >> + if (likely(!tso))
> >> + clen = RTE_ALIGN_CEIL(clen, sa->pad_align);
> >>
> >> /* pad length + esp tail */
> >> pdlen = clen - plen;
> >> - tlen = pdlen + sa->icv_len + sqh_len;
> >> +
> >> + /* We don't append ICV length when using TSO offload */
> >> + if (likely(!tso))
> >> + tlen = pdlen + sa->icv_len + sqh_len;
> >> + else
> >> + tlen = pdlen + sqh_len;
> >>
> >> /* do append and insert */
> >> ml = rte_pktmbuf_lastseg(mb);
> >> @@ -375,8 +395,8 @@ outb_trs_pkt_prepare(struct rte_ipsec_sa *sa, rte_be64_t sqc,
> >> insert_esph(ph, ph + hlen, uhlen);
> >>
> >> /* update ip header fields */
> >> - np = update_trs_l3hdr(sa, ph + l2len, mb->pkt_len - sqh_len, l2len,
> >> - l3len, IPPROTO_ESP);
> >> + np = update_trs_l34hdrs(sa, ph + l2len, mb->pkt_len - sqh_len, l2len,
> >> + l3len, IPPROTO_ESP, tso);
> >>
> >> /* update spi, seqn and iv */
> >> esph = (struct rte_esp_hdr *)(ph + uhlen);
> >> @@ -651,6 +671,33 @@ inline_outb_mbuf_prepare(const struct rte_ipsec_session *ss,
> >> }
> >> }
> >>
> >> +/* check if packet will exceed MSS and segmentation is required */
> >> +static inline int
> >> +esn_outb_nb_segments(const struct rte_ipsec_sa *sa, struct rte_mbuf *m) {
> >> + uint16_t segments = 1;
> >> + uint16_t pkt_l3len = m->pkt_len - m->l2_len;
> >> +
> >> + /* Only support segmentation for UDP/TCP flows */
> >> + if (!(m->packet_type & (RTE_PTYPE_L4_UDP | RTE_PTYPE_L4_TCP)))
> >> + return segments;
> >> +
> >> + if (sa->tso.enabled && pkt_l3len > sa->tso.mss) {
> >> + segments = ceil((float)pkt_l3len / sa->tso.mss);
> > Float calculations in the middle of data-path?
> > Just to calculate roundup?
> > Doesn't look good to me at all.
> It doesn't look good to me either - I will rework it.
> >
> >> +
> >> + if (m->packet_type & RTE_PTYPE_L4_TCP) {
> >> + m->ol_flags |= (PKT_TX_TCP_SEG | PKT_TX_TCP_CKSUM);
> > That's really strange - why ipsec library will set PKT_TX_TCP_SEG unconditionally?
> > That should be responsibility of the upper layer, I think.
> > In the lib we should only check was tso requested for that packet or not.
> > Same for UDP.
> These are under an if(TSO) condition.
> >
> >> + m->l4_len = sizeof(struct rte_tcp_hdr);
> > Hmm, how do we know there are no TCP options present for that packet?
> > Wouldn't it be better to expect user to provide proper l4_len for such packets?
> You're right, I will update it.
>
> >
> >> + } else {
> >> + m->ol_flags |= (PKT_TX_UDP_SEG | PKT_TX_UDP_CKSUM);
> >> + m->l4_len = sizeof(struct rte_udp_hdr);
> >> + }
> >> +
> >> + m->tso_segsz = sa->tso.mss;
> >> + }
> >> +
> >> + return segments;
> >> +}
> >> +
> >> /*
> >> * process group of ESP outbound tunnel packets destined for
> >> * INLINE_CRYPTO type of device.
> >> @@ -660,24 +707,29 @@ inline_outb_tun_pkt_process(const struct rte_ipsec_session *ss,
> >> struct rte_mbuf *mb[], uint16_t num)
> >> {
> >> int32_t rc;
> >> - uint32_t i, k, n;
> >> + uint32_t i, k, nb_sqn = 0, nb_sqn_alloc;
> >> uint64_t sqn;
> >> rte_be64_t sqc;
> >> struct rte_ipsec_sa *sa;
> >> union sym_op_data icv;
> >> uint64_t iv[IPSEC_MAX_IV_QWORD];
> >> uint32_t dr[num];
> >> + uint16_t nb_segs[num];
> >>
> >> sa = ss->sa;
> >>
> >> - n = num;
> >> - sqn = esn_outb_update_sqn(sa, &n);
> >> - if (n != num)
> >> + for (i = 0; i != num; i++) {
> >> + nb_segs[i] = esn_outb_nb_segments(sa, mb[i]);
> >> + nb_sqn += nb_segs[i];
> >> + }
> >> +
> >> + nb_sqn_alloc = nb_sqn;
> >> + sqn = esn_outb_update_sqn(sa, &nb_sqn_alloc);
> >> + if (nb_sqn_alloc != nb_sqn)
> >> rte_errno = EOVERFLOW;
> >>
> >> k = 0;
> >> - for (i = 0; i != n; i++) {
> >> -
> >> + for (i = 0; i != num; i++) {
> >> sqc = rte_cpu_to_be_64(sqn + i);
> >> gen_iv(iv, sqc);
> >>
> >> @@ -691,11 +743,18 @@ inline_outb_tun_pkt_process(const struct rte_ipsec_session *ss,
> >> dr[i - k] = i;
> >> rte_errno = -rc;
> >> }
> >> +
> >> + /**
> >> + * If packet is using tso, increment sqn by the number of
> >> + * segments for packet
> >> + */
> >> + if (mb[i]->ol_flags & (PKT_TX_TCP_SEG | PKT_TX_UDP_SEG))
> >> + sqn += nb_segs[i] - 1;
> >> }
> >>
> >> /* copy not processed mbufs beyond good ones */
> >> - if (k != n && k != 0)
> >> - move_bad_mbufs(mb, dr, n, n - k);
> >> + if (k != num && k != 0)
> >> + move_bad_mbufs(mb, dr, num, num - k);
> >>
> >> inline_outb_mbuf_prepare(ss, mb, k);
> >> return k;
> >> @@ -710,23 +769,30 @@ inline_outb_trs_pkt_process(const struct rte_ipsec_session *ss,
> >> struct rte_mbuf *mb[], uint16_t num)
> >> {
> >> int32_t rc;
> >> - uint32_t i, k, n;
> >> + uint32_t i, k, nb_sqn, nb_sqn_alloc;
> >> uint64_t sqn;
> >> rte_be64_t sqc;
> >> struct rte_ipsec_sa *sa;
> >> union sym_op_data icv;
> >> uint64_t iv[IPSEC_MAX_IV_QWORD];
> >> uint32_t dr[num];
> >> + uint16_t nb_segs[num];
> >>
> >> sa = ss->sa;
> >>
> >> - n = num;
> >> - sqn = esn_outb_update_sqn(sa, &n);
> >> - if (n != num)
> >> + /* Calculate number of sequence numbers required */
> >> + for (i = 0, nb_sqn = 0; i != num; i++) {
> >> + nb_segs[i] = esn_outb_nb_segments(sa, mb[i]);
> >> + nb_sqn += nb_segs[i];
> >> + }
> >> +
> >> + nb_sqn_alloc = nb_sqn;
> >> + sqn = esn_outb_update_sqn(sa, &nb_sqn_alloc);
> >> + if (nb_sqn_alloc != nb_sqn)
> >> rte_errno = EOVERFLOW;
> >>
> >> k = 0;
> >> - for (i = 0; i != n; i++) {
> >> + for (i = 0; i != num; i++) {
> >>
> >> sqc = rte_cpu_to_be_64(sqn + i);
> >> gen_iv(iv, sqc);
> >> @@ -741,11 +807,18 @@ inline_outb_trs_pkt_process(const struct rte_ipsec_session *ss,
> >> dr[i - k] = i;
> >> rte_errno = -rc;
> >> }
> >> +
> >> + /**
> >> + * If packet is using tso, increment sqn by the number of
> >> + * segments for packet
> >> + */
> >> + if (mb[i]->ol_flags & (PKT_TX_TCP_SEG | PKT_TX_UDP_SEG))
> >> + sqn += nb_segs[i] - 1;
> >> }
> >>
> >> /* copy not processed mbufs beyond good ones */
> >> - if (k != n && k != 0)
> >> - move_bad_mbufs(mb, dr, n, n - k);
> >> + if (k != num && k != 0)
> >> + move_bad_mbufs(mb, dr, num, num - k);
> >>
> >> inline_outb_mbuf_prepare(ss, mb, k);
> >> return k;
> >> diff --git a/lib/ipsec/iph.h b/lib/ipsec/iph.h
> >> index 861f16905a..2d223199ac 100644
> >> --- a/lib/ipsec/iph.h
> >> +++ b/lib/ipsec/iph.h
> >> @@ -6,6 +6,8 @@
> >> #define _IPH_H_
> >>
> >> #include <rte_ip.h>
> >> +#include <rte_udp.h>
> >> +#include <rte_tcp.h>
> >>
> >> /**
> >> * @file iph.h
> >> @@ -39,8 +41,8 @@ insert_esph(char *np, char *op, uint32_t hlen)
> >>
> >> /* update original ip header fields for transport case */
> >> static inline int
> >> -update_trs_l3hdr(const struct rte_ipsec_sa *sa, void *p, uint32_t plen,
> >> - uint32_t l2len, uint32_t l3len, uint8_t proto)
> >> +update_trs_l34hdrs(const struct rte_ipsec_sa *sa, void *p, uint32_t plen,
> >> + uint32_t l2len, uint32_t l3len, uint8_t proto, uint8_t tso)
> > Hmm... why to change name of the function?
> >
> >> {
> >> int32_t rc;
> >>
> >> @@ -51,6 +53,10 @@ update_trs_l3hdr(const struct rte_ipsec_sa *sa, void *p, uint32_t plen,
> >> v4h = p;
> >> rc = v4h->next_proto_id;
> >> v4h->next_proto_id = proto;
> >> + if (tso) {
> >> + v4h->hdr_checksum = 0;
> >> + v4h->total_length = 0;
> > total_len will be overwritten unconditionally at next line below.
> >
> > Another question - why it is necessary?
> > Is it HW specific requirment or ... ?
> It looks wrong I will rewrite this.
> >
> >
> >> + }
> >> v4h->total_length = rte_cpu_to_be_16(plen - l2len);
> >
> >> /* IPv6 */
> >> } else {
> >> diff --git a/lib/ipsec/sa.c b/lib/ipsec/sa.c
> >> index 720e0f365b..2ecbbce0a4 100644
> >> --- a/lib/ipsec/sa.c
> >> +++ b/lib/ipsec/sa.c
> >> @@ -565,6 +565,12 @@ rte_ipsec_sa_init(struct rte_ipsec_sa *sa, const struct rte_ipsec_sa_prm *prm,
> >> sa->type = type;
> >> sa->size = sz;
> >>
> >> +
> >> + if (prm->ipsec_xform.options.tso == 1) {
> >> + sa->tso.enabled = 1;
> >> + sa->tso.mss = prm->ipsec_xform.mss;
> >> + }
> >> +
> >> /* check for ESN flag */
> >> sa->sqn_mask = (prm->ipsec_xform.options.esn == 0) ?
> >> UINT32_MAX : UINT64_MAX;
> >> diff --git a/lib/ipsec/sa.h b/lib/ipsec/sa.h
> >> index 107ebd1519..5e237f3525 100644
> >> --- a/lib/ipsec/sa.h
> >> +++ b/lib/ipsec/sa.h
> >> @@ -113,6 +113,10 @@ struct rte_ipsec_sa {
> >> uint8_t iv_len;
> >> uint8_t pad_align;
> >> uint8_t tos_mask;
> >> + struct {
> >> + uint8_t enabled:1;
> >> + uint16_t mss;
> >> + } tso;
> > Wouldn't one field be enough?
> > uint16_t tso_mss;
> > And it it is zero, then tso is disabled.
> > In fact, do we need it at all?
> > Wouldn't it be better to request user to fill mbuf->tso_segsz properly for us?
>
> We added an option to rte_security_ipsec_sa_options to allow the user to
> enable TSO per SA and specify the MSS in the sessions parameters.
After another thought, it doesn’t look like a good approach to me:
from one side same SA can be used for multiple IP addresses,
from other side - MSS value can differ on a per connection basis.
So different TCP connections within same SA can easily have different MSS values.
So I think we shouldn't save mss in SA at all.
Instead, we probably need to request user to fill mbuf->tso_segsz for us.
>
> We can request user to fill mbuf->tso_segsz, but with this patch we are
> doing it for the user.
>
> >
> >> /* template for tunnel header */
> >> uint8_t hdr[IPSEC_MAX_HDR_SIZE];
> >> --
> >> 2.25.1
@@ -668,8 +668,8 @@ trs_process(const struct rte_ipsec_sa *sa, struct rte_mbuf *mb[],
/* modify packet's layout */
np = trs_process_step2(mb[i], ml[i], hl[i], cofs,
to[i], tl, sqn + k);
- update_trs_l3hdr(sa, np + l2, mb[i]->pkt_len,
- l2, hl[i] - l2, espt[i].next_proto);
+ update_trs_l34hdrs(sa, np + l2, mb[i]->pkt_len,
+ l2, hl[i] - l2, espt[i].next_proto, 0);
/* update mbuf's metadata */
trs_process_step3(mb[i]);
@@ -2,6 +2,8 @@
* Copyright(c) 2018-2020 Intel Corporation
*/
+#include <math.h>
+
#include <rte_ipsec.h>
#include <rte_esp.h>
#include <rte_ip.h>
@@ -156,11 +158,20 @@ outb_tun_pkt_prepare(struct rte_ipsec_sa *sa, rte_be64_t sqc,
/* number of bytes to encrypt */
clen = plen + sizeof(*espt);
- clen = RTE_ALIGN_CEIL(clen, sa->pad_align);
+
+ /* We don't need to pad/ailgn packet when using TSO offload */
+ if (likely(!(mb->ol_flags & (PKT_TX_TCP_SEG | PKT_TX_UDP_SEG))))
+ clen = RTE_ALIGN_CEIL(clen, sa->pad_align);
+
/* pad length + esp tail */
pdlen = clen - plen;
- tlen = pdlen + sa->icv_len + sqh_len;
+
+ /* We don't append ICV length when using TSO offload */
+ if (likely(!(mb->ol_flags & (PKT_TX_TCP_SEG | PKT_TX_UDP_SEG))))
+ tlen = pdlen + sa->icv_len + sqh_len;
+ else
+ tlen = pdlen + sqh_len;
/* do append and prepend */
ml = rte_pktmbuf_lastseg(mb);
@@ -337,6 +348,7 @@ outb_trs_pkt_prepare(struct rte_ipsec_sa *sa, rte_be64_t sqc,
char *ph, *pt;
uint64_t *iv;
uint32_t l2len, l3len;
+ uint8_t tso = mb->ol_flags & (PKT_TX_TCP_SEG | PKT_TX_UDP_SEG) ? 1 : 0;
l2len = mb->l2_len;
l3len = mb->l3_len;
@@ -349,11 +361,19 @@ outb_trs_pkt_prepare(struct rte_ipsec_sa *sa, rte_be64_t sqc,
/* number of bytes to encrypt */
clen = plen + sizeof(*espt);
- clen = RTE_ALIGN_CEIL(clen, sa->pad_align);
+
+ /* We don't need to pad/ailgn packet when using TSO offload */
+ if (likely(!tso))
+ clen = RTE_ALIGN_CEIL(clen, sa->pad_align);
/* pad length + esp tail */
pdlen = clen - plen;
- tlen = pdlen + sa->icv_len + sqh_len;
+
+ /* We don't append ICV length when using TSO offload */
+ if (likely(!tso))
+ tlen = pdlen + sa->icv_len + sqh_len;
+ else
+ tlen = pdlen + sqh_len;
/* do append and insert */
ml = rte_pktmbuf_lastseg(mb);
@@ -375,8 +395,8 @@ outb_trs_pkt_prepare(struct rte_ipsec_sa *sa, rte_be64_t sqc,
insert_esph(ph, ph + hlen, uhlen);
/* update ip header fields */
- np = update_trs_l3hdr(sa, ph + l2len, mb->pkt_len - sqh_len, l2len,
- l3len, IPPROTO_ESP);
+ np = update_trs_l34hdrs(sa, ph + l2len, mb->pkt_len - sqh_len, l2len,
+ l3len, IPPROTO_ESP, tso);
/* update spi, seqn and iv */
esph = (struct rte_esp_hdr *)(ph + uhlen);
@@ -651,6 +671,33 @@ inline_outb_mbuf_prepare(const struct rte_ipsec_session *ss,
}
}
+/* check if packet will exceed MSS and segmentation is required */
+static inline int
+esn_outb_nb_segments(const struct rte_ipsec_sa *sa, struct rte_mbuf *m) {
+ uint16_t segments = 1;
+ uint16_t pkt_l3len = m->pkt_len - m->l2_len;
+
+ /* Only support segmentation for UDP/TCP flows */
+ if (!(m->packet_type & (RTE_PTYPE_L4_UDP | RTE_PTYPE_L4_TCP)))
+ return segments;
+
+ if (sa->tso.enabled && pkt_l3len > sa->tso.mss) {
+ segments = ceil((float)pkt_l3len / sa->tso.mss);
+
+ if (m->packet_type & RTE_PTYPE_L4_TCP) {
+ m->ol_flags |= (PKT_TX_TCP_SEG | PKT_TX_TCP_CKSUM);
+ m->l4_len = sizeof(struct rte_tcp_hdr);
+ } else {
+ m->ol_flags |= (PKT_TX_UDP_SEG | PKT_TX_UDP_CKSUM);
+ m->l4_len = sizeof(struct rte_udp_hdr);
+ }
+
+ m->tso_segsz = sa->tso.mss;
+ }
+
+ return segments;
+}
+
/*
* process group of ESP outbound tunnel packets destined for
* INLINE_CRYPTO type of device.
@@ -660,24 +707,29 @@ inline_outb_tun_pkt_process(const struct rte_ipsec_session *ss,
struct rte_mbuf *mb[], uint16_t num)
{
int32_t rc;
- uint32_t i, k, n;
+ uint32_t i, k, nb_sqn = 0, nb_sqn_alloc;
uint64_t sqn;
rte_be64_t sqc;
struct rte_ipsec_sa *sa;
union sym_op_data icv;
uint64_t iv[IPSEC_MAX_IV_QWORD];
uint32_t dr[num];
+ uint16_t nb_segs[num];
sa = ss->sa;
- n = num;
- sqn = esn_outb_update_sqn(sa, &n);
- if (n != num)
+ for (i = 0; i != num; i++) {
+ nb_segs[i] = esn_outb_nb_segments(sa, mb[i]);
+ nb_sqn += nb_segs[i];
+ }
+
+ nb_sqn_alloc = nb_sqn;
+ sqn = esn_outb_update_sqn(sa, &nb_sqn_alloc);
+ if (nb_sqn_alloc != nb_sqn)
rte_errno = EOVERFLOW;
k = 0;
- for (i = 0; i != n; i++) {
-
+ for (i = 0; i != num; i++) {
sqc = rte_cpu_to_be_64(sqn + i);
gen_iv(iv, sqc);
@@ -691,11 +743,18 @@ inline_outb_tun_pkt_process(const struct rte_ipsec_session *ss,
dr[i - k] = i;
rte_errno = -rc;
}
+
+ /**
+ * If packet is using tso, increment sqn by the number of
+ * segments for packet
+ */
+ if (mb[i]->ol_flags & (PKT_TX_TCP_SEG | PKT_TX_UDP_SEG))
+ sqn += nb_segs[i] - 1;
}
/* copy not processed mbufs beyond good ones */
- if (k != n && k != 0)
- move_bad_mbufs(mb, dr, n, n - k);
+ if (k != num && k != 0)
+ move_bad_mbufs(mb, dr, num, num - k);
inline_outb_mbuf_prepare(ss, mb, k);
return k;
@@ -710,23 +769,30 @@ inline_outb_trs_pkt_process(const struct rte_ipsec_session *ss,
struct rte_mbuf *mb[], uint16_t num)
{
int32_t rc;
- uint32_t i, k, n;
+ uint32_t i, k, nb_sqn, nb_sqn_alloc;
uint64_t sqn;
rte_be64_t sqc;
struct rte_ipsec_sa *sa;
union sym_op_data icv;
uint64_t iv[IPSEC_MAX_IV_QWORD];
uint32_t dr[num];
+ uint16_t nb_segs[num];
sa = ss->sa;
- n = num;
- sqn = esn_outb_update_sqn(sa, &n);
- if (n != num)
+ /* Calculate number of sequence numbers required */
+ for (i = 0, nb_sqn = 0; i != num; i++) {
+ nb_segs[i] = esn_outb_nb_segments(sa, mb[i]);
+ nb_sqn += nb_segs[i];
+ }
+
+ nb_sqn_alloc = nb_sqn;
+ sqn = esn_outb_update_sqn(sa, &nb_sqn_alloc);
+ if (nb_sqn_alloc != nb_sqn)
rte_errno = EOVERFLOW;
k = 0;
- for (i = 0; i != n; i++) {
+ for (i = 0; i != num; i++) {
sqc = rte_cpu_to_be_64(sqn + i);
gen_iv(iv, sqc);
@@ -741,11 +807,18 @@ inline_outb_trs_pkt_process(const struct rte_ipsec_session *ss,
dr[i - k] = i;
rte_errno = -rc;
}
+
+ /**
+ * If packet is using tso, increment sqn by the number of
+ * segments for packet
+ */
+ if (mb[i]->ol_flags & (PKT_TX_TCP_SEG | PKT_TX_UDP_SEG))
+ sqn += nb_segs[i] - 1;
}
/* copy not processed mbufs beyond good ones */
- if (k != n && k != 0)
- move_bad_mbufs(mb, dr, n, n - k);
+ if (k != num && k != 0)
+ move_bad_mbufs(mb, dr, num, num - k);
inline_outb_mbuf_prepare(ss, mb, k);
return k;
@@ -6,6 +6,8 @@
#define _IPH_H_
#include <rte_ip.h>
+#include <rte_udp.h>
+#include <rte_tcp.h>
/**
* @file iph.h
@@ -39,8 +41,8 @@ insert_esph(char *np, char *op, uint32_t hlen)
/* update original ip header fields for transport case */
static inline int
-update_trs_l3hdr(const struct rte_ipsec_sa *sa, void *p, uint32_t plen,
- uint32_t l2len, uint32_t l3len, uint8_t proto)
+update_trs_l34hdrs(const struct rte_ipsec_sa *sa, void *p, uint32_t plen,
+ uint32_t l2len, uint32_t l3len, uint8_t proto, uint8_t tso)
{
int32_t rc;
@@ -51,6 +53,10 @@ update_trs_l3hdr(const struct rte_ipsec_sa *sa, void *p, uint32_t plen,
v4h = p;
rc = v4h->next_proto_id;
v4h->next_proto_id = proto;
+ if (tso) {
+ v4h->hdr_checksum = 0;
+ v4h->total_length = 0;
+ }
v4h->total_length = rte_cpu_to_be_16(plen - l2len);
/* IPv6 */
} else {
@@ -565,6 +565,12 @@ rte_ipsec_sa_init(struct rte_ipsec_sa *sa, const struct rte_ipsec_sa_prm *prm,
sa->type = type;
sa->size = sz;
+
+ if (prm->ipsec_xform.options.tso == 1) {
+ sa->tso.enabled = 1;
+ sa->tso.mss = prm->ipsec_xform.mss;
+ }
+
/* check for ESN flag */
sa->sqn_mask = (prm->ipsec_xform.options.esn == 0) ?
UINT32_MAX : UINT64_MAX;
@@ -113,6 +113,10 @@ struct rte_ipsec_sa {
uint8_t iv_len;
uint8_t pad_align;
uint8_t tos_mask;
+ struct {
+ uint8_t enabled:1;
+ uint16_t mss;
+ } tso;
/* template for tunnel header */
uint8_t hdr[IPSEC_MAX_HDR_SIZE];