[dpdk-dev] [PATCH v9 06/10] ipsec: add transmit segmentation offload support

Ananyev, Konstantin konstantin.ananyev at intel.com
Thu Oct 14 16:42:08 CEST 2021
Previous message (by thread): [dpdk-dev] [PATCH v9 06/10] ipsec: add transmit segmentation offload support
Next message (by thread): [dpdk-dev] [PATCH v9 06/10] ipsec: add transmit segmentation offload support
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]

> > Add support for transmit segmentation offload to inline crypto processing
> mode. This offload is not supported by other offload modes, as at a
> minimum it requires inline crypto for IPsec to be supported on the
> network interface.

Thanks for rework.
It looks much better to me now, but still few more comments.
Konstantin
 
> Signed-off-by: Declan Doherty <declan.doherty at intel.com>
> Signed-off-by: Radu Nicolau <radu.nicolau at intel.com>
> Signed-off-by: Abhijit Sinha <abhijit.sinha at intel.com>
> Signed-off-by: Daniel Martin Buckley <daniel.m.buckley at intel.com>
> Acked-by: Fan Zhang <roy.fan.zhang at intel.com>
> ---
>  doc/guides/prog_guide/ipsec_lib.rst    |   2 +
>  doc/guides/rel_notes/release_21_11.rst |   1 +
>  lib/ipsec/esp_outb.c                   | 120 +++++++++++++++++++------
>  3 files changed, 97 insertions(+), 26 deletions(-)
> 
> diff --git a/doc/guides/prog_guide/ipsec_lib.rst b/doc/guides/prog_guide/ipsec_lib.rst
> index af51ff8131..fc0af5eadb 100644
> --- a/doc/guides/prog_guide/ipsec_lib.rst
> +++ b/doc/guides/prog_guide/ipsec_lib.rst
> @@ -315,6 +315,8 @@ Supported features
> 
>  *  NAT-T / UDP encapsulated ESP.
> 
> +*  TSO support (only for inline crypto mode)
> +
>  *  algorithms: 3DES-CBC, AES-CBC, AES-CTR, AES-GCM, AES_CCM, CHACHA20_POLY1305,
>     AES_GMAC, HMAC-SHA1, NULL.
> 
> diff --git a/doc/guides/rel_notes/release_21_11.rst b/doc/guides/rel_notes/release_21_11.rst
> index e9fb169d44..0a9c71d92e 100644
> --- a/doc/guides/rel_notes/release_21_11.rst
> +++ b/doc/guides/rel_notes/release_21_11.rst
> @@ -158,6 +158,7 @@ New Features
> 
>    * Added support for AEAD algorithms AES_CCM, CHACHA20_POLY1305 and AES_GMAC.
>    * Added support for NAT-T / UDP encapsulated ESP
> +  * Added support TSO offload support; only supported for inline crypto mode.
> 
> 
>  Removed Items
> diff --git a/lib/ipsec/esp_outb.c b/lib/ipsec/esp_outb.c
> index 0e3314b358..d327c32a38 100644
> --- a/lib/ipsec/esp_outb.c
> +++ b/lib/ipsec/esp_outb.c
> @@ -18,7 +18,7 @@
> 
>  typedef int32_t (*esp_outb_prepare_t)(struct rte_ipsec_sa *sa, rte_be64_t sqc,
>  	const uint64_t ivp[IPSEC_MAX_IV_QWORD], struct rte_mbuf *mb,
> -	union sym_op_data *icv, uint8_t sqh_len);
> +	union sym_op_data *icv, uint8_t sqh_len, uint8_t tso);
> 
>  /*
>   * helper function to fill crypto_sym op for cipher+auth algorithms.
> @@ -139,7 +139,7 @@ outb_cop_prepare(struct rte_crypto_op *cop,
>  static inline int32_t
>  outb_tun_pkt_prepare(struct rte_ipsec_sa *sa, rte_be64_t sqc,
>  	const uint64_t ivp[IPSEC_MAX_IV_QWORD], struct rte_mbuf *mb,
> -	union sym_op_data *icv, uint8_t sqh_len)
> +	union sym_op_data *icv, uint8_t sqh_len, uint8_t tso)
>  {
>  	uint32_t clen, hlen, l2len, pdlen, pdofs, plen, tlen;
>  	struct rte_mbuf *ml;
> @@ -157,11 +157,20 @@ outb_tun_pkt_prepare(struct rte_ipsec_sa *sa, rte_be64_t sqc,
> 
>  	/* number of bytes to encrypt */
>  	clen = plen + sizeof(*espt);
> -	clen = RTE_ALIGN_CEIL(clen, sa->pad_align);
> +
> +	/* We don't need to pad/align packet when using TSO offload */
> +	if (!tso)
> +		clen = RTE_ALIGN_CEIL(clen, sa->pad_align);
> +
> 
>  	/* pad length + esp tail */
>  	pdlen = clen - plen;
> -	tlen = pdlen + sa->icv_len + sqh_len;
> +
> +	/* We don't append ICV length when using TSO offload */
> +	if (!tso)
> +		tlen = pdlen + sa->icv_len + sqh_len;
> +	else
> +		tlen = pdlen + sqh_len;
> 
>  	/* do append and prepend */
>  	ml = rte_pktmbuf_lastseg(mb);
> @@ -309,7 +318,7 @@ esp_outb_tun_prepare(const struct rte_ipsec_session *ss, struct rte_mbuf *mb[],
> 
>  		/* try to update the packet itself */
>  		rc = outb_tun_pkt_prepare(sa, sqc, iv, mb[i], &icv,
> -					  sa->sqh_len);
> +					  sa->sqh_len, 0);
>  		/* success, setup crypto op */
>  		if (rc >= 0) {
>  			outb_pkt_xprepare(sa, sqc, &icv);
> @@ -336,7 +345,7 @@ esp_outb_tun_prepare(const struct rte_ipsec_session *ss, struct rte_mbuf *mb[],
>  static inline int32_t
>  outb_trs_pkt_prepare(struct rte_ipsec_sa *sa, rte_be64_t sqc,
>  	const uint64_t ivp[IPSEC_MAX_IV_QWORD], struct rte_mbuf *mb,
> -	union sym_op_data *icv, uint8_t sqh_len)
> +	union sym_op_data *icv, uint8_t sqh_len, uint8_t tso)
>  {
>  	uint8_t np;
>  	uint32_t clen, hlen, pdlen, pdofs, plen, tlen, uhlen;
> @@ -358,11 +367,19 @@ outb_trs_pkt_prepare(struct rte_ipsec_sa *sa, rte_be64_t sqc,
> 
>  	/* number of bytes to encrypt */
>  	clen = plen + sizeof(*espt);
> -	clen = RTE_ALIGN_CEIL(clen, sa->pad_align);
> +
> +	/* We don't need to pad/align packet when using TSO offload */
> +	if (!tso)
> +		clen = RTE_ALIGN_CEIL(clen, sa->pad_align);
> 
>  	/* pad length + esp tail */
>  	pdlen = clen - plen;
> -	tlen = pdlen + sa->icv_len + sqh_len;
> +
> +	/* We don't append ICV length when using TSO offload */
> +	if (!tso)
> +		tlen = pdlen + sa->icv_len + sqh_len;
> +	else
> +		tlen = pdlen + sqh_len;
> 
>  	/* do append and insert */
>  	ml = rte_pktmbuf_lastseg(mb);
> @@ -452,7 +469,7 @@ esp_outb_trs_prepare(const struct rte_ipsec_session *ss, struct rte_mbuf *mb[],
> 
>  		/* try to update the packet itself */
>  		rc = outb_trs_pkt_prepare(sa, sqc, iv, mb[i], &icv,
> -				  sa->sqh_len);
> +				  sa->sqh_len, 0);
>  		/* success, setup crypto op */
>  		if (rc >= 0) {
>  			outb_pkt_xprepare(sa, sqc, &icv);
> @@ -549,7 +566,7 @@ cpu_outb_pkt_prepare(const struct rte_ipsec_session *ss,
>  		gen_iv(ivbuf[k], sqc);
> 
>  		/* try to update the packet itself */
> -		rc = prepare(sa, sqc, ivbuf[k], mb[i], &icv, sa->sqh_len);
> +		rc = prepare(sa, sqc, ivbuf[k], mb[i], &icv, sa->sqh_len, 0);
> 
>  		/* success, proceed with preparations */
>  		if (rc >= 0) {
> @@ -660,6 +677,20 @@ inline_outb_mbuf_prepare(const struct rte_ipsec_session *ss,
>  	}
>  }
> 
> +
> +static inline int
> +esn_outb_nb_segments(struct rte_mbuf *m)
> +{
> +	if  (m->ol_flags & (PKT_TX_TCP_SEG | PKT_TX_UDP_SEG)) {
> +		uint16_t pkt_l3len = m->pkt_len - m->l2_len;
> +		uint16_t segments =
> +			(m->tso_segsz > 0 && pkt_l3len > m->tso_segsz) ?
> +			(pkt_l3len + m->tso_segsz - 1) / m->tso_segsz : 1;
> +		return segments;
> +	}
> +	return 1; /* no TSO */
> +}
> +
>  /*
>   * process group of ESP outbound tunnel packets destined for
>   * INLINE_CRYPTO type of device.
> @@ -669,29 +700,47 @@ inline_outb_tun_pkt_process(const struct rte_ipsec_session *ss,
>  	struct rte_mbuf *mb[], uint16_t num)
>  {
>  	int32_t rc;
> -	uint32_t i, k, n;
> +	uint32_t i, k, n, nb_sqn;
>  	uint64_t sqn;
>  	rte_be64_t sqc;
>  	struct rte_ipsec_sa *sa;
>  	union sym_op_data icv;
>  	uint64_t iv[IPSEC_MAX_IV_QWORD];
>  	uint32_t dr[num];
> +	uint16_t nb_segs[num];
> 
>  	sa = ss->sa;
> +	nb_sqn = 0;
> +	for (i = 0; i != num; i++) {
> +		nb_segs[i] = esn_outb_nb_segments(mb[i]);
> +		nb_sqn += nb_segs[i];
> +		/* setup outer l2 and l3 len for TSO */
> +		if (nb_segs[i] > 1) {
> +			if (sa->type & RTE_IPSEC_SATP_MODE_TUNLV4)
> +				mb[i]->outer_l3_len =
> +						sizeof(struct rte_ipv4_hdr);
> +			else
> +				mb[i]->outer_l3_len =
> +						sizeof(struct rte_ipv6_hdr);
> +			mb[i]->outer_l2_len = mb[i]->l2_len;

I still don't understand your logic beyond setting these fields here.
How it looks to me:
It is a tunnel mode, so ipsec lib appends it's tunnel header.
In normal case (non-TSO) it sets up l2_len and l3_len that are stored  inside sa->tx_offload
(for non-TSO case we don't care about inner/outer case and have to setup outer fields or
set TX_PKT_OUTER flags).
Now for TSO we do need to do that, right?
So as I understand:
sa->tx_offload.l2_len will become mb->outer_l2_len
sa->tx_offload.l3_len will become mb->outer_l3_len
mb->l2_len should be set to zero
mb->l3_len, mb->l4_len, mb->tso_segsz should remain the same
(ipsec lib shouldn't modify them). 
Please correct me, if I missed something here.
Also note that right now we setup mbuf tx_offload way below 
these lines - at outb_tun_pkt_prepare().
So probably these changes has to be adjusted after that function call.  

		}
> +	}
> 
> -	n = num;
> +	n = nb_sqn;
>  	sqn = esn_outb_update_sqn(sa, &n);
> -	if (n != num)
> +	if (n != nb_sqn)
>  		rte_errno = EOVERFLOW;
> 
>  	k = 0;
> -	for (i = 0; i != n; i++) {
> +	for (i = 0; i != num; i++) {

As I stated that in previous mail, you can't just assume that n == num always.
That way you just ignores SQN overflow error you get above.
The proper way - would be to find for how many full packets you have
valid SQN value and set 'n' to it.
I know it is an extra pain for TSO mode, but I don't see any better way here.  

> 
> -		sqc = rte_cpu_to_be_64(sqn + i);
> +		sqc = rte_cpu_to_be_64(sqn);
>  		gen_iv(iv, sqc);
> +		sqn += nb_segs[i];
> 
>  		/* try to update the packet itself */
> -		rc = outb_tun_pkt_prepare(sa, sqc, iv, mb[i], &icv, 0);
> +		rc = outb_tun_pkt_prepare(sa, sqc, iv, mb[i], &icv, 0,
> +				nb_segs[i] > 1);

I don't think we have to make decision based on number of segments.
Even if whole packet will fit into one TCP segment, TX_TCP_SEG is still set for it,
so HW/PMD expects data in different format.
Probably it should be based on flags value, something like:
mb[i]->ol_flags & (PKT_TX_TCP_SEG | PKT_TX_UDP_SEG).
 

> 
>  		k += (rc >= 0);
> 
> @@ -703,8 +752,8 @@ inline_outb_tun_pkt_process(const struct rte_ipsec_session *ss,
>  	}
> 
>  	/* copy not processed mbufs beyond good ones */
> -	if (k != n && k != 0)
> -		move_bad_mbufs(mb, dr, n, n - k);
> +	if (k != num && k != 0)
> +		move_bad_mbufs(mb, dr, num, num - k);
> 
>  	inline_outb_mbuf_prepare(ss, mb, k);
>  	return k;
> @@ -719,29 +768,48 @@ inline_outb_trs_pkt_process(const struct rte_ipsec_session *ss,
>  	struct rte_mbuf *mb[], uint16_t num)
>  {
>  	int32_t rc;
> -	uint32_t i, k, n;
> +	uint32_t i, k, n, nb_sqn;
>  	uint64_t sqn;
>  	rte_be64_t sqc;
>  	struct rte_ipsec_sa *sa;
>  	union sym_op_data icv;
>  	uint64_t iv[IPSEC_MAX_IV_QWORD];
>  	uint32_t dr[num];
> +	uint16_t nb_segs[num];
> 
>  	sa = ss->sa;
> +	nb_sqn = 0;
> +	/* Calculate number of sequence numbers required */
> +	for (i = 0; i != num; i++) {
> +		nb_segs[i] = esn_outb_nb_segments(mb[i]);
> +		nb_sqn += nb_segs[i];
> +		/* setup outer l2 and l3 len for TSO */
> +		if (nb_segs[i] > 1) {
> +			if (sa->type & RTE_IPSEC_SATP_MODE_TUNLV4)
> +				mb[i]->outer_l3_len =
> +						sizeof(struct rte_ipv4_hdr);
> +			else
> +				mb[i]->outer_l3_len =
> +						sizeof(struct rte_ipv6_hdr);

Again, that just doesn't look right to me.

> +			mb[i]->outer_l2_len = mb[i]->l2_len;


For transport mode actually I am not sure how mb tx_offload fields has to be setuped...
Do we still need to setup outer fields, considering that we are not adding new IP header here?

> +		}
> +	}
> 
> -	n = num;
> +	n = nb_sqn;
>  	sqn = esn_outb_update_sqn(sa, &n);
> -	if (n != num)
> +	if (n != nb_sqn)
>  		rte_errno = EOVERFLOW;
> 
>  	k = 0;
> -	for (i = 0; i != n; i++) {
> +	for (i = 0; i != num; i++) {

Same story as for tunnel, we can't just ignore an error here.

> 
> -		sqc = rte_cpu_to_be_64(sqn + i);
> +		sqc = rte_cpu_to_be_64(sqn);
>  		gen_iv(iv, sqc);
> +		sqn += nb_segs[i];
> 
>  		/* try to update the packet itself */
> -		rc = outb_trs_pkt_prepare(sa, sqc, iv, mb[i], &icv, 0);
> +		rc = outb_trs_pkt_prepare(sa, sqc, iv, mb[i], &icv, 0,
> +				nb_segs[i] > 1);

Same thoughts as for tunnel mode.

> 
>  		k += (rc >= 0);
> 
> @@ -753,8 +821,8 @@ inline_outb_trs_pkt_process(const struct rte_ipsec_session *ss,
>  	}
> 
>  	/* copy not processed mbufs beyond good ones */
> -	if (k != n && k != 0)
> -		move_bad_mbufs(mb, dr, n, n - k);
> +	if (k != num && k != 0)
> +		move_bad_mbufs(mb, dr, num, num - k);
> 
>  	inline_outb_mbuf_prepare(ss, mb, k);
>  	return k;
> --
> 2.25.1
Previous message (by thread): [dpdk-dev] [PATCH v9 06/10] ipsec: add transmit segmentation offload support
Next message (by thread): [dpdk-dev] [PATCH v9 06/10] ipsec: add transmit segmentation offload support
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]
More information about the dev mailing list