[6/9] ipsec: implement SA data-path API

Message ID 1542326031-5263-7-git-send-email-konstantin.ananyev@intel.com (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers
Series None |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation fail Compilation issues

Commit Message

Ananyev, Konstantin Nov. 15, 2018, 11:53 p.m. UTC
  Provide implementation for rte_ipsec_pkt_crypto_prepare() and
rte_ipsec_pkt_process().
Current implementation:
 - supports ESP protocol tunnel mode.
 - supports ESP protocol transport mode.
 - supports ESN and replay window.
 - supports algorithms: AES-CBC, AES-GCM, HMAC-SHA1, NULL.
 - covers all currently defined security session types:
        - RTE_SECURITY_ACTION_TYPE_NONE
        - RTE_SECURITY_ACTION_TYPE_INLINE_CRYPTO
        - RTE_SECURITY_ACTION_TYPE_INLINE_PROTOCOL
        - RTE_SECURITY_ACTION_TYPE_LOOKASIDE_PROTOCOL

For first two types SQN check/update is done by SW (inside the library).
For last two type it is HW/PMD responsibility.

Signed-off-by: Mohammad Abdul Awal <mohammad.abdul.awal@intel.com>
Signed-off-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
---
 lib/librte_ipsec/crypto.h    |  119 ++++
 lib/librte_ipsec/iph.h       |   63 ++
 lib/librte_ipsec/ipsec_sqn.h |  186 ++++++
 lib/librte_ipsec/pad.h       |   45 ++
 lib/librte_ipsec/sa.c        | 1050 +++++++++++++++++++++++++++++++++-
 5 files changed, 1461 insertions(+), 2 deletions(-)
 create mode 100644 lib/librte_ipsec/crypto.h
 create mode 100644 lib/librte_ipsec/iph.h
 create mode 100644 lib/librte_ipsec/pad.h
  

Comments

Qi Zhang Nov. 20, 2018, 1:03 a.m. UTC | #1
Hi Konstantin and Awal:
	
	I have couple questions for this patch.
	please forgive me if they are obvious, since I don't have much insight on IPsec, but I may work on related stuff in future :)

> +static inline int32_t
> +esp_outb_tun_pkt_prepare(struct rte_ipsec_sa *sa, rte_be64_t sqc,
> +	const uint64_t ivp[IPSEC_MAX_IV_QWORD], struct rte_mbuf *mb,
> +	union sym_op_data *icv)
> +{
> +	uint32_t clen, hlen, pdlen, pdofs, tlen;
> +	struct rte_mbuf *ml;
> +	struct esp_hdr *esph;
> +	struct esp_tail *espt;
> +	char *ph, *pt;
> +	uint64_t *iv;
> +
> +	/* calculate extra header space required */
> +	hlen = sa->hdr_len + sa->iv_len + sizeof(*esph);
> +
> +	/* number of bytes to encrypt */
> +	clen = mb->pkt_len + sizeof(*espt);
> +	clen = RTE_ALIGN_CEIL(clen, sa->pad_align);
> +
> +	/* pad length + esp tail */
> +	pdlen = clen - mb->pkt_len;
> +	tlen = pdlen + sa->icv_len;
> +
> +	/* do append and prepend */
> +	ml = rte_pktmbuf_lastseg(mb);
> +	if (tlen + sa->sqh_len + sa->aad_len > rte_pktmbuf_tailroom(ml))
> +		return -ENOSPC;
> +
> +	/* prepend header */
> +	ph = rte_pktmbuf_prepend(mb, hlen);
> +	if (ph == NULL)
> +		return -ENOSPC;
> +
> +	/* append tail */
> +	pdofs = ml->data_len;
> +	ml->data_len += tlen;
> +	mb->pkt_len += tlen;
> +	pt = rte_pktmbuf_mtod_offset(ml, typeof(pt), pdofs);
> +
> +	/* update pkt l2/l3 len */
> +	mb->l2_len = sa->hdr_l3_off;
> +	mb->l3_len = sa->hdr_len - sa->hdr_l3_off;
> +
> +	/* copy tunnel pkt header */
> +	rte_memcpy(ph, sa->hdr, sa->hdr_len);

I didn't get this, my understand is: 

for tunnel mode if an original packet is

	Eth + IP + UDP/TCP + data, 	

after encap, it should become
	
	Eth + encap header (IP or IP + UDP) + ESP Header + IP + UDP/TCP + Data + ESP Tailer...

So after rte_pktmbuf_prepend shouldn't we do below

1) shift L2 HEAD (Eth) ahead 
2) copy encap header and ESP header to the hole.
?

But now we just copy the sa->hdr on the pre-pend space directly? What is the sa->hdr supposed to be? but no matter what is it, we encap everything before the packet?
BTW, is UDP encapsulation also be considered here?, I didn't figure out how a IP + UDP header should be configured with sa->hdr , sa->hdr_l3_off, sa->hdr_len for this case

> +static inline int
> +esp_inb_tun_single_pkt_process(struct rte_ipsec_sa *sa, struct rte_mbuf
> *mb,
> +	uint32_t *sqn)
> +{
> +	uint32_t hlen, icv_len, tlen;
> +	struct esp_hdr *esph;
> +	struct esp_tail *espt;
> +	struct rte_mbuf *ml;
> +	char *pd;
> +
> +	if (mb->ol_flags & PKT_RX_SEC_OFFLOAD_FAILED)
> +		return -EBADMSG;
> +
> +	icv_len = sa->icv_len;
> +
> +	ml = rte_pktmbuf_lastseg(mb);
> +	espt = rte_pktmbuf_mtod_offset(ml, struct esp_tail *,
> +		ml->data_len - icv_len - sizeof(*espt));

What kind of mechanism is to guarantee that last segment will always cover the esp tail?( data_len >= icv_len + sizeof (*espt))
Is that possible the esp tail be split into multi-segment for jumbo frames caes?

Thanks
Qi
  
Ananyev, Konstantin Nov. 20, 2018, 9:44 a.m. UTC | #2
Hi Qi,

> 
> Hi Konstantin and Awal:
> 
> 	I have couple questions for this patch.
> 	please forgive me if they are obvious, since I don't have much insight on IPsec, but I may work on related stuff in future :)
> 
> > +static inline int32_t
> > +esp_outb_tun_pkt_prepare(struct rte_ipsec_sa *sa, rte_be64_t sqc,
> > +	const uint64_t ivp[IPSEC_MAX_IV_QWORD], struct rte_mbuf *mb,
> > +	union sym_op_data *icv)
> > +{
> > +	uint32_t clen, hlen, pdlen, pdofs, tlen;
> > +	struct rte_mbuf *ml;
> > +	struct esp_hdr *esph;
> > +	struct esp_tail *espt;
> > +	char *ph, *pt;
> > +	uint64_t *iv;
> > +
> > +	/* calculate extra header space required */
> > +	hlen = sa->hdr_len + sa->iv_len + sizeof(*esph);
> > +
> > +	/* number of bytes to encrypt */
> > +	clen = mb->pkt_len + sizeof(*espt);
> > +	clen = RTE_ALIGN_CEIL(clen, sa->pad_align);
> > +
> > +	/* pad length + esp tail */
> > +	pdlen = clen - mb->pkt_len;
> > +	tlen = pdlen + sa->icv_len;
> > +
> > +	/* do append and prepend */
> > +	ml = rte_pktmbuf_lastseg(mb);
> > +	if (tlen + sa->sqh_len + sa->aad_len > rte_pktmbuf_tailroom(ml))
> > +		return -ENOSPC;
> > +
> > +	/* prepend header */
> > +	ph = rte_pktmbuf_prepend(mb, hlen);
> > +	if (ph == NULL)
> > +		return -ENOSPC;
> > +
> > +	/* append tail */
> > +	pdofs = ml->data_len;
> > +	ml->data_len += tlen;
> > +	mb->pkt_len += tlen;
> > +	pt = rte_pktmbuf_mtod_offset(ml, typeof(pt), pdofs);
> > +
> > +	/* update pkt l2/l3 len */
> > +	mb->l2_len = sa->hdr_l3_off;
> > +	mb->l3_len = sa->hdr_len - sa->hdr_l3_off;
> > +
> > +	/* copy tunnel pkt header */
> > +	rte_memcpy(ph, sa->hdr, sa->hdr_len);
> 
> I didn't get this, my understand is:
> 
> for tunnel mode if an original packet is
> 
> 	Eth + IP + UDP/TCP + data,

It  is assumed that input mbuf doesn't contain L2 header already (only L3/L4/...)
That's why we don't shift L2 header.
Probably have to put that into public API comments.

> 
> after encap, it should become
> 
> 	Eth + encap header (IP or IP + UDP) + ESP Header + IP + UDP/TCP + Data + ESP Tailer...
> 
> So after rte_pktmbuf_prepend shouldn't we do below
> 
> 1) shift L2 HEAD (Eth) ahead
> 2) copy encap header and ESP header to the hole.
> ?
> 
> But now we just copy the sa->hdr on the pre-pend space directly? What is the sa->hdr supposed to be?

Optional L2 header and new L3 header.

> but no matter what is it, we encap
> everything before the packet?
> BTW, is UDP encapsulation also be considered here?

Right now - no.
Might be later, if there would be a request for it.

>, I didn't figure out how a IP + UDP header should be configured with sa->hdr , sa-
> >hdr_l3_off, sa->hdr_len for this case
> 
> > +static inline int
> > +esp_inb_tun_single_pkt_process(struct rte_ipsec_sa *sa, struct rte_mbuf
> > *mb,
> > +	uint32_t *sqn)
> > +{
> > +	uint32_t hlen, icv_len, tlen;
> > +	struct esp_hdr *esph;
> > +	struct esp_tail *espt;
> > +	struct rte_mbuf *ml;
> > +	char *pd;
> > +
> > +	if (mb->ol_flags & PKT_RX_SEC_OFFLOAD_FAILED)
> > +		return -EBADMSG;
> > +
> > +	icv_len = sa->icv_len;
> > +
> > +	ml = rte_pktmbuf_lastseg(mb);
> > +	espt = rte_pktmbuf_mtod_offset(ml, struct esp_tail *,
> > +		ml->data_len - icv_len - sizeof(*espt));
> 
> What kind of mechanism is to guarantee that last segment will always cover the esp tail?( data_len >= icv_len + sizeof (*espt))
> Is that possible the esp tail be split into multi-segment for jumbo frames caes?

It is possible, though right now we don't support such cases.
Right now it is up to the caller to make sure last segment contains espt+icv (plus enough free space for AAD, ESN.hi, etc.).
Plan to add proper multi-seg support later (most likely 19.05).
Konstantin
  
Ananyev, Konstantin Nov. 20, 2018, 10:02 a.m. UTC | #3
> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Ananyev, Konstantin
> Sent: Tuesday, November 20, 2018 9:44 AM
> To: Zhang, Qi Z <qi.z.zhang@intel.com>; dev@dpdk.org
> Cc: Awal, Mohammad Abdul <mohammad.abdul.awal@intel.com>
> Subject: Re: [dpdk-dev] [PATCH 6/9] ipsec: implement SA data-path API
> 
> 
> Hi Qi,
> 
> >
> > Hi Konstantin and Awal:
> >
> > 	I have couple questions for this patch.
> > 	please forgive me if they are obvious, since I don't have much insight on IPsec, but I may work on related stuff in future :)
> >
> > > +static inline int32_t
> > > +esp_outb_tun_pkt_prepare(struct rte_ipsec_sa *sa, rte_be64_t sqc,
> > > +	const uint64_t ivp[IPSEC_MAX_IV_QWORD], struct rte_mbuf *mb,
> > > +	union sym_op_data *icv)
> > > +{
> > > +	uint32_t clen, hlen, pdlen, pdofs, tlen;
> > > +	struct rte_mbuf *ml;
> > > +	struct esp_hdr *esph;
> > > +	struct esp_tail *espt;
> > > +	char *ph, *pt;
> > > +	uint64_t *iv;
> > > +
> > > +	/* calculate extra header space required */
> > > +	hlen = sa->hdr_len + sa->iv_len + sizeof(*esph);
> > > +
> > > +	/* number of bytes to encrypt */
> > > +	clen = mb->pkt_len + sizeof(*espt);
> > > +	clen = RTE_ALIGN_CEIL(clen, sa->pad_align);
> > > +
> > > +	/* pad length + esp tail */
> > > +	pdlen = clen - mb->pkt_len;
> > > +	tlen = pdlen + sa->icv_len;
> > > +
> > > +	/* do append and prepend */
> > > +	ml = rte_pktmbuf_lastseg(mb);
> > > +	if (tlen + sa->sqh_len + sa->aad_len > rte_pktmbuf_tailroom(ml))
> > > +		return -ENOSPC;
> > > +
> > > +	/* prepend header */
> > > +	ph = rte_pktmbuf_prepend(mb, hlen);
> > > +	if (ph == NULL)
> > > +		return -ENOSPC;
> > > +
> > > +	/* append tail */
> > > +	pdofs = ml->data_len;
> > > +	ml->data_len += tlen;
> > > +	mb->pkt_len += tlen;
> > > +	pt = rte_pktmbuf_mtod_offset(ml, typeof(pt), pdofs);
> > > +
> > > +	/* update pkt l2/l3 len */
> > > +	mb->l2_len = sa->hdr_l3_off;
> > > +	mb->l3_len = sa->hdr_len - sa->hdr_l3_off;
> > > +
> > > +	/* copy tunnel pkt header */
> > > +	rte_memcpy(ph, sa->hdr, sa->hdr_len);
> >
> > I didn't get this, my understand is:
> >
> > for tunnel mode if an original packet is
> >
> > 	Eth + IP + UDP/TCP + data,
> 
> It  is assumed that input mbuf doesn't contain L2 header already (only L3/L4/...)
> That's why we don't shift L2 header.
> Probably have to put that into public API comments.

After another thought - probably it is better to support the case when L2 is not stripped too here.
After all we do support it for other modes (inbound tunnel/transport, outbound transport).
Will try to add it into v2.
Konstantin

> 
> >
> > after encap, it should become
> >
> > 	Eth + encap header (IP or IP + UDP) + ESP Header + IP + UDP/TCP + Data + ESP Tailer...
> >
> > So after rte_pktmbuf_prepend shouldn't we do below
> >
> > 1) shift L2 HEAD (Eth) ahead
> > 2) copy encap header and ESP header to the hole.
> > ?
> >
> > But now we just copy the sa->hdr on the pre-pend space directly? What is the sa->hdr supposed to be?
> 
> Optional L2 header and new L3 header.
> 
> > but no matter what is it, we encap
> > everything before the packet?
> > BTW, is UDP encapsulation also be considered here?
> 
> Right now - no.
> Might be later, if there would be a request for it.
> 
> >, I didn't figure out how a IP + UDP header should be configured with sa->hdr , sa-
> > >hdr_l3_off, sa->hdr_len for this case
> >
> > > +static inline int
> > > +esp_inb_tun_single_pkt_process(struct rte_ipsec_sa *sa, struct rte_mbuf
> > > *mb,
> > > +	uint32_t *sqn)
> > > +{
> > > +	uint32_t hlen, icv_len, tlen;
> > > +	struct esp_hdr *esph;
> > > +	struct esp_tail *espt;
> > > +	struct rte_mbuf *ml;
> > > +	char *pd;
> > > +
> > > +	if (mb->ol_flags & PKT_RX_SEC_OFFLOAD_FAILED)
> > > +		return -EBADMSG;
> > > +
> > > +	icv_len = sa->icv_len;
> > > +
> > > +	ml = rte_pktmbuf_lastseg(mb);
> > > +	espt = rte_pktmbuf_mtod_offset(ml, struct esp_tail *,
> > > +		ml->data_len - icv_len - sizeof(*espt));
> >
> > What kind of mechanism is to guarantee that last segment will always cover the esp tail?( data_len >= icv_len + sizeof (*espt))
> > Is that possible the esp tail be split into multi-segment for jumbo frames caes?
> 
> It is possible, though right now we don't support such cases.
> Right now it is up to the caller to make sure last segment contains espt+icv.
> Plan to add proper multi-seg support later (most likely 19.05).
> Konstantin
  

Patch

diff --git a/lib/librte_ipsec/crypto.h b/lib/librte_ipsec/crypto.h
new file mode 100644
index 000000000..98f9989af
--- /dev/null
+++ b/lib/librte_ipsec/crypto.h
@@ -0,0 +1,119 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef _CRYPTO_H_
+#define _CRYPTO_H_
+
+/**
+ * @file crypto.h
+ * Contains crypto specific functions/structures/macros used internally
+ * by ipsec library.
+ */
+
+ /*
+  * AES-GCM devices have some specific requirements for IV and AAD formats.
+  * Ideally that to be done by the driver itself.
+  */
+
+struct aead_gcm_iv {
+	uint32_t salt;
+	uint64_t iv;
+	uint32_t cnt;
+} __attribute__((packed));
+
+struct aead_gcm_aad {
+	uint32_t spi;
+	/*
+	 * RFC 4106, section 5:
+	 * Two formats of the AAD are defined:
+	 * one for 32-bit sequence numbers, and one for 64-bit ESN.
+	 */
+	union {
+		uint32_t u32;
+		uint64_t u64;
+	} sqn;
+	uint32_t align0; /* align to 16B boundary */
+} __attribute__((packed));
+
+struct gcm_esph_iv {
+	struct esp_hdr esph;
+	uint64_t iv;
+} __attribute__((packed));
+
+
+static inline void
+aead_gcm_iv_fill(struct aead_gcm_iv *gcm, uint64_t iv, uint32_t salt)
+{
+	gcm->salt = salt;
+	gcm->iv = iv;
+	gcm->cnt = rte_cpu_to_be_32(1);
+}
+
+/*
+ * RFC 4106, 5 AAD Construction
+ * spi and sqn should already be converted into network byte order.
+ */
+static inline void
+aead_gcm_aad_fill(struct aead_gcm_aad *aad, rte_be32_t spi, rte_be64_t sqn,
+	int esn)
+{
+	aad->spi = spi;
+	if (esn)
+		aad->sqn.u64 = sqn;
+	else
+		aad->sqn.u32 = sqn_low32(sqn);
+}
+
+static inline void
+gen_iv(uint64_t iv[IPSEC_MAX_IV_QWORD], rte_be64_t sqn)
+{
+	iv[0] = sqn;
+	iv[1] = 0;
+}
+
+/*
+ * from RFC 4303 3.3.2.1.4:
+ * If the ESN option is enabled for the SA, the high-order 32
+ * bits of the sequence number are appended after the Next Header field
+ * for purposes of this computation, but are not transmitted.
+ */
+
+/*
+ * Helper function that moves ICV by 4B below, and inserts SQN.hibits.
+ * icv parameter points to the new start of ICV.
+ */
+static inline void
+insert_sqh(uint32_t sqh, void *picv, uint32_t icv_len)
+{
+	uint32_t *icv;
+	int32_t i;
+
+	RTE_ASSERT(icv_len % sizeof(uint32_t) == 0);
+
+	icv = picv;
+	icv_len = icv_len / sizeof(uint32_t);
+	for (i = icv_len; i-- != 0; icv[i] = icv[i - 1])
+		;
+
+	icv[i] = sqh;
+}
+
+/*
+ * Helper function that moves ICV by 4B up, and removes SQN.hibits.
+ * icv parameter points to the new start of ICV.
+ */
+static inline void
+remove_sqh(void *picv, uint32_t icv_len)
+{
+	uint32_t i, *icv;
+
+	RTE_ASSERT(icv_len % sizeof(uint32_t) == 0);
+
+	icv = picv;
+	icv_len = icv_len / sizeof(uint32_t);
+	for (i = 0; i != icv_len; i++)
+		icv[i] = icv[i + 1];
+}
+
+#endif /* _CRYPTO_H_ */
diff --git a/lib/librte_ipsec/iph.h b/lib/librte_ipsec/iph.h
new file mode 100644
index 000000000..c85bd2866
--- /dev/null
+++ b/lib/librte_ipsec/iph.h
@@ -0,0 +1,63 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef _IPH_H_
+#define _IPH_H_
+
+/**
+ * @file iph.h
+ * Contains functions/structures/macros to manipulate IPv/IPv6 headers
+ * used internally by ipsec library.
+ */
+
+/*
+ * Move preceding (L3) headers down to remove ESP header and IV.
+ */
+static inline void
+remove_esph(char *np, char *op, uint32_t hlen)
+{
+	uint32_t i;
+
+	for (i = hlen; i-- != 0; np[i] = op[i])
+		;
+}
+
+/*
+ * Move preceding (L3) headers up to free space for ESP header and IV.
+ */
+static inline void
+insert_esph(char *np, char *op, uint32_t hlen)
+{
+	uint32_t i;
+
+	for (i = 0; i != hlen; i++)
+		np[i] = op[i];
+}
+
+static inline int
+update_trs_l3hdr(const struct rte_ipsec_sa *sa, void *p, uint32_t plen,
+		uint32_t l2len, uint32_t l3len, uint8_t proto)
+{
+	struct ipv4_hdr *v4h;
+	struct ipv6_hdr *v6h;
+	int32_t rc;
+
+	if ((sa->type & RTE_IPSEC_SATP_IPV_MASK) == RTE_IPSEC_SATP_IPV4) {
+		v4h = p;
+		rc = v4h->next_proto_id;
+		v4h->next_proto_id = proto;
+		v4h->total_length = rte_cpu_to_be_16(plen - l2len);
+	} else if (l3len == sizeof(*v6h)) {
+		v6h = p;
+		rc = v6h->proto;
+		v6h->proto = proto;
+		v6h->payload_len = rte_cpu_to_be_16(plen - l2len - l3len);
+	/* need to add support for IPv6 with options */
+	} else
+		rc = -ENOTSUP;
+
+	return rc;
+}
+
+#endif /* _IPH_H_ */
diff --git a/lib/librte_ipsec/ipsec_sqn.h b/lib/librte_ipsec/ipsec_sqn.h
index 4471814f9..a33ff9cca 100644
--- a/lib/librte_ipsec/ipsec_sqn.h
+++ b/lib/librte_ipsec/ipsec_sqn.h
@@ -15,6 +15,45 @@ 
 
 #define IS_ESN(sa)	((sa)->sqn_mask == UINT64_MAX)
 
+/*
+ * gets SQN.hi32 bits, SQN supposed to be in network byte order.
+ */
+static inline rte_be32_t
+sqn_hi32(rte_be64_t sqn)
+{
+#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
+	return (sqn >> 32);
+#else
+	return sqn;
+#endif
+}
+
+/*
+ * gets SQN.low32 bits, SQN supposed to be in network byte order.
+ */
+static inline rte_be32_t
+sqn_low32(rte_be64_t sqn)
+{
+#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
+	return sqn;
+#else
+	return (sqn >> 32);
+#endif
+}
+
+/*
+ * gets SQN.low16 bits, SQN supposed to be in network byte order.
+ */
+static inline rte_be16_t
+sqn_low16(rte_be64_t sqn)
+{
+#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
+	return sqn;
+#else
+	return (sqn >> 48);
+#endif
+}
+
 /*
  * for given size, calculate required number of buckets.
  */
@@ -30,6 +69,153 @@  replay_num_bucket(uint32_t wsz)
 	return nb;
 }
 
+/*
+ * According to RFC4303 A2.1, determine the high-order bit of sequence number.
+ * use 32bit arithmetic inside, return uint64_t.
+ */
+static inline uint64_t
+reconstruct_esn(uint64_t t, uint32_t sqn, uint32_t w)
+{
+	uint32_t th, tl, bl;
+
+	tl = t;
+	th = t >> 32;
+	bl = tl - w + 1;
+
+	/* case A: window is within one sequence number subspace */
+	if (tl >= (w - 1))
+		th += (sqn < bl);
+	/* case B: window spans two sequence number subspaces */
+	else if (th != 0)
+		th -= (sqn >= bl);
+
+	/* return constructed sequence with proper high-order bits */
+	return (uint64_t)th << 32 | sqn;
+}
+
+/**
+ * Perform the replay checking.
+ *
+ * struct rte_ipsec_sa contains the window and window related parameters,
+ * such as the window size, bitmask, and the last acknowledged sequence number.
+ *
+ * Based on RFC 6479.
+ * Blocks are 64 bits unsigned integers
+ */
+static inline int32_t
+esn_inb_check_sqn(const struct replay_sqn *rsn, const struct rte_ipsec_sa *sa,
+	uint64_t sqn)
+{
+	uint32_t bit, bucket;
+
+	/* replay not enabled */
+	if (sa->replay.win_sz == 0)
+		return 0;
+
+	/* seq is larger than lastseq */
+	if (sqn > rsn->sqn)
+		return 0;
+
+	/* seq is outside window */
+	if (sqn == 0 || sqn + sa->replay.win_sz < rsn->sqn)
+		return -EINVAL;
+
+	/* seq is inside the window */
+	bit = sqn & WINDOW_BIT_LOC_MASK;
+	bucket = (sqn >> WINDOW_BUCKET_BITS) & sa->replay.bucket_index_mask;
+
+	/* already seen packet */
+	if (rsn->window[bucket] & ((uint64_t)1 << bit))
+		return -EINVAL;
+
+	return 0;
+}
+
+/**
+ * For outbound SA perform the sequence number update.
+ */
+static inline uint64_t
+esn_outb_update_sqn(struct rte_ipsec_sa *sa, uint32_t *num)
+{
+	uint64_t n, s, sqn;
+
+	n = *num;
+	sqn = sa->sqn.outb + n;
+	sa->sqn.outb = sqn;
+
+	/* overflow */
+	if (sqn > sa->sqn_mask) {
+		s = sqn - sa->sqn_mask;
+		*num = (s < n) ?  n - s : 0;
+	}
+
+	return sqn - n;
+}
+
+/**
+ * For inbound SA perform the sequence number and replay window update.
+ */
+static inline int32_t
+esn_inb_update_sqn(struct replay_sqn *rsn, const struct rte_ipsec_sa *sa,
+	uint64_t sqn)
+{
+	uint32_t bit, bucket, last_bucket, new_bucket, diff, i;
+
+	/* replay not enabled */
+	if (sa->replay.win_sz == 0)
+		return 0;
+
+	/* handle ESN */
+	if (IS_ESN(sa))
+		sqn = reconstruct_esn(rsn->sqn, sqn, sa->replay.win_sz);
+
+	/* seq is outside window*/
+	if (sqn == 0 || sqn + sa->replay.win_sz < rsn->sqn)
+		return -EINVAL;
+
+	/* update the bit */
+	bucket = (sqn >> WINDOW_BUCKET_BITS);
+
+	/* check if the seq is within the range */
+	if (sqn > rsn->sqn) {
+		last_bucket = rsn->sqn >> WINDOW_BUCKET_BITS;
+		diff = bucket - last_bucket;
+		/* seq is way after the range of WINDOW_SIZE */
+		if (diff > sa->replay.nb_bucket)
+			diff = sa->replay.nb_bucket;
+
+		for (i = 0; i != diff; i++) {
+			new_bucket = (i + last_bucket + 1) &
+				sa->replay.bucket_index_mask;
+			rsn->window[new_bucket] = 0;
+		}
+		rsn->sqn = sqn;
+	}
+
+	bucket &= sa->replay.bucket_index_mask;
+	bit = (uint64_t)1 << (sqn & WINDOW_BIT_LOC_MASK);
+
+	/* already seen packet */
+	if (rsn->window[bucket] & bit)
+		return -EINVAL;
+
+	rsn->window[bucket] |= bit;
+	return 0;
+}
+
+/**
+ * To achieve ability to do multiple readers single writer for
+ * SA replay window information and sequence number (RSN)
+ * basic RCU schema is used:
+ * SA have 2 copies of RSN (one for readers, another for writers).
+ * Each RSN contains a rwlock that has to be grabbed (for read/write)
+ * to avoid races between readers and writer.
+ * Writer is responsible to make a copy or reader RSN, update it
+ * and mark newly updated RSN as readers one.
+ * That approach is intended to minimize contention and cache sharing
+ * between writer and readers.
+ */
+
 /**
  * Based on number of buckets calculated required size for the
  * structure that holds replay window and sequnce number (RSN) information.
diff --git a/lib/librte_ipsec/pad.h b/lib/librte_ipsec/pad.h
new file mode 100644
index 000000000..2f5ccd00e
--- /dev/null
+++ b/lib/librte_ipsec/pad.h
@@ -0,0 +1,45 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef _PAD_H_
+#define _PAD_H_
+
+#define IPSEC_MAX_PAD_SIZE	UINT8_MAX
+
+static const uint8_t esp_pad_bytes[IPSEC_MAX_PAD_SIZE] = {
+	1, 2, 3, 4, 5, 6, 7, 8,
+	9, 10, 11, 12, 13, 14, 15, 16,
+	17, 18, 19, 20, 21, 22, 23, 24,
+	25, 26, 27, 28, 29, 30, 31, 32,
+	33, 34, 35, 36, 37, 38, 39, 40,
+	41, 42, 43, 44, 45, 46, 47, 48,
+	49, 50, 51, 52, 53, 54, 55, 56,
+	57, 58, 59, 60, 61, 62, 63, 64,
+	65, 66, 67, 68, 69, 70, 71, 72,
+	73, 74, 75, 76, 77, 78, 79, 80,
+	81, 82, 83, 84, 85, 86, 87, 88,
+	89, 90, 91, 92, 93, 94, 95, 96,
+	97, 98, 99, 100, 101, 102, 103, 104,
+	105, 106, 107, 108, 109, 110, 111, 112,
+	113, 114, 115, 116, 117, 118, 119, 120,
+	121, 122, 123, 124, 125, 126, 127, 128,
+	129, 130, 131, 132, 133, 134, 135, 136,
+	137, 138, 139, 140, 141, 142, 143, 144,
+	145, 146, 147, 148, 149, 150, 151, 152,
+	153, 154, 155, 156, 157, 158, 159, 160,
+	161, 162, 163, 164, 165, 166, 167, 168,
+	169, 170, 171, 172, 173, 174, 175, 176,
+	177, 178, 179, 180, 181, 182, 183, 184,
+	185, 186, 187, 188, 189, 190, 191, 192,
+	193, 194, 195, 196, 197, 198, 199, 200,
+	201, 202, 203, 204, 205, 206, 207, 208,
+	209, 210, 211, 212, 213, 214, 215, 216,
+	217, 218, 219, 220, 221, 222, 223, 224,
+	225, 226, 227, 228, 229, 230, 231, 232,
+	233, 234, 235, 236, 237, 238, 239, 240,
+	241, 242, 243, 244, 245, 246, 247, 248,
+	249, 250, 251, 252, 253, 254, 255,
+};
+
+#endif /* _PAD_H_ */
diff --git a/lib/librte_ipsec/sa.c b/lib/librte_ipsec/sa.c
index 7f9baa602..00b3c8044 100644
--- a/lib/librte_ipsec/sa.c
+++ b/lib/librte_ipsec/sa.c
@@ -6,9 +6,13 @@ 
 #include <rte_esp.h>
 #include <rte_ip.h>
 #include <rte_errno.h>
+#include <rte_cryptodev.h>
 
 #include "sa.h"
 #include "ipsec_sqn.h"
+#include "crypto.h"
+#include "iph.h"
+#include "pad.h"
 
 /* some helper structures */
 struct crypto_xform {
@@ -192,6 +196,7 @@  esp_sa_init(struct rte_ipsec_sa *sa, const struct rte_ipsec_sa_prm *prm,
 		/* RFC 4106 */
 		if (cxf->aead->algo != RTE_CRYPTO_AEAD_AES_GCM)
 			return -EINVAL;
+		sa->aad_len = sizeof(struct aead_gcm_aad);
 		sa->icv_len = cxf->aead->digest_length;
 		sa->iv_ofs = cxf->aead->iv.offset;
 		sa->iv_len = sizeof(uint64_t);
@@ -306,18 +311,1059 @@  rte_ipsec_sa_init(struct rte_ipsec_sa *sa, const struct rte_ipsec_sa_prm *prm,
 	return sz;
 }
 
+static inline void
+mbuf_bulk_copy(struct rte_mbuf *dst[], struct rte_mbuf * const src[],
+	uint32_t num)
+{
+	uint32_t i;
+
+	for (i = 0; i != num; i++)
+		dst[i] = src[i];
+}
+
+static inline void
+lksd_none_cop_prepare(const struct rte_ipsec_session *ss,
+	struct rte_mbuf *mb[], struct rte_crypto_op *cop[], uint16_t num)
+{
+	uint32_t i;
+	struct rte_crypto_sym_op *sop;
+
+	for (i = 0; i != num; i++) {
+		sop = cop[i]->sym;
+		cop[i]->type = RTE_CRYPTO_OP_TYPE_SYMMETRIC;
+		cop[i]->status = RTE_CRYPTO_OP_STATUS_NOT_PROCESSED;
+		cop[i]->sess_type = RTE_CRYPTO_OP_WITH_SESSION;
+		sop->m_src = mb[i];
+		__rte_crypto_sym_op_attach_sym_session(sop, ss->crypto.ses);
+	}
+}
+
+static inline void
+esp_outb_cop_prepare(struct rte_crypto_op *cop,
+	const struct rte_ipsec_sa *sa, const uint64_t ivp[IPSEC_MAX_IV_QWORD],
+	const union sym_op_data *icv, uint32_t hlen, uint32_t plen)
+{
+	struct rte_crypto_sym_op *sop;
+	struct aead_gcm_iv *gcm;
+
+	/* fill sym op fields */
+	sop = cop->sym;
+
+	/* AEAD (AES_GCM) case */
+	if (sa->aad_len != 0) {
+		sop->aead.data.offset = sa->ctp.cipher.offset + hlen;
+		sop->aead.data.length = sa->ctp.cipher.length + plen;
+		sop->aead.digest.data = icv->va;
+		sop->aead.digest.phys_addr = icv->pa;
+		sop->aead.aad.data = icv->va + sa->icv_len;
+		sop->aead.aad.phys_addr = icv->pa + sa->icv_len;
+
+		/* fill AAD IV (located inside crypto op) */
+		gcm = rte_crypto_op_ctod_offset(cop, struct aead_gcm_iv *,
+			sa->iv_ofs);
+		aead_gcm_iv_fill(gcm, ivp[0], sa->salt);
+	/* CRYPT+AUTH case */
+	} else {
+		sop->cipher.data.offset = sa->ctp.cipher.offset + hlen;
+		sop->cipher.data.length = sa->ctp.cipher.length + plen;
+		sop->auth.data.offset = sa->ctp.auth.offset + hlen;
+		sop->auth.data.length = sa->ctp.auth.length + plen;
+		sop->auth.digest.data = icv->va;
+		sop->auth.digest.phys_addr = icv->pa;
+	}
+}
+
+static inline int32_t
+esp_outb_tun_pkt_prepare(struct rte_ipsec_sa *sa, rte_be64_t sqc,
+	const uint64_t ivp[IPSEC_MAX_IV_QWORD], struct rte_mbuf *mb,
+	union sym_op_data *icv)
+{
+	uint32_t clen, hlen, pdlen, pdofs, tlen;
+	struct rte_mbuf *ml;
+	struct esp_hdr *esph;
+	struct esp_tail *espt;
+	char *ph, *pt;
+	uint64_t *iv;
+
+	/* calculate extra header space required */
+	hlen = sa->hdr_len + sa->iv_len + sizeof(*esph);
+
+	/* number of bytes to encrypt */
+	clen = mb->pkt_len + sizeof(*espt);
+	clen = RTE_ALIGN_CEIL(clen, sa->pad_align);
+
+	/* pad length + esp tail */
+	pdlen = clen - mb->pkt_len;
+	tlen = pdlen + sa->icv_len;
+
+	/* do append and prepend */
+	ml = rte_pktmbuf_lastseg(mb);
+	if (tlen + sa->sqh_len + sa->aad_len > rte_pktmbuf_tailroom(ml))
+		return -ENOSPC;
+
+	/* prepend header */
+	ph = rte_pktmbuf_prepend(mb, hlen);
+	if (ph == NULL)
+		return -ENOSPC;
+
+	/* append tail */
+	pdofs = ml->data_len;
+	ml->data_len += tlen;
+	mb->pkt_len += tlen;
+	pt = rte_pktmbuf_mtod_offset(ml, typeof(pt), pdofs);
+
+	/* update pkt l2/l3 len */
+	mb->l2_len = sa->hdr_l3_off;
+	mb->l3_len = sa->hdr_len - sa->hdr_l3_off;
+
+	/* copy tunnel pkt header */
+	rte_memcpy(ph, sa->hdr, sa->hdr_len);
+
+	/* update original and new ip header fields */
+	if (sa->type & RTE_IPSEC_SATP_MODE_TUNLV4) {
+		struct ipv4_hdr *l3h;
+		l3h = (struct ipv4_hdr *)(ph + sa->hdr_l3_off);
+		l3h->packet_id = sqn_low16(sqc);
+		l3h->total_length = rte_cpu_to_be_16(mb->pkt_len -
+			sa->hdr_l3_off);
+	} else {
+		struct ipv6_hdr *l3h;
+		l3h = (struct ipv6_hdr *)(ph + sa->hdr_l3_off);
+		l3h->payload_len = rte_cpu_to_be_16(mb->pkt_len -
+			sa->hdr_l3_off - sizeof(*l3h));
+	}
+
+	/* update spi, seqn and iv */
+	esph = (struct esp_hdr *)(ph + sa->hdr_len);
+	iv = (uint64_t *)(esph + 1);
+	rte_memcpy(iv, ivp, sa->iv_len);
+
+	esph->spi = sa->spi;
+	esph->seq = sqn_low32(sqc);
+
+	/* offset for ICV */
+	pdofs += pdlen + sa->sqh_len;
+
+	/* pad length */
+	pdlen -= sizeof(*espt);
+
+	/* copy padding data */
+	rte_memcpy(pt, esp_pad_bytes, pdlen);
+
+	/* update esp trailer */
+	espt = (struct esp_tail *)(pt + pdlen);
+	espt->pad_len = pdlen;
+	espt->next_proto = sa->proto;
+
+	icv->va = rte_pktmbuf_mtod_offset(ml, void *, pdofs);
+	icv->pa = rte_pktmbuf_iova_offset(ml, pdofs);
+
+	return clen;
+}
+
+/*
+ * for pure cryptodev (lookaside none) depending on SA settings,
+ * we might have to write some extra data to the packet.
+ */
+static inline void
+outb_pkt_xprepare(const struct rte_ipsec_sa *sa, rte_be64_t sqc,
+	const union sym_op_data *icv)
+{
+	uint32_t *psqh;
+	struct aead_gcm_aad *aad;
+
+	/* insert SQN.hi between ESP trailer and ICV */
+	if (sa->sqh_len != 0) {
+		psqh = (uint32_t *)(icv->va - sa->sqh_len);
+		psqh[0] = sqn_hi32(sqc);
+	}
+
+	/*
+	 * fill IV and AAD fields, if any (aad fields are placed after icv),
+	 * right now we support only one AEAD algorithm: AES-GCM .
+	 */
+	if (sa->aad_len != 0) {
+		aad = (struct aead_gcm_aad *)(icv->va + sa->icv_len);
+		aead_gcm_aad_fill(aad, sa->spi, sqc, IS_ESN(sa));
+	}
+}
+
+static uint16_t
+outb_tun_prepare(const struct rte_ipsec_session *ss, struct rte_mbuf *mb[],
+	struct rte_crypto_op *cop[], uint16_t num)
+{
+	int32_t rc;
+	uint32_t i, k, n;
+	uint64_t sqn;
+	rte_be64_t sqc;
+	struct rte_ipsec_sa *sa;
+	union sym_op_data icv;
+	uint64_t iv[IPSEC_MAX_IV_QWORD];
+	struct rte_mbuf *dr[num];
+
+	sa = ss->sa;
+
+	n = num;
+	sqn = esn_outb_update_sqn(sa, &n);
+	if (n != num)
+		rte_errno = EOVERFLOW;
+
+	k = 0;
+	for (i = 0; i != n; i++) {
+
+		sqc = rte_cpu_to_be_64(sqn + i);
+		gen_iv(iv, sqc);
+
+		/* try to update the packet itself */
+		rc = esp_outb_tun_pkt_prepare(sa, sqc, iv, mb[i], &icv);
+
+		/* success, setup crypto op */
+		if (rc >= 0) {
+			mb[k] = mb[i];
+			outb_pkt_xprepare(sa, sqc, &icv);
+			esp_outb_cop_prepare(cop[k], sa, iv, &icv, 0, rc);
+			k++;
+		/* failure, put packet into the death-row */
+		} else {
+			dr[i - k] = mb[i];
+			rte_errno = -rc;
+		}
+	}
+
+	/* update cops */
+	lksd_none_cop_prepare(ss, mb, cop, k);
+
+	 /* copy not prepared mbufs beyond good ones */
+	if (k != num && k != 0)
+		mbuf_bulk_copy(mb + k, dr, num - k);
+
+	return k;
+}
+
+static inline int32_t
+esp_outb_trs_pkt_prepare(struct rte_ipsec_sa *sa, rte_be64_t sqc,
+	const uint64_t ivp[IPSEC_MAX_IV_QWORD], struct rte_mbuf *mb,
+	uint32_t l2len, uint32_t l3len, union sym_op_data *icv)
+{
+	uint8_t np;
+	uint32_t clen, hlen, pdlen, pdofs, plen, tlen, uhlen;
+	struct rte_mbuf *ml;
+	struct esp_hdr *esph;
+	struct esp_tail *espt;
+	char *ph, *pt;
+	uint64_t *iv;
+
+	uhlen = l2len + l3len;
+	plen = mb->pkt_len - uhlen;
+
+	/* calculate extra header space required */
+	hlen = sa->iv_len + sizeof(*esph);
+
+	/* number of bytes to encrypt */
+	clen = plen + sizeof(*espt);
+	clen = RTE_ALIGN_CEIL(clen, sa->pad_align);
+
+	/* pad length + esp tail */
+	pdlen = clen - plen;
+	tlen = pdlen + sa->icv_len;
+
+	/* do append and insert */
+	ml = rte_pktmbuf_lastseg(mb);
+	if (tlen + sa->sqh_len + sa->aad_len > rte_pktmbuf_tailroom(ml))
+		return -ENOSPC;
+
+	/* prepend space for ESP header */
+	ph = rte_pktmbuf_prepend(mb, hlen);
+	if (ph == NULL)
+		return -ENOSPC;
+
+	/* append tail */
+	pdofs = ml->data_len;
+	ml->data_len += tlen;
+	mb->pkt_len += tlen;
+	pt = rte_pktmbuf_mtod_offset(ml, typeof(pt), pdofs);
+
+	/* shift L2/L3 headers */
+	insert_esph(ph, ph + hlen, uhlen);
+
+	/* update ip  header fields */
+	np = update_trs_l3hdr(sa, ph + l2len, mb->pkt_len, l2len, l3len,
+			IPPROTO_ESP);
+
+	/* update spi, seqn and iv */
+	esph = (struct esp_hdr *)(ph + uhlen);
+	iv = (uint64_t *)(esph + 1);
+	rte_memcpy(iv, ivp, sa->iv_len);
+
+	esph->spi = sa->spi;
+	esph->seq = sqn_low32(sqc);
+
+	/* offset for ICV */
+	pdofs += pdlen + sa->sqh_len;
+
+	/* pad length */
+	pdlen -= sizeof(*espt);
+
+	/* copy padding data */
+	rte_memcpy(pt, esp_pad_bytes, pdlen);
+
+	/* update esp trailer */
+	espt = (struct esp_tail *)(pt + pdlen);
+	espt->pad_len = pdlen;
+	espt->next_proto = np;
+
+	icv->va = rte_pktmbuf_mtod_offset(ml, void *, pdofs);
+	icv->pa = rte_pktmbuf_iova_offset(ml, pdofs);
+
+	return clen;
+}
+
+static uint16_t
+outb_trs_prepare(const struct rte_ipsec_session *ss, struct rte_mbuf *mb[],
+	struct rte_crypto_op *cop[], uint16_t num)
+{
+	int32_t rc;
+	uint32_t i, k, n, l2, l3;
+	uint64_t sqn;
+	rte_be64_t sqc;
+	struct rte_ipsec_sa *sa;
+	union sym_op_data icv;
+	uint64_t iv[IPSEC_MAX_IV_QWORD];
+	struct rte_mbuf *dr[num];
+
+	sa = ss->sa;
+
+	n = num;
+	sqn = esn_outb_update_sqn(sa, &n);
+	if (n != num)
+		rte_errno = EOVERFLOW;
+
+	k = 0;
+	for (i = 0; i != n; i++) {
+
+		l2 = mb[i]->l2_len;
+		l3 = mb[i]->l3_len;
+
+		sqc = rte_cpu_to_be_64(sqn + i);
+		gen_iv(iv, sqc);
+
+		/* try to update the packet itself */
+		rc = esp_outb_trs_pkt_prepare(sa, sqc, iv, mb[i],
+				l2, l3, &icv);
+
+		/* success, setup crypto op */
+		if (rc >= 0) {
+			mb[k] = mb[i];
+			outb_pkt_xprepare(sa, sqc, &icv);
+			esp_outb_cop_prepare(cop[k], sa, iv, &icv, l2 + l3, rc);
+			k++;
+		/* failure, put packet into the death-row */
+		} else {
+			dr[i - k] = mb[i];
+			rte_errno = -rc;
+		}
+	}
+
+	/* update cops */
+	lksd_none_cop_prepare(ss, mb, cop, k);
+
+	/* copy not prepared mbufs beyond good ones */
+	if (k != num && k != 0)
+		mbuf_bulk_copy(mb + k, dr, num - k);
+
+	return k;
+}
+
+static inline int32_t
+esp_inb_tun_cop_prepare(struct rte_crypto_op *cop,
+	const struct rte_ipsec_sa *sa, struct rte_mbuf *mb,
+	const union sym_op_data *icv, uint32_t pofs, uint32_t plen)
+{
+	struct rte_crypto_sym_op *sop;
+	struct aead_gcm_iv *gcm;
+	uint64_t *ivc, *ivp;
+	uint32_t clen;
+
+	clen = plen - sa->ctp.cipher.length;
+	if ((int32_t)clen < 0 || (clen & (sa->pad_align - 1)) != 0)
+		return -EINVAL;
+
+	/* fill sym op fields */
+	sop = cop->sym;
+
+	/* AEAD (AES_GCM) case */
+	if (sa->aad_len != 0) {
+		sop->aead.data.offset = pofs + sa->ctp.cipher.offset;
+		sop->aead.data.length = clen;
+		sop->aead.digest.data = icv->va;
+		sop->aead.digest.phys_addr = icv->pa;
+		sop->aead.aad.data = icv->va + sa->icv_len;
+		sop->aead.aad.phys_addr = icv->pa + sa->icv_len;
+
+		/* fill AAD IV (located inside crypto op) */
+		gcm = rte_crypto_op_ctod_offset(cop, struct aead_gcm_iv *,
+			sa->iv_ofs);
+		ivp = rte_pktmbuf_mtod_offset(mb, uint64_t *,
+			pofs + sizeof(struct esp_hdr));
+		aead_gcm_iv_fill(gcm, ivp[0], sa->salt);
+	/* CRYPT+AUTH case */
+	} else {
+		sop->cipher.data.offset = pofs + sa->ctp.cipher.offset;
+		sop->cipher.data.length = clen;
+		sop->auth.data.offset = pofs + sa->ctp.auth.offset;
+		sop->auth.data.length = plen - sa->ctp.auth.length;
+		sop->auth.digest.data = icv->va;
+		sop->auth.digest.phys_addr = icv->pa;
+
+		/* copy iv from the input packet to the cop */
+		ivc = rte_crypto_op_ctod_offset(cop, uint64_t *, sa->iv_ofs);
+		ivp = rte_pktmbuf_mtod_offset(mb, uint64_t *,
+			pofs + sizeof(struct esp_hdr));
+		rte_memcpy(ivc, ivp, sa->iv_len);
+	}
+	return 0;
+}
+
+/*
+ * for pure cryptodev (lookaside none) depending on SA settings,
+ * we might have to write some extra data to the packet.
+ */
+static inline void
+inb_pkt_xprepare(const struct rte_ipsec_sa *sa, rte_be64_t sqc,
+	const union sym_op_data *icv)
+{
+	struct aead_gcm_aad *aad;
+
+	/* insert SQN.hi between ESP trailer and ICV */
+	if (sa->sqh_len != 0)
+		insert_sqh(sqn_hi32(sqc), icv->va, sa->icv_len);
+
+	/*
+	 * fill AAD fields, if any (aad fields are placed after icv),
+	 * right now we support only one AEAD algorithm: AES-GCM.
+	 */
+	if (sa->aad_len != 0) {
+		aad = (struct aead_gcm_aad *)(icv->va + sa->icv_len);
+		aead_gcm_aad_fill(aad, sa->spi, sqc, IS_ESN(sa));
+	}
+}
+
+static inline int32_t
+esp_inb_tun_pkt_prepare(const struct rte_ipsec_sa *sa,
+	const struct replay_sqn *rsn, struct rte_mbuf *mb,
+	uint32_t hlen, union sym_op_data *icv)
+{
+	int32_t rc;
+	uint64_t sqn;
+	uint32_t icv_ofs, plen;
+	struct rte_mbuf *ml;
+	struct esp_hdr *esph;
+
+	esph = rte_pktmbuf_mtod_offset(mb, struct esp_hdr *, hlen);
+
+	/*
+	 * retrieve and reconstruct SQN, then check it, then
+	 * convert it back into network byte order.
+	 */
+	sqn = rte_be_to_cpu_32(esph->seq);
+	if (IS_ESN(sa))
+		sqn = reconstruct_esn(rsn->sqn, sqn, sa->replay.win_sz);
+
+	rc = esn_inb_check_sqn(rsn, sa, sqn);
+	if (rc != 0)
+		return rc;
+
+	sqn = rte_cpu_to_be_64(sqn);
+
+	/* start packet manipulation */
+	plen = mb->pkt_len;
+	plen = plen - hlen;
+
+	ml = rte_pktmbuf_lastseg(mb);
+	icv_ofs = ml->data_len - sa->icv_len + sa->sqh_len;
+
+	/* we have to allocate space for AAD somewhere,
+	 * right now - just use free trailing space at the last segment.
+	 * Would probably be more convenient to reserve space for AAD
+	 * inside rte_crypto_op itself
+	 * (again for IV space is already reserved inside cop).
+	 */
+	if (sa->aad_len + sa->sqh_len > rte_pktmbuf_tailroom(ml))
+		return -ENOSPC;
+
+	icv->va = rte_pktmbuf_mtod_offset(ml, void *, icv_ofs);
+	icv->pa = rte_pktmbuf_iova_offset(ml, icv_ofs);
+
+	inb_pkt_xprepare(sa, sqn, icv);
+	return plen;
+}
+
+static uint16_t
+inb_pkt_prepare(const struct rte_ipsec_session *ss, struct rte_mbuf *mb[],
+	struct rte_crypto_op *cop[], uint16_t num)
+{
+	int32_t rc;
+	uint32_t i, k, hl;
+	struct rte_ipsec_sa *sa;
+	struct replay_sqn *rsn;
+	union sym_op_data icv;
+	struct rte_mbuf *dr[num];
+
+	sa = ss->sa;
+	rsn = sa->sqn.inb;
+
+	k = 0;
+	for (i = 0; i != num; i++) {
+
+		hl = mb[i]->l2_len + mb[i]->l3_len;
+		rc = esp_inb_tun_pkt_prepare(sa, rsn, mb[i], hl, &icv);
+		if (rc >= 0)
+			rc = esp_inb_tun_cop_prepare(cop[k], sa, mb[i], &icv,
+				hl, rc);
+
+		if (rc == 0)
+			mb[k++] = mb[i];
+		else {
+			dr[i - k] = mb[i];
+			rte_errno = -rc;
+		}
+	}
+
+	/* update cops */
+	lksd_none_cop_prepare(ss, mb, cop, k);
+
+	/* copy not prepared mbufs beyond good ones */
+	if (k != num && k != 0)
+		mbuf_bulk_copy(mb + k, dr, num - k);
+
+	return k;
+}
+
+static inline void
+lksd_proto_cop_prepare(const struct rte_ipsec_session *ss,
+	struct rte_mbuf *mb[], struct rte_crypto_op *cop[], uint16_t num)
+{
+	uint32_t i;
+	struct rte_crypto_sym_op *sop;
+
+	for (i = 0; i != num; i++) {
+		sop = cop[i]->sym;
+		cop[i]->type = RTE_CRYPTO_OP_TYPE_SYMMETRIC;
+		cop[i]->status = RTE_CRYPTO_OP_STATUS_NOT_PROCESSED;
+		cop[i]->sess_type = RTE_CRYPTO_OP_SECURITY_SESSION;
+		sop->m_src = mb[i];
+		__rte_security_attach_session(sop, ss->security.ses);
+	}
+}
+
+static uint16_t
+lksd_proto_prepare(const struct rte_ipsec_session *ss,
+	struct rte_mbuf *mb[], struct rte_crypto_op *cop[], uint16_t num)
+{
+	lksd_proto_cop_prepare(ss, mb, cop, num);
+	return num;
+}
+
+static inline int
+esp_inb_tun_single_pkt_process(struct rte_ipsec_sa *sa, struct rte_mbuf *mb,
+	uint32_t *sqn)
+{
+	uint32_t hlen, icv_len, tlen;
+	struct esp_hdr *esph;
+	struct esp_tail *espt;
+	struct rte_mbuf *ml;
+	char *pd;
+
+	if (mb->ol_flags & PKT_RX_SEC_OFFLOAD_FAILED)
+		return -EBADMSG;
+
+	icv_len = sa->icv_len;
+
+	ml = rte_pktmbuf_lastseg(mb);
+	espt = rte_pktmbuf_mtod_offset(ml, struct esp_tail *,
+		ml->data_len - icv_len - sizeof(*espt));
+
+	/*
+	 * check padding and next proto.
+	 * return an error if something is wrong.
+	 */
+	pd = (char *)espt - espt->pad_len;
+	if (espt->next_proto != sa->proto ||
+			memcmp(pd, esp_pad_bytes, espt->pad_len))
+		return -EINVAL;
+
+	/* cut of ICV, ESP tail and padding bytes */
+	tlen = icv_len + sizeof(*espt) + espt->pad_len;
+	ml->data_len -= tlen;
+	mb->pkt_len -= tlen;
+
+	/* cut of L2/L3 headers, ESP header and IV */
+	hlen = mb->l2_len + mb->l3_len;
+	esph = rte_pktmbuf_mtod_offset(mb, struct esp_hdr *, hlen);
+	rte_pktmbuf_adj(mb, hlen + sa->ctp.cipher.offset);
+
+	/* retrieve SQN for later check */
+	*sqn = rte_be_to_cpu_32(esph->seq);
+
+	/* reset mbuf metatdata: L2/L3 len, packet type */
+	mb->packet_type = RTE_PTYPE_UNKNOWN;
+	mb->l2_len = 0;
+	mb->l3_len = 0;
+
+	/* clear the PKT_RX_SEC_OFFLOAD flag if set */
+	mb->ol_flags &= ~(mb->ol_flags & PKT_RX_SEC_OFFLOAD);
+	return 0;
+}
+
+static inline int
+esp_inb_trs_single_pkt_process(struct rte_ipsec_sa *sa, struct rte_mbuf *mb,
+	uint32_t *sqn)
+{
+	uint32_t hlen, icv_len, l2len, l3len, tlen;
+	struct esp_hdr *esph;
+	struct esp_tail *espt;
+	struct rte_mbuf *ml;
+	char *np, *op, *pd;
+
+	if (mb->ol_flags & PKT_RX_SEC_OFFLOAD_FAILED)
+		return -EBADMSG;
+
+	icv_len = sa->icv_len;
+
+	ml = rte_pktmbuf_lastseg(mb);
+	espt = rte_pktmbuf_mtod_offset(ml, struct esp_tail *,
+		ml->data_len - icv_len - sizeof(*espt));
+
+	/* check padding, return an error if something is wrong. */
+	pd = (char *)espt - espt->pad_len;
+	if (memcmp(pd, esp_pad_bytes, espt->pad_len))
+		return -EINVAL;
+
+	/* cut of ICV, ESP tail and padding bytes */
+	tlen = icv_len + sizeof(*espt) + espt->pad_len;
+	ml->data_len -= tlen;
+	mb->pkt_len -= tlen;
+
+	/* retrieve SQN for later check */
+	l2len = mb->l2_len;
+	l3len = mb->l3_len;
+	hlen = l2len + l3len;
+	op = rte_pktmbuf_mtod(mb, char *);
+	esph = (struct esp_hdr *)(op + hlen);
+	*sqn = rte_be_to_cpu_32(esph->seq);
+
+	/* cut off ESP header and IV, update L3 header */
+	np = rte_pktmbuf_adj(mb, sa->ctp.cipher.offset);
+	remove_esph(np, op, hlen);
+	update_trs_l3hdr(sa, np + l2len, mb->pkt_len, l2len, l3len,
+			espt->next_proto);
+
+	/* reset mbuf packet type */
+	mb->packet_type &= (RTE_PTYPE_L2_MASK | RTE_PTYPE_L3_MASK);
+
+	/* clear the PKT_RX_SEC_OFFLOAD flag if set */
+	mb->ol_flags &= ~(mb->ol_flags & PKT_RX_SEC_OFFLOAD);
+	return 0;
+}
+
+static inline uint16_t
+esp_inb_rsn_update(struct rte_ipsec_sa *sa, const uint32_t sqn[],
+	struct rte_mbuf *mb[], struct rte_mbuf *dr[], uint16_t num)
+{
+	uint32_t i, k;
+	struct replay_sqn *rsn;
+
+	rsn = sa->sqn.inb;
+
+	k = 0;
+	for (i = 0; i != num; i++) {
+		if (esn_inb_update_sqn(rsn, sa, sqn[i]) == 0)
+			mb[k++] = mb[i];
+		else
+			dr[i - k] = mb[i];
+	}
+
+	return k;
+}
+
+static uint16_t
+inb_tun_pkt_process(const struct rte_ipsec_session *ss, struct rte_mbuf *mb[],
+	uint16_t num)
+{
+	uint32_t i, k;
+	struct rte_ipsec_sa *sa;
+	uint32_t sqn[num];
+	struct rte_mbuf *dr[num];
+
+	sa = ss->sa;
+
+	/* process packets, extract seq numbers */
+
+	k = 0;
+	for (i = 0; i != num; i++) {
+		/* good packet */
+		if (esp_inb_tun_single_pkt_process(sa, mb[i], sqn + k) == 0)
+			mb[k++] = mb[i];
+		/* bad packet, will drop from furhter processing */
+		else
+			dr[i - k] = mb[i];
+	}
+
+	/* update seq # and replay winow */
+	k = esp_inb_rsn_update(sa, sqn, mb, dr + i - k, k);
+
+	/* handle unprocessed mbufs */
+	if (k != num) {
+		rte_errno = EBADMSG;
+		if (k != 0)
+			mbuf_bulk_copy(mb + k, dr, num - k);
+	}
+
+	return k;
+}
+
+static uint16_t
+inb_trs_pkt_process(const struct rte_ipsec_session *ss, struct rte_mbuf *mb[],
+	uint16_t num)
+{
+	uint32_t i, k;
+	uint32_t sqn[num];
+	struct rte_ipsec_sa *sa;
+	struct rte_mbuf *dr[num];
+
+	sa = ss->sa;
+
+	/* process packets, extract seq numbers */
+
+	k = 0;
+	for (i = 0; i != num; i++) {
+		/* good packet */
+		if (esp_inb_trs_single_pkt_process(sa, mb[i], sqn + k) == 0)
+			mb[k++] = mb[i];
+		/* bad packet, will drop from furhter processing */
+		else
+			dr[i - k] = mb[i];
+	}
+
+	/* update seq # and replay winow */
+	k = esp_inb_rsn_update(sa, sqn, mb, dr + i - k, k);
+
+	/* handle unprocessed mbufs */
+	if (k != num) {
+		rte_errno = EBADMSG;
+		if (k != 0)
+			mbuf_bulk_copy(mb + k, dr, num - k);
+	}
+
+	return k;
+}
+
+/*
+ * process outbound packets for SA with ESN support,
+ * for algorithms that require SQN.hibits to be implictly included
+ * into digest computation.
+ * In that case we have to move ICV bytes back to their proper place.
+ */
+static uint16_t
+outb_sqh_process(const struct rte_ipsec_session *ss, struct rte_mbuf *mb[],
+	uint16_t num)
+{
+	uint32_t i, k, icv_len, *icv;
+	struct rte_mbuf *ml;
+	struct rte_ipsec_sa *sa;
+	struct rte_mbuf *dr[num];
+
+	sa = ss->sa;
+
+	k = 0;
+	icv_len = sa->icv_len;
+
+	for (i = 0; i != num; i++) {
+		if ((mb[i]->ol_flags & PKT_RX_SEC_OFFLOAD_FAILED) == 0) {
+			ml = rte_pktmbuf_lastseg(mb[i]);
+			icv = rte_pktmbuf_mtod_offset(ml, void *,
+				ml->data_len - icv_len);
+			remove_sqh(icv, icv_len);
+			mb[k++] = mb[i];
+		} else
+			dr[i - k] = mb[i];
+	}
+
+	/* handle unprocessed mbufs */
+	if (k != num) {
+		rte_errno = EBADMSG;
+		if (k != 0)
+			mbuf_bulk_copy(mb + k, dr, num - k);
+	}
+
+	return k;
+}
+
+/*
+ * simplest pkt process routine:
+ * all actual processing is done already doneby HW/PMD,
+ * just check mbuf ol_flags.
+ * used for:
+ * - inbound for RTE_SECURITY_ACTION_TYPE_INLINE_PROTOCOL
+ * - inbound/outbound for RTE_SECURITY_ACTION_TYPE_LOOKASIDE_PROTOCOL
+ * - outbound for RTE_SECURITY_ACTION_TYPE_NONE when ESN is disabled
+ */
+static uint16_t
+pkt_flag_process(const struct rte_ipsec_session *ss, struct rte_mbuf *mb[],
+	uint16_t num)
+{
+	uint32_t i, k;
+	struct rte_mbuf *dr[num];
+
+	RTE_SET_USED(ss);
+
+	k = 0;
+	for (i = 0; i != num; i++) {
+		if ((mb[i]->ol_flags & PKT_RX_SEC_OFFLOAD_FAILED) == 0)
+			mb[k++] = mb[i];
+		else
+			dr[i - k] = mb[i];
+	}
+
+	/* handle unprocessed mbufs */
+	if (k != num) {
+		rte_errno = EBADMSG;
+		if (k != 0)
+			mbuf_bulk_copy(mb + k, dr, num - k);
+	}
+
+	return k;
+}
+
+/*
+ * prepare packets for inline ipsec processing:
+ * set ol_flags and attach metadata.
+ */
+static inline void
+inline_outb_mbuf_prepare(const struct rte_ipsec_session *ss,
+	struct rte_mbuf *mb[], uint16_t num)
+{
+	uint32_t i, ol_flags;
+
+	ol_flags = ss->security.ol_flags & RTE_SECURITY_TX_OLOAD_NEED_MDATA;
+	for (i = 0; i != num; i++) {
+
+		mb[i]->ol_flags |= PKT_TX_SEC_OFFLOAD;
+		if (ol_flags != 0)
+			rte_security_set_pkt_metadata(ss->security.ctx,
+				ss->security.ses, mb[i], NULL);
+	}
+}
+
+static uint16_t
+inline_outb_tun_pkt_process(const struct rte_ipsec_session *ss,
+	struct rte_mbuf *mb[], uint16_t num)
+{
+	int32_t rc;
+	uint32_t i, k, n;
+	uint64_t sqn;
+	rte_be64_t sqc;
+	struct rte_ipsec_sa *sa;
+	union sym_op_data icv;
+	uint64_t iv[IPSEC_MAX_IV_QWORD];
+	struct rte_mbuf *dr[num];
+
+	sa = ss->sa;
+
+	n = num;
+	sqn = esn_outb_update_sqn(sa, &n);
+	if (n != num)
+		rte_errno = EOVERFLOW;
+
+	k = 0;
+	for (i = 0; i != n; i++) {
+
+		sqc = rte_cpu_to_be_64(sqn + i);
+		gen_iv(iv, sqc);
+
+		/* try to update the packet itself */
+		rc = esp_outb_tun_pkt_prepare(sa, sqc, iv, mb[i], &icv);
+
+		/* success, update mbuf fields */
+		if (rc >= 0)
+			mb[k++] = mb[i];
+		/* failure, put packet into the death-row */
+		else {
+			dr[i - k] = mb[i];
+			rte_errno = -rc;
+		}
+	}
+
+	inline_outb_mbuf_prepare(ss, mb, k);
+
+	/* copy not processed mbufs beyond good ones */
+	if (k != num && k != 0)
+		mbuf_bulk_copy(mb + k, dr, num - k);
+
+	return k;
+}
+
+static uint16_t
+inline_outb_trs_pkt_process(const struct rte_ipsec_session *ss,
+	struct rte_mbuf *mb[], uint16_t num)
+{
+	int32_t rc;
+	uint32_t i, k, n, l2, l3;
+	uint64_t sqn;
+	rte_be64_t sqc;
+	struct rte_ipsec_sa *sa;
+	union sym_op_data icv;
+	uint64_t iv[IPSEC_MAX_IV_QWORD];
+	struct rte_mbuf *dr[num];
+
+	sa = ss->sa;
+
+	n = num;
+	sqn = esn_outb_update_sqn(sa, &n);
+	if (n != num)
+		rte_errno = EOVERFLOW;
+
+	k = 0;
+	for (i = 0; i != n; i++) {
+
+		l2 = mb[i]->l2_len;
+		l3 = mb[i]->l3_len;
+
+		sqc = rte_cpu_to_be_64(sqn + i);
+		gen_iv(iv, sqc);
+
+		/* try to update the packet itself */
+		rc = esp_outb_trs_pkt_prepare(sa, sqc, iv, mb[i],
+				l2, l3, &icv);
+
+		/* success, update mbuf fields */
+		if (rc >= 0)
+			mb[k++] = mb[i];
+		/* failure, put packet into the death-row */
+		else {
+			dr[i - k] = mb[i];
+			rte_errno = -rc;
+		}
+	}
+
+	inline_outb_mbuf_prepare(ss, mb, k);
+
+	/* copy not processed mbufs beyond good ones */
+	if (k != num && k != 0)
+		mbuf_bulk_copy(mb + k, dr, num - k);
+
+	return k;
+}
+
+/*
+ * outbound for RTE_SECURITY_ACTION_TYPE_INLINE_PROTOCOL:
+ * actual processing is done by HW/PMD, just set flags and metadata.
+ */
+static uint16_t
+outb_inline_proto_process(const struct rte_ipsec_session *ss,
+		struct rte_mbuf *mb[], uint16_t num)
+{
+	inline_outb_mbuf_prepare(ss, mb, num);
+	return num;
+}
+
+static int
+lksd_none_pkt_func_select(const struct rte_ipsec_sa *sa,
+		struct rte_ipsec_sa_pkt_func *pf)
+{
+	int32_t rc;
+
+	static const uint64_t msk = RTE_IPSEC_SATP_DIR_MASK |
+			RTE_IPSEC_SATP_MODE_MASK;
+
+	rc = 0;
+	switch (sa->type & msk) {
+	case (RTE_IPSEC_SATP_DIR_IB | RTE_IPSEC_SATP_MODE_TUNLV4):
+	case (RTE_IPSEC_SATP_DIR_IB | RTE_IPSEC_SATP_MODE_TUNLV6):
+		pf->prepare = inb_pkt_prepare;
+		pf->process = inb_tun_pkt_process;
+		break;
+	case (RTE_IPSEC_SATP_DIR_IB | RTE_IPSEC_SATP_MODE_TRANS):
+		pf->prepare = inb_pkt_prepare;
+		pf->process = inb_trs_pkt_process;
+		break;
+	case (RTE_IPSEC_SATP_DIR_OB | RTE_IPSEC_SATP_MODE_TUNLV4):
+	case (RTE_IPSEC_SATP_DIR_OB | RTE_IPSEC_SATP_MODE_TUNLV6):
+		pf->prepare = outb_tun_prepare;
+		pf->process = (sa->sqh_len != 0) ?
+			outb_sqh_process : pkt_flag_process;
+		break;
+	case (RTE_IPSEC_SATP_DIR_OB | RTE_IPSEC_SATP_MODE_TRANS):
+		pf->prepare = outb_trs_prepare;
+		pf->process = (sa->sqh_len != 0) ?
+			outb_sqh_process : pkt_flag_process;
+		break;
+	default:
+		rc = -ENOTSUP;
+	}
+
+	return rc;
+}
+
+static int
+inline_crypto_pkt_func_select(const struct rte_ipsec_sa *sa,
+		struct rte_ipsec_sa_pkt_func *pf)
+{
+	int32_t rc;
+
+	static const uint64_t msk = RTE_IPSEC_SATP_DIR_MASK |
+			RTE_IPSEC_SATP_MODE_MASK;
+
+	rc = 0;
+	switch (sa->type & msk) {
+	case (RTE_IPSEC_SATP_DIR_IB | RTE_IPSEC_SATP_MODE_TUNLV4):
+	case (RTE_IPSEC_SATP_DIR_IB | RTE_IPSEC_SATP_MODE_TUNLV6):
+		pf->process = inb_tun_pkt_process;
+		break;
+	case (RTE_IPSEC_SATP_DIR_IB | RTE_IPSEC_SATP_MODE_TRANS):
+		pf->process = inb_trs_pkt_process;
+		break;
+	case (RTE_IPSEC_SATP_DIR_OB | RTE_IPSEC_SATP_MODE_TUNLV4):
+	case (RTE_IPSEC_SATP_DIR_OB | RTE_IPSEC_SATP_MODE_TUNLV6):
+		pf->process = inline_outb_tun_pkt_process;
+		break;
+	case (RTE_IPSEC_SATP_DIR_OB | RTE_IPSEC_SATP_MODE_TRANS):
+		pf->process = inline_outb_trs_pkt_process;
+		break;
+	default:
+		rc = -ENOTSUP;
+	}
+
+	return rc;
+}
+
 int
 ipsec_sa_pkt_func_select(const struct rte_ipsec_session *ss,
 	const struct rte_ipsec_sa *sa, struct rte_ipsec_sa_pkt_func *pf)
 {
 	int32_t rc;
 
-	RTE_SET_USED(sa);
-
 	rc = 0;
 	pf[0] = (struct rte_ipsec_sa_pkt_func) { 0 };
 
 	switch (ss->type) {
+	case RTE_SECURITY_ACTION_TYPE_NONE:
+		rc = lksd_none_pkt_func_select(sa, pf);
+		break;
+	case RTE_SECURITY_ACTION_TYPE_INLINE_CRYPTO:
+		rc = inline_crypto_pkt_func_select(sa, pf);
+		break;
+	case RTE_SECURITY_ACTION_TYPE_INLINE_PROTOCOL:
+		if ((sa->type & RTE_IPSEC_SATP_DIR_MASK) ==
+				RTE_IPSEC_SATP_DIR_IB)
+			pf->process = pkt_flag_process;
+		else
+			pf->process = outb_inline_proto_process;
+		break;
+	case RTE_SECURITY_ACTION_TYPE_LOOKASIDE_PROTOCOL:
+		pf->prepare = lksd_proto_prepare;
+		pf->process = pkt_flag_process;
+		break;
 	default:
 		rc = -ENOTSUP;
 	}