[v5,03/10] examples/ipsec-secgw: fix crypto-op might never get dequeued

Message ID 1546011238-22318-4-git-send-email-konstantin.ananyev@intel.com (mailing list archive)
State Superseded, archived
Delegated to: akhil goyal
Headers
Series [v4,1/9] examples/ipsec-secgw: avoid to request unused TX offloads |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK

Commit Message

Ananyev, Konstantin Dec. 28, 2018, 3:33 p.m. UTC
  In some cases crypto-ops could never be dequeued from the crypto-device.
The easiest way to reproduce:
start ipsec-secgw with crypto-dev and send to it less then 32 packets.
none packets will be forwarded.
Reason for that is that the application does dequeue() from crypto-queues
only when new packets arrive.
This patch makes sure it calls dequeue() on a regular basis.

Fixes: c64278c0c18b ("examples/ipsec-secgw: rework processing loop")
Cc: stable@dpdk.org

Signed-off-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
Acked-by: Radu Nicolau <radu.nicolau@intel.com>
---
 examples/ipsec-secgw/ipsec-secgw.c | 136 ++++++++++++++++++++++++-----
 examples/ipsec-secgw/ipsec.c       |  60 ++++++++-----
 examples/ipsec-secgw/ipsec.h       |  11 +++
 3 files changed, 165 insertions(+), 42 deletions(-)
  

Comments

Akhil Goyal Jan. 2, 2019, 11:44 a.m. UTC | #1
On 12/28/2018 9:03 PM, Konstantin Ananyev wrote:
> In some cases crypto-ops could never be dequeued from the crypto-device.
> The easiest way to reproduce:
> start ipsec-secgw with crypto-dev and send to it less then 32 packets.
> none packets will be forwarded.
> Reason for that is that the application does dequeue() from crypto-queues
> only when new packets arrive.
> This patch makes sure it calls dequeue() on a regular basis.
>
> Fixes: c64278c0c18b ("examples/ipsec-secgw: rework processing loop")
> Cc: stable@dpdk.org
>
> Signed-off-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
> Acked-by: Radu Nicolau <radu.nicolau@intel.com>
> ---
>   examples/ipsec-secgw/ipsec-secgw.c | 136 ++++++++++++++++++++++++-----
>   examples/ipsec-secgw/ipsec.c       |  60 ++++++++-----
>   examples/ipsec-secgw/ipsec.h       |  11 +++
>   3 files changed, 165 insertions(+), 42 deletions(-)
[snip]
> +
>   /* main processing loop */
>   static int32_t
>   main_loop(__attribute__((unused)) void *dummy)
> @@ -866,7 +958,8 @@ main_loop(__attribute__((unused)) void *dummy)
>   		diff_tsc = cur_tsc - prev_tsc;
>   
>   		if (unlikely(diff_tsc > drain_tsc)) {
> -			drain_buffers(qconf);
> +			drain_tx_buffers(qconf);
> +			drain_crypto_buffers(qconf);
>   			prev_tsc = cur_tsc;
>   		}
>   
> @@ -880,6 +973,9 @@ main_loop(__attribute__((unused)) void *dummy)
>   			if (nb_rx > 0)
>   				process_pkts(qconf, pkts, nb_rx, portid);
>   		}
> +
> +		drain_inbound_crypto_queues(qconf, &qconf->inbound);
> +		drain_outbound_crypto_queues(qconf, &qconf->outbound);

drain_inbound_crypto_queues and drain_outbound_crypto_queues should be called based on diff_tsc.
moving these two lines above after  drain_crypto_buffers will improve the performance drop caused due to this patch.
  
Ananyev, Konstantin Jan. 2, 2019, 1:43 p.m. UTC | #2
> 
> On 12/28/2018 9:03 PM, Konstantin Ananyev wrote:
> > In some cases crypto-ops could never be dequeued from the crypto-device.
> > The easiest way to reproduce:
> > start ipsec-secgw with crypto-dev and send to it less then 32 packets.
> > none packets will be forwarded.
> > Reason for that is that the application does dequeue() from crypto-queues
> > only when new packets arrive.
> > This patch makes sure it calls dequeue() on a regular basis.
> >
> > Fixes: c64278c0c18b ("examples/ipsec-secgw: rework processing loop")
> > Cc: stable@dpdk.org
> >
> > Signed-off-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
> > Acked-by: Radu Nicolau <radu.nicolau@intel.com>
> > ---
> >   examples/ipsec-secgw/ipsec-secgw.c | 136 ++++++++++++++++++++++++-----
> >   examples/ipsec-secgw/ipsec.c       |  60 ++++++++-----
> >   examples/ipsec-secgw/ipsec.h       |  11 +++
> >   3 files changed, 165 insertions(+), 42 deletions(-)
> [snip]
> > +
> >   /* main processing loop */
> >   static int32_t
> >   main_loop(__attribute__((unused)) void *dummy)
> > @@ -866,7 +958,8 @@ main_loop(__attribute__((unused)) void *dummy)
> >   		diff_tsc = cur_tsc - prev_tsc;
> >
> >   		if (unlikely(diff_tsc > drain_tsc)) {
> > -			drain_buffers(qconf);
> > +			drain_tx_buffers(qconf);
> > +			drain_crypto_buffers(qconf);
> >   			prev_tsc = cur_tsc;
> >   		}
> >
> > @@ -880,6 +973,9 @@ main_loop(__attribute__((unused)) void *dummy)
> >   			if (nb_rx > 0)
> >   				process_pkts(qconf, pkts, nb_rx, portid);
> >   		}
> > +
> > +		drain_inbound_crypto_queues(qconf, &qconf->inbound);
> > +		drain_outbound_crypto_queues(qconf, &qconf->outbound);
> 
> drain_inbound_crypto_queues and drain_outbound_crypto_queues should be called based on diff_tsc.
> moving these two lines above after  drain_crypto_buffers will improve the performance drop caused due to this patch.

Thanks, good to know.
To make what you suggest above to work properly with non-legacy mode ('-l') extra changes
would be needed...  
Do you have an idea - what exactly causing a slowdown?
Just an extra function calls (drain_inbound_crypto_queues/ drain_outbound_crypto_queues)?
Or is that because we do dequeue() from crypto PMD more often then before?  
Konstantin
  
Akhil Goyal Jan. 2, 2019, 1:50 p.m. UTC | #3
On 1/2/2019 7:13 PM, Ananyev, Konstantin wrote:
>
>> On 12/28/2018 9:03 PM, Konstantin Ananyev wrote:
>>> In some cases crypto-ops could never be dequeued from the crypto-device.
>>> The easiest way to reproduce:
>>> start ipsec-secgw with crypto-dev and send to it less then 32 packets.
>>> none packets will be forwarded.
>>> Reason for that is that the application does dequeue() from crypto-queues
>>> only when new packets arrive.
>>> This patch makes sure it calls dequeue() on a regular basis.
>>>
>>> Fixes: c64278c0c18b ("examples/ipsec-secgw: rework processing loop")
>>> Cc: stable@dpdk.org
>>>
>>> Signed-off-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
>>> Acked-by: Radu Nicolau <radu.nicolau@intel.com>
>>> ---
>>>    examples/ipsec-secgw/ipsec-secgw.c | 136 ++++++++++++++++++++++++-----
>>>    examples/ipsec-secgw/ipsec.c       |  60 ++++++++-----
>>>    examples/ipsec-secgw/ipsec.h       |  11 +++
>>>    3 files changed, 165 insertions(+), 42 deletions(-)
>> [snip]
>>> +
>>>    /* main processing loop */
>>>    static int32_t
>>>    main_loop(__attribute__((unused)) void *dummy)
>>> @@ -866,7 +958,8 @@ main_loop(__attribute__((unused)) void *dummy)
>>>    		diff_tsc = cur_tsc - prev_tsc;
>>>
>>>    		if (unlikely(diff_tsc > drain_tsc)) {
>>> -			drain_buffers(qconf);
>>> +			drain_tx_buffers(qconf);
>>> +			drain_crypto_buffers(qconf);
>>>    			prev_tsc = cur_tsc;
>>>    		}
>>>
>>> @@ -880,6 +973,9 @@ main_loop(__attribute__((unused)) void *dummy)
>>>    			if (nb_rx > 0)
>>>    				process_pkts(qconf, pkts, nb_rx, portid);
>>>    		}
>>> +
>>> +		drain_inbound_crypto_queues(qconf, &qconf->inbound);
>>> +		drain_outbound_crypto_queues(qconf, &qconf->outbound);
>> drain_inbound_crypto_queues and drain_outbound_crypto_queues should be called based on diff_tsc.
>> moving these two lines above after  drain_crypto_buffers will improve the performance drop caused due to this patch.
> Thanks, good to know.
> To make what you suggest above to work properly with non-legacy mode ('-l') extra changes
> would be needed...
What changes do you see?
> Do you have an idea - what exactly causing a slowdown?
> Just an extra function calls (drain_inbound_crypto_queues/ drain_outbound_crypto_queues)?
> Or is that because we do dequeue() from crypto PMD more often then before?
I have not profiled it, but it should be because of more dequeues. On a 
single call to dequeue, a burst of packets get dequeued. but now there 
will be a lot more dequeues which have lesser packets than the burst 
size which will deteriorate the performance as it would be wasting the 
dequeue cycles.

This patch is causing around 5% drop out of the 10% that I mentioned in 
the other mail.
With the change that I suggested, I am almost able to get back those 5%.
> Konstantin
>   
>
  
Ananyev, Konstantin Jan. 2, 2019, 3:06 p.m. UTC | #4
> -----Original Message-----
> From: Akhil Goyal [mailto:akhil.goyal@nxp.com]
> Sent: Wednesday, January 2, 2019 1:51 PM
> To: Ananyev, Konstantin <konstantin.ananyev@intel.com>; dev@dpdk.org
> Cc: stable@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v5 03/10] examples/ipsec-secgw: fix crypto-op might never get dequeued
> 
> 
> 
> On 1/2/2019 7:13 PM, Ananyev, Konstantin wrote:
> >
> >> On 12/28/2018 9:03 PM, Konstantin Ananyev wrote:
> >>> In some cases crypto-ops could never be dequeued from the crypto-device.
> >>> The easiest way to reproduce:
> >>> start ipsec-secgw with crypto-dev and send to it less then 32 packets.
> >>> none packets will be forwarded.
> >>> Reason for that is that the application does dequeue() from crypto-queues
> >>> only when new packets arrive.
> >>> This patch makes sure it calls dequeue() on a regular basis.
> >>>
> >>> Fixes: c64278c0c18b ("examples/ipsec-secgw: rework processing loop")
> >>> Cc: stable@dpdk.org
> >>>
> >>> Signed-off-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
> >>> Acked-by: Radu Nicolau <radu.nicolau@intel.com>
> >>> ---
> >>>    examples/ipsec-secgw/ipsec-secgw.c | 136 ++++++++++++++++++++++++-----
> >>>    examples/ipsec-secgw/ipsec.c       |  60 ++++++++-----
> >>>    examples/ipsec-secgw/ipsec.h       |  11 +++
> >>>    3 files changed, 165 insertions(+), 42 deletions(-)
> >> [snip]
> >>> +
> >>>    /* main processing loop */
> >>>    static int32_t
> >>>    main_loop(__attribute__((unused)) void *dummy)
> >>> @@ -866,7 +958,8 @@ main_loop(__attribute__((unused)) void *dummy)
> >>>    		diff_tsc = cur_tsc - prev_tsc;
> >>>
> >>>    		if (unlikely(diff_tsc > drain_tsc)) {
> >>> -			drain_buffers(qconf);
> >>> +			drain_tx_buffers(qconf);
> >>> +			drain_crypto_buffers(qconf);
> >>>    			prev_tsc = cur_tsc;
> >>>    		}
> >>>
> >>> @@ -880,6 +973,9 @@ main_loop(__attribute__((unused)) void *dummy)
> >>>    			if (nb_rx > 0)
> >>>    				process_pkts(qconf, pkts, nb_rx, portid);
> >>>    		}
> >>> +
> >>> +		drain_inbound_crypto_queues(qconf, &qconf->inbound);
> >>> +		drain_outbound_crypto_queues(qconf, &qconf->outbound);
> >> drain_inbound_crypto_queues and drain_outbound_crypto_queues should be called based on diff_tsc.
> >> moving these two lines above after  drain_crypto_buffers will improve the performance drop caused due to this patch.
> > Thanks, good to know.
> > To make what you suggest above to work properly with non-legacy mode ('-l') extra changes
> > would be needed...
> What changes do you see?

Non-legacy mode relies on a drain_crypto_queues() to dequeuer crypto-ops.
It doesn't do that as part of process_pkts().
It is doable, but it means I have to rework my patches a bit.

> > Do you have an idea - what exactly causing a slowdown?
> > Just an extra function calls (drain_inbound_crypto_queues/ drain_outbound_crypto_queues)?
> > Or is that because we do dequeue() from crypto PMD more often then before?
> I have not profiled it, but it should be because of more dequeues. On a
> single call to dequeue, a burst of packets get dequeued. but now there
> will be a lot more dequeues which have lesser packets than the burst
> size which will deteriorate the performance as it would be wasting the
> dequeue cycles.
> 
> This patch is causing around 5% drop out of the 10% that I mentioned in
> the other mail.
> With the change that I suggested, I am almost able to get back those 5%.

Great, any idea what causing other 5%?
Konstantin
  
Ananyev, Konstantin Jan. 3, 2019, 8:36 p.m. UTC | #5
Hi Akhil,

> >>> In some cases crypto-ops could never be dequeued from the crypto-device.
> >>> The easiest way to reproduce:
> >>> start ipsec-secgw with crypto-dev and send to it less then 32 packets.
> >>> none packets will be forwarded.
> >>> Reason for that is that the application does dequeue() from crypto-queues
> >>> only when new packets arrive.
> >>> This patch makes sure it calls dequeue() on a regular basis.
> >>>
> >>> Fixes: c64278c0c18b ("examples/ipsec-secgw: rework processing loop")
> >>> Cc: stable@dpdk.org
> >>>
> >>> Signed-off-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
> >>> Acked-by: Radu Nicolau <radu.nicolau@intel.com>
> >>> ---
> >>>    examples/ipsec-secgw/ipsec-secgw.c | 136 ++++++++++++++++++++++++-----
> >>>    examples/ipsec-secgw/ipsec.c       |  60 ++++++++-----
> >>>    examples/ipsec-secgw/ipsec.h       |  11 +++
> >>>    3 files changed, 165 insertions(+), 42 deletions(-)
> >> [snip]
> >>> +
> >>>    /* main processing loop */
> >>>    static int32_t
> >>>    main_loop(__attribute__((unused)) void *dummy)
> >>> @@ -866,7 +958,8 @@ main_loop(__attribute__((unused)) void *dummy)
> >>>    		diff_tsc = cur_tsc - prev_tsc;
> >>>
> >>>    		if (unlikely(diff_tsc > drain_tsc)) {
> >>> -			drain_buffers(qconf);
> >>> +			drain_tx_buffers(qconf);
> >>> +			drain_crypto_buffers(qconf);
> >>>    			prev_tsc = cur_tsc;
> >>>    		}
> >>>
> >>> @@ -880,6 +973,9 @@ main_loop(__attribute__((unused)) void *dummy)
> >>>    			if (nb_rx > 0)
> >>>    				process_pkts(qconf, pkts, nb_rx, portid);
> >>>    		}
> >>> +
> >>> +		drain_inbound_crypto_queues(qconf, &qconf->inbound);
> >>> +		drain_outbound_crypto_queues(qconf, &qconf->outbound);
> >> drain_inbound_crypto_queues and drain_outbound_crypto_queues should be called based on diff_tsc.
> >> moving these two lines above after  drain_crypto_buffers will improve the performance drop caused due to this patch.
> > Thanks, good to know.
> > To make what you suggest above to work properly with non-legacy mode ('-l') extra changes
> > would be needed...
> What changes do you see?
> > Do you have an idea - what exactly causing a slowdown?
> > Just an extra function calls (drain_inbound_crypto_queues/ drain_outbound_crypto_queues)?
> > Or is that because we do dequeue() from crypto PMD more often then before?
> I have not profiled it, but it should be because of more dequeues. On a
> single call to dequeue, a burst of packets get dequeued. but now there
> will be a lot more dequeues which have lesser packets than the burst
> size which will deteriorate the performance as it would be wasting the
> dequeue cycles.
> 
> This patch is causing around 5% drop out of the 10% that I mentioned in
> the other mail.
> With the change that I suggested, I am almost able to get back those 5%.

After another thought, I suppose we have 2 options here:
1. move drain_crypto_queues into timeout section
    (as you suggested above).
2. Make pkt_process() not to dequeue packets from crypto-dev
    (leave that to drain_crypto_queues).

For V6 I choose option #2 - it seems a bit cleaner and easier to me,
plus on my box it is ~1-1.5% faster than option #1.
Also V6 contains few other fixes for the issues you reported.
Please give it a try, if you'll still find it slower than #1 on your HW -
NP to use option #1.
Konstantin
  

Patch

diff --git a/examples/ipsec-secgw/ipsec-secgw.c b/examples/ipsec-secgw/ipsec-secgw.c
index 274a49cbb..797bd6435 100644
--- a/examples/ipsec-secgw/ipsec-secgw.c
+++ b/examples/ipsec-secgw/ipsec-secgw.c
@@ -469,38 +469,55 @@  inbound_sp_sa(struct sp_ctx *sp, struct sa_ctx *sa, struct traffic_type *ip,
 	ip->num = j;
 }
 
-static inline void
-process_pkts_inbound(struct ipsec_ctx *ipsec_ctx,
-		struct ipsec_traffic *traffic)
+static void
+split46_traffic(struct ipsec_traffic *trf, struct rte_mbuf *mb[], uint32_t num)
 {
+	uint32_t i, n4, n6;
+	struct ip *ip;
 	struct rte_mbuf *m;
-	uint16_t idx, nb_pkts_in, i, n_ip4, n_ip6;
 
-	nb_pkts_in = ipsec_inbound(ipsec_ctx, traffic->ipsec.pkts,
-			traffic->ipsec.num, MAX_PKT_BURST);
+	n4 = trf->ip4.num;
+	n6 = trf->ip6.num;
 
-	n_ip4 = traffic->ip4.num;
-	n_ip6 = traffic->ip6.num;
+	for (i = 0; i < num; i++) {
+
+		m = mb[i];
+		ip = rte_pktmbuf_mtod(m, struct ip *);
 
-	/* SP/ACL Inbound check ipsec and ip4 */
-	for (i = 0; i < nb_pkts_in; i++) {
-		m = traffic->ipsec.pkts[i];
-		struct ip *ip = rte_pktmbuf_mtod(m, struct ip *);
 		if (ip->ip_v == IPVERSION) {
-			idx = traffic->ip4.num++;
-			traffic->ip4.pkts[idx] = m;
-			traffic->ip4.data[idx] = rte_pktmbuf_mtod_offset(m,
+			trf->ip4.pkts[n4] = m;
+			trf->ip4.data[n4] = rte_pktmbuf_mtod_offset(m,
 					uint8_t *, offsetof(struct ip, ip_p));
+			n4++;
 		} else if (ip->ip_v == IP6_VERSION) {
-			idx = traffic->ip6.num++;
-			traffic->ip6.pkts[idx] = m;
-			traffic->ip6.data[idx] = rte_pktmbuf_mtod_offset(m,
+			trf->ip6.pkts[n6] = m;
+			trf->ip6.data[n6] = rte_pktmbuf_mtod_offset(m,
 					uint8_t *,
 					offsetof(struct ip6_hdr, ip6_nxt));
+			n6++;
 		} else
 			rte_pktmbuf_free(m);
 	}
 
+	trf->ip4.num = n4;
+	trf->ip6.num = n6;
+}
+
+
+static inline void
+process_pkts_inbound(struct ipsec_ctx *ipsec_ctx,
+		struct ipsec_traffic *traffic)
+{
+	uint16_t nb_pkts_in, n_ip4, n_ip6;
+
+	n_ip4 = traffic->ip4.num;
+	n_ip6 = traffic->ip6.num;
+
+	nb_pkts_in = ipsec_inbound(ipsec_ctx, traffic->ipsec.pkts,
+			traffic->ipsec.num, MAX_PKT_BURST);
+
+	split46_traffic(traffic, traffic->ipsec.pkts, nb_pkts_in);
+
 	inbound_sp_sa(ipsec_ctx->sp4_ctx, ipsec_ctx->sa_ctx, &traffic->ip4,
 			n_ip4);
 
@@ -795,7 +812,7 @@  process_pkts(struct lcore_conf *qconf, struct rte_mbuf **pkts,
 }
 
 static inline void
-drain_buffers(struct lcore_conf *qconf)
+drain_tx_buffers(struct lcore_conf *qconf)
 {
 	struct buffer *buf;
 	uint32_t portid;
@@ -809,6 +826,81 @@  drain_buffers(struct lcore_conf *qconf)
 	}
 }
 
+static inline void
+drain_crypto_buffers(struct lcore_conf *qconf)
+{
+	uint32_t i;
+	struct ipsec_ctx *ctx;
+
+	/* drain inbound buffers*/
+	ctx = &qconf->inbound;
+	for (i = 0; i != ctx->nb_qps; i++) {
+		if (ctx->tbl[i].len != 0)
+			enqueue_cop_burst(ctx->tbl  + i);
+	}
+
+	/* drain outbound buffers*/
+	ctx = &qconf->outbound;
+	for (i = 0; i != ctx->nb_qps; i++) {
+		if (ctx->tbl[i].len != 0)
+			enqueue_cop_burst(ctx->tbl  + i);
+	}
+}
+
+static void
+drain_inbound_crypto_queues(const struct lcore_conf *qconf,
+		struct ipsec_ctx *ctx)
+{
+	uint32_t n;
+	struct ipsec_traffic trf;
+
+	/* dequeue packets from crypto-queue */
+	n = ipsec_inbound_cqp_dequeue(ctx, trf.ipsec.pkts,
+			RTE_DIM(trf.ipsec.pkts));
+	if (n == 0)
+		return;
+
+	trf.ip4.num = 0;
+	trf.ip6.num = 0;
+
+	/* split traffic by ipv4-ipv6 */
+	split46_traffic(&trf, trf.ipsec.pkts, n);
+
+	/* process ipv4 packets */
+	inbound_sp_sa(ctx->sp4_ctx, ctx->sa_ctx, &trf.ip4, 0);
+	route4_pkts(qconf->rt4_ctx, trf.ip4.pkts, trf.ip4.num);
+
+	/* process ipv6 packets */
+	inbound_sp_sa(ctx->sp6_ctx, ctx->sa_ctx, &trf.ip6, 0);
+	route6_pkts(qconf->rt6_ctx, trf.ip6.pkts, trf.ip6.num);
+}
+
+static void
+drain_outbound_crypto_queues(const struct lcore_conf *qconf,
+		struct ipsec_ctx *ctx)
+{
+	uint32_t n;
+	struct ipsec_traffic trf;
+
+	/* dequeue packets from crypto-queue */
+	n = ipsec_outbound_cqp_dequeue(ctx, trf.ipsec.pkts,
+			RTE_DIM(trf.ipsec.pkts));
+	if (n == 0)
+		return;
+
+	trf.ip4.num = 0;
+	trf.ip6.num = 0;
+
+	/* split traffic by ipv4-ipv6 */
+	split46_traffic(&trf, trf.ipsec.pkts, n);
+
+	/* process ipv4 packets */
+	route4_pkts(qconf->rt4_ctx, trf.ip4.pkts, trf.ip4.num);
+
+	/* process ipv6 packets */
+	route6_pkts(qconf->rt6_ctx, trf.ip6.pkts, trf.ip6.num);
+}
+
 /* main processing loop */
 static int32_t
 main_loop(__attribute__((unused)) void *dummy)
@@ -866,7 +958,8 @@  main_loop(__attribute__((unused)) void *dummy)
 		diff_tsc = cur_tsc - prev_tsc;
 
 		if (unlikely(diff_tsc > drain_tsc)) {
-			drain_buffers(qconf);
+			drain_tx_buffers(qconf);
+			drain_crypto_buffers(qconf);
 			prev_tsc = cur_tsc;
 		}
 
@@ -880,6 +973,9 @@  main_loop(__attribute__((unused)) void *dummy)
 			if (nb_rx > 0)
 				process_pkts(qconf, pkts, nb_rx, portid);
 		}
+
+		drain_inbound_crypto_queues(qconf, &qconf->inbound);
+		drain_outbound_crypto_queues(qconf, &qconf->outbound);
 	}
 }
 
diff --git a/examples/ipsec-secgw/ipsec.c b/examples/ipsec-secgw/ipsec.c
index 3d415f1af..8bf928a23 100644
--- a/examples/ipsec-secgw/ipsec.c
+++ b/examples/ipsec-secgw/ipsec.c
@@ -333,33 +333,35 @@  create_session(struct ipsec_ctx *ipsec_ctx, struct ipsec_sa *sa)
 	return 0;
 }
 
+/*
+ * queue crypto-ops into PMD queue.
+ */
+void
+enqueue_cop_burst(struct cdev_qp *cqp)
+{
+	uint32_t i, len, ret;
+
+	len = cqp->len;
+	ret = rte_cryptodev_enqueue_burst(cqp->id, cqp->qp, cqp->buf, len);
+	if (ret < len) {
+		RTE_LOG_DP(DEBUG, IPSEC, "Cryptodev %u queue %u:"
+			" enqueued %u crypto ops out of %u\n",
+			cqp->id, cqp->qp, ret, len);
+			/* drop packets that we fail to enqueue */
+			for (i = ret; i < len; i++)
+				rte_pktmbuf_free(cqp->buf[i]->sym->m_src);
+	}
+	cqp->in_flight += ret;
+	cqp->len = 0;
+}
+
 static inline void
 enqueue_cop(struct cdev_qp *cqp, struct rte_crypto_op *cop)
 {
-	int32_t ret = 0, i;
-
 	cqp->buf[cqp->len++] = cop;
 
-	if (cqp->len == MAX_PKT_BURST) {
-		int enq_size = cqp->len;
-		if ((cqp->in_flight + enq_size) > MAX_INFLIGHT)
-			enq_size -=
-			    (int)((cqp->in_flight + enq_size) - MAX_INFLIGHT);
-
-		if (enq_size > 0)
-			ret = rte_cryptodev_enqueue_burst(cqp->id, cqp->qp,
-					cqp->buf, enq_size);
-		if (ret < cqp->len) {
-			RTE_LOG_DP(DEBUG, IPSEC, "Cryptodev %u queue %u:"
-					" enqueued %u crypto ops out of %u\n",
-					 cqp->id, cqp->qp,
-					 ret, cqp->len);
-			for (i = ret; i < cqp->len; i++)
-				rte_pktmbuf_free(cqp->buf[i]->sym->m_src);
-		}
-		cqp->in_flight += ret;
-		cqp->len = 0;
-	}
+	if (cqp->len == MAX_PKT_BURST)
+		enqueue_cop_burst(cqp);
 }
 
 static inline void
@@ -548,6 +550,13 @@  ipsec_inbound(struct ipsec_ctx *ctx, struct rte_mbuf *pkts[],
 	return ipsec_dequeue(esp_inbound_post, ctx, pkts, len);
 }
 
+uint16_t
+ipsec_inbound_cqp_dequeue(struct ipsec_ctx *ctx, struct rte_mbuf *pkts[],
+		uint16_t len)
+{
+	return ipsec_dequeue(esp_inbound_post, ctx, pkts, len);
+}
+
 uint16_t
 ipsec_outbound(struct ipsec_ctx *ctx, struct rte_mbuf *pkts[],
 		uint32_t sa_idx[], uint16_t nb_pkts, uint16_t len)
@@ -560,3 +569,10 @@  ipsec_outbound(struct ipsec_ctx *ctx, struct rte_mbuf *pkts[],
 
 	return ipsec_dequeue(esp_outbound_post, ctx, pkts, len);
 }
+
+uint16_t
+ipsec_outbound_cqp_dequeue(struct ipsec_ctx *ctx, struct rte_mbuf *pkts[],
+		uint16_t len)
+{
+	return ipsec_dequeue(esp_outbound_post, ctx, pkts, len);
+}
diff --git a/examples/ipsec-secgw/ipsec.h b/examples/ipsec-secgw/ipsec.h
index 580f7876b..2f04b7d68 100644
--- a/examples/ipsec-secgw/ipsec.h
+++ b/examples/ipsec-secgw/ipsec.h
@@ -184,6 +184,14 @@  uint16_t
 ipsec_outbound(struct ipsec_ctx *ctx, struct rte_mbuf *pkts[],
 		uint32_t sa_idx[], uint16_t nb_pkts, uint16_t len);
 
+uint16_t
+ipsec_inbound_cqp_dequeue(struct ipsec_ctx *ctx, struct rte_mbuf *pkts[],
+		uint16_t len);
+
+uint16_t
+ipsec_outbound_cqp_dequeue(struct ipsec_ctx *ctx, struct rte_mbuf *pkts[],
+		uint16_t len);
+
 static inline uint16_t
 ipsec_metadata_size(void)
 {
@@ -248,4 +256,7 @@  sa_check_offloads(uint16_t port_id, uint64_t *rx_offloads,
 int
 add_dst_ethaddr(uint16_t port, const struct ether_addr *addr);
 
+void
+enqueue_cop_burst(struct cdev_qp *cqp);
+
 #endif /* __IPSEC_H__ */