[dpdk-dev,1/2] LACP control packet filtering offload

Message ID 1495884464-3548-2-git-send-email-tomaszx.kulasek@intel.com (mailing list archive)
State Superseded, archived
Delegated to: Ferruh Yigit
Headers

Checks

Context Check Description
ci/checkpatch warning coding style issues
ci/Intel-compilation success Compilation OK

Commit Message

Tomasz Kulasek May 27, 2017, 11:27 a.m. UTC
  New API funtions implemented:

   rte_eth_bond_8023ad_slow_queue_enable(uint8_t port_id);
   rte_eth_bond_8023ad_slow_queue_disable(uint8_t port_id);

rte_eth_bond_8023ad_slow_queue_enable should be called before bonding port
start to enable new path.

When this option is enabled all slaves must support flow director's
filtering by ethernet type and support one additional queue on slaves
tx/rx.

Signed-off-by: Tomasz Kulasek <tomaszx.kulasek@intel.com>
---
 drivers/net/bonding/rte_eth_bond_8023ad.c         | 141 +++++++--
 drivers/net/bonding/rte_eth_bond_8023ad.h         |   6 +
 drivers/net/bonding/rte_eth_bond_8023ad_private.h |  15 +
 drivers/net/bonding/rte_eth_bond_pmd.c            | 345 +++++++++++++++++++++-
 drivers/net/bonding/rte_eth_bond_version.map      |   9 +
 5 files changed, 481 insertions(+), 35 deletions(-)
  

Comments

Adrien Mazarguil May 29, 2017, 8:10 a.m. UTC | #1
Hi Tomasz,

On Sat, May 27, 2017 at 01:27:43PM +0200, Tomasz Kulasek wrote:
> New API funtions implemented:
> 
>    rte_eth_bond_8023ad_slow_queue_enable(uint8_t port_id);
>    rte_eth_bond_8023ad_slow_queue_disable(uint8_t port_id);
> 
> rte_eth_bond_8023ad_slow_queue_enable should be called before bonding port
> start to enable new path.
> 
> When this option is enabled all slaves must support flow director's
> filtering by ethernet type and support one additional queue on slaves
> tx/rx.
> 
> Signed-off-by: Tomasz Kulasek <tomaszx.kulasek@intel.com>
[...]
> diff --git a/drivers/net/bonding/rte_eth_bond_pmd.c b/drivers/net/bonding/rte_eth_bond_pmd.c
> index 82959ab..558682c 100644
> --- a/drivers/net/bonding/rte_eth_bond_pmd.c
> +++ b/drivers/net/bonding/rte_eth_bond_pmd.c
> @@ -59,6 +59,12 @@
>  /* Table for statistics in mode 5 TLB */
>  static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
>  
> +#if  __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
> +#define _htons(x) ((uint16_t)((((x) & 0x00ffU) << 8) | (((x) & 0xff00U) >> 8)))
> +#else
> +#define _htons(x) (x)
> +#endif
> +
[...]
>  static inline size_t
>  get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto)
>  {
> @@ -133,6 +139,215 @@
>  		(subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
>  }
>  
> +/*****************************************************************************
> + * Flow director's setup for mode 4 optimization
> + */
> +
> +static struct rte_flow_item_eth flow_item_eth_type_8023ad = {
> +	.dst.addr_bytes = { 0 },
> +	.src.addr_bytes = { 0 },
> +	.type = _htons(ETHER_TYPE_SLOW),
> +};

Might I interest you in a more generic alternative [1]?

[1] http://dpdk.org/ml/archives/dev/2017-May/066097.html
  
Doherty, Declan June 29, 2017, 9:18 a.m. UTC | #2
On 27/05/17 12:27, Tomasz Kulasek wrote:
> New API funtions implemented:
> 
>     rte_eth_bond_8023ad_slow_queue_enable(uint8_t port_id);
>     rte_eth_bond_8023ad_slow_queue_disable(uint8_t port_id);
> 
> rte_eth_bond_8023ad_slow_queue_enable should be called before bonding port
> start to enable new path.
> 
> When this option is enabled all slaves must support flow director's
> filtering by ethernet type and support one additional queue on slaves
> tx/rx.
> 
> Signed-off-by: Tomasz Kulasek <tomaszx.kulasek@intel.com>
> ---
>   drivers/net/bonding/rte_eth_bond_8023ad.c         | 141 +++++++--
>   drivers/net/bonding/rte_eth_bond_8023ad.h         |   6 +
>   drivers/net/bonding/rte_eth_bond_8023ad_private.h |  15 +
>   drivers/net/bonding/rte_eth_bond_pmd.c            | 345 +++++++++++++++++++++-
>   drivers/net/bonding/rte_eth_bond_version.map      |   9 +
>   5 files changed, 481 insertions(+), 35 deletions(-)
> 
> diff --git a/drivers/net/bonding/rte_eth_bond_8023ad.c b/drivers/net/bonding/rte_eth_bond_8023ad.c
> index 7b863d6..125eb45 100644
> --- a/drivers/net/bonding/rte_eth_bond_8023ad.c
> +++ b/drivers/net/bonding/rte_eth_bond_8023ad.c
> @@ -632,12 +632,20 @@
>   	lacpdu->tlv_type_terminator = TLV_TYPE_TERMINATOR_INFORMATION;
>   	lacpdu->terminator_length = 0;
>   
> -	if (rte_ring_enqueue(port->tx_ring, lacp_pkt) == -ENOBUFS) {
> -		/* If TX ring full, drop packet and free message. Retransmission
> -		 * will happen in next function call. */
> -		rte_pktmbuf_free(lacp_pkt);
> -		set_warning_flags(port, WRN_TX_QUEUE_FULL);
> -		return;
> +	if (internals->mode4.slow_rx_queue == 0) {

I think we should have an explicit flag set for if hw filtering of slow 
packets is enabled instead of checking the rx/tx queue id like above.

> +		if (rte_ring_enqueue(port->tx_ring, lacp_pkt) == -ENOBUFS) {
> +			/* If TX ring full, drop packet and free message. Retransmission
> +			 * will happen in next function call. */
> +			rte_pktmbuf_free(lacp_pkt);
> +			set_warning_flags(port, WRN_TX_QUEUE_FULL);
> +			return;
> +		}
> +	} else {
> +		if (rte_eth_tx_burst(slave_id, internals->mode4.slow_tx_queue, &lacp_pkt, 1) == 0) {
> +			rte_pktmbuf_free(lacp_pkt);
> +			set_warning_flags(port, WRN_TX_QUEUE_FULL);
> +			return;
> +		}
>   	}
>   
>   	MODE4_DEBUG("sending LACP frame\n");
> @@ -741,6 +749,25 @@
>   }
>   
>   static void
> +rx_machine_update(struct bond_dev_private *internals, uint8_t slave_id,
> +		struct rte_mbuf *lacp_pkt) {
> +
> +	/* Find LACP packet to this port. Do not check subtype, it is done in
> +	 * function that queued packet */
> +	if (lacp_pkt != NULL) {
> +		struct lacpdu_header *lacp;
> +
> +		lacp = rte_pktmbuf_mtod(lacp_pkt, struct lacpdu_header *);
> +		RTE_ASSERT(lacp->lacpdu.subtype == SLOW_SUBTYPE_LACP);
> +
> +		/* This is LACP frame so pass it to rx_machine */
> +		rx_machine(internals, slave_id, &lacp->lacpdu);
> +		rte_pktmbuf_free(lacp_pkt);
> +	} else
> +		rx_machine(internals, slave_id, NULL);
> +}
> +
> +static void
>   bond_mode_8023ad_periodic_cb(void *arg)
>   {
>   	struct rte_eth_dev *bond_dev = arg;
> @@ -809,20 +836,21 @@
>   
>   		SM_FLAG_SET(port, LACP_ENABLED);
>   
> -		/* Find LACP packet to this port. Do not check subtype, it is done in
> -		 * function that queued packet */
> -		if (rte_ring_dequeue(port->rx_ring, &pkt) == 0) {
> -			struct rte_mbuf *lacp_pkt = pkt;
> -			struct lacpdu_header *lacp;
> +		struct rte_mbuf *lacp_pkt = NULL;
>   
> -			lacp = rte_pktmbuf_mtod(lacp_pkt, struct lacpdu_header *);
> -			RTE_ASSERT(lacp->lacpdu.subtype == SLOW_SUBTYPE_LACP);
> +		if (internals->mode4.slow_rx_queue == 0) {
 >

As above instead of checking rx queue id and explicit enable/disable 
flag would be clearer.

> +			/* Find LACP packet to this port. Do not check subtype, it is done in
> +			 * function that queued packet */
> +			if (rte_ring_dequeue(port->rx_ring, &pkt) == 0)
> +				lacp_pkt = pkt;
>   
> -			/* This is LACP frame so pass it to rx_machine */
> -			rx_machine(internals, slave_id, &lacp->lacpdu);
> -			rte_pktmbuf_free(lacp_pkt);
> -		} else
> -			rx_machine(internals, slave_id, NULL);
> +			rx_machine_update(internals, slave_id, lacp_pkt);
> +		} else {
> +			if (rte_eth_rx_burst(slave_id, internals->mode4.slow_rx_queue, &lacp_pkt, 1) == 1)
> +				bond_mode_8023ad_handle_slow_pkt(internals, slave_id, lacp_pkt);
> +			else
> +				rx_machine_update(internals, slave_id, NULL);
> +		}


If possible it would be good if the hw filtered path and the using the 
sw queue followed the same code path here. We are now calling 
bond_mode_8023ad_handle_slow_pkt from both the 
bond_mode_8023ad_periodic_cb and bond_ethdev_tx_burst_8023ad, it would 
be clearer if both follow the same processing path and 
bond_mode_8023ad_handle_slow_pkt wasn't called within 
bond_ethdev_tx_burst_8023ad.

>   
>   		periodic_machine(internals, slave_id);
>   		mux_machine(internals, slave_id);
> @@ -1188,18 +1216,36 @@
>   		m_hdr->marker.tlv_type_marker = MARKER_TLV_TYPE_RESP;
>   		rte_eth_macaddr_get(slave_id, &m_hdr->eth_hdr.s_addr);
>   
> -		if (unlikely(rte_ring_enqueue(port->tx_ring, pkt) == -ENOBUFS)) {
> -			/* reset timer */
> -			port->rx_marker_timer = 0;
> -			wrn = WRN_TX_QUEUE_FULL;
> -			goto free_out;
> +		if (internals->mode4.slow_tx_queue == 0) {
> +			if (unlikely(rte_ring_enqueue(port->tx_ring, pkt) ==
> +					-ENOBUFS)) {
> +				/* reset timer */
> +				port->rx_marker_timer = 0;
> +				wrn = WRN_TX_QUEUE_FULL;
> +				goto free_out;
> +			}
> +		} else {
> +			/* Send packet directly to the slow queue */
> +			if (unlikely(rte_eth_tx_burst(slave_id,
> +					internals->mode4.slow_tx_queue,
> +					&pkt, 1) == 0)) {
> +				/* reset timer */
> +				port->rx_marker_timer = 0;
> +				wrn = WRN_TX_QUEUE_FULL;
> +				goto free_out;
> +			}
>   		}
>   	} else if (likely(subtype == SLOW_SUBTYPE_LACP)) {
> -		if (unlikely(rte_ring_enqueue(port->rx_ring, pkt) == -ENOBUFS)) {
> -			/* If RX fing full free lacpdu message and drop packet */
> -			wrn = WRN_RX_QUEUE_FULL;
> -			goto free_out;
> -		}
> +
> +		if (internals->mode4.slow_rx_queue == 0) {
> +			if (unlikely(rte_ring_enqueue(port->rx_ring, pkt) == -ENOBUFS)) {
> +				/* If RX fing full free lacpdu message and drop packet */
> +				wrn = WRN_RX_QUEUE_FULL;
> +				goto free_out;
> +			}
> +		} else
> +			rx_machine_update(internals, slave_id, pkt);
> +
>   	} else {
>   		wrn = WRN_UNKNOWN_SLOW_TYPE;
>   		goto free_out;
> @@ -1504,3 +1550,42 @@
>   	rte_eal_alarm_set(internals->mode4.update_timeout_us,
>   			bond_mode_8023ad_ext_periodic_cb, arg);
>   }
> +
> +#define MBUF_CACHE_SIZE 250
> +#define NUM_MBUFS 8191
> +
> +int
> +rte_eth_bond_8023ad_slow_queue_enable(uint8_t port)
> +{
> +	int retval = 0;
> +	struct rte_eth_dev *dev = &rte_eth_devices[port];
> +	struct bond_dev_private *internals = (struct bond_dev_private *)
> +		dev->data->dev_private;
> +
> +	if (check_for_bonded_ethdev(dev) != 0)
> +		return -1;
> +
> +	internals->mode4.slow_rx_queue = dev->data->nb_rx_queues;
> +	internals->mode4.slow_tx_queue = dev->data->nb_tx_queues;
> +

We shouldn't be setting the slow queues here as they won't necessarily 
be the right values, as mentioned above just an enable flag would be 
sufficient.

Also we should really be testing whether all the slaves of the bond can 
support applying the filtering rule required here and then fail 
enablement if they don't.

> +	bond_ethdev_mode_set(dev, internals->mode);
> +	return retval;
> +}
> +
> +int
> +rte_eth_bond_8023ad_slow_queue_disable(uint8_t port)
> +{
> +	int retval = 0;
> +	struct rte_eth_dev *dev = &rte_eth_devices[port];
> +	struct bond_dev_private *internals = (struct bond_dev_private *)
> +		dev->data->dev_private;
> +
> +	if (check_for_bonded_ethdev(dev) != 0)
> +		return -1;
> +
> +	internals->mode4.slow_rx_queue = 0;
> +	internals->mode4.slow_tx_queue = 0;
> +


As above, in regards to the enable flag

> +	bond_ethdev_mode_set(dev, internals->mode);
> +	return retval;
> +}
> diff --git a/drivers/net/bonding/rte_eth_bond_8023ad.h b/drivers/net/bonding/rte_eth_bond_8023ad.h
> index 6b8ff57..8d21c7a 100644
> --- a/drivers/net/bonding/rte_eth_bond_8023ad.h
> +++ b/drivers/net/bonding/rte_eth_bond_8023ad.h
> @@ -302,4 +302,10 @@ struct rte_eth_bond_8023ad_slave_info {
>   rte_eth_bond_8023ad_ext_slowtx(uint8_t port_id, uint8_t slave_id,
>   		struct rte_mbuf *lacp_pkt);
>   
> +int
> +rte_eth_bond_8023ad_slow_queue_enable(uint8_t port_id);
> 
> +int
> +rte_eth_bond_8023ad_slow_queue_disable(uint8_t port_id);
> +


We need to include the doxygen here, with some  details on what is being 
enable here, i.e. details that dedicated rx/tx queues on slaves are 
being created for filtering the lacp control plane traffic from data 
path traffic so filtering in the data path is not required.

Also, I think that these functions purpose would be clearer if there 
where called rte_eth_bond_8023ad_slow_pkt_hw_filter_enable/disable

>   #endif /* RTE_ETH_BOND_8023AD_H_ */
> diff --git a/drivers/net/bonding/rte_eth_bond_8023ad_private.h b/drivers/net/bonding/rte_eth_bond_8023ad_private.h
> index ca8858b..3963714 100644
....
> 

On thing missing is the reporting to the application that there is a 
reduced number of tx/rx queues available when hw filtering is enabled. 
Looking at the bond_ethdev_info() it doesn't look like this is getting 
reported correctly at the moment anyway but it should be smallest value 
of the max number of queues of the slave devices minus one. So if we had 
3 slaves one which support 8 rx queues and the other 2 supported 16, 
then we should report 7 (8-1) as the maximum number of rx queues for the 
bonded devices.


Finally, we are missing some updated documentation about this new 
feature. The information in the cover note should be added to the 
bonding documentation at a minimum.
  

Patch

diff --git a/drivers/net/bonding/rte_eth_bond_8023ad.c b/drivers/net/bonding/rte_eth_bond_8023ad.c
index 7b863d6..125eb45 100644
--- a/drivers/net/bonding/rte_eth_bond_8023ad.c
+++ b/drivers/net/bonding/rte_eth_bond_8023ad.c
@@ -632,12 +632,20 @@ 
 	lacpdu->tlv_type_terminator = TLV_TYPE_TERMINATOR_INFORMATION;
 	lacpdu->terminator_length = 0;
 
-	if (rte_ring_enqueue(port->tx_ring, lacp_pkt) == -ENOBUFS) {
-		/* If TX ring full, drop packet and free message. Retransmission
-		 * will happen in next function call. */
-		rte_pktmbuf_free(lacp_pkt);
-		set_warning_flags(port, WRN_TX_QUEUE_FULL);
-		return;
+	if (internals->mode4.slow_rx_queue == 0) {
+		if (rte_ring_enqueue(port->tx_ring, lacp_pkt) == -ENOBUFS) {
+			/* If TX ring full, drop packet and free message. Retransmission
+			 * will happen in next function call. */
+			rte_pktmbuf_free(lacp_pkt);
+			set_warning_flags(port, WRN_TX_QUEUE_FULL);
+			return;
+		}
+	} else {
+		if (rte_eth_tx_burst(slave_id, internals->mode4.slow_tx_queue, &lacp_pkt, 1) == 0) {
+			rte_pktmbuf_free(lacp_pkt);
+			set_warning_flags(port, WRN_TX_QUEUE_FULL);
+			return;
+		}
 	}
 
 	MODE4_DEBUG("sending LACP frame\n");
@@ -741,6 +749,25 @@ 
 }
 
 static void
+rx_machine_update(struct bond_dev_private *internals, uint8_t slave_id,
+		struct rte_mbuf *lacp_pkt) {
+
+	/* Find LACP packet to this port. Do not check subtype, it is done in
+	 * function that queued packet */
+	if (lacp_pkt != NULL) {
+		struct lacpdu_header *lacp;
+
+		lacp = rte_pktmbuf_mtod(lacp_pkt, struct lacpdu_header *);
+		RTE_ASSERT(lacp->lacpdu.subtype == SLOW_SUBTYPE_LACP);
+
+		/* This is LACP frame so pass it to rx_machine */
+		rx_machine(internals, slave_id, &lacp->lacpdu);
+		rte_pktmbuf_free(lacp_pkt);
+	} else
+		rx_machine(internals, slave_id, NULL);
+}
+
+static void
 bond_mode_8023ad_periodic_cb(void *arg)
 {
 	struct rte_eth_dev *bond_dev = arg;
@@ -809,20 +836,21 @@ 
 
 		SM_FLAG_SET(port, LACP_ENABLED);
 
-		/* Find LACP packet to this port. Do not check subtype, it is done in
-		 * function that queued packet */
-		if (rte_ring_dequeue(port->rx_ring, &pkt) == 0) {
-			struct rte_mbuf *lacp_pkt = pkt;
-			struct lacpdu_header *lacp;
+		struct rte_mbuf *lacp_pkt = NULL;
 
-			lacp = rte_pktmbuf_mtod(lacp_pkt, struct lacpdu_header *);
-			RTE_ASSERT(lacp->lacpdu.subtype == SLOW_SUBTYPE_LACP);
+		if (internals->mode4.slow_rx_queue == 0) {
+			/* Find LACP packet to this port. Do not check subtype, it is done in
+			 * function that queued packet */
+			if (rte_ring_dequeue(port->rx_ring, &pkt) == 0)
+				lacp_pkt = pkt;
 
-			/* This is LACP frame so pass it to rx_machine */
-			rx_machine(internals, slave_id, &lacp->lacpdu);
-			rte_pktmbuf_free(lacp_pkt);
-		} else
-			rx_machine(internals, slave_id, NULL);
+			rx_machine_update(internals, slave_id, lacp_pkt);
+		} else {
+			if (rte_eth_rx_burst(slave_id, internals->mode4.slow_rx_queue, &lacp_pkt, 1) == 1)
+				bond_mode_8023ad_handle_slow_pkt(internals, slave_id, lacp_pkt);
+			else
+				rx_machine_update(internals, slave_id, NULL);
+		}
 
 		periodic_machine(internals, slave_id);
 		mux_machine(internals, slave_id);
@@ -1188,18 +1216,36 @@ 
 		m_hdr->marker.tlv_type_marker = MARKER_TLV_TYPE_RESP;
 		rte_eth_macaddr_get(slave_id, &m_hdr->eth_hdr.s_addr);
 
-		if (unlikely(rte_ring_enqueue(port->tx_ring, pkt) == -ENOBUFS)) {
-			/* reset timer */
-			port->rx_marker_timer = 0;
-			wrn = WRN_TX_QUEUE_FULL;
-			goto free_out;
+		if (internals->mode4.slow_tx_queue == 0) {
+			if (unlikely(rte_ring_enqueue(port->tx_ring, pkt) ==
+					-ENOBUFS)) {
+				/* reset timer */
+				port->rx_marker_timer = 0;
+				wrn = WRN_TX_QUEUE_FULL;
+				goto free_out;
+			}
+		} else {
+			/* Send packet directly to the slow queue */
+			if (unlikely(rte_eth_tx_burst(slave_id,
+					internals->mode4.slow_tx_queue,
+					&pkt, 1) == 0)) {
+				/* reset timer */
+				port->rx_marker_timer = 0;
+				wrn = WRN_TX_QUEUE_FULL;
+				goto free_out;
+			}
 		}
 	} else if (likely(subtype == SLOW_SUBTYPE_LACP)) {
-		if (unlikely(rte_ring_enqueue(port->rx_ring, pkt) == -ENOBUFS)) {
-			/* If RX fing full free lacpdu message and drop packet */
-			wrn = WRN_RX_QUEUE_FULL;
-			goto free_out;
-		}
+
+		if (internals->mode4.slow_rx_queue == 0) {
+			if (unlikely(rte_ring_enqueue(port->rx_ring, pkt) == -ENOBUFS)) {
+				/* If RX fing full free lacpdu message and drop packet */
+				wrn = WRN_RX_QUEUE_FULL;
+				goto free_out;
+			}
+		} else
+			rx_machine_update(internals, slave_id, pkt);
+
 	} else {
 		wrn = WRN_UNKNOWN_SLOW_TYPE;
 		goto free_out;
@@ -1504,3 +1550,42 @@ 
 	rte_eal_alarm_set(internals->mode4.update_timeout_us,
 			bond_mode_8023ad_ext_periodic_cb, arg);
 }
+
+#define MBUF_CACHE_SIZE 250
+#define NUM_MBUFS 8191
+
+int
+rte_eth_bond_8023ad_slow_queue_enable(uint8_t port)
+{
+	int retval = 0;
+	struct rte_eth_dev *dev = &rte_eth_devices[port];
+	struct bond_dev_private *internals = (struct bond_dev_private *)
+		dev->data->dev_private;
+
+	if (check_for_bonded_ethdev(dev) != 0)
+		return -1;
+
+	internals->mode4.slow_rx_queue = dev->data->nb_rx_queues;
+	internals->mode4.slow_tx_queue = dev->data->nb_tx_queues;
+
+	bond_ethdev_mode_set(dev, internals->mode);
+	return retval;
+}
+
+int
+rte_eth_bond_8023ad_slow_queue_disable(uint8_t port)
+{
+	int retval = 0;
+	struct rte_eth_dev *dev = &rte_eth_devices[port];
+	struct bond_dev_private *internals = (struct bond_dev_private *)
+		dev->data->dev_private;
+
+	if (check_for_bonded_ethdev(dev) != 0)
+		return -1;
+
+	internals->mode4.slow_rx_queue = 0;
+	internals->mode4.slow_tx_queue = 0;
+
+	bond_ethdev_mode_set(dev, internals->mode);
+	return retval;
+}
diff --git a/drivers/net/bonding/rte_eth_bond_8023ad.h b/drivers/net/bonding/rte_eth_bond_8023ad.h
index 6b8ff57..8d21c7a 100644
--- a/drivers/net/bonding/rte_eth_bond_8023ad.h
+++ b/drivers/net/bonding/rte_eth_bond_8023ad.h
@@ -302,4 +302,10 @@  struct rte_eth_bond_8023ad_slave_info {
 rte_eth_bond_8023ad_ext_slowtx(uint8_t port_id, uint8_t slave_id,
 		struct rte_mbuf *lacp_pkt);
 
+int
+rte_eth_bond_8023ad_slow_queue_enable(uint8_t port_id);
+
+int
+rte_eth_bond_8023ad_slow_queue_disable(uint8_t port_id);
+
 #endif /* RTE_ETH_BOND_8023AD_H_ */
diff --git a/drivers/net/bonding/rte_eth_bond_8023ad_private.h b/drivers/net/bonding/rte_eth_bond_8023ad_private.h
index ca8858b..3963714 100644
--- a/drivers/net/bonding/rte_eth_bond_8023ad_private.h
+++ b/drivers/net/bonding/rte_eth_bond_8023ad_private.h
@@ -39,6 +39,7 @@ 
 #include <rte_ether.h>
 #include <rte_byteorder.h>
 #include <rte_atomic.h>
+#include <rte_flow.h>
 
 #include "rte_eth_bond_8023ad.h"
 
@@ -162,6 +163,9 @@  struct port {
 
 	uint64_t warning_timer;
 	volatile uint16_t warnings_to_show;
+
+	/** Memory pool used to allocate slow queues */
+	struct rte_mempool *slow_pool;
 };
 
 struct mode8023ad_private {
@@ -175,6 +179,10 @@  struct mode8023ad_private {
 	uint64_t update_timeout_us;
 	rte_eth_bond_8023ad_ext_slowrx_fn slowrx_cb;
 	uint8_t external_sm;
+
+	uint8_t slow_rx_queue; /**< Queue no for slow packets, or 0 if no accel */
+	uint8_t slow_tx_queue;
+	struct rte_flow *slow_flow[RTE_MAX_ETHPORTS];
 };
 
 /**
@@ -295,4 +303,11 @@  struct mode8023ad_private {
 void
 bond_mode_8023ad_mac_address_update(struct rte_eth_dev *bond_dev);
 
+int
+bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
+		uint8_t slave_port);
+
+int
+bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint8_t slave_port);
+
 #endif /* RTE_ETH_BOND_8023AD_H_ */
diff --git a/drivers/net/bonding/rte_eth_bond_pmd.c b/drivers/net/bonding/rte_eth_bond_pmd.c
index 82959ab..558682c 100644
--- a/drivers/net/bonding/rte_eth_bond_pmd.c
+++ b/drivers/net/bonding/rte_eth_bond_pmd.c
@@ -59,6 +59,12 @@ 
 /* Table for statistics in mode 5 TLB */
 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
 
+#if  __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define _htons(x) ((uint16_t)((((x) & 0x00ffU) << 8) | (((x) & 0xff00U) >> 8)))
+#else
+#define _htons(x) (x)
+#endif
+
 static inline size_t
 get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto)
 {
@@ -133,6 +139,215 @@ 
 		(subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
 }
 
+/*****************************************************************************
+ * Flow director's setup for mode 4 optimization
+ */
+
+static struct rte_flow_item_eth flow_item_eth_type_8023ad = {
+	.dst.addr_bytes = { 0 },
+	.src.addr_bytes = { 0 },
+	.type = _htons(ETHER_TYPE_SLOW),
+};
+
+static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = {
+	.dst.addr_bytes = { 0 },
+	.src.addr_bytes = { 0 },
+	.type = 0xFFFF,
+};
+
+static struct rte_flow_item flow_item_8023ad[] = {
+	{
+		.type = RTE_FLOW_ITEM_TYPE_ETH,
+		.spec = &flow_item_eth_type_8023ad,
+		.last = NULL,
+		.mask = &flow_item_eth_mask_type_8023ad,
+	},
+	{
+		.type = RTE_FLOW_ITEM_TYPE_END,
+		.spec = NULL,
+		.last = NULL,
+		.mask = NULL,
+	}
+};
+
+const struct rte_flow_attr flow_attr_8023ad = {
+	.group = 0,
+	.priority = 0,
+	.ingress = 1,
+	.egress = 0,
+	.reserved = 0,
+};
+
+int
+bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
+		uint8_t slave_port) {
+
+	struct rte_flow_error error;
+	struct bond_dev_private *internals = (struct bond_dev_private *)
+			(bond_dev->data->dev_private);
+
+	struct rte_flow_action_queue lacp_queue_conf = {
+		.index = internals->mode4.slow_rx_queue,
+	};
+
+	const struct rte_flow_action actions[] = {
+		{
+			.type = RTE_FLOW_ACTION_TYPE_QUEUE,
+			.conf = &lacp_queue_conf
+		},
+		{
+			.type = RTE_FLOW_ACTION_TYPE_END,
+		}
+	};
+
+	int ret = rte_flow_validate(slave_port, &flow_attr_8023ad,
+			flow_item_8023ad, actions, &error);
+	if (ret < 0)
+		return -1;
+
+	return 0;
+}
+
+int
+bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint8_t slave_port) {
+
+	struct rte_flow_error error;
+	struct bond_dev_private *internals = (struct bond_dev_private *)
+			(bond_dev->data->dev_private);
+
+	struct rte_flow_action_queue lacp_queue_conf = {
+		.index = internals->mode4.slow_rx_queue,
+	};
+
+	const struct rte_flow_action actions[] = {
+		{
+			.type = RTE_FLOW_ACTION_TYPE_QUEUE,
+			.conf = &lacp_queue_conf
+		},
+		{
+			.type = RTE_FLOW_ACTION_TYPE_END,
+		}
+	};
+
+	internals->mode4.slow_flow[slave_port] = rte_flow_create(slave_port,
+			&flow_attr_8023ad, flow_item_8023ad, actions, &error);
+	if (internals->mode4.slow_flow[slave_port] == NULL) {
+		RTE_BOND_LOG(ERR,
+			"bond_ethdev_8023ad_flow_set: %s (slave_port=%d queue_id=%d)",
+			error.message, slave_port, internals->mode4.slow_rx_queue);
+		return -1;
+	}
+
+	return 0;
+}
+
+static uint16_t
+bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
+		uint16_t nb_pkts)
+{
+	struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
+	struct bond_dev_private *internals = bd_rx_q->dev_private;
+	uint16_t num_rx_total = 0;	/* Total number of received packets */
+	uint8_t slaves[RTE_MAX_ETHPORTS];
+	uint8_t slave_count;
+
+	uint8_t i;
+
+	/* Copy slave list to protect against slave up/down changes during tx
+	 * bursting */
+	slave_count = internals->active_slave_count;
+	memcpy(slaves, internals->active_slaves,
+			sizeof(internals->active_slaves[0]) * slave_count);
+
+	for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
+		/* Read packets from this slave */
+		num_rx_total += rte_eth_rx_burst(slaves[i], bd_rx_q->queue_id,
+				&bufs[num_rx_total], nb_pkts - num_rx_total);
+	}
+
+	return num_rx_total;
+}
+
+static uint16_t
+bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
+		uint16_t nb_pkts)
+{
+	struct bond_dev_private *internals;
+	struct bond_tx_queue *bd_tx_q;
+
+	uint8_t num_of_slaves;
+	uint8_t slaves[RTE_MAX_ETHPORTS];
+	 /* positions in slaves, not ID */
+	uint8_t distributing_offsets[RTE_MAX_ETHPORTS];
+	uint8_t distributing_count;
+
+	uint16_t num_tx_slave, num_tx_total = 0, num_tx_fail_total = 0;
+	uint16_t i, op_slave_idx;
+
+	struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
+
+	/* Total amount of packets in slave_bufs */
+	uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
+	/* Slow packets placed in each slave */
+
+	if (unlikely(nb_pkts == 0))
+		return 0;
+
+	bd_tx_q = (struct bond_tx_queue *)queue;
+	internals = bd_tx_q->dev_private;
+
+	/* Copy slave list to protect against slave up/down changes during tx
+	 * bursting */
+	num_of_slaves = internals->active_slave_count;
+	if (num_of_slaves < 1)
+		return num_tx_total;
+
+	memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) *
+			num_of_slaves);
+
+	distributing_count = 0;
+	for (i = 0; i < num_of_slaves; i++) {
+		struct port *port = &mode_8023ad_ports[slaves[i]];
+		if (ACTOR_STATE(port, DISTRIBUTING))
+			distributing_offsets[distributing_count++] = i;
+	}
+
+	if (likely(distributing_count > 0)) {
+		/* Populate slaves mbuf with the packets which are to be sent on it */
+		for (i = 0; i < nb_pkts; i++) {
+			/* Select output slave using hash based on xmit policy */
+			op_slave_idx = internals->xmit_hash(bufs[i], distributing_count);
+
+			/* Populate slave mbuf arrays with mbufs for that slave. Use only
+			 * slaves that are currently distributing. */
+			uint8_t slave_offset = distributing_offsets[op_slave_idx];
+			slave_bufs[slave_offset][slave_nb_pkts[slave_offset]] = bufs[i];
+			slave_nb_pkts[slave_offset]++;
+		}
+	}
+
+	/* Send packet burst on each slave device */
+	for (i = 0; i < num_of_slaves; i++) {
+		if (slave_nb_pkts[i] == 0)
+			continue;
+
+		num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
+				slave_bufs[i], slave_nb_pkts[i]);
+
+		num_tx_total += num_tx_slave;
+		num_tx_fail_total += slave_nb_pkts[i] - num_tx_slave;
+
+		/* If tx burst fails move packets to end of bufs */
+		if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
+			uint16_t j = nb_pkts - num_tx_fail_total;
+			for ( ; num_tx_slave < slave_nb_pkts[i]; j++, num_tx_slave++)
+				bufs[j] = slave_bufs[i][num_tx_slave];
+		}
+	}
+
+	return num_tx_total;
+}
+
 static uint16_t
 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
 		uint16_t nb_pkts)
@@ -180,6 +395,13 @@ 
 
 		/* Handle slow protocol packets. */
 		while (j < num_rx_total) {
+
+			/* if packet is not pure L2 and is known, skip it */
+			if ((bufs[j]->packet_type & ~RTE_PTYPE_L2_ETHER) != 0) {
+				j++;
+				continue;
+			}
+
 			if (j + 3 < num_rx_total)
 				rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
 
@@ -1295,11 +1517,19 @@  struct bwg_slave {
 		if (bond_mode_8023ad_enable(eth_dev) != 0)
 			return -1;
 
-		eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
-		eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
-		RTE_LOG(WARNING, PMD,
-				"Using mode 4, it is necessary to do TX burst and RX burst "
-				"at least every 100ms.\n");
+		if (internals->mode4.slow_rx_queue == 0) {
+			eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
+			eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
+			RTE_LOG(WARNING, PMD,
+				"Using mode 4, it is necessary to do TX burst "
+				"and RX burst at least every 100ms.\n");
+		} else {
+			/* Use flow director's optimization */
+			eth_dev->rx_pkt_burst =
+					bond_ethdev_rx_burst_8023ad_fast_queue;
+			eth_dev->tx_pkt_burst =
+					bond_ethdev_tx_burst_8023ad_fast_queue;
+		}
 		break;
 	case BONDING_MODE_TLB:
 		eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
@@ -1321,6 +1551,72 @@  struct bwg_slave {
 	return 0;
 }
 
+static int
+slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev,
+		struct rte_eth_dev *slave_eth_dev)
+{
+	int errval = 0;
+	struct bond_dev_private *internals = (struct bond_dev_private *)
+		bonded_eth_dev->data->dev_private;
+	struct port *port = &mode_8023ad_ports[slave_eth_dev->data->port_id];
+
+	if ((internals->mode != BONDING_MODE_8023AD) ||
+			(internals->mode4.slow_rx_queue == 0) ||
+			(internals->mode4.slow_tx_queue == 0))
+		return 0;
+
+	if (port->slow_pool == NULL) {
+		char mem_name[256];
+		int slave_id = slave_eth_dev->data->port_id;
+
+		snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool",
+				slave_id);
+		port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191,
+			250, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
+			slave_eth_dev->data->numa_node);
+
+		/* Any memory allocation failure in initialization is critical because
+		 * resources can't be free, so reinitialization is impossible. */
+		if (port->slow_pool == NULL) {
+			rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
+				slave_id, mem_name, rte_strerror(rte_errno));
+		}
+	}
+
+	if (internals->mode4.slow_rx_queue > 0) {
+		/* Configure slow Rx queue */
+
+		errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id,
+				internals->mode4.slow_rx_queue, 128,
+				rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
+				NULL, port->slow_pool);
+		if (errval != 0) {
+			RTE_BOND_LOG(ERR,
+					"rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
+					slave_eth_dev->data->port_id,
+					internals->mode4.slow_rx_queue,
+					errval);
+			return errval;
+		}
+	}
+
+	if (internals->mode4.slow_tx_queue > 0) {
+		errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id,
+				internals->mode4.slow_tx_queue, 512,
+				rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
+				NULL);
+		if (errval != 0) {
+			RTE_BOND_LOG(ERR,
+				"rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
+				slave_eth_dev->data->port_id,
+				internals->mode4.slow_tx_queue,
+				errval);
+			return errval;
+		}
+	}
+	return 0;
+}
+
 int
 slave_configure(struct rte_eth_dev *bonded_eth_dev,
 		struct rte_eth_dev *slave_eth_dev)
@@ -1330,6 +1626,10 @@  struct bwg_slave {
 
 	int errval;
 	uint16_t q_id;
+	struct rte_flow_error flow_error;
+
+	struct bond_dev_private *internals = (struct bond_dev_private *)
+		bonded_eth_dev->data->dev_private;
 
 	/* Stop slave */
 	rte_eth_dev_stop(slave_eth_dev->data->port_id);
@@ -1359,10 +1659,19 @@  struct bwg_slave {
 	slave_eth_dev->data->dev_conf.rxmode.hw_vlan_filter =
 			bonded_eth_dev->data->dev_conf.rxmode.hw_vlan_filter;
 
+	uint16_t nb_rx_queues = bonded_eth_dev->data->nb_rx_queues;
+	uint16_t nb_tx_queues = bonded_eth_dev->data->nb_tx_queues;
+
+	if (internals->mode == BONDING_MODE_8023AD) {
+		if (internals->mode4.slow_rx_queue > 0)
+			nb_rx_queues++;
+		if (internals->mode4.slow_tx_queue > 0)
+			nb_tx_queues++;
+	}
+
 	/* Configure device */
 	errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
-			bonded_eth_dev->data->nb_rx_queues,
-			bonded_eth_dev->data->nb_tx_queues,
+			nb_rx_queues, nb_tx_queues,
 			&(slave_eth_dev->data->dev_conf));
 	if (errval != 0) {
 		RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u , err (%d)",
@@ -1402,6 +1711,28 @@  struct bwg_slave {
 		}
 	}
 
+	slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev);
+
+	if ((internals->mode == BONDING_MODE_8023AD) &&
+			(internals->mode4.slow_rx_queue > 0)) {
+
+		if (bond_ethdev_8023ad_flow_verify(bonded_eth_dev,
+				slave_eth_dev->data->port_id) != 0) {
+			RTE_BOND_LOG(ERR,
+					"rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
+					slave_eth_dev->data->port_id, q_id, errval);
+			return -1;
+		}
+
+		if (internals->mode4.slow_flow[slave_eth_dev->data->port_id] != NULL)
+			rte_flow_destroy(slave_eth_dev->data->port_id,
+					internals->mode4.slow_flow[slave_eth_dev->data->port_id],
+					&flow_error);
+
+		bond_ethdev_8023ad_flow_set(bonded_eth_dev,
+				slave_eth_dev->data->port_id);
+	}
+
 	/* Start device */
 	errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
 	if (errval != 0) {
diff --git a/drivers/net/bonding/rte_eth_bond_version.map b/drivers/net/bonding/rte_eth_bond_version.map
index 2de0a7d..6f1f13a 100644
--- a/drivers/net/bonding/rte_eth_bond_version.map
+++ b/drivers/net/bonding/rte_eth_bond_version.map
@@ -43,3 +43,12 @@  DPDK_16.07 {
 	rte_eth_bond_8023ad_setup;
 
 } DPDK_16.04;
+
+DPDK_17.08 {
+	global:
+
+	rte_eth_bond_8023ad_slow_queue_enable;
+	rte_eth_bond_8023ad_slow_queue_disable;
+
+	local: *;
+} DPDK_16.07;