[dpdk-dev] [PATCH] ixgbe: Discard SRIOV transparent vlan packet headers.

Ananyev, Konstantin konstantin.ananyev at intel.com
Mon Dec 14 20:12:26 CET 2015



> From: Stephen Hemminger [mailto:stephen at networkplumber.org]
> Sent: Friday, December 11, 2015 4:59 PM
> To: Zhang, Helin; Ananyev, Konstantin
> Cc: dev at dpdk.org; Tom Kiely; Stephen Hemminger
> Subject: [PATCH] ixgbe: Discard SRIOV transparent vlan packet headers.
> 
> From: Tom Kiely <tkiely at brocade.com>
> 
> SRIOV VFs support "transparent" vlans. Traffic from/to a VM
> associated with a VF is tagged/untagged with the specified
> vlan in a manner intended to be totally transparent to the VM.
> 
> The vlan is specified by "ip link set <device> vf <n> vlan <v>".
> The VM is not configured for any vlan on the VF and the VM
> should never see these transparent vlan headers for that reason.
> 
> However, in practice these vlan headers are being received by
> the VM which discards the packets as that vlan is unknown to it.
> The Linux kernel explicitly discards such vlan headers but DPDK
> does not.
> This patch mirrors the kernel behaviour for SRIOV VFs only


I have few concerns about that approach:

1. I don't think vlan_tci info should *always* be stripped by vf RX routine.
There could be configurations when that information might be needed by upper layer.
Let say VF can be member of 2 or more VLANs and upper layer would like to have that information
for further processing.
Or special mirror VF, that does traffic snnoping, or something else.
2. Proposed implementation would introduce a slowdown for all VF RX routines.
3. From the description it seems like the aim is to clear VLAN information for the RX packet.
Though the patch actually clears VLAN info only for the RX packet whose VLAN tag is not present inside SW copy of VFTA table.
Which makes no much point to me: 
If VLAN is not present in HW VFTA table, then packet with that VLAN tag will be discarded by HW anyway.
If it is present inside VFTA table (both SW & HW), then VLAN information would be preserved with and without the patch.    

If you need to clear VLAN information, why not to do it on the upper layer - inside your application itself? 
Either create some sort of wrapper around rx_burst(), or setup an RX call-back for your VF device.

Konstantin


> 
> Signed-off-by: Tom Kiely <tkiely at brocade.com>
> Signed-off-by: Stephen Hemminger <stephen at networkplumber.org>
> ---
>  drivers/net/ixgbe/ixgbe_ethdev.c   | 10 ++++----
>  drivers/net/ixgbe/ixgbe_ethdev.h   | 36 +++++++++++++++++++++++++++
>  drivers/net/ixgbe/ixgbe_rxtx.c     | 50 ++++++++++++++++++++++++++++++++++----
>  drivers/net/ixgbe/ixgbe_rxtx.h     | 27 ++++++++++++++++++++
>  drivers/net/ixgbe/ixgbe_rxtx_vec.c | 10 ++++++++
>  5 files changed, 123 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c
> index 1b6cd8e..0987bf9 100644
> --- a/drivers/net/ixgbe/ixgbe_ethdev.c
> +++ b/drivers/net/ixgbe/ixgbe_ethdev.c
> @@ -516,7 +516,7 @@ static const struct eth_dev_ops ixgbevf_eth_dev_ops = {
>  	.vlan_filter_set      = ixgbevf_vlan_filter_set,
>  	.vlan_strip_queue_set = ixgbevf_vlan_strip_queue_set,
>  	.vlan_offload_set     = ixgbevf_vlan_offload_set,
> -	.rx_queue_setup       = ixgbe_dev_rx_queue_setup,
> +	.rx_queue_setup       = ixgbevf_dev_rx_queue_setup,
>  	.rx_queue_release     = ixgbe_dev_rx_queue_release,
>  	.rx_descriptor_done   = ixgbe_dev_rx_descriptor_done,
>  	.tx_queue_setup       = ixgbe_dev_tx_queue_setup,
> @@ -1492,8 +1492,8 @@ ixgbe_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
>  	uint32_t vid_idx;
>  	uint32_t vid_bit;
> 
> -	vid_idx = (uint32_t) ((vlan_id >> 5) & 0x7F);
> -	vid_bit = (uint32_t) (1 << (vlan_id & 0x1F));
> +	vid_idx = ixgbe_vfta_index(vlan_id);
> +	vid_bit = ixgbe_vfta_bit(vlan_id);
>  	vfta = IXGBE_READ_REG(hw, IXGBE_VFTA(vid_idx));
>  	if (on)
>  		vfta |= vid_bit;
> @@ -3965,8 +3965,8 @@ ixgbevf_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
>  		PMD_INIT_LOG(ERR, "Unable to set VF vlan");
>  		return ret;
>  	}
> -	vid_idx = (uint32_t) ((vlan_id >> 5) & 0x7F);
> -	vid_bit = (uint32_t) (1 << (vlan_id & 0x1F));
> +	vid_idx = ixgbe_vfta_index(vlan_id);
> +	vid_bit = ixgbe_vfta_bit(vlan_id);
> 
>  	/* Save what we set and retore it after device reset */
>  	if (on)
> diff --git a/drivers/net/ixgbe/ixgbe_ethdev.h b/drivers/net/ixgbe/ixgbe_ethdev.h
> index d26771a..44411e4 100644
> --- a/drivers/net/ixgbe/ixgbe_ethdev.h
> +++ b/drivers/net/ixgbe/ixgbe_ethdev.h
> @@ -340,6 +340,11 @@ int  ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
>  		const struct rte_eth_rxconf *rx_conf,
>  		struct rte_mempool *mb_pool);
> 
> +int  ixgbevf_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
> +				uint16_t nb_rx_desc, unsigned int socket_id,
> +				const struct rte_eth_rxconf *rx_conf,
> +				struct rte_mempool *mb_pool);
> +
>  int  ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
>  		uint16_t nb_tx_desc, unsigned int socket_id,
>  		const struct rte_eth_txconf *tx_conf);
> @@ -436,4 +441,35 @@ uint32_t ixgbe_convert_vm_rx_mask_to_val(uint16_t rx_mask, uint32_t orig_val);
> 
>  int ixgbe_fdir_ctrl_func(struct rte_eth_dev *dev,
>  			enum rte_filter_op filter_op, void *arg);
> +
> +/*
> + * Calculate index in vfta array of the 32 bit value enclosing
> + * a given vlan id
> + */
> +static inline uint32_t
> +ixgbe_vfta_index(uint16_t vlan)
> +{
> +	return (vlan >> 5) & 0x7f;
> +}
> +
> +/*
> + * Calculate vfta array entry bitmask for vlan id within the
> + * enclosing 32 bit entry.
> + */
> +static inline uint32_t
> +ixgbe_vfta_bit(uint16_t vlan)
> +{
> +	return 1 << (vlan & 0x1f);
> +}
> +
> +/*
> + * Check in the vfta bit array if the bit corresponding to
> + * the given vlan is set.
> + */
> +static inline bool
> +ixgbe_vfta_is_vlan_set(const struct ixgbe_vfta *vfta, uint16_t vlan)
> +{
> +	return (vfta->vfta[ixgbe_vfta_index(vlan)] & ixgbe_vfta_bit(vlan)) != 0;
> +}
> +
>  #endif /* _IXGBE_ETHDEV_H_ */
> diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
> index 52a263c..5ab029d 100644
> --- a/drivers/net/ixgbe/ixgbe_rxtx.c
> +++ b/drivers/net/ixgbe/ixgbe_rxtx.c
> @@ -1171,14 +1171,21 @@ ixgbe_rx_fill_from_stage(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
>  			 uint16_t nb_pkts)
>  {
>  	struct rte_mbuf **stage = &rxq->rx_stage[rxq->rx_next_avail];
> +	const struct rte_eth_dev *dev;
> +	const struct ixgbe_vfta *vfta;
>  	int i;
> 
> +	dev = &rte_eth_devices[rxq->port_id];
> +	vfta = IXGBE_DEV_PRIVATE_TO_VFTA(dev->data->dev_private);
> +
>  	/* how many packets are ready to return? */
>  	nb_pkts = (uint16_t)RTE_MIN(nb_pkts, rxq->rx_nb_avail);
> 
>  	/* copy mbuf pointers to the application's packet list */
> -	for (i = 0; i < nb_pkts; ++i)
> +	for (i = 0; i < nb_pkts; ++i) {
>  		rx_pkts[i] = stage[i];
> +		ixgbe_unknown_vlan_sw_filter_hdr(rx_pkts[i], vfta, rxq);
> +	}
> 
>  	/* update internal queue state */
>  	rxq->rx_nb_avail = (uint16_t)(rxq->rx_nb_avail - nb_pkts);
> @@ -1188,10 +1195,9 @@ ixgbe_rx_fill_from_stage(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
>  }
> 
>  static inline uint16_t
> -rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
> +rx_recv_pkts(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
>  	     uint16_t nb_pkts)
>  {
> -	struct ixgbe_rx_queue *rxq = (struct ixgbe_rx_queue *)rx_queue;
>  	uint16_t nb_rx = 0;
> 
>  	/* Any previously recv'd pkts will be returned from the Rx stage */
> @@ -1252,19 +1258,20 @@ ixgbe_recv_pkts_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
>  			   uint16_t nb_pkts)
>  {
>  	uint16_t nb_rx;
> +	struct ixgbe_rx_queue *rxq = (struct ixgbe_rx_queue *)rx_queue;
> 
>  	if (unlikely(nb_pkts == 0))
>  		return 0;
> 
>  	if (likely(nb_pkts <= RTE_PMD_IXGBE_RX_MAX_BURST))
> -		return rx_recv_pkts(rx_queue, rx_pkts, nb_pkts);
> +		return rx_recv_pkts(rxq, rx_pkts, nb_pkts);
> 
>  	/* request is relatively large, chunk it up */
>  	nb_rx = 0;
>  	while (nb_pkts) {
>  		uint16_t ret, n;
>  		n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_RX_MAX_BURST);
> -		ret = rx_recv_pkts(rx_queue, &rx_pkts[nb_rx], n);
> +		ret = rx_recv_pkts(rxq, &rx_pkts[nb_rx], n);
>  		nb_rx = (uint16_t)(nb_rx + ret);
>  		nb_pkts = (uint16_t)(nb_pkts - ret);
>  		if (ret < n)
> @@ -1294,6 +1301,8 @@ ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
>  	uint16_t nb_rx;
>  	uint16_t nb_hold;
>  	uint64_t pkt_flags;
> +	const struct rte_eth_dev *dev;
> +	const struct ixgbe_vfta *vfta;
> 
>  	nb_rx = 0;
>  	nb_hold = 0;
> @@ -1301,6 +1310,9 @@ ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
>  	rx_id = rxq->rx_tail;
>  	rx_ring = rxq->rx_ring;
>  	sw_ring = rxq->sw_ring;
> +	dev = &rte_eth_devices[rxq->port_id];
> +	vfta = IXGBE_DEV_PRIVATE_TO_VFTA(dev->data->dev_private);
> +
>  	while (nb_rx < nb_pkts) {
>  		/*
>  		 * The order of operations here is important as the DD status
> @@ -1418,6 +1430,8 @@ ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
>  		rxm->ol_flags = pkt_flags;
>  		rxm->packet_type = ixgbe_rxd_pkt_info_to_pkt_type(pkt_info);
> 
> +		ixgbe_unknown_vlan_sw_filter_hdr(rxm, vfta, rxq);
> +
>  		if (likely(pkt_flags & PKT_RX_RSS_HASH))
>  			rxm->hash.rss = rte_le_to_cpu_32(
>  						rxd.wb.lower.hi_dword.rss);
> @@ -1557,6 +1571,11 @@ ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
>  	uint16_t nb_rx = 0;
>  	uint16_t nb_hold = rxq->nb_rx_hold;
>  	uint16_t prev_id = rxq->rx_tail;
> +	const struct rte_eth_dev *dev;
> +	const struct ixgbe_vfta *vfta;
> +
> +	dev = &rte_eth_devices[rxq->port_id];
> +	vfta = IXGBE_DEV_PRIVATE_TO_VFTA(dev->data->dev_private);
> 
>  	while (nb_rx < nb_pkts) {
>  		bool eop;
> @@ -1779,6 +1798,8 @@ next_desc:
>  		rte_packet_prefetch((char *)first_seg->buf_addr +
>  			first_seg->data_off);
> 
> +		ixgbe_unknown_vlan_sw_filter_hdr(first_seg, vfta, rxq);
> +
>  		/*
>  		 * Store the mbuf address into the next entry of the array
>  		 * of returned packets.
> @@ -2480,6 +2501,25 @@ ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
>  	return 0;
>  }
> 
> +int __attribute__((cold))
> +ixgbevf_dev_rx_queue_setup(struct rte_eth_dev *dev,
> +			   uint16_t queue_idx,
> +			   uint16_t nb_desc,
> +			   unsigned int socket_id,
> +			   const struct rte_eth_rxconf *rx_conf,
> +			   struct rte_mempool *mp)
> +{
> +	struct ixgbe_rx_queue *rxq;
> +
> +	ixgbe_dev_rx_queue_setup(dev, queue_idx, nb_desc, socket_id,
> +				 rx_conf, mp);
> +
> +	rxq = dev->data->rx_queues[queue_idx];
> +	rxq->filter_unknown_vlan_hdrs = true;
> +
> +	return 0;
> +}
> +
>  uint32_t
>  ixgbe_dev_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id)
>  {
> diff --git a/drivers/net/ixgbe/ixgbe_rxtx.h b/drivers/net/ixgbe/ixgbe_rxtx.h
> index 475a800..3bfceda 100644
> --- a/drivers/net/ixgbe/ixgbe_rxtx.h
> +++ b/drivers/net/ixgbe/ixgbe_rxtx.h
> @@ -146,6 +146,7 @@ struct ixgbe_rx_queue {
>  	uint8_t             crc_len;  /**< 0 if CRC stripped, 4 otherwise. */
>  	uint8_t             drop_en;  /**< If not 0, set SRRCTL.Drop_En. */
>  	uint8_t             rx_deferred_start; /**< not in global dev start. */
> +	uint8_t             filter_unknown_vlan_hdrs;
>  	/** need to alloc dummy mbuf, for wraparound when scanning hw ring */
>  	struct rte_mbuf fake_mbuf;
>  	/** hold packets to return to application */
> @@ -307,5 +308,31 @@ uint16_t ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
>  		uint16_t nb_pkts);
>  int ixgbe_txq_vec_setup(struct ixgbe_tx_queue *txq);
> 
> +#define VLAN_VID_MASK 0x0fff
> +
> +/*
> + * Filter out vlan headers if no vlan configured.
> + *
> + * One use case for this is SRIOV VFs with transparent
> + * vlans. These vlan headers are currently seen by the DPDK
> + * client and may cause affected packets to be dropped as
> + * that vlan is not configured.
> + */
> +static inline void
> +ixgbe_unknown_vlan_sw_filter_hdr(struct rte_mbuf *m,
> +				 const struct ixgbe_vfta *vfta,
> +				 struct ixgbe_rx_queue *rxq)
> +{
> +	uint16_t vlan;
> +
> +	if (rxq->filter_unknown_vlan_hdrs && (m->ol_flags & PKT_RX_VLAN_PKT)) {
> +		vlan = m->vlan_tci & VLAN_VID_MASK;
> +		if (!ixgbe_vfta_is_vlan_set(vfta, vlan)) {
> +			m->vlan_tci = 0;
> +			m->ol_flags &= ~PKT_RX_VLAN_PKT;
> +		}
> +	}
> +}
> +
>  #endif /* RTE_IXGBE_INC_VECTOR */
>  #endif /* _IXGBE_RXTX_H_ */
> diff --git a/drivers/net/ixgbe/ixgbe_rxtx_vec.c b/drivers/net/ixgbe/ixgbe_rxtx_vec.c
> index ccd93c7..a710af1 100644
> --- a/drivers/net/ixgbe/ixgbe_rxtx_vec.c
> +++ b/drivers/net/ixgbe/ixgbe_rxtx_vec.c
> @@ -206,6 +206,10 @@ static inline uint16_t
>  _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
>  		uint16_t nb_pkts, uint8_t *split_packet)
>  {
> +	const struct rte_eth_dev *dev
> +		= &rte_eth_devices[rxq->port_id];
> +	const struct ixgbe_vfta *vfta
> +		= IXGBE_DEV_PRIVATE_TO_VFTA(dev->data->dev_private);
>  	volatile union ixgbe_adv_rx_desc *rxdp;
>  	struct ixgbe_rx_entry *sw_ring;
>  	uint16_t nb_pkts_recd;
> @@ -350,8 +354,11 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
>  		/* D.3 copy final 3,4 data to rx_pkts */
>  		_mm_storeu_si128((void *)&rx_pkts[pos+3]->rx_descriptor_fields1,
>  				pkt_mb4);
> +		ixgbe_unknown_vlan_sw_filter_hdr(rx_pkts[pos + 3], vfta, rxq);
> +
>  		_mm_storeu_si128((void *)&rx_pkts[pos+2]->rx_descriptor_fields1,
>  				pkt_mb3);
> +		ixgbe_unknown_vlan_sw_filter_hdr(rx_pkts[pos + 2], vfta, rxq);
> 
>  		/* D.2 pkt 1,2 set in_port/nb_seg and remove crc */
>  		pkt_mb2 = _mm_add_epi16(pkt_mb2, crc_adjust);
> @@ -391,8 +398,11 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
>  		/* D.3 copy final 1,2 data to rx_pkts */
>  		_mm_storeu_si128((void *)&rx_pkts[pos+1]->rx_descriptor_fields1,
>  				pkt_mb2);
> +		ixgbe_unknown_vlan_sw_filter_hdr(rx_pkts[pos + 1], vfta, rxq);
> +
>  		_mm_storeu_si128((void *)&rx_pkts[pos]->rx_descriptor_fields1,
>  				pkt_mb1);
> +		ixgbe_unknown_vlan_sw_filter_hdr(rx_pkts[pos], vfta, rxq);
> 
>  		/* C.4 calc avaialbe number of desc */
>  		var = __builtin_popcountll(_mm_cvtsi128_si64(staterr));
> --
> 2.1.4



More information about the dev mailing list