[dpdk-dev,v2] net/mlx4: support CRC strip toggling

Message ID 1521477410-8936-1-git-send-email-ophirmu@mellanox.com (mailing list archive)
State Superseded, archived
Delegated to: Shahaf Shuler
Headers

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK

Commit Message

Ophir Munk March 19, 2018, 4:36 p.m. UTC
  Previous to this commit mlx4 CRC stripping was executed by default and
there was no verbs API to disable it.

Current support in MLNX_OFED_4.3-1.5.0.0 and above

Signed-off-by: Ophir Munk <ophirmu@mellanox.com>
---
v1: initial version
v2: following internal reviews

 drivers/net/mlx4/mlx4.c      |  4 ++++
 drivers/net/mlx4/mlx4.h      |  1 +
 drivers/net/mlx4/mlx4_rxq.c  | 33 +++++++++++++++++++++++++++++++--
 drivers/net/mlx4/mlx4_rxtx.c |  5 ++++-
 drivers/net/mlx4/mlx4_rxtx.h |  1 +
 5 files changed, 41 insertions(+), 3 deletions(-)
  

Comments

Adrien Mazarguil March 23, 2018, 2:53 p.m. UTC | #1
Hi Ophir,

On Mon, Mar 19, 2018 at 04:36:50PM +0000, Ophir Munk wrote:
> Previous to this commit mlx4 CRC stripping was executed by default and
> there was no verbs API to disable it.
> 
> Current support in MLNX_OFED_4.3-1.5.0.0 and above

I suggest to drop this line as it's not exclusive to MLNX_OFED; the
documented minimum requirements are already correct and rdma-core v15 also
supports it.

> Signed-off-by: Ophir Munk <ophirmu@mellanox.com>

A few more comments below.

> ---
> v1: initial version
> v2: following internal reviews
> 
>  drivers/net/mlx4/mlx4.c      |  4 ++++
>  drivers/net/mlx4/mlx4.h      |  1 +
>  drivers/net/mlx4/mlx4_rxq.c  | 33 +++++++++++++++++++++++++++++++--
>  drivers/net/mlx4/mlx4_rxtx.c |  5 ++++-
>  drivers/net/mlx4/mlx4_rxtx.h |  1 +
>  5 files changed, 41 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
> index ee93daf..d4f4323 100644
> --- a/drivers/net/mlx4/mlx4.c
> +++ b/drivers/net/mlx4/mlx4.c
> @@ -578,6 +578,10 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
>  		}
>  		DEBUG("supported RSS hash fields mask: %016" PRIx64,
>  		      priv->hw_rss_sup);
> +		priv->hw_fcs_strip = !!(device_attr_ex.raw_packet_caps &
> +					 IBV_RAW_PACKET_CAP_SCATTER_FCS);

Minor nit, indentation contains one extra space.

> +		DEBUG("FCS stripping toggling is %ssupported",
> +		      (priv->hw_fcs_strip ? "" : "not "));

Another minor nit, mlx5 prints "configuration" instead of "toggling",
wouldn't it make sense for both PMDs to print the same information?

Also the extra set of parentheses around the conditional expression could be
removed.

>  		/* Configure the first MAC address by default. */
>  		if (mlx4_get_mac(priv, &mac.addr_bytes)) {
>  			ERROR("cannot get MAC address, is mlx4_en loaded?"
> diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
> index 19c8a22..3ae3ce6 100644
> --- a/drivers/net/mlx4/mlx4.h
> +++ b/drivers/net/mlx4/mlx4.h
> @@ -105,6 +105,7 @@ struct priv {
>  	uint32_t isolated:1; /**< Toggle isolated mode. */
>  	uint32_t hw_csum:1; /**< Checksum offload is supported. */
>  	uint32_t hw_csum_l2tun:1; /**< Checksum support for L2 tunnels. */
> +	uint32_t hw_fcs_strip:1; /**< FCS stripping toggling is supported. */
>  	uint64_t hw_rss_sup; /**< Supported RSS hash fields (Verbs format). */
>  	struct rte_intr_handle intr_handle; /**< Port interrupt handle. */
>  	struct mlx4_drop *drop; /**< Shared resources for drop flow rules. */
> diff --git a/drivers/net/mlx4/mlx4_rxq.c b/drivers/net/mlx4/mlx4_rxq.c
> index 7a036ed..6748355 100644
> --- a/drivers/net/mlx4/mlx4_rxq.c
> +++ b/drivers/net/mlx4/mlx4_rxq.c
> @@ -491,6 +491,8 @@ mlx4_rxq_attach(struct rxq *rxq)
>  	const char *msg;
>  	struct ibv_cq *cq = NULL;
>  	struct ibv_wq *wq = NULL;
> +	unsigned int create_flags = 0;
> +	unsigned int comp_mask = 0;

I suggest using uint32_t to align these with Verb's definitions for struct
ibv_wq_init_attr.

>  	volatile struct mlx4_wqe_data_seg (*wqes)[];
>  	unsigned int i;
>  	int ret;
> @@ -503,6 +505,11 @@ mlx4_rxq_attach(struct rxq *rxq)
>  		msg = "CQ creation failure";
>  		goto error;
>  	}
> +	/* By default, FCS (CRC) is stripped by hardware. */
> +	if (rxq->crc_present) {
> +		create_flags |= IBV_WQ_FLAGS_SCATTER_FCS;
> +		comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
> +	}
>  	wq = mlx4_glue->create_wq
>  		(priv->ctx,
>  		 &(struct ibv_wq_init_attr){
> @@ -511,6 +518,8 @@ mlx4_rxq_attach(struct rxq *rxq)
>  			.max_sge = sges_n,
>  			.pd = priv->pd,
>  			.cq = cq,
> +			.comp_mask = comp_mask,
> +			.create_flags = create_flags,
>  		 });
>  	if (!wq) {
>  		ret = errno ? errno : EINVAL;
> @@ -649,9 +658,10 @@ mlx4_rxq_detach(struct rxq *rxq)
>  uint64_t
>  mlx4_get_rx_queue_offloads(struct priv *priv)
>  {
> -	uint64_t offloads = DEV_RX_OFFLOAD_SCATTER |
> -			    DEV_RX_OFFLOAD_CRC_STRIP;
> +	uint64_t offloads = DEV_RX_OFFLOAD_SCATTER;
>  
> +	if (priv->hw_fcs_strip)
> +		offloads |= DEV_RX_OFFLOAD_CRC_STRIP;
>  	if (priv->hw_csum)
>  		offloads |= DEV_RX_OFFLOAD_CHECKSUM;
>  	return offloads;
> @@ -781,6 +791,24 @@ mlx4_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
>  		      (void *)dev, idx);
>  		return -rte_errno;
>  	}
> +	/* By default, FCS (CRC) is stripped by hardware. */
> +	unsigned char crc_present;

This variable must be grouped with others at the beginning of the block and
use the same type as its counterpart in struct rxq for consistency,
uint32_t.

> +	if (conf->offloads & DEV_RX_OFFLOAD_CRC_STRIP) {
> +		crc_present = 0;
> +	} else if (priv->hw_fcs_strip) {
> +		crc_present = 1;
> +	} else {
> +		WARN("%p: CRC stripping has been disabled but will still"
> +		     " be performed by hardware, make sure MLNX_OFED and"
> +		     " firmware are up to date",
> +		     (void *)dev);
> +		crc_present = 0;
> +	}
> +	DEBUG("%p: CRC stripping is %s, %u bytes will be subtracted from"
> +	      " incoming frames to hide it",
> +	      (void *)dev,
> +	      crc_present ? "disabled" : "enabled",
> +	      crc_present << 2);

The above block must appear prior to the mlx4_zmallocv_socket() call where
other configuration checks are performed.

>  	*rxq = (struct rxq){
>  		.priv = priv,
>  		.mp = mp,
> @@ -794,6 +822,7 @@ mlx4_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
>  		.csum_l2tun = priv->hw_csum_l2tun &&
>  			      (conf->offloads & DEV_RX_OFFLOAD_CHECKSUM),
>  		.l2tun_offload = priv->hw_csum_l2tun,
> +		.crc_present = crc_present,

One more nit: this line should appear before the l2tun_offload assignment to
match the order of definitions in struct rxq.

>  		.stats = {
>  			.idx = idx,
>  		},
> diff --git a/drivers/net/mlx4/mlx4_rxtx.c b/drivers/net/mlx4/mlx4_rxtx.c
> index 8ca8b77..84a7bf1 100644
> --- a/drivers/net/mlx4/mlx4_rxtx.c
> +++ b/drivers/net/mlx4/mlx4_rxtx.c
> @@ -934,12 +934,12 @@ mlx4_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
>  				goto skip;
>  			}
>  			pkt = seg;
> +			assert(len >= (rxq->crc_present << 2));
>  			/* Update packet information. */
>  			pkt->packet_type =
>  				rxq_cq_to_pkt_type(cqe, rxq->l2tun_offload);
>  			pkt->ol_flags = PKT_RX_RSS_HASH;
>  			pkt->hash.rss = cqe->immed_rss_invalid;
> -			pkt->pkt_len = len;
>  			if (rxq->csum | rxq->csum_l2tun) {
>  				uint32_t flags =
>  					mlx4_cqe_flags(cqe,
> @@ -951,6 +951,9 @@ mlx4_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
>  							   rxq->csum,
>  							   rxq->csum_l2tun);
>  			}
> +			if (rxq->crc_present)
> +				len -= ETHER_CRC_LEN;
> +			pkt->pkt_len = len;

I suggest to move this hunk above, where the pkt->pkt_len assignment was
previously made for a shorter diff.

>  		}
>  		rep->nb_segs = 1;
>  		rep->port = rxq->port_id;
> diff --git a/drivers/net/mlx4/mlx4_rxtx.h b/drivers/net/mlx4/mlx4_rxtx.h
> index c12bd39..a0633bf 100644
> --- a/drivers/net/mlx4/mlx4_rxtx.h
> +++ b/drivers/net/mlx4/mlx4_rxtx.h
> @@ -52,6 +52,7 @@ struct rxq {
>  	volatile uint32_t *rq_db; /**< RQ doorbell record. */
>  	uint32_t csum:1; /**< Enable checksum offloading. */
>  	uint32_t csum_l2tun:1; /**< Same for L2 tunnels. */
> +	uint32_t crc_present:1; /**< CRC must be subtracted. */
>  	uint32_t l2tun_offload:1; /**< L2 tunnel offload is enabled. */
>  	struct mlx4_cq mcq;  /**< Info for directly manipulating the CQ. */
>  	struct mlx4_rxq_stats stats; /**< Rx queue counters. */
> -- 
> 2.7.4
>
  
Ophir Munk March 25, 2018, 4:17 p.m. UTC | #2
Hi Adrien,
v3 is ready and will be sent soon.
Please see inline more comments.

> -----Original Message-----
> From: Adrien Mazarguil [mailto:adrien.mazarguil@6wind.com]
> Sent: Friday, March 23, 2018 5:53 PM
> To: Ophir Munk <ophirmu@mellanox.com>
> Cc: dev@dpdk.org; Thomas Monjalon <thomas@monjalon.net>; Olga Shern
> <olgas@mellanox.com>; Shahaf Shuler <shahafs@mellanox.com>
> Subject: Re: [PATCH v2] net/mlx4: support CRC strip toggling
> 
> Hi Ophir,
> 
> On Mon, Mar 19, 2018 at 04:36:50PM +0000, Ophir Munk wrote:
> > Previous to this commit mlx4 CRC stripping was executed by default and
> > there was no verbs API to disable it.
> >
> > Current support in MLNX_OFED_4.3-1.5.0.0 and above
> 
> I suggest to drop this line as it's not exclusive to MLNX_OFED; the
> documented minimum requirements are already correct and rdma-core v15
> also supports it.
> 

Actually rdma-core v15 does not expose CRC capability in mlx4. It is probably expected in rdma-core v18.
The OFED support for CRC stripping only starts with in MLNX_OFED_4.3-1.5.0.0.
Regardless - for v3 I have dropped the versioning information (as requested) since I think versioning information 
can be present in documentation rather than in commit logs.

> > Signed-off-by: Ophir Munk <ophirmu@mellanox.com>
> 
> A few more comments below.
> 
> > ---
> > v1: initial version
> > v2: following internal reviews
> >
> >  drivers/net/mlx4/mlx4.c      |  4 ++++
> >  drivers/net/mlx4/mlx4.h      |  1 +
> >  drivers/net/mlx4/mlx4_rxq.c  | 33 +++++++++++++++++++++++++++++++--
> > drivers/net/mlx4/mlx4_rxtx.c |  5 ++++-  drivers/net/mlx4/mlx4_rxtx.h
> > |  1 +
> >  5 files changed, 41 insertions(+), 3 deletions(-)
> >
> > diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c index
> > ee93daf..d4f4323 100644
> > --- a/drivers/net/mlx4/mlx4.c
> > +++ b/drivers/net/mlx4/mlx4.c
> > @@ -578,6 +578,10 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv,
> struct rte_pci_device *pci_dev)
> >  		}
> >  		DEBUG("supported RSS hash fields mask: %016" PRIx64,
> >  		      priv->hw_rss_sup);
> > +		priv->hw_fcs_strip = !!(device_attr_ex.raw_packet_caps &
> > +
> IBV_RAW_PACKET_CAP_SCATTER_FCS);
> 
> Minor nit, indentation contains one extra space.

The full block is already with an extra space. So removing the extra space in just this line will appear miss-aligned with
the remaining block. I suggest leaving this line as is for v3.

> 
> > +		DEBUG("FCS stripping toggling is %ssupported",
> > +		      (priv->hw_fcs_strip ? "" : "not "));
> 

> Another minor nit, mlx5 prints "configuration" instead of "toggling", wouldn't
> it make sense for both PMDs to print the same information?
> 

I have followed your review and changed "toggling" to "configuration" in v3. However please note that
the commit title is "support CRC strip toggling". So using the same wording "toggling" is clearer in my view.
(it was also an input I got from internal review prior to ML submission).

> Also the extra set of parentheses around the conditional expression could be
> removed.
> 

Done for v3. Please note that another line had an extra set of parentheses (not included in this patch) which was
updated as well in v3.

> >  		/* Configure the first MAC address by default. */
> >  		if (mlx4_get_mac(priv, &mac.addr_bytes)) {
> >  			ERROR("cannot get MAC address, is mlx4_en
> loaded?"
> > diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h index
> > 19c8a22..3ae3ce6 100644
> > --- a/drivers/net/mlx4/mlx4.h
> > +++ b/drivers/net/mlx4/mlx4.h
> > @@ -105,6 +105,7 @@ struct priv {
> >  	uint32_t isolated:1; /**< Toggle isolated mode. */
> >  	uint32_t hw_csum:1; /**< Checksum offload is supported. */
> >  	uint32_t hw_csum_l2tun:1; /**< Checksum support for L2 tunnels. */
> > +	uint32_t hw_fcs_strip:1; /**< FCS stripping toggling is supported.
> > +*/
> >  	uint64_t hw_rss_sup; /**< Supported RSS hash fields (Verbs format).
> */
> >  	struct rte_intr_handle intr_handle; /**< Port interrupt handle. */
> >  	struct mlx4_drop *drop; /**< Shared resources for drop flow rules.
> > */ diff --git a/drivers/net/mlx4/mlx4_rxq.c
> > b/drivers/net/mlx4/mlx4_rxq.c index 7a036ed..6748355 100644
> > --- a/drivers/net/mlx4/mlx4_rxq.c
> > +++ b/drivers/net/mlx4/mlx4_rxq.c
> > @@ -491,6 +491,8 @@ mlx4_rxq_attach(struct rxq *rxq)
> >  	const char *msg;
> >  	struct ibv_cq *cq = NULL;
> >  	struct ibv_wq *wq = NULL;
> > +	unsigned int create_flags = 0;
> > +	unsigned int comp_mask = 0;
> 
> I suggest using uint32_t to align these with Verb's definitions for struct
> ibv_wq_init_attr.
> 

Done in v3

> >  	volatile struct mlx4_wqe_data_seg (*wqes)[];
> >  	unsigned int i;
> >  	int ret;
> > @@ -503,6 +505,11 @@ mlx4_rxq_attach(struct rxq *rxq)
> >  		msg = "CQ creation failure";
> >  		goto error;
> >  	}
> > +	/* By default, FCS (CRC) is stripped by hardware. */
> > +	if (rxq->crc_present) {
> > +		create_flags |= IBV_WQ_FLAGS_SCATTER_FCS;
> > +		comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
> > +	}
> >  	wq = mlx4_glue->create_wq
> >  		(priv->ctx,
> >  		 &(struct ibv_wq_init_attr){
> > @@ -511,6 +518,8 @@ mlx4_rxq_attach(struct rxq *rxq)
> >  			.max_sge = sges_n,
> >  			.pd = priv->pd,
> >  			.cq = cq,
> > +			.comp_mask = comp_mask,
> > +			.create_flags = create_flags,
> >  		 });
> >  	if (!wq) {
> >  		ret = errno ? errno : EINVAL;
> > @@ -649,9 +658,10 @@ mlx4_rxq_detach(struct rxq *rxq)  uint64_t
> > mlx4_get_rx_queue_offloads(struct priv *priv)  {
> > -	uint64_t offloads = DEV_RX_OFFLOAD_SCATTER |
> > -			    DEV_RX_OFFLOAD_CRC_STRIP;
> > +	uint64_t offloads = DEV_RX_OFFLOAD_SCATTER;
> >
> > +	if (priv->hw_fcs_strip)
> > +		offloads |= DEV_RX_OFFLOAD_CRC_STRIP;
> >  	if (priv->hw_csum)
> >  		offloads |= DEV_RX_OFFLOAD_CHECKSUM;
> >  	return offloads;
> > @@ -781,6 +791,24 @@ mlx4_rx_queue_setup(struct rte_eth_dev *dev,
> uint16_t idx, uint16_t desc,
> >  		      (void *)dev, idx);
> >  		return -rte_errno;
> >  	}
> > +	/* By default, FCS (CRC) is stripped by hardware. */
> > +	unsigned char crc_present;
> 
> This variable must be grouped with others at the beginning of the block and
> use the same type as its counterpart in struct rxq for consistency, uint32_t.
> 

Done in v3

> > +	if (conf->offloads & DEV_RX_OFFLOAD_CRC_STRIP) {
> > +		crc_present = 0;
> > +	} else if (priv->hw_fcs_strip) {
> > +		crc_present = 1;
> > +	} else {
> > +		WARN("%p: CRC stripping has been disabled but will still"
> > +		     " be performed by hardware, make sure MLNX_OFED and"
> > +		     " firmware are up to date",
> > +		     (void *)dev);
> > +		crc_present = 0;
> > +	}
> > +	DEBUG("%p: CRC stripping is %s, %u bytes will be subtracted from"
> > +	      " incoming frames to hide it",
> > +	      (void *)dev,
> > +	      crc_present ? "disabled" : "enabled",
> > +	      crc_present << 2);
> 
> The above block must appear prior to the mlx4_zmallocv_socket() call where
> other configuration checks are performed.

Done in v3

> 
> >  	*rxq = (struct rxq){
> >  		.priv = priv,
> >  		.mp = mp,
> > @@ -794,6 +822,7 @@ mlx4_rx_queue_setup(struct rte_eth_dev *dev,
> uint16_t idx, uint16_t desc,
> >  		.csum_l2tun = priv->hw_csum_l2tun &&
> >  			      (conf->offloads &
> DEV_RX_OFFLOAD_CHECKSUM),
> >  		.l2tun_offload = priv->hw_csum_l2tun,
> > +		.crc_present = crc_present,
> 
> One more nit: this line should appear before the l2tun_offload assignment to
> match the order of definitions in struct rxq.
> 

Done in v3

> >  		.stats = {
> >  			.idx = idx,
> >  		},
> > diff --git a/drivers/net/mlx4/mlx4_rxtx.c
> > b/drivers/net/mlx4/mlx4_rxtx.c index 8ca8b77..84a7bf1 100644
> > --- a/drivers/net/mlx4/mlx4_rxtx.c
> > +++ b/drivers/net/mlx4/mlx4_rxtx.c
> > @@ -934,12 +934,12 @@ mlx4_rx_burst(void *dpdk_rxq, struct rte_mbuf
> **pkts, uint16_t pkts_n)
> >  				goto skip;
> >  			}
> >  			pkt = seg;
> > +			assert(len >= (rxq->crc_present << 2));
> >  			/* Update packet information. */
> >  			pkt->packet_type =
> >  				rxq_cq_to_pkt_type(cqe, rxq-
> >l2tun_offload);
> >  			pkt->ol_flags = PKT_RX_RSS_HASH;
> >  			pkt->hash.rss = cqe->immed_rss_invalid;
> > -			pkt->pkt_len = len;
> >  			if (rxq->csum | rxq->csum_l2tun) {
> >  				uint32_t flags =
> >  					mlx4_cqe_flags(cqe,
> > @@ -951,6 +951,9 @@ mlx4_rx_burst(void *dpdk_rxq, struct rte_mbuf
> **pkts, uint16_t pkts_n)
> >  							   rxq->csum,
> >  							   rxq->csum_l2tun);
> >  			}
> > +			if (rxq->crc_present)
> > +				len -= ETHER_CRC_LEN;
> > +			pkt->pkt_len = len;
> 
> I suggest to move this hunk above, where the pkt->pkt_len assignment was
> previously made for a shorter diff.

Done in v3

> 
> >  		}
> >  		rep->nb_segs = 1;
> >  		rep->port = rxq->port_id;
> > diff --git a/drivers/net/mlx4/mlx4_rxtx.h
> > b/drivers/net/mlx4/mlx4_rxtx.h index c12bd39..a0633bf 100644
> > --- a/drivers/net/mlx4/mlx4_rxtx.h
> > +++ b/drivers/net/mlx4/mlx4_rxtx.h
> > @@ -52,6 +52,7 @@ struct rxq {
> >  	volatile uint32_t *rq_db; /**< RQ doorbell record. */
> >  	uint32_t csum:1; /**< Enable checksum offloading. */
> >  	uint32_t csum_l2tun:1; /**< Same for L2 tunnels. */
> > +	uint32_t crc_present:1; /**< CRC must be subtracted. */
> >  	uint32_t l2tun_offload:1; /**< L2 tunnel offload is enabled. */
> >  	struct mlx4_cq mcq;  /**< Info for directly manipulating the CQ. */
> >  	struct mlx4_rxq_stats stats; /**< Rx queue counters. */
> > --
> > 2.7.4
> >
> 
> --
> Adrien Mazarguil
> 6WIND
  

Patch

diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index ee93daf..d4f4323 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -578,6 +578,10 @@  mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
 		}
 		DEBUG("supported RSS hash fields mask: %016" PRIx64,
 		      priv->hw_rss_sup);
+		priv->hw_fcs_strip = !!(device_attr_ex.raw_packet_caps &
+					 IBV_RAW_PACKET_CAP_SCATTER_FCS);
+		DEBUG("FCS stripping toggling is %ssupported",
+		      (priv->hw_fcs_strip ? "" : "not "));
 		/* Configure the first MAC address by default. */
 		if (mlx4_get_mac(priv, &mac.addr_bytes)) {
 			ERROR("cannot get MAC address, is mlx4_en loaded?"
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index 19c8a22..3ae3ce6 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -105,6 +105,7 @@  struct priv {
 	uint32_t isolated:1; /**< Toggle isolated mode. */
 	uint32_t hw_csum:1; /**< Checksum offload is supported. */
 	uint32_t hw_csum_l2tun:1; /**< Checksum support for L2 tunnels. */
+	uint32_t hw_fcs_strip:1; /**< FCS stripping toggling is supported. */
 	uint64_t hw_rss_sup; /**< Supported RSS hash fields (Verbs format). */
 	struct rte_intr_handle intr_handle; /**< Port interrupt handle. */
 	struct mlx4_drop *drop; /**< Shared resources for drop flow rules. */
diff --git a/drivers/net/mlx4/mlx4_rxq.c b/drivers/net/mlx4/mlx4_rxq.c
index 7a036ed..6748355 100644
--- a/drivers/net/mlx4/mlx4_rxq.c
+++ b/drivers/net/mlx4/mlx4_rxq.c
@@ -491,6 +491,8 @@  mlx4_rxq_attach(struct rxq *rxq)
 	const char *msg;
 	struct ibv_cq *cq = NULL;
 	struct ibv_wq *wq = NULL;
+	unsigned int create_flags = 0;
+	unsigned int comp_mask = 0;
 	volatile struct mlx4_wqe_data_seg (*wqes)[];
 	unsigned int i;
 	int ret;
@@ -503,6 +505,11 @@  mlx4_rxq_attach(struct rxq *rxq)
 		msg = "CQ creation failure";
 		goto error;
 	}
+	/* By default, FCS (CRC) is stripped by hardware. */
+	if (rxq->crc_present) {
+		create_flags |= IBV_WQ_FLAGS_SCATTER_FCS;
+		comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
+	}
 	wq = mlx4_glue->create_wq
 		(priv->ctx,
 		 &(struct ibv_wq_init_attr){
@@ -511,6 +518,8 @@  mlx4_rxq_attach(struct rxq *rxq)
 			.max_sge = sges_n,
 			.pd = priv->pd,
 			.cq = cq,
+			.comp_mask = comp_mask,
+			.create_flags = create_flags,
 		 });
 	if (!wq) {
 		ret = errno ? errno : EINVAL;
@@ -649,9 +658,10 @@  mlx4_rxq_detach(struct rxq *rxq)
 uint64_t
 mlx4_get_rx_queue_offloads(struct priv *priv)
 {
-	uint64_t offloads = DEV_RX_OFFLOAD_SCATTER |
-			    DEV_RX_OFFLOAD_CRC_STRIP;
+	uint64_t offloads = DEV_RX_OFFLOAD_SCATTER;
 
+	if (priv->hw_fcs_strip)
+		offloads |= DEV_RX_OFFLOAD_CRC_STRIP;
 	if (priv->hw_csum)
 		offloads |= DEV_RX_OFFLOAD_CHECKSUM;
 	return offloads;
@@ -781,6 +791,24 @@  mlx4_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 		      (void *)dev, idx);
 		return -rte_errno;
 	}
+	/* By default, FCS (CRC) is stripped by hardware. */
+	unsigned char crc_present;
+	if (conf->offloads & DEV_RX_OFFLOAD_CRC_STRIP) {
+		crc_present = 0;
+	} else if (priv->hw_fcs_strip) {
+		crc_present = 1;
+	} else {
+		WARN("%p: CRC stripping has been disabled but will still"
+		     " be performed by hardware, make sure MLNX_OFED and"
+		     " firmware are up to date",
+		     (void *)dev);
+		crc_present = 0;
+	}
+	DEBUG("%p: CRC stripping is %s, %u bytes will be subtracted from"
+	      " incoming frames to hide it",
+	      (void *)dev,
+	      crc_present ? "disabled" : "enabled",
+	      crc_present << 2);
 	*rxq = (struct rxq){
 		.priv = priv,
 		.mp = mp,
@@ -794,6 +822,7 @@  mlx4_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 		.csum_l2tun = priv->hw_csum_l2tun &&
 			      (conf->offloads & DEV_RX_OFFLOAD_CHECKSUM),
 		.l2tun_offload = priv->hw_csum_l2tun,
+		.crc_present = crc_present,
 		.stats = {
 			.idx = idx,
 		},
diff --git a/drivers/net/mlx4/mlx4_rxtx.c b/drivers/net/mlx4/mlx4_rxtx.c
index 8ca8b77..84a7bf1 100644
--- a/drivers/net/mlx4/mlx4_rxtx.c
+++ b/drivers/net/mlx4/mlx4_rxtx.c
@@ -934,12 +934,12 @@  mlx4_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 				goto skip;
 			}
 			pkt = seg;
+			assert(len >= (rxq->crc_present << 2));
 			/* Update packet information. */
 			pkt->packet_type =
 				rxq_cq_to_pkt_type(cqe, rxq->l2tun_offload);
 			pkt->ol_flags = PKT_RX_RSS_HASH;
 			pkt->hash.rss = cqe->immed_rss_invalid;
-			pkt->pkt_len = len;
 			if (rxq->csum | rxq->csum_l2tun) {
 				uint32_t flags =
 					mlx4_cqe_flags(cqe,
@@ -951,6 +951,9 @@  mlx4_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 							   rxq->csum,
 							   rxq->csum_l2tun);
 			}
+			if (rxq->crc_present)
+				len -= ETHER_CRC_LEN;
+			pkt->pkt_len = len;
 		}
 		rep->nb_segs = 1;
 		rep->port = rxq->port_id;
diff --git a/drivers/net/mlx4/mlx4_rxtx.h b/drivers/net/mlx4/mlx4_rxtx.h
index c12bd39..a0633bf 100644
--- a/drivers/net/mlx4/mlx4_rxtx.h
+++ b/drivers/net/mlx4/mlx4_rxtx.h
@@ -52,6 +52,7 @@  struct rxq {
 	volatile uint32_t *rq_db; /**< RQ doorbell record. */
 	uint32_t csum:1; /**< Enable checksum offloading. */
 	uint32_t csum_l2tun:1; /**< Same for L2 tunnels. */
+	uint32_t crc_present:1; /**< CRC must be subtracted. */
 	uint32_t l2tun_offload:1; /**< L2 tunnel offload is enabled. */
 	struct mlx4_cq mcq;  /**< Info for directly manipulating the CQ. */
 	struct mlx4_rxq_stats stats; /**< Rx queue counters. */