[dpdk-dev,v8,1/2] librte_net: add crc compute APIs

Message ID 1490873422-13734-2-git-send-email-jasvinder.singh@intel.com (mailing list archive)
State Superseded, archived
Headers

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation fail Compilation issues

Commit Message

Jasvinder Singh March 30, 2017, 11:30 a.m. UTC
  APIs for selecting the architecure specific implementation and computing
the crc (16-bit and 32-bit CRCs) are added. For CRCs calculation, scalar
as well as x86 intrinsic(sse4.2) versions are implemented.

The scalar version is based on generic Look-Up Table(LUT) algorithm,
while x86 intrinsic version uses carry-less multiplication for
fast CRC computation.

Signed-off-by: Jasvinder Singh <jasvinder.singh@intel.com>
Acked-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
---
 lib/Makefile                       |   2 +-
 lib/librte_net/Makefile            |   3 +
 lib/librte_net/rte_net_crc.c       | 199 ++++++++++++++++++++
 lib/librte_net/rte_net_crc.h       | 104 +++++++++++
 lib/librte_net/rte_net_crc_sse.h   | 361 +++++++++++++++++++++++++++++++++++++
 lib/librte_net/rte_net_version.map |   8 +
 6 files changed, 676 insertions(+), 1 deletion(-)
 create mode 100644 lib/librte_net/rte_net_crc.c
 create mode 100644 lib/librte_net/rte_net_crc.h
 create mode 100644 lib/librte_net/rte_net_crc_sse.h
  

Comments

Ananyev, Konstantin March 30, 2017, 11:31 a.m. UTC | #1
Hi Jasvinder,

> diff --git a/lib/librte_net/rte_net_crc.h b/lib/librte_net/rte_net_crc.h
> new file mode 100644
> index 0000000..dd6c110
> --- /dev/null
> +++ b/lib/librte_net/rte_net_crc.h
> @@ -0,0 +1,104 @@
> +/*-
> + *   BSD LICENSE
> + *
> + *   Copyright(c) 2017 Intel Corporation.
> + *   All rights reserved.
> + *
> + *   Redistribution and use in source and binary forms, with or without
> + *   modification, are permitted provided that the following conditions
> + *   are met:
> + *
> + *     * Redistributions of source code must retain the above copyright
> + *       notice, this list of conditions and the following disclaimer.
> + *     * Redistributions in binary form must reproduce the above copyright
> + *       notice, this list of conditions and the following disclaimer in
> + *       the documentation and/or other materials provided with the
> + *       distribution.
> + *     * Neither the name of Intel Corporation nor the names of its
> + *       contributors may be used to endorse or promote products derived
> + *       from this software without specific prior written permission.
> + *
> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +#ifndef _RTE_NET_CRC_H_
> +#define _RTE_NET_CRC_H_
> +
> +#ifdef __cplusplus
> +extern "C" {
> +#endif
> +
> +#include <stdint.h>
> +
> +#include <rte_mbuf.h>

As a nit: you probably don't need that include.
Konstantin

> +
> +/** CRC polynomials */
> +#define CRC32_ETH_POLYNOMIAL 0x04c11db7UL
> +#define CRC16_CCITT_POLYNOMIAL 0x1021U
> +
> +#define CRC_LUT_SIZE 256
> +
> +/** CRC types */
> +enum rte_net_crc_type {
> +	RTE_NET_CRC16_CCITT = 0,
> +	RTE_NET_CRC32_ETH,
> +	RTE_NET_CRC_REQS
> +};
> +
> +/** CRC compute algorithm */
> +enum rte_net_crc_alg {
> +	RTE_NET_CRC_SCALAR = 0,
> +	RTE_NET_CRC_SSE42,
> +};
> +
> +/**
> + * This API set the CRC computation algorithm (i.e. scalar version,
> + * x86 64-bit sse4.2 intrinsic version, etc.) and internal data
> + * structure.
> + *
> + * @param alg
> + *   This parameter is used to select the CRC implementation version.
> + *   - RTE_NET_CRC_SCALAR
> + *   - RTE_NET_CRC_SSE42 (Use 64-bit SSE4.2 intrinsic)
> + */
> +void
> +rte_net_crc_set_alg(enum rte_net_crc_alg alg);
> +
> +/**
> + * CRC compute API
> + *
> + * @param data
> + *   Pointer to the packet data for CRC computation
> + * @param data_len
> + *   Data length for CRC computation
> + * @param type
> + *   CRC type (enum rte_net_crc_type)
> + *
> + * @return
> + *   CRC value
> + */
> +uint32_t
> +rte_net_crc_calc(const void *data,
> +	uint32_t data_len,
> +	enum rte_net_crc_type type);
> +
> +#if defined(RTE_ARCH_X86_64) && defined(RTE_MACHINE_CPUFLAG_SSE4_2)
> +#include <rte_net_crc_sse.h>
> +#endif
> +
> +#ifdef __cplusplus
> +}
> +#endif
> +
> +
  
Jasvinder Singh March 30, 2017, 12:06 p.m. UTC | #2
> -----Original Message-----
> From: Ananyev, Konstantin
> Sent: Thursday, March 30, 2017 12:32 PM
> To: Singh, Jasvinder <jasvinder.singh@intel.com>; dev@dpdk.org
> Cc: olivier.matz@6wind.com; Doherty, Declan <declan.doherty@intel.com>;
> De Lara Guarch, Pablo <pablo.de.lara.guarch@intel.com>
> Subject: RE: [dpdk-dev] [PATCH v8 1/2] librte_net: add crc compute APIs
> 
> Hi Jasvinder,
> 
> > diff --git a/lib/librte_net/rte_net_crc.h
> > b/lib/librte_net/rte_net_crc.h new file mode 100644 index
> > 0000000..dd6c110
> > --- /dev/null
> > +++ b/lib/librte_net/rte_net_crc.h
> > @@ -0,0 +1,104 @@
> > +/*-
> > + *   BSD LICENSE
> > + *
> > + *   Copyright(c) 2017 Intel Corporation.
> > + *   All rights reserved.
> > + *
> > + *   Redistribution and use in source and binary forms, with or without
> > + *   modification, are permitted provided that the following conditions
> > + *   are met:
> > + *
> > + *     * Redistributions of source code must retain the above copyright
> > + *       notice, this list of conditions and the following disclaimer.
> > + *     * Redistributions in binary form must reproduce the above copyright
> > + *       notice, this list of conditions and the following disclaimer in
> > + *       the documentation and/or other materials provided with the
> > + *       distribution.
> > + *     * Neither the name of Intel Corporation nor the names of its
> > + *       contributors may be used to endorse or promote products derived
> > + *       from this software without specific prior written permission.
> > + *
> > + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
> CONTRIBUTORS
> > + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
> NOT
> > + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
> FITNESS FOR
> > + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
> COPYRIGHT
> > + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
> INCIDENTAL,
> > + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
> BUT NOT
> > + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
> LOSS OF USE,
> > + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
> AND ON ANY
> > + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
> TORT
> > + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
> OF THE USE
> > + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
> DAMAGE.
> > + */
> > +
> > +#ifndef _RTE_NET_CRC_H_
> > +#define _RTE_NET_CRC_H_
> > +
> > +#ifdef __cplusplus
> > +extern "C" {
> > +#endif
> > +
> > +#include <stdint.h>
> > +
> > +#include <rte_mbuf.h>
> 
> As a nit: you probably don't need that include.
> Konstantin
> 

Oh, forgot to remove this, will send another version. Thanks Konstantin.
  
Olivier Matz March 30, 2017, 2:40 p.m. UTC | #3
Hi Jasvinder,

On Thu, 30 Mar 2017 11:31:54 +0000, "Ananyev, Konstantin" <konstantin.ananyev@intel.com> wrote:
> Hi Jasvinder,
> 
> > diff --git a/lib/librte_net/rte_net_crc.h b/lib/librte_net/rte_net_crc.h
> > new file mode 100644
> > index 0000000..dd6c110
> > --- /dev/null
> > +++ b/lib/librte_net/rte_net_crc.h
> > @@ -0,0 +1,104 @@
> > +/*-
> > + *   BSD LICENSE
> > + *
> > + *   Copyright(c) 2017 Intel Corporation.
> > + *   All rights reserved.
> > + *
> > + *   Redistribution and use in source and binary forms, with or without
> > + *   modification, are permitted provided that the following conditions
> > + *   are met:
> > + *
> > + *     * Redistributions of source code must retain the above copyright
> > + *       notice, this list of conditions and the following disclaimer.
> > + *     * Redistributions in binary form must reproduce the above copyright
> > + *       notice, this list of conditions and the following disclaimer in
> > + *       the documentation and/or other materials provided with the
> > + *       distribution.
> > + *     * Neither the name of Intel Corporation nor the names of its
> > + *       contributors may be used to endorse or promote products derived
> > + *       from this software without specific prior written permission.
> > + *
> > + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> > + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> > + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> > + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> > + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> > + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> > + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> > + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> > + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> > + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> > + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> > + */
> > +
> > +#ifndef _RTE_NET_CRC_H_
> > +#define _RTE_NET_CRC_H_
> > +
> > +#ifdef __cplusplus
> > +extern "C" {
> > +#endif
> > +
> > +#include <stdint.h>
> > +
> > +#include <rte_mbuf.h>  
> 
> As a nit: you probably don't need that include.
> Konstantin
> 
> > +
> > +/** CRC polynomials */
> > +#define CRC32_ETH_POLYNOMIAL 0x04c11db7UL
> > +#define CRC16_CCITT_POLYNOMIAL 0x1021U
> > +
> > +#define CRC_LUT_SIZE 256
> > +
> > +/** CRC types */
> > +enum rte_net_crc_type {
> > +	RTE_NET_CRC16_CCITT = 0,
> > +	RTE_NET_CRC32_ETH,
> > +	RTE_NET_CRC_REQS
> > +};
> > +
> > +/** CRC compute algorithm */
> > +enum rte_net_crc_alg {
> > +	RTE_NET_CRC_SCALAR = 0,
> > +	RTE_NET_CRC_SSE42,
> > +};
> > +
> > +/**
> > + * This API set the CRC computation algorithm (i.e. scalar version,
> > + * x86 64-bit sse4.2 intrinsic version, etc.) and internal data
> > + * structure.
> > + *
> > + * @param alg
> > + *   This parameter is used to select the CRC implementation version.
> > + *   - RTE_NET_CRC_SCALAR
> > + *   - RTE_NET_CRC_SSE42 (Use 64-bit SSE4.2 intrinsic)
> > + */
> > +void
> > +rte_net_crc_set_alg(enum rte_net_crc_alg alg);
> > +
> > +/**
> > + * CRC compute API
> > + *
> > + * @param data
> > + *   Pointer to the packet data for CRC computation
> > + * @param data_len
> > + *   Data length for CRC computation
> > + * @param type
> > + *   CRC type (enum rte_net_crc_type)
> > + *
> > + * @return
> > + *   CRC value
> > + */
> > +uint32_t
> > +rte_net_crc_calc(const void *data,
> > +	uint32_t data_len,
> > +	enum rte_net_crc_type type);
> > +
> > +#if defined(RTE_ARCH_X86_64) && defined(RTE_MACHINE_CPUFLAG_SSE4_2)
> > +#include <rte_net_crc_sse.h>
> > +#endif

I think this include should not be included from rte_net_crc.h.

From what I see, the API is the same for sse and non-sse, so this include
could be private, included only from the .c file. If you also remove
the include to rte_mbuf.h as suggested by Konstantin, it will require the
following includes in rte_net_crc.c:

 #include <stddef.h>
 #include <string.h>
 
 #include <rte_common.h>
 #include <rte_cpuflags.h>
 #include <rte_branch_prediction.h>
 #include <rte_vect.h>
 #include <rte_net_crc.h>
 #if defined(RTE_ARCH_X86_64) && defined(RTE_MACHINE_CPUFLAG_SSE4_2)
 #include <rte_net_crc_sse.h>
 #endif

If the sse file is only used in the .c, this line could also be
removed in the Makefile:

SYMLINK-$(CONFIG_RTE_LIBRTE_NET)-include += rte_net_crc_sse.h


I'm not very familiar with crc and sse code. Could you add yourself
as maintainer for these files in MAINTAINERS?


Thanks
Olivier
  
Jasvinder Singh March 30, 2017, 3:14 p.m. UTC | #4
Hi Olivier,

> -----Original Message-----
> From: Olivier Matz [mailto:olivier.matz@6wind.com]
> Sent: Thursday, March 30, 2017 3:41 PM
> To: Ananyev, Konstantin <konstantin.ananyev@intel.com>
> Cc: Singh, Jasvinder <jasvinder.singh@intel.com>; dev@dpdk.org; Doherty,
> Declan <declan.doherty@intel.com>; De Lara Guarch, Pablo
> <pablo.de.lara.guarch@intel.com>
> Subject: Re: [dpdk-dev] [PATCH v8 1/2] librte_net: add crc compute APIs


<snip>

> I think this include should not be included from rte_net_crc.h.
> 
> From what I see, the API is the same for sse and non-sse, so this include
> could be private, included only from the .c file. If you also remove the include
> to rte_mbuf.h as suggested by Konstantin, it will require the following
> includes in rte_net_crc.c:
> 
>  #include <stddef.h>
>  #include <string.h>
> 
>  #include <rte_common.h>
>  #include <rte_cpuflags.h>
>  #include <rte_branch_prediction.h>
>  #include <rte_vect.h>
>  #include <rte_net_crc.h>
>  #if defined(RTE_ARCH_X86_64) &&
> defined(RTE_MACHINE_CPUFLAG_SSE4_2)
>  #include <rte_net_crc_sse.h>
>  #endif
> 
> If the sse file is only used in the .c, this line could also be removed in the
> Makefile:
> 
> SYMLINK-$(CONFIG_RTE_LIBRTE_NET)-include += rte_net_crc_sse.h
> 
> 
> I'm not very familiar with crc and sse code. Could you add yourself as
> maintainer for these files in MAINTAINERS?
> 
> 
> Thanks
> Olivier

Thank you for the review. I will make above suggested changes in the next version. 

Jasvinder
  
Thomas Monjalon April 4, 2017, 8:02 p.m. UTC | #5
2017-03-30 17:15, Jasvinder Singh:
> In some applications, CRC (Cyclic Redundancy Check) needs to be computed
> or updated during packet processing operations. This patchset adds
> software implementation of some common standard CRCs (32-bit Ethernet
> CRC as per Ethernet/[ISO/IEC 8802-3] and 16-bit CCITT-CRC [ITU-T X.25]).
> Two versions of each 32-bit and 16-bit CRC calculation are proposed.
> 
> The first version presents a fast and efficient CRC generation on IA
> processors by using the carry-less multiplication instruction � PCLMULQDQ
> (i.e SSE4.2 instrinsics). In this implementation, a parallelized folding
> approach has been used to first reduce an arbitrary length buffer to a small
> fixed size length buffer (16 bytes) with the help of precomputed constants.
> The resultant single 16-bytes chunk is further reduced by Barrett reduction
> method to generate final CRC value. For more details on the implementation,
> see reference [1].
> 
> The second version presents the fallback solution to support the CRC
> generation without needing any specific support from CPU (for examples-
> SSE4.2 intrinsics). It is based on generic Look-Up Table(LUT) algorithm
> that uses precomputed 256 element table as explained in reference[2].
> 
> During intialisation, all the data structures required for CRC computation
> are initialised. Also, x86 specific crc implementation (if supported by
> the platform) or scalar version is enabled.

As you can see in patchwork, it does not compile on FreeBSD:
	http://dpdk.org/ml/archives/test-report/2017-April/016943.html
  
Jasvinder Singh April 5, 2017, 8:34 a.m. UTC | #6
Hi Thomas,


> -----Original Message-----

> From: Thomas Monjalon [mailto:thomas.monjalon@6wind.com]

> Sent: Tuesday, April 4, 2017 9:02 PM

> To: Singh, Jasvinder <jasvinder.singh@intel.com>

> Cc: dev@dpdk.org; olivier.matz@6wind.com; Doherty, Declan

> <declan.doherty@intel.com>; De Lara Guarch, Pablo

> <pablo.de.lara.guarch@intel.com>

> Subject: Re: [dpdk-dev] [PATCH v9 0/3] librte_net: add crc computation

> support

> 

> 2017-03-30 17:15, Jasvinder Singh:

> > In some applications, CRC (Cyclic Redundancy Check) needs to be

> > computed or updated during packet processing operations. This patchset

> > adds software implementation of some common standard CRCs (32-bit

> > Ethernet CRC as per Ethernet/[ISO/IEC 8802-3] and 16-bit CCITT-CRC [ITU-T

> X.25]).

> > Two versions of each 32-bit and 16-bit CRC calculation are proposed.

> >

> > The first version presents a fast and efficient CRC generation on IA

> > processors by using the carry-less multiplication instruction

> > PCLMULQDQ (i.e SSE4.2 instrinsics). In this implementation, a

> > parallelized folding approach has been used to first reduce an

> > arbitrary length buffer to a small fixed size length buffer (16 bytes) with the

> help of precomputed constants.

> > The resultant single 16-bytes chunk is further reduced by Barrett

> > reduction method to generate final CRC value. For more details on the

> > implementation, see reference [1].

> >

> > The second version presents the fallback solution to support the CRC

> > generation without needing any specific support from CPU (for

> > examples-

> > SSE4.2 intrinsics). It is based on generic Look-Up Table(LUT)

> > algorithm that uses precomputed 256 element table as explained in

> reference[2].

> >

> > During intialisation, all the data structures required for CRC

> > computation are initialised. Also, x86 specific crc implementation (if

> > supported by the platform) or scalar version is enabled.

> 

> As you can see in patchwork, it does not compile on FreeBSD:

> 	http://dpdk.org/ml/archives/test-report/2017-April/016943.html


As I stated in the cover letter  notes as well that The patchset build fails on clang version earlier than 3.7.0 due to
missing intrinsics and this issue is listed in DPDK known issue section. FreeBSD build on gcc  target should work fine.

Jasvinder
  
Thomas Monjalon April 5, 2017, 9:01 a.m. UTC | #7
2017-04-05 08:34, Singh, Jasvinder:
> Hi Thomas,
> 
> From: Thomas Monjalon [mailto:thomas.monjalon@6wind.com]
> > 2017-03-30 17:15, Jasvinder Singh:
> > > In some applications, CRC (Cyclic Redundancy Check) needs to be
> > > computed or updated during packet processing operations. This patchset
> > > adds software implementation of some common standard CRCs (32-bit
> > > Ethernet CRC as per Ethernet/[ISO/IEC 8802-3] and 16-bit CCITT-CRC [ITU-T
> > X.25]).
> > > Two versions of each 32-bit and 16-bit CRC calculation are proposed.
> > >
> > > The first version presents a fast and efficient CRC generation on IA
> > > processors by using the carry-less multiplication instruction
> > > PCLMULQDQ (i.e SSE4.2 instrinsics). In this implementation, a
> > > parallelized folding approach has been used to first reduce an
> > > arbitrary length buffer to a small fixed size length buffer (16 bytes) with the
> > help of precomputed constants.
> > > The resultant single 16-bytes chunk is further reduced by Barrett
> > > reduction method to generate final CRC value. For more details on the
> > > implementation, see reference [1].
> > >
> > > The second version presents the fallback solution to support the CRC
> > > generation without needing any specific support from CPU (for
> > > examples-
> > > SSE4.2 intrinsics). It is based on generic Look-Up Table(LUT)
> > > algorithm that uses precomputed 256 element table as explained in
> > reference[2].
> > >
> > > During intialisation, all the data structures required for CRC
> > > computation are initialised. Also, x86 specific crc implementation (if
> > > supported by the platform) or scalar version is enabled.
> > 
> > As you can see in patchwork, it does not compile on FreeBSD:
> > 	http://dpdk.org/ml/archives/test-report/2017-April/016943.html
> 
> As I stated in the cover letter  notes as well that The patchset build fails on clang version earlier than 3.7.0 due to
> missing intrinsics and this issue is listed in DPDK known issue section. FreeBSD build on gcc  target should work fine.

Ah, I have not seen this explanation.

However, we cannot let the build fails.
It is a blocker for patch admission.

Can you, at least, disable the code for some compiler versions?
  
Bruce Richardson April 5, 2017, 9:37 a.m. UTC | #8
> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Thomas Monjalon
> Sent: Wednesday, April 5, 2017 10:01 AM
> To: Singh, Jasvinder <jasvinder.singh@intel.com>
> Cc: dev@dpdk.org; olivier.matz@6wind.com; Doherty, Declan
> <declan.doherty@intel.com>; De Lara Guarch, Pablo
> <pablo.de.lara.guarch@intel.com>
> Subject: Re: [dpdk-dev] [PATCH v9 0/3] librte_net: add crc computation
> support
> 
> 2017-04-05 08:34, Singh, Jasvinder:
> > Hi Thomas,
> >
> > From: Thomas Monjalon [mailto:thomas.monjalon@6wind.com]
> > > 2017-03-30 17:15, Jasvinder Singh:
> > > > In some applications, CRC (Cyclic Redundancy Check) needs to be
> > > > computed or updated during packet processing operations. This
> > > > patchset adds software implementation of some common standard CRCs
> > > > (32-bit Ethernet CRC as per Ethernet/[ISO/IEC 8802-3] and 16-bit
> > > > CCITT-CRC [ITU-T
> > > X.25]).
> > > > Two versions of each 32-bit and 16-bit CRC calculation are proposed.
> > > >
> > > > The first version presents a fast and efficient CRC generation on
> > > > IA processors by using the carry-less multiplication instruction
> > > > PCLMULQDQ (i.e SSE4.2 instrinsics). In this implementation, a
> > > > parallelized folding approach has been used to first reduce an
> > > > arbitrary length buffer to a small fixed size length buffer (16
> > > > bytes) with the
> > > help of precomputed constants.
> > > > The resultant single 16-bytes chunk is further reduced by Barrett
> > > > reduction method to generate final CRC value. For more details on
> > > > the implementation, see reference [1].
> > > >
> > > > The second version presents the fallback solution to support the
> > > > CRC generation without needing any specific support from CPU (for
> > > > examples-
> > > > SSE4.2 intrinsics). It is based on generic Look-Up Table(LUT)
> > > > algorithm that uses precomputed 256 element table as explained in
> > > reference[2].
> > > >
> > > > During intialisation, all the data structures required for CRC
> > > > computation are initialised. Also, x86 specific crc implementation
> > > > (if supported by the platform) or scalar version is enabled.
> > >
> > > As you can see in patchwork, it does not compile on FreeBSD:
> > > 	http://dpdk.org/ml/archives/test-report/2017-April/016943.html
> >
> > As I stated in the cover letter  notes as well that The patchset build
> > fails on clang version earlier than 3.7.0 due to missing intrinsics and
> this issue is listed in DPDK known issue section. FreeBSD build on gcc
> target should work fine.
> 
> Ah, I have not seen this explanation.
> 
> However, we cannot let the build fails.
> It is a blocker for patch admission.
> 
> Can you, at least, disable the code for some compiler versions?

Hi Jasvinder,

Any chance a work-around for this issue. The default compiler on BSD is clang, and the BSD 10 series of releases uses v3.4. This means this functionality will be unavailable for anyone using DPDK from BSD ports on BSD 10.

/Bruce
  
Jasvinder Singh April 5, 2017, 12:52 p.m. UTC | #9
Hi Bruce,

> -----Original Message-----
> From: Richardson, Bruce
> Sent: Wednesday, April 5, 2017 10:37 AM
> To: Thomas Monjalon <thomas.monjalon@6wind.com>; Singh, Jasvinder
> <jasvinder.singh@intel.com>
> Cc: dev@dpdk.org; olivier.matz@6wind.com; Doherty, Declan
> <declan.doherty@intel.com>; De Lara Guarch, Pablo
> <pablo.de.lara.guarch@intel.com>
> Subject: RE: [dpdk-dev] [PATCH v9 0/3] librte_net: add crc computation
> support
> 
> 
> 
> > -----Original Message-----
> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Thomas Monjalon
> > Sent: Wednesday, April 5, 2017 10:01 AM
> > To: Singh, Jasvinder <jasvinder.singh@intel.com>
> > Cc: dev@dpdk.org; olivier.matz@6wind.com; Doherty, Declan
> > <declan.doherty@intel.com>; De Lara Guarch, Pablo
> > <pablo.de.lara.guarch@intel.com>
> > Subject: Re: [dpdk-dev] [PATCH v9 0/3] librte_net: add crc computation
> > support
> >
> > 2017-04-05 08:34, Singh, Jasvinder:
> > > Hi Thomas,
> > >
> > > From: Thomas Monjalon [mailto:thomas.monjalon@6wind.com]
> > > > 2017-03-30 17:15, Jasvinder Singh:
> > > > > In some applications, CRC (Cyclic Redundancy Check) needs to be
> > > > > computed or updated during packet processing operations. This
> > > > > patchset adds software implementation of some common standard
> > > > > CRCs (32-bit Ethernet CRC as per Ethernet/[ISO/IEC 8802-3] and
> > > > > 16-bit CCITT-CRC [ITU-T
> > > > X.25]).
> > > > > Two versions of each 32-bit and 16-bit CRC calculation are proposed.
> > > > >
> > > > > The first version presents a fast and efficient CRC generation
> > > > > on IA processors by using the carry-less multiplication
> > > > > instruction PCLMULQDQ (i.e SSE4.2 instrinsics). In this
> > > > > implementation, a parallelized folding approach has been used to
> > > > > first reduce an arbitrary length buffer to a small fixed size
> > > > > length buffer (16
> > > > > bytes) with the
> > > > help of precomputed constants.
> > > > > The resultant single 16-bytes chunk is further reduced by
> > > > > Barrett reduction method to generate final CRC value. For more
> > > > > details on the implementation, see reference [1].
> > > > >
> > > > > The second version presents the fallback solution to support the
> > > > > CRC generation without needing any specific support from CPU
> > > > > (for
> > > > > examples-
> > > > > SSE4.2 intrinsics). It is based on generic Look-Up Table(LUT)
> > > > > algorithm that uses precomputed 256 element table as explained
> > > > > in
> > > > reference[2].
> > > > >
> > > > > During intialisation, all the data structures required for CRC
> > > > > computation are initialised. Also, x86 specific crc
> > > > > implementation (if supported by the platform) or scalar version is
> enabled.
> > > >
> > > > As you can see in patchwork, it does not compile on FreeBSD:
> > > > 	http://dpdk.org/ml/archives/test-report/2017-April/016943.html
> > >
> > > As I stated in the cover letter  notes as well that The patchset
> > > build fails on clang version earlier than 3.7.0 due to missing
> > > intrinsics and
> > this issue is listed in DPDK known issue section. FreeBSD build on gcc
> > target should work fine.
> >
> > Ah, I have not seen this explanation.
> >
> > However, we cannot let the build fails.
> > It is a blocker for patch admission.
> >
> > Can you, at least, disable the code for some compiler versions?
> 
> Hi Jasvinder,
> 
> Any chance a work-around for this issue. The default compiler on BSD is
> clang, and the BSD 10 series of releases uses v3.4. This means this
> functionality will be unavailable for anyone using DPDK from BSD ports on
> BSD 10.
> 
> /Bruce


I  will have a look at this and send another version with fix.

Jasvinder
  

Patch

diff --git a/lib/Makefile b/lib/Makefile
index 5ad3c7c..456eb38 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -62,7 +62,7 @@  DEPDIRS-librte_lpm := librte_eal
 DIRS-$(CONFIG_RTE_LIBRTE_ACL) += librte_acl
 DEPDIRS-librte_acl := librte_eal
 DIRS-$(CONFIG_RTE_LIBRTE_NET) += librte_net
-DEPDIRS-librte_net := librte_mbuf
+DEPDIRS-librte_net := librte_mbuf librte_eal
 DIRS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += librte_ip_frag
 DEPDIRS-librte_ip_frag := librte_eal librte_mempool librte_mbuf librte_ether
 DEPDIRS-librte_ip_frag += librte_hash
diff --git a/lib/librte_net/Makefile b/lib/librte_net/Makefile
index abd5c46..757f3bc 100644
--- a/lib/librte_net/Makefile
+++ b/lib/librte_net/Makefile
@@ -39,10 +39,13 @@  EXPORT_MAP := rte_net_version.map
 LIBABIVER := 1
 
 SRCS-$(CONFIG_RTE_LIBRTE_NET) := rte_net.c
+SRCS-$(CONFIG_RTE_LIBRTE_NET) += rte_net_crc.c
 
 # install includes
 SYMLINK-$(CONFIG_RTE_LIBRTE_NET)-include := rte_ip.h rte_tcp.h rte_udp.h
 SYMLINK-$(CONFIG_RTE_LIBRTE_NET)-include += rte_sctp.h rte_icmp.h rte_arp.h
 SYMLINK-$(CONFIG_RTE_LIBRTE_NET)-include += rte_ether.h rte_gre.h rte_net.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_NET)-include += rte_net_crc.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_NET)-include += rte_net_crc_sse.h
 
 include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_net/rte_net_crc.c b/lib/librte_net/rte_net_crc.c
new file mode 100644
index 0000000..0f58f07
--- /dev/null
+++ b/lib/librte_net/rte_net_crc.c
@@ -0,0 +1,199 @@ 
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2017 Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stddef.h>
+
+#include <rte_net_crc.h>
+
+/** crc tables */
+static uint32_t crc32_eth_lut[CRC_LUT_SIZE];
+static uint32_t crc16_ccitt_lut[CRC_LUT_SIZE];
+
+static uint32_t
+rte_crc16_ccitt_handler(const uint8_t *data, uint32_t data_len);
+
+static uint32_t
+rte_crc32_eth_handler(const uint8_t *data, uint32_t data_len);
+
+typedef uint32_t
+(*rte_net_crc_handler)(const uint8_t *data, uint32_t data_len);
+
+static rte_net_crc_handler *handlers;
+
+static rte_net_crc_handler handlers_scalar[] = {
+	[RTE_NET_CRC16_CCITT] = rte_crc16_ccitt_handler,
+	[RTE_NET_CRC32_ETH] = rte_crc32_eth_handler,
+};
+
+#if defined(RTE_ARCH_X86_64) && defined(RTE_MACHINE_CPUFLAG_SSE4_2)
+static rte_net_crc_handler handlers_sse42[] = {
+	[RTE_NET_CRC16_CCITT] = rte_crc16_ccitt_sse42_handler,
+	[RTE_NET_CRC32_ETH] = rte_crc32_eth_sse42_handler,
+};
+#endif
+
+/**
+ * Reflect the bits about the middle
+ *
+ * @param val
+ *   value to be reflected
+ *
+ * @return
+ *   reflected value
+ */
+static uint32_t
+reflect_32bits(uint32_t val)
+{
+	uint32_t i, res = 0;
+
+	for (i = 0; i < 32; i++)
+		if ((val & (1 << i)) != 0)
+			res |= (uint32_t)(1 << (31 - i));
+
+	return res;
+}
+
+static void
+crc32_eth_init_lut(uint32_t poly,
+	uint32_t *lut)
+{
+	uint32_t i, j;
+
+	for (i = 0; i < CRC_LUT_SIZE; i++) {
+		uint32_t crc = reflect_32bits(i);
+
+		for (j = 0; j < 8; j++) {
+			if (crc & 0x80000000L)
+				crc = (crc << 1) ^ poly;
+			else
+				crc <<= 1;
+		}
+		lut[i] = reflect_32bits(crc);
+	}
+}
+
+static inline __attribute__((always_inline)) uint32_t
+crc32_eth_calc_lut(const uint8_t *data,
+	uint32_t data_len,
+	uint32_t crc,
+	const uint32_t *lut)
+{
+	while (data_len--)
+		crc = lut[(crc ^ *data++) & 0xffL] ^ (crc >> 8);
+
+	return crc;
+}
+
+static void
+rte_net_crc_scalar_init(void)
+{
+	/** 32-bit crc init */
+	crc32_eth_init_lut(CRC32_ETH_POLYNOMIAL, crc32_eth_lut);
+
+	/** 16-bit CRC init */
+	crc32_eth_init_lut(CRC16_CCITT_POLYNOMIAL << 16, crc16_ccitt_lut);
+}
+
+static inline uint32_t
+rte_crc16_ccitt_handler(const uint8_t *data, uint32_t data_len)
+{
+	/** return 16-bit CRC value */
+	return (uint16_t)~crc32_eth_calc_lut(data,
+		data_len,
+		0xffff,
+		crc16_ccitt_lut);
+}
+
+static inline uint32_t
+rte_crc32_eth_handler(const uint8_t *data, uint32_t data_len)
+{
+	/** return 32-bit CRC value */
+	return ~crc32_eth_calc_lut(data,
+		data_len,
+		0xffffffffUL,
+		crc32_eth_lut);
+}
+
+void
+rte_net_crc_set_alg(enum rte_net_crc_alg alg)
+{
+	switch (alg) {
+	case RTE_NET_CRC_SSE42:
+#ifdef RTE_ARCH_X86_64
+		if (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE4_2))
+			alg = RTE_NET_CRC_SCALAR;
+		else {
+			handlers = handlers_sse42;
+			break;
+		}
+#endif
+	case RTE_NET_CRC_SCALAR:
+	default:
+		handlers = handlers_scalar;
+		break;
+	}
+}
+
+uint32_t
+rte_net_crc_calc(const void *data,
+	uint32_t data_len,
+	enum rte_net_crc_type type)
+{
+	uint32_t ret;
+	rte_net_crc_handler f_handle;
+
+	f_handle = handlers[type];
+	ret = f_handle((const uint8_t *) data, data_len);
+
+	return ret;
+}
+
+/*
+ * Select highest available crc algorithm as default one.
+ */
+static inline void __attribute__((constructor))
+rte_net_crc_init(void)
+{
+	enum rte_net_crc_alg alg = RTE_NET_CRC_SCALAR;
+
+	rte_net_crc_scalar_init();
+
+#ifdef RTE_ARCH_X86_64
+	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE4_2)) {
+		alg = RTE_NET_CRC_SSE42;
+		rte_net_crc_sse42_init();
+	}
+#endif
+
+	rte_net_crc_set_alg(alg);
+}
diff --git a/lib/librte_net/rte_net_crc.h b/lib/librte_net/rte_net_crc.h
new file mode 100644
index 0000000..dd6c110
--- /dev/null
+++ b/lib/librte_net/rte_net_crc.h
@@ -0,0 +1,104 @@ 
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2017 Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_NET_CRC_H_
+#define _RTE_NET_CRC_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+
+#include <rte_mbuf.h>
+
+/** CRC polynomials */
+#define CRC32_ETH_POLYNOMIAL 0x04c11db7UL
+#define CRC16_CCITT_POLYNOMIAL 0x1021U
+
+#define CRC_LUT_SIZE 256
+
+/** CRC types */
+enum rte_net_crc_type {
+	RTE_NET_CRC16_CCITT = 0,
+	RTE_NET_CRC32_ETH,
+	RTE_NET_CRC_REQS
+};
+
+/** CRC compute algorithm */
+enum rte_net_crc_alg {
+	RTE_NET_CRC_SCALAR = 0,
+	RTE_NET_CRC_SSE42,
+};
+
+/**
+ * This API set the CRC computation algorithm (i.e. scalar version,
+ * x86 64-bit sse4.2 intrinsic version, etc.) and internal data
+ * structure.
+ *
+ * @param alg
+ *   This parameter is used to select the CRC implementation version.
+ *   - RTE_NET_CRC_SCALAR
+ *   - RTE_NET_CRC_SSE42 (Use 64-bit SSE4.2 intrinsic)
+ */
+void
+rte_net_crc_set_alg(enum rte_net_crc_alg alg);
+
+/**
+ * CRC compute API
+ *
+ * @param data
+ *   Pointer to the packet data for CRC computation
+ * @param data_len
+ *   Data length for CRC computation
+ * @param type
+ *   CRC type (enum rte_net_crc_type)
+ *
+ * @return
+ *   CRC value
+ */
+uint32_t
+rte_net_crc_calc(const void *data,
+	uint32_t data_len,
+	enum rte_net_crc_type type);
+
+#if defined(RTE_ARCH_X86_64) && defined(RTE_MACHINE_CPUFLAG_SSE4_2)
+#include <rte_net_crc_sse.h>
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif /* _RTE_NET_CRC_H_ */
diff --git a/lib/librte_net/rte_net_crc_sse.h b/lib/librte_net/rte_net_crc_sse.h
new file mode 100644
index 0000000..514b400
--- /dev/null
+++ b/lib/librte_net/rte_net_crc_sse.h
@@ -0,0 +1,361 @@ 
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2017 Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_NET_CRC_SSE_H_
+#define _RTE_NET_CRC_SSE_H_
+
+#include <cpuid.h>
+#include <rte_net_crc.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** PCLMULQDQ CRC computation context structure */
+struct crc_pclmulqdq_ctx {
+	__m128i rk1_rk2;
+	__m128i rk5_rk6;
+	__m128i rk7_rk8;
+};
+
+struct crc_pclmulqdq_ctx crc32_eth_pclmulqdq __rte_aligned(16);
+struct crc_pclmulqdq_ctx crc16_ccitt_pclmulqdq __rte_aligned(16);
+/**
+ * @brief Performs one folding round
+ *
+ * Logically function operates as follows:
+ *     DATA = READ_NEXT_16BYTES();
+ *     F1 = LSB8(FOLD)
+ *     F2 = MSB8(FOLD)
+ *     T1 = CLMUL(F1, RK1)
+ *     T2 = CLMUL(F2, RK2)
+ *     FOLD = XOR(T1, T2, DATA)
+ *
+ * @param data_block
+ *   16 byte data block
+ * @param precomp
+ *   Precomputed rk1 constanst
+ * @param fold
+ *   Current16 byte folded data
+ *
+ * @return
+ *   New 16 byte folded data
+ */
+static inline __attribute__((always_inline)) __m128i
+crcr32_folding_round(__m128i data_block,
+		__m128i precomp,
+		__m128i fold)
+{
+	__m128i tmp0 = _mm_clmulepi64_si128(fold, precomp, 0x01);
+	__m128i tmp1 = _mm_clmulepi64_si128(fold, precomp, 0x10);
+
+	return _mm_xor_si128(tmp1, _mm_xor_si128(data_block, tmp0));
+}
+
+/**
+ * Performs reduction from 128 bits to 64 bits
+ *
+ * @param data128
+ *   128 bits data to be reduced
+ * @param precomp
+ *   precomputed constants rk5, rk6
+ *
+ * @return
+ *  64 bits reduced data
+ */
+
+static inline __attribute__((always_inline)) __m128i
+crcr32_reduce_128_to_64(__m128i data128, __m128i precomp)
+{
+	__m128i tmp0, tmp1, tmp2;
+
+	/* 64b fold */
+	tmp0 = _mm_clmulepi64_si128(data128, precomp, 0x00);
+	tmp1 = _mm_srli_si128(data128, 8);
+	tmp0 = _mm_xor_si128(tmp0, tmp1);
+
+	/* 32b fold */
+	tmp2 = _mm_slli_si128(tmp0, 4);
+	tmp1 = _mm_clmulepi64_si128(tmp2, precomp, 0x10);
+
+	return _mm_xor_si128(tmp1, tmp0);
+}
+
+/**
+ * Performs Barret's reduction from 64 bits to 32 bits
+ *
+ * @param data64
+ *   64 bits data to be reduced
+ * @param precomp
+ *   rk7 precomputed constant
+ *
+ * @return
+ *   reduced 32 bits data
+ */
+
+static inline __attribute__((always_inline)) uint32_t
+crcr32_reduce_64_to_32(__m128i data64, __m128i precomp)
+{
+	static const uint32_t mask1[4] __rte_aligned(16) = {
+		0xffffffff, 0xffffffff, 0x00000000, 0x00000000
+	};
+
+	static const uint32_t mask2[4] __rte_aligned(16) = {
+		0x00000000, 0xffffffff, 0xffffffff, 0xffffffff
+	};
+	__m128i tmp0, tmp1, tmp2;
+
+	tmp0 = _mm_and_si128(data64, _mm_load_si128((const __m128i *)mask2));
+
+	tmp1 = _mm_clmulepi64_si128(tmp0, precomp, 0x00);
+	tmp1 = _mm_xor_si128(tmp1, tmp0);
+	tmp1 = _mm_and_si128(tmp1, _mm_load_si128((const __m128i *)mask1));
+
+	tmp2 = _mm_clmulepi64_si128(tmp1, precomp, 0x10);
+	tmp2 = _mm_xor_si128(tmp2, tmp1);
+	tmp2 = _mm_xor_si128(tmp2, tmp0);
+
+	return _mm_extract_epi32(tmp2, 2);
+}
+
+static const uint8_t crc_xmm_shift_tab[48] __rte_aligned(16) = {
+	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+	0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+};
+
+/**
+ * Shifts left 128 bit register by specified number of bytes
+ *
+ * @param reg
+ *   128 bit value
+ * @param num
+ *   number of bytes to shift left reg by (0-16)
+ *
+ * @return
+ *   reg << (num * 8)
+ */
+
+static inline __attribute__((always_inline)) __m128i
+xmm_shift_left(__m128i reg, const unsigned int num)
+{
+	const __m128i *p = (const __m128i *)(crc_xmm_shift_tab + 16 - num);
+
+	return _mm_shuffle_epi8(reg, _mm_loadu_si128(p));
+}
+
+static inline __attribute__((always_inline)) uint32_t
+crc32_eth_calc_pclmulqdq(
+	const uint8_t *data,
+	uint32_t data_len,
+	uint32_t crc,
+	const struct crc_pclmulqdq_ctx *params)
+{
+	__m128i temp, fold, k;
+	uint32_t n;
+
+	/* Get CRC init value */
+	temp = _mm_insert_epi32(_mm_setzero_si128(), crc, 0);
+
+	/**
+	 * Folding all data into single 16 byte data block
+	 * Assumes: fold holds first 16 bytes of data
+	 */
+
+	if (unlikely(data_len < 32)) {
+		if (unlikely(data_len == 16)) {
+			/* 16 bytes */
+			fold = _mm_loadu_si128((const __m128i *)data);
+			fold = _mm_xor_si128(fold, temp);
+			goto reduction_128_64;
+		}
+
+		if (unlikely(data_len < 16)) {
+			/* 0 to 15 bytes */
+			uint8_t buffer[16] __rte_aligned(16);
+
+			memset(buffer, 0, sizeof(buffer));
+			memcpy(buffer, data, data_len);
+
+			fold = _mm_load_si128((const __m128i *)buffer);
+			fold = _mm_xor_si128(fold, temp);
+			if (unlikely(data_len < 4)) {
+				fold = xmm_shift_left(fold, 8 - data_len);
+				goto barret_reduction;
+			}
+			fold = xmm_shift_left(fold, 16 - data_len);
+			goto reduction_128_64;
+		}
+		/* 17 to 31 bytes */
+		fold = _mm_loadu_si128((const __m128i *)data);
+		fold = _mm_xor_si128(fold, temp);
+		n = 16;
+		k = params->rk1_rk2;
+		goto partial_bytes;
+	}
+
+	/** At least 32 bytes in the buffer */
+	/** Apply CRC initial value */
+	fold = _mm_loadu_si128((const __m128i *)data);
+	fold = _mm_xor_si128(fold, temp);
+
+	/** Main folding loop - the last 16 bytes is processed separately */
+	k = params->rk1_rk2;
+	for (n = 16; (n + 16) <= data_len; n += 16) {
+		temp = _mm_loadu_si128((const __m128i *)&data[n]);
+		fold = crcr32_folding_round(temp, k, fold);
+	}
+
+partial_bytes:
+	if (likely(n < data_len)) {
+
+		const uint32_t mask3[4] __rte_aligned(16) = {
+			0x80808080, 0x80808080, 0x80808080, 0x80808080
+		};
+
+		const uint8_t shf_table[32] __rte_aligned(16) = {
+			0x00, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+			0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+			0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+			0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
+		};
+
+		__m128i last16, a, b;
+
+		last16 = _mm_loadu_si128((const __m128i *)&data[data_len - 16]);
+
+		temp = _mm_loadu_si128((const __m128i *)
+			&shf_table[data_len & 15]);
+		a = _mm_shuffle_epi8(fold, temp);
+
+		temp = _mm_xor_si128(temp,
+			_mm_load_si128((const __m128i *)mask3));
+		b = _mm_shuffle_epi8(fold, temp);
+		b = _mm_blendv_epi8(b, last16, temp);
+
+		/* k = rk1 & rk2 */
+		temp = _mm_clmulepi64_si128(a, k, 0x01);
+		fold = _mm_clmulepi64_si128(a, k, 0x10);
+
+		fold = _mm_xor_si128(fold, temp);
+		fold = _mm_xor_si128(fold, b);
+	}
+
+	/** Reduction 128 -> 32 Assumes: fold holds 128bit folded data */
+reduction_128_64:
+	k = params->rk5_rk6;
+	fold = crcr32_reduce_128_to_64(fold, k);
+
+barret_reduction:
+	k = params->rk7_rk8;
+	n = crcr32_reduce_64_to_32(fold, k);
+
+	return n;
+}
+
+
+static inline void
+rte_net_crc_sse42_init(void)
+{
+	uint64_t k1, k2, k5, k6;
+	uint64_t p = 0, q = 0;
+
+	/** Initialize CRC16 data */
+	k1 = 0x189aeLLU;
+	k2 = 0x8e10LLU;
+	k5 = 0x189aeLLU;
+	k6 = 0x114aaLLU;
+	q =  0x11c581910LLU;
+	p =  0x10811LLU;
+
+	/** Save the params in context structure */
+	crc16_ccitt_pclmulqdq.rk1_rk2 =
+		_mm_setr_epi64(_mm_cvtsi64_m64(k1), _mm_cvtsi64_m64(k2));
+	crc16_ccitt_pclmulqdq.rk5_rk6 =
+		_mm_setr_epi64(_mm_cvtsi64_m64(k5), _mm_cvtsi64_m64(k6));
+	crc16_ccitt_pclmulqdq.rk7_rk8 =
+		_mm_setr_epi64(_mm_cvtsi64_m64(q), _mm_cvtsi64_m64(p));
+
+	/** Initialize CRC32 data */
+	k1 = 0xccaa009eLLU;
+	k2 = 0x1751997d0LLU;
+	k5 = 0xccaa009eLLU;
+	k6 = 0x163cd6124LLU;
+	q =  0x1f7011640LLU;
+	p =  0x1db710641LLU;
+
+	/** Save the params in context structure */
+	crc32_eth_pclmulqdq.rk1_rk2 =
+		_mm_setr_epi64(_mm_cvtsi64_m64(k1), _mm_cvtsi64_m64(k2));
+	crc32_eth_pclmulqdq.rk5_rk6 =
+		_mm_setr_epi64(_mm_cvtsi64_m64(k5), _mm_cvtsi64_m64(k6));
+	crc32_eth_pclmulqdq.rk7_rk8 =
+		_mm_setr_epi64(_mm_cvtsi64_m64(q), _mm_cvtsi64_m64(p));
+
+	/**
+	 * Reset the register as following calculation may
+	 * use other data types such as float, double, etc.
+	 */
+	_mm_empty();
+
+}
+
+static inline uint32_t
+rte_crc16_ccitt_sse42_handler(const uint8_t *data,
+	uint32_t data_len)
+{
+	/** return 16-bit CRC value */
+	return (uint16_t)~crc32_eth_calc_pclmulqdq(data,
+		data_len,
+		0xffff,
+		&crc16_ccitt_pclmulqdq);
+}
+
+static inline uint32_t
+rte_crc32_eth_sse42_handler(const uint8_t *data,
+	uint32_t data_len)
+{
+	return ~crc32_eth_calc_pclmulqdq(data,
+		data_len,
+		0xffffffffUL,
+		&crc32_eth_pclmulqdq);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_NET_CRC_SSE_H_ */
diff --git a/lib/librte_net/rte_net_version.map b/lib/librte_net/rte_net_version.map
index 3b15e65..687c40e 100644
--- a/lib/librte_net/rte_net_version.map
+++ b/lib/librte_net/rte_net_version.map
@@ -4,3 +4,11 @@  DPDK_16.11 {
 
 	local: *;
 };
+
+DPDK_17.05 {
+	global:
+
+	rte_net_crc_calc;
+	rte_net_crc_set_alg;
+
+} DPDK_16.11;