[dpdk-dev] [PATCH v4 02/18] ixgbe: support of unified packet type for vector
Helin Zhang
helin.zhang at intel.com
Fri Feb 27 14:11:20 CET 2015
To unify the packet type, bit masks of packet type for ol_flags are
replaced. In addition, more packet types (UDP, TCP and SCTP) are
supported in vectorized ixgbe PMD.
Note that around 2% performance drop (64B) was observed of doing 4
ports (1 port per 82599 card) IO forwarding on the same SNB core.
Signed-off-by: Cunming Liang <cunming.liang at intel.com>
Signed-off-by: Helin Zhang <helin.zhang at intel.com>
---
config/common_linuxapp | 2 +-
lib/librte_pmd_ixgbe/ixgbe_rxtx_vec.c | 49 +++++++++++++++++++----------------
2 files changed, 27 insertions(+), 24 deletions(-)
v2 changes:
* Used redefined packet types and enlarged packet_type field in mbuf.
v3 changes:
* Put vector ixgbe changes right after mbuf changes.
* Enabled vector ixgbe PMD by default together with changes for updated
vector PMD.
diff --git a/config/common_linuxapp b/config/common_linuxapp
index 97d7bae..97f1c9e 100644
--- a/config/common_linuxapp
+++ b/config/common_linuxapp
@@ -166,7 +166,7 @@ CONFIG_RTE_LIBRTE_IXGBE_DEBUG_TX_FREE=n
CONFIG_RTE_LIBRTE_IXGBE_DEBUG_DRIVER=n
CONFIG_RTE_LIBRTE_IXGBE_PF_DISABLE_STRIP_CRC=n
CONFIG_RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC=y
-CONFIG_RTE_IXGBE_INC_VECTOR=n
+CONFIG_RTE_IXGBE_INC_VECTOR=y
CONFIG_RTE_IXGBE_RX_OLFLAGS_ENABLE=y
#
diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx_vec.c b/lib/librte_pmd_ixgbe/ixgbe_rxtx_vec.c
index 1f46f0f..eeb0ffb 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_rxtx_vec.c
+++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx_vec.c
@@ -134,44 +134,35 @@ ixgbe_rxq_rearm(struct igb_rx_queue *rxq)
*/
#ifdef RTE_IXGBE_RX_OLFLAGS_ENABLE
-#define OLFLAGS_MASK ((uint16_t)(PKT_RX_VLAN_PKT | PKT_RX_IPV4_HDR |\
- PKT_RX_IPV4_HDR_EXT | PKT_RX_IPV6_HDR |\
- PKT_RX_IPV6_HDR_EXT))
-#define OLFLAGS_MASK_V (((uint64_t)OLFLAGS_MASK << 48) | \
- ((uint64_t)OLFLAGS_MASK << 32) | \
- ((uint64_t)OLFLAGS_MASK << 16) | \
- ((uint64_t)OLFLAGS_MASK))
-#define PTYPE_SHIFT (1)
+#define OLFLAGS_MASK_V (((uint64_t)PKT_RX_VLAN_PKT << 48) | \
+ ((uint64_t)PKT_RX_VLAN_PKT << 32) | \
+ ((uint64_t)PKT_RX_VLAN_PKT << 16) | \
+ ((uint64_t)PKT_RX_VLAN_PKT))
#define VTAG_SHIFT (3)
static inline void
desc_to_olflags_v(__m128i descs[4], struct rte_mbuf **rx_pkts)
{
- __m128i ptype0, ptype1, vtag0, vtag1;
+ __m128i vtag0, vtag1;
union {
uint16_t e[4];
uint64_t dword;
} vol;
- ptype0 = _mm_unpacklo_epi16(descs[0], descs[1]);
- ptype1 = _mm_unpacklo_epi16(descs[2], descs[3]);
vtag0 = _mm_unpackhi_epi16(descs[0], descs[1]);
vtag1 = _mm_unpackhi_epi16(descs[2], descs[3]);
- ptype1 = _mm_unpacklo_epi32(ptype0, ptype1);
vtag1 = _mm_unpacklo_epi32(vtag0, vtag1);
-
- ptype1 = _mm_slli_epi16(ptype1, PTYPE_SHIFT);
vtag1 = _mm_srli_epi16(vtag1, VTAG_SHIFT);
- ptype1 = _mm_or_si128(ptype1, vtag1);
- vol.dword = _mm_cvtsi128_si64(ptype1) & OLFLAGS_MASK_V;
+ vol.dword = _mm_cvtsi128_si64(vtag1) & OLFLAGS_MASK_V;
rx_pkts[0]->ol_flags = vol.e[0];
rx_pkts[1]->ol_flags = vol.e[1];
rx_pkts[2]->ol_flags = vol.e[2];
rx_pkts[3]->ol_flags = vol.e[3];
}
+
#else
#define desc_to_olflags_v(desc, rx_pkts) do {} while (0)
#endif
@@ -197,13 +188,15 @@ _recv_raw_pkts_vec(struct igb_rx_queue *rxq, struct rte_mbuf **rx_pkts,
uint64_t var;
__m128i shuf_msk;
__m128i crc_adjust = _mm_set_epi16(
- 0, 0, 0, 0, /* ignore non-length fields */
+ 0, 0, 0, /* ignore non-length fields */
+ -rxq->crc_len, /* sub crc on data_len */
0, /* ignore high-16bits of pkt_len */
-rxq->crc_len, /* sub crc on pkt_len */
- -rxq->crc_len, /* sub crc on data_len */
- 0 /* ignore pkt_type field */
+ 0, 0 /* ignore pkt_type field */
);
__m128i dd_check, eop_check;
+ __m128i desc_mask = _mm_set_epi32(0xFFFFFFFF, 0xFFFFFFFF,
+ 0xFFFFFFFF, 0xFFFF07F0);
if (unlikely(nb_pkts < RTE_IXGBE_VPMD_RX_BURST))
return 0;
@@ -234,12 +227,13 @@ _recv_raw_pkts_vec(struct igb_rx_queue *rxq, struct rte_mbuf **rx_pkts,
/* mask to shuffle from desc. to mbuf */
shuf_msk = _mm_set_epi8(
7, 6, 5, 4, /* octet 4~7, 32bits rss */
- 0xFF, 0xFF, /* skip high 16 bits vlan_macip, zero out */
15, 14, /* octet 14~15, low 16 bits vlan_macip */
+ 13, 12, /* octet 12~13, 16 bits data_len */
0xFF, 0xFF, /* skip high 16 bits pkt_len, zero out */
13, 12, /* octet 12~13, low 16 bits pkt_len */
- 13, 12, /* octet 12~13, 16 bits data_len */
- 0xFF, 0xFF /* skip pkt_type field */
+ 0xFF, 0xFF, /* skip high 16 bits pkt_type */
+ 1, /* octet 1, 8 bits pkt_type field */
+ 0 /* octet 0, 4 bits offset 4 pkt_type field */
);
/* Cache is empty -> need to scan the buffer rings, but first move
@@ -248,6 +242,7 @@ _recv_raw_pkts_vec(struct igb_rx_queue *rxq, struct rte_mbuf **rx_pkts,
/*
* A. load 4 packet in one loop
+ * [A*. mask out 4 unused dirty field in desc]
* B. copy 4 mbuf point from swring to rx_pkts
* C. calc the number of DD bits among the 4 packets
* [C*. extract the end-of-packet bit, if requested]
@@ -289,6 +284,14 @@ _recv_raw_pkts_vec(struct igb_rx_queue *rxq, struct rte_mbuf **rx_pkts,
/* B.2 copy 2 mbuf point into rx_pkts */
_mm_storeu_si128((__m128i *)&rx_pkts[pos+2], mbp2);
+ /* A* mask out 0~3 bits RSS type */
+ descs[3] = _mm_and_si128(descs[3], desc_mask);
+ descs[2] = _mm_and_si128(descs[2], desc_mask);
+
+ /* A* mask out 0~3 bits RSS type */
+ descs[1] = _mm_and_si128(descs[1], desc_mask);
+ descs[0] = _mm_and_si128(descs[0], desc_mask);
+
/* avoid compiler reorder optimization */
rte_compiler_barrier();
@@ -301,7 +304,7 @@ _recv_raw_pkts_vec(struct igb_rx_queue *rxq, struct rte_mbuf **rx_pkts,
/* C.1 4=>2 filter staterr info only */
sterr_tmp1 = _mm_unpackhi_epi32(descs[1], descs[0]);
- /* set ol_flags with packet type and vlan tag */
+ /* set ol_flags with vlan packet type */
desc_to_olflags_v(descs, &rx_pkts[pos]);
/* D.2 pkt 3,4 set in_port/nb_seg and remove crc */
--
1.9.3
More information about the dev
mailing list