@@ -89,6 +89,10 @@ static const struct acl_alg acl_alg[] = {
.name = "altivec",
.alg = RTE_ACL_CLASSIFY_ALTIVEC,
},
+ {
+ .name = "s390x",
+ .alg = RTE_ACL_CLASSIFY_S390X,
+ },
{
.name = "avx512x16",
.alg = RTE_ACL_CLASSIFY_AVX512X16,
@@ -351,6 +351,7 @@ test_classify_run(struct rte_acl_ctx *acx, struct ipv4_7tuple test_data[],
RTE_ACL_CLASSIFY_AVX2,
RTE_ACL_CLASSIFY_NEON,
RTE_ACL_CLASSIFY_ALTIVEC,
+ RTE_ACL_CLASSIFY_S390X,
RTE_ACL_CLASSIFY_AVX512X16,
RTE_ACL_CLASSIFY_AVX512X32,
};
@@ -17,6 +17,7 @@
#include <rte_lcore.h>
#include <rte_random.h>
#include <rte_hash_crc.h>
+#include <rte_byteorder.h>
#include "test.h"
@@ -351,6 +352,7 @@ volatile uint16_t token16;
volatile uint32_t token32;
volatile uint64_t token64;
+#ifndef RTE_ARCH_S390X
static void
build_crc8_table(void)
{
@@ -441,6 +443,8 @@ test_atomic_exchange(__rte_unused void *arg)
return 0;
}
+#endif
+
static int
test_atomic(void)
{
@@ -597,6 +601,7 @@ test_atomic(void)
}
#endif
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
/*
* Test 16/32/64bit atomic exchange.
*/
@@ -628,7 +633,7 @@ test_atomic(void)
printf("Atomic exchange test failed\n");
return -1;
}
-
+#endif
return 0;
}
REGISTER_TEST_COMMAND(atomic_autotest, test_atomic);
@@ -6,19 +6,29 @@
#include <inttypes.h>
#include <rte_string_fns.h>
+#include <rte_byteorder.h>
#include <cmdline_parse.h>
#include <cmdline_parse_ipaddr.h>
#include "test_cmdline.h"
-#define IP4(a,b,c,d) {.s_addr = (uint32_t)(((a) & 0xff) | \
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+#define IP4(a, b, c, d) {.s_addr = (uint32_t)(((a) & 0xff) | \
(((b) & 0xff) << 8) | \
(((c) & 0xff) << 16) | \
((d) & 0xff) << 24)}
#define U16_SWAP(x) \
(((x & 0xFF) << 8) | ((x & 0xFF00) >> 8))
+#else
+#define IP4(a, b, c, d) {((uint32_t)(((a) & 0xff) << 24) | \
+ (((b) & 0xff) << 16) | \
+ (((c) & 0xff) << 8) | \
+ ((d) & 0xff))}
+
+#define U16_SWAP(x) x
+#endif
/* create IPv6 address, swapping bytes where needed */
#ifndef s6_addr16
@@ -10,6 +10,7 @@
#include <cmdline_parse.h>
#include <cmdline_parse_num.h>
+#include <rte_byteorder.h>
#include "test_cmdline.h"
@@ -438,6 +439,48 @@ test_parse_num_valid(void)
/* check if result matches what it should have matched
* since unsigned numbers don't care about number of bits, we can just convert
* everything to uint64_t without any worries. */
+#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
+ switch (type) {
+ case RTE_UINT8:
+ {
+ uint8_t *temp = (uint8_t *)&result;
+ result = *temp;
+ break;
+ }
+ case RTE_UINT16:
+ {
+ uint16_t *temp = (uint16_t *)&result;
+ result = *temp;
+ break;
+ }
+ case RTE_UINT32:
+ {
+ uint32_t *temp = (uint32_t *)&result;
+ result = *temp;
+ break;
+ }
+ case RTE_INT8:
+ {
+ int8_t *temp = (int8_t *)&result;
+ result = *temp;
+ break;
+ }
+ case RTE_INT16:
+ {
+ int16_t *temp = (int16_t *)&result;
+ result = *temp;
+ break;
+ }
+ case RTE_INT32:
+ {
+ int32_t *temp = (int32_t *)&result;
+ result = *temp;
+ break;
+ }
+ default:
+ break;
+ }
+#endif
if (ret > 0 && num_valid_positive_strs[i].result != result) {
printf("Error: parsing %s as %s failed: result mismatch!\n",
num_valid_positive_strs[i].str, buf);
@@ -467,6 +510,7 @@ test_parse_num_valid(void)
* the result is signed in this case, so we have to account for that */
if (ret > 0) {
/* detect negative */
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
switch (type) {
case RTE_INT8:
result = (int8_t) result;
@@ -480,6 +524,30 @@ test_parse_num_valid(void)
default:
break;
}
+#else
+ switch (type) {
+ case RTE_INT8:
+ {
+ int8_t *temp = (int8_t *)&result;
+ result = *temp;
+ break;
+ }
+ case RTE_INT16:
+ {
+ int16_t *temp = (int16_t *)&result;
+ result = *temp;
+ break;
+ }
+ case RTE_INT32:
+ {
+ int32_t *temp = (int32_t *)&result;
+ result = *temp;
+ break;
+ }
+ default:
+ break;
+ }
+#endif
if (num_valid_negative_strs[i].result == (int64_t) result)
continue;
printf("Error: parsing %s as %s failed: result mismatch!\n",
@@ -516,6 +584,48 @@ test_parse_num_valid(void)
/* check if result matches what it should have matched
* since unsigned numbers don't care about number of bits, we can just convert
* everything to uint64_t without any worries. */
+#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
+ switch (type) {
+ case RTE_UINT8:
+ {
+ uint8_t *temp = (uint8_t *)&result;
+ result = *temp;
+ break;
+ }
+ case RTE_UINT16:
+ {
+ uint16_t *temp = (uint16_t *)&result;
+ result = *temp;
+ break;
+ }
+ case RTE_UINT32:
+ {
+ uint32_t *temp = (uint32_t *)&result;
+ result = *temp;
+ break;
+ }
+ case RTE_INT8:
+ {
+ int8_t *temp = (int8_t *)&result;
+ result = *temp;
+ break;
+ }
+ case RTE_INT16:
+ {
+ int16_t *temp = (int16_t *)&result;
+ result = *temp;
+ break;
+ }
+ case RTE_INT32:
+ {
+ int32_t *temp = (int32_t *)&result;
+ result = *temp;
+ break;
+ }
+ default:
+ break;
+ }
+#endif
if (ret > 0 && num_garbage_positive_strs[i].result != result) {
printf("Error: parsing %s as %s failed: result mismatch!\n",
num_garbage_positive_strs[i].str, buf);
@@ -25,6 +25,7 @@
* e.g.: key size = 4, key = 0x03020100
* key size = 8, key = 0x0706050403020100
*/
+#if !defined(RTE_ARCH_S390X)
static uint32_t hash_values_jhash[2][12] = {{
0x8ba9414b, 0xdf0d39c9,
0xe4cf1d42, 0xd4ccb93c, 0x5e84eafc, 0x21362cfe,
@@ -51,6 +52,34 @@ static uint32_t hash_values_crc[2][12] = {{
0x789c104f, 0x53028d3e
}
};
+#else
+static uint32_t hash_values_jhash[2][12] = {{
+ 0x8ba9414b, 0x8a2f8eb,
+ 0x55dcd60b, 0xf0b95bfe, 0x1a28d94c, 0x003d8f00,
+ 0x84c90b2c, 0x24b83acf, 0x5e16af2f, 0x751c9f59,
+ 0x665b8254, 0x6e347c81
+},
+{
+ 0x5c62c303, 0xb21d4b7b,
+ 0xa33cdfcf, 0x47cf3d14, 0x1cae829f, 0x1253a9ea,
+ 0x7171efd1, 0xcef21db0, 0x3df3f5fe, 0x35fd67d2,
+ 0x2922cbc4, 0xeaee5c5c
+}
+};
+static uint32_t hash_values_crc[2][12] = {{
+ 0x00000000, 0x13a29877,
+ 0x3eef4343, 0xb6719589, 0x938d3d79, 0xed93196b,
+ 0xe710a46c, 0x81f7ab71, 0x702bc9ee, 0x26c72488,
+ 0x2e7092a9, 0xf2fbc80b
+},
+{
+ 0xbdfd3980, 0x91e95e36,
+ 0x37765e57, 0x6559eb17, 0x49c8a164, 0x18daa0d3,
+ 0x67065980, 0x62f966d0, 0x4e28a2a0, 0xe342d18f,
+ 0x1518c680, 0xebe8026b
+}
+};
+#endif
/*******************************************************************************
* Hash function performance test configuration section. Each performance test
@@ -52,6 +52,20 @@ vect_set_epi32(int i3, int i2, int i1, int i0)
return data;
}
+#elif defined(RTE_ARCH_S390X)
+
+/* loads the xmm_t value from address p(does not need to be 16-byte aligned)*/
+#define vect_loadu_sil128(p) vec_xld2(0, (signed int *)p)
+
+/* sets the 4 signed 32-bit integer values and returns the xmm_t variable */
+static __rte_always_inline xmm_t
+vect_set_epi32(int i3, int i2, int i1, int i0)
+{
+ xmm_t data = (xmm_t){.u32 = {i0, i1, i2, i3}};
+
+ return data;
+}
+
#elif defined(RTE_ARCH_RISCV)
#define vect_loadu_sil128(p) vect_load_128(p)
@@ -16,8 +16,15 @@
except ImportError:
pass
-import coff
+try:
+ import coff
+except TypeError:
+ pass
+def decode_asciiz(data):
+ index = data.find(b'\x00')
+ end = index if index >= 0 else len(data)
+ return data[:end].decode()
class ELFSymbol:
def __init__(self, image, symbol):
@@ -28,7 +35,7 @@ def __init__(self, image, symbol):
def string_value(self):
size = self._symbol["st_size"]
value = self.get_value(0, size)
- return coff.decode_asciiz(value) # not COFF-specific
+ return decode_asciiz(value) # not COFF-specific
def get_value(self, offset, size):
section = self._symbol["st_shndx"]
@@ -121,6 +121,8 @@ if cpu_instruction_set == 'generic'
cpu_instruction_set = 'generic'
elif host_machine.cpu_family().startswith('ppc')
cpu_instruction_set = 'power8'
+ elif host_machine.cpu_family().startswith('s390x')
+ machine = 'z13'
elif host_machine.cpu_family().startswith('riscv')
cpu_instruction_set = 'riscv'
endif
new file mode 100644
@@ -0,0 +1,51 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# (c) Copyright IBM Corp. 2019, 2020
+
+if not dpdk_conf.get('RTE_ARCH_64')
+ error('Only 64-bit compiles are supported for this platform type')
+endif
+dpdk_conf.set('RTE_ARCH', 's390x')
+dpdk_conf.set('RTE_ARCH_S390X', 1)
+dpdk_conf.set('RTE_FORCE_INTRINSICS', 1)
+
+# overrides specific to s390x
+dpdk_conf.set('RTE_MAX_LCORE', 256)
+dpdk_conf.set('RTE_MAX_NUMA_NODES', 32)
+dpdk_conf.set('RTE_CACHE_LINE_SIZE', 128)
+
+
+
+# default to z13
+cpu_instruction_set = 'z13'
+
+# test compiler support
+cc_march_z14 = cc.has_argument('-march=z14')
+cc_march_z15 = cc.has_argument('-march=z15')
+
+
+machine_args = ['-march=' + cpu_instruction_set, '-mtune=' + cpu_instruction_set]
+
+dpdk_conf.set('RTE_MACHINE','s390x')
+dpdk_conf.set('RTE_MACHINE_CPUFLAG_ZARCH', 1) # should this be z# 13 ?
+#dpdk_conf.set('RTE_MACHINE', cpu_instruction_set)
+
+if (cc.get_define('__s390x__', args: machine_args) != '')
+ compile_time_cpuflags += ['RTE_MACHINE_CPUFLAG_ZARCH']
+endif
+
+
+# Suppress the gcc warning "note: the layout of aggregates containing
+# vectors with 4-byte alignment has changed in GCC 5".
+if (cc.get_id() == 'gcc' and cc.version().version_compare('>=10.0') and
+ cc.version().version_compare('<12.0') and cc.has_argument('-Wno-psabi'))
+ add_project_arguments('-Wno-psabi', language: 'c')
+endif
+
+
+
+
+
+
+
+
+
new file mode 100644
@@ -0,0 +1,19 @@
+[binaries]
+c = 'clang'
+cpp = 'clang++'
+ar = 'llvm-ar'
+strip = 'llvm-strip'
+llvm-config = 'llvm-config'
+pcap-config = 'llvm-config'
+pkgconfig = 'pkg-config'
+
+[host_machine]
+system = 'linux'
+cpu_family = 's390x'
+cpu = 'z13'
+endian = 'big'
+
+[properties]
+platform = 'generic'
+c_args = ['-target', 'aarch64-linux-gnu', '--sysroot', '/usr/aarch64-linux-gnu']
+c_link_args = ['-target', 'aarch64-linux-gnu', '-fuse-ld=lld', '--gcc-toolchain=/usr']
@@ -50,6 +50,7 @@ x86-32 = Y
x86-64 = Y
ARMv8 = Y
Power8 = Y
+s390x = Y
[rte_flow items]
ah = Y
@@ -21,6 +21,11 @@
#include <rte_spinlock.h>
#include <rte_os_shim.h>
+/* s390x pci implemenation. */
+#ifdef RTE_MACHINE_CPUFLAG_ZARCH
+#include <rte_io.h>
+#endif
+
#include "mlx5_prm.h"
#include "mlx5_devx_cmds.h"
#include "mlx5_common_os.h"
@@ -407,7 +412,11 @@ mlx5_doorbell_ring(struct mlx5_uar_data *uar, uint64_t val, uint32_t index,
/* Ensure ordering between DB record actual update and UAR access. */
rte_wmb();
#ifdef RTE_ARCH_64
+# ifndef RTE_MACHINE_CPUFLAG_ZARCH
*uar->db = val;
+# else
+ rte_write64_relaxed(val, uar->db);
+# endif
#else /* !RTE_ARCH_64 */
rte_spinlock_lock(uar->sl_p);
*(volatile uint32_t *)uar->db = val;
new file mode 100644
@@ -0,0 +1,630 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * (c) Copyright IBM Corp. 2017, 2019
+ */
+
+#include <stdint.h>
+#include <vecintrin.h>
+#include <rte_ethdev_driver.h>
+#include <rte_malloc.h>
+
+#include "base/i40e_prototype.h"
+#include "base/i40e_type.h"
+#include "i40e_ethdev.h"
+#include "i40e_rxtx.h"
+#include "i40e_rxtx_vec_common.h"
+
+#pragma GCC diagnostic ignored "-Wcast-qual"
+
+typedef unsigned long long vector_unsigned_long_long
+ __attribute__((vector_size(2 * sizeof(unsigned long long))));
+typedef unsigned int vector_unsigned_int
+ __attribute__((vector_size(4 * sizeof(unsigned int))));
+typedef unsigned short vector_unsigned_short
+ __attribute__((vector_size(8 * sizeof(unsigned short))));
+typedef unsigned char vector_unsigned_char
+ __attribute__((vector_size(16 * sizeof(unsigned char))));
+
+
+static inline void
+i40e_rxq_rearm(struct i40e_rx_queue *rxq)
+{
+ int i;
+ uint16_t rx_id;
+ volatile union i40e_rx_desc *rxdp;
+
+ struct i40e_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
+ struct rte_mbuf *mb0, *mb1;
+
+ vector_unsigned_long_long hdr_room = (vector_unsigned_long_long){
+ RTE_PKTMBUF_HEADROOM,
+ RTE_PKTMBUF_HEADROOM};
+ vector_unsigned_long_long dma_addr0, dma_addr1;
+
+ rxdp = rxq->rx_ring + rxq->rxrearm_start;
+
+ /* Pull 'n' more MBUFs into the software ring */
+ if (rte_mempool_get_bulk(rxq->mp,
+ (void *)rxep,
+ RTE_I40E_RXQ_REARM_THRESH) < 0) {
+ if (rxq->rxrearm_nb + RTE_I40E_RXQ_REARM_THRESH >=
+ rxq->nb_rx_desc) {
+ dma_addr0 = (vector_unsigned_long_long){};
+ for (i = 0; i < RTE_I40E_DESCS_PER_LOOP; i++) {
+ rxep[i].mbuf = &rxq->fake_mbuf;
+ vec_xstd2(dma_addr0, 0,
+ (unsigned long long *)&rxdp[i].read);
+ }
+ }
+ rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
+ RTE_I40E_RXQ_REARM_THRESH;
+ return;
+ }
+
+ /* Initialize the mbufs in vector, process 2 mbufs in one loop */
+ for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH; i += 2, rxep += 2) {
+ vector_unsigned_long_long vaddr0, vaddr1;
+ uintptr_t p0, p1;
+
+ mb0 = rxep[0].mbuf;
+ mb1 = rxep[1].mbuf;
+
+ /* Flush mbuf with pkt template.
+ * Data to be rearmed is 6 bytes long.
+ * Though, RX will overwrite ol_flags that are coming next
+ * anyway. So overwrite whole 8 bytes with one load:
+ * 6 bytes of rearm_data plus first 2 bytes of ol_flags.
+ */
+ p0 = (uintptr_t)&mb0->rearm_data;
+ *(uint64_t *)p0 = rxq->mbuf_initializer;
+ p1 = (uintptr_t)&mb1->rearm_data;
+ *(uint64_t *)p1 = rxq->mbuf_initializer;
+
+ /* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+ vaddr0 = vec_xld2(0, (unsigned long long *)&mb0->buf_addr);
+ vaddr1 = vec_xld2(0, (unsigned long long *)&mb1->buf_addr);
+
+ /* convert pa to dma_addr hdr/data */
+ dma_addr0 = vec_mergel(vaddr0, vaddr0);
+ dma_addr1 = vec_mergel(vaddr1, vaddr1);
+
+ /* add headroom to pa values */
+ dma_addr0 = dma_addr0 + hdr_room;
+ dma_addr1 = dma_addr1 + hdr_room;
+
+ /* flush desc with pa dma_addr */
+ vec_xstd2(dma_addr0, 0, (unsigned long long *)&rxdp++->read);
+ vec_xstd2(dma_addr1, 0, (unsigned long long *)&rxdp++->read);
+ }
+
+ rxq->rxrearm_start += RTE_I40E_RXQ_REARM_THRESH;
+ if (rxq->rxrearm_start >= rxq->nb_rx_desc)
+ rxq->rxrearm_start = 0;
+
+ rxq->rxrearm_nb -= RTE_I40E_RXQ_REARM_THRESH;
+
+ rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
+ (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
+
+ /* Update the tail pointer on the NIC */
+ I40E_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
+}
+
+static inline void
+desc_to_olflags_v(vector_unsigned_long_long descs[4], struct rte_mbuf **rx_pkts)
+{
+ vector_unsigned_int vlan0, vlan1, rss, l3_l4e;
+
+ /* mask everything except RSS, flow director and VLAN flags
+ * bit2 is for VLAN tag, bit11 for flow director indication
+ * bit13:12 for RSS indication.
+ */
+ const vector_unsigned_int rss_vlan_msk = (vector_unsigned_int){
+ (int32_t)0x1c03804, (int32_t)0x1c03804,
+ (int32_t)0x1c03804, (int32_t)0x1c03804};
+
+ /* map rss and vlan type to rss hash and vlan flag */
+ const vector_unsigned_char vlan_flags = (vector_unsigned_char){
+ 0, 0, 0, 0,
+ PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0};
+
+ const vector_unsigned_char rss_flags = (vector_unsigned_char){
+ 0, PKT_RX_FDIR, 0, 0,
+ 0, 0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH | PKT_RX_FDIR,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0};
+
+ const vector_unsigned_char l3_l4e_flags = (vector_unsigned_char){
+ 0,
+ PKT_RX_IP_CKSUM_BAD,
+ PKT_RX_L4_CKSUM_BAD,
+ PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD,
+ PKT_RX_IP_CKSUM_BAD,
+ PKT_RX_IP_CKSUM_BAD,
+ PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD,
+ PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD,
+ 0, 0, 0, 0, 0, 0, 0, 0};
+
+ vlan0 = (vector_unsigned_int)vec_mergel(descs[0], descs[1]);
+ vlan1 = (vector_unsigned_int)vec_mergel(descs[2], descs[3]);
+ vlan0 = (vector_unsigned_int)vec_mergeh(vlan0, vlan1);
+
+ vlan1 = vec_and(vlan0, rss_vlan_msk);
+ vlan0 = (vector_unsigned_int)vec_perm(vlan_flags,
+ (vector_unsigned_char){},
+ *(vector_unsigned_char *)&vlan1);
+
+ rss[0] = (uint32_t)vlan1[0] >> 11;
+ rss[1] = (uint32_t)vlan1[1] >> 11;
+ rss[2] = (uint32_t)vlan1[2] >> 11;
+ rss[3] = (uint32_t)vlan1[3] >> 11;
+ rss = (vector_unsigned_int)vec_perm(rss_flags, (vector_unsigned_char){},
+ *(vector_unsigned_char *)&rss);
+
+ l3_l4e[0] = (uint32_t)vlan1[0] >> 22;
+ l3_l4e[1] = (uint32_t)vlan1[1] >> 22;
+ l3_l4e[2] = (uint32_t)vlan1[2] >> 22;
+ l3_l4e[3] = (uint32_t)vlan1[3] >> 22;
+
+ l3_l4e = (vector_unsigned_int)vec_perm(l3_l4e_flags,
+ (vector_unsigned_char){},
+ *(vector_unsigned_char *)&l3_l4e);
+
+ vlan0 = vec_or(vlan0, rss);
+ vlan0 = vec_or(vlan0, l3_l4e);
+
+ rx_pkts[0]->ol_flags = (uint64_t)vlan0[2];
+ rx_pkts[1]->ol_flags = (uint64_t)vlan0[3];
+ rx_pkts[2]->ol_flags = (uint64_t)vlan0[0];
+ rx_pkts[3]->ol_flags = (uint64_t)vlan0[1];
+}
+
+#define PKTLEN_SHIFT 10
+
+static inline void
+desc_to_ptype_v(vector_unsigned_long_long descs[4], struct rte_mbuf **rx_pkts,
+ uint32_t *ptype_tbl)
+{
+ vector_unsigned_long_long ptype0 = vec_mergel(descs[0], descs[1]);
+ vector_unsigned_long_long ptype1 = vec_mergel(descs[2], descs[3]);
+
+ ptype0[0] = ptype0[0] >> 30;
+ ptype0[1] = ptype0[1] >> 30;
+
+ ptype1[0] = ptype1[0] >> 30;
+ ptype1[1] = ptype1[1] >> 30;
+
+ rx_pkts[0]->packet_type =
+ ptype_tbl[(*(vector_unsigned_char *)&ptype0)[0]];
+ rx_pkts[1]->packet_type =
+ ptype_tbl[(*(vector_unsigned_char *)&ptype0)[8]];
+ rx_pkts[2]->packet_type =
+ ptype_tbl[(*(vector_unsigned_char *)&ptype1)[0]];
+ rx_pkts[3]->packet_type =
+ ptype_tbl[(*(vector_unsigned_char *)&ptype1)[8]];
+}
+
+ /* Notice:
+ * - nb_pkts < RTE_I40E_DESCS_PER_LOOP, just return no packet
+ * - nb_pkts > RTE_I40E_VPMD_RX_BURST, only scan RTE_I40E_VPMD_RX_BURST
+ * numbers of DD bits
+ */
+static inline uint16_t
+_recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
+ uint16_t nb_pkts, uint8_t *split_packet)
+{
+ volatile union i40e_rx_desc *rxdp;
+ struct i40e_rx_entry *sw_ring;
+ uint16_t nb_pkts_recd;
+ int pos;
+ uint64_t var;
+ vector_unsigned_char shuf_msk;
+ uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+
+ vector_unsigned_short crc_adjust = (vector_unsigned_short){
+ 0, 0, /* ignore pkt_type field */
+ rxq->crc_len, /* sub crc on pkt_len */
+ 0, /* ignore high-16bits of pkt_len */
+ rxq->crc_len, /* sub crc on data_len */
+ 0, 0, 0 /* ignore non-length fields */
+ };
+ vector_unsigned_long_long dd_check, eop_check;
+
+ /* nb_pkts shall be less equal than RTE_I40E_MAX_RX_BURST */
+ nb_pkts = RTE_MIN(nb_pkts, RTE_I40E_MAX_RX_BURST);
+
+ /* nb_pkts has to be floor-aligned to RTE_I40E_DESCS_PER_LOOP */
+ nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_I40E_DESCS_PER_LOOP);
+
+ /* Just the act of getting into the function from the application is
+ * going to cost about 7 cycles
+ */
+ rxdp = rxq->rx_ring + rxq->rx_tail;
+
+ rte_prefetch0(rxdp);
+
+ /* See if we need to rearm the RX queue - gives the prefetch a bit
+ * of time to act
+ */
+ if (rxq->rxrearm_nb > RTE_I40E_RXQ_REARM_THRESH)
+ i40e_rxq_rearm(rxq);
+
+ /* Before we start moving massive data around, check to see if
+ * there is actually a packet available
+ */
+ if (!(rxdp->wb.qword1.status_error_len &
+ rte_cpu_to_le_32(1 << I40E_RX_DESC_STATUS_DD_SHIFT)))
+ return 0;
+
+ /* 4 packets DD mask */
+ dd_check = (vector_unsigned_long_long){0x0000000100000001ULL,
+ 0x0000000100000001ULL};
+
+ /* 4 packets EOP mask */
+ eop_check = (vector_unsigned_long_long){0x0000000200000002ULL,
+ 0x0000000200000002ULL};
+
+ /* mask to shuffle from desc. to mbuf */
+ shuf_msk = (vector_unsigned_char){
+ 0xFF, 0xFF, /* pkt_type set as unknown */
+ 0xFF, 0xFF, /* pkt_type set as unknown */
+ 14, 15, /* octet 15~14, low 16 bits pkt_len */
+ 0xFF, 0xFF, /* skip high 16 bits pkt_len, zero out */
+ 14, 15, /* octet 15~14, 16 bits data_len */
+ 2, 3, /* octet 2~3, low 16 bits vlan_macip */
+ 4, 5, 6, 7 /* octet 4~7, 32bits rss */
+ };
+
+ /* Cache is empty -> need to scan the buffer rings, but first move
+ * the next 'n' mbufs into the cache
+ */
+ sw_ring = &rxq->sw_ring[rxq->rx_tail];
+
+ /* A. load 4 packet in one loop
+ * [A*. mask out 4 unused dirty field in desc]
+ * B. copy 4 mbuf point from swring to rx_pkts
+ * C. calc the number of DD bits among the 4 packets
+ * [C*. extract the end-of-packet bit, if requested]
+ * D. fill info. from desc to mbuf
+ */
+
+ for (pos = 0, nb_pkts_recd = 0; pos < nb_pkts;
+ pos += RTE_I40E_DESCS_PER_LOOP,
+ rxdp += RTE_I40E_DESCS_PER_LOOP) {
+ vector_unsigned_long_long descs[RTE_I40E_DESCS_PER_LOOP];
+ vector_unsigned_char pkt_mb1, pkt_mb2, pkt_mb3, pkt_mb4;
+ vector_unsigned_short staterr, sterr_tmp1, sterr_tmp2;
+ vector_unsigned_long_long mbp1, mbp2; /* two mbuf pointer
+ * in one XMM reg.
+ */
+
+ /* B.1 load 1 mbuf point */
+ mbp1 = *(vector_unsigned_long_long *)&sw_ring[pos];
+ /* Read desc statuses backwards to avoid race condition */
+ /* A.1 load 4 pkts desc */
+ descs[3] = *(vector_unsigned_long_long *)(rxdp + 3);
+ rte_compiler_barrier();
+
+ /* B.2 copy 2 mbuf point into rx_pkts */
+ *(vector_unsigned_long_long *)&rx_pkts[pos] = mbp1;
+
+ /* B.1 load 1 mbuf point */
+ mbp2 = *(vector_unsigned_long_long *)&sw_ring[pos + 2];
+
+ descs[2] = *(vector_unsigned_long_long *)(rxdp + 2);
+ rte_compiler_barrier();
+ /* B.1 load 2 mbuf point */
+ descs[1] = *(vector_unsigned_long_long *)(rxdp + 1);
+ rte_compiler_barrier();
+ descs[0] = *(vector_unsigned_long_long *)(rxdp);
+
+ /* B.2 copy 2 mbuf point into rx_pkts */
+ *(vector_unsigned_long_long *)&rx_pkts[pos + 2] = mbp2;
+
+ if (split_packet) {
+ rte_mbuf_prefetch_part2(rx_pkts[pos]);
+ rte_mbuf_prefetch_part2(rx_pkts[pos + 1]);
+ rte_mbuf_prefetch_part2(rx_pkts[pos + 2]);
+ rte_mbuf_prefetch_part2(rx_pkts[pos + 3]);
+ }
+
+ /* avoid compiler reorder optimization */
+ rte_compiler_barrier();
+
+ /* pkt 3,4 shift the pktlen field to be 16-bit aligned*/
+ vector_unsigned_int len3_temp = vec_xld2(0,
+ (unsigned int *)&descs[3]);
+ len3_temp[3] = len3_temp[3] << PKTLEN_SHIFT;
+ const vector_unsigned_int len3 = len3_temp;
+
+ vector_unsigned_int len2_temp = vec_xld2(0,
+ (unsigned int *)&descs[2]);
+ len2_temp[3] = len2_temp[3] << PKTLEN_SHIFT;
+ const vector_unsigned_int len2 = len2_temp;
+
+ /* merge the now-aligned packet length fields back in */
+ descs[3] = (vector_unsigned_long_long)len3;
+ descs[2] = (vector_unsigned_long_long)len2;
+
+ /* D.1 pkt 3,4 convert format from desc to pktmbuf */
+ pkt_mb4 = vec_perm((vector_unsigned_char)descs[3],
+ (vector_unsigned_char){}, shuf_msk);
+ pkt_mb3 = vec_perm((vector_unsigned_char)descs[2],
+ (vector_unsigned_char){}, shuf_msk);
+
+ /* C.1 4=>2 filter staterr info only */
+ sterr_tmp2 = vec_mergel((vector_unsigned_short)descs[3],
+ (vector_unsigned_short)descs[2]);
+ /* C.1 4=>2 filter staterr info only */
+ sterr_tmp1 = vec_mergel((vector_unsigned_short)descs[1],
+ (vector_unsigned_short)descs[0]);
+ /* D.2 pkt 3,4 set in_port/nb_seg and remove crc */
+ pkt_mb4 = (vector_unsigned_char)((vector_unsigned_short)pkt_mb4
+ - crc_adjust);
+ pkt_mb3 = (vector_unsigned_char)((vector_unsigned_short)pkt_mb3
+ - crc_adjust);
+
+ /* pkt 1,2 shift the pktlen field to be 16-bit aligned*/
+ const vector_unsigned_int len1 =
+ vec_sll(vec_xld2(0, (unsigned int *)&descs[1]),
+ (vector_unsigned_int){0, 0, 0, PKTLEN_SHIFT});
+ const vector_unsigned_int len0 =
+ vec_sll(vec_xld2(0, (unsigned int *)&descs[0]),
+ (vector_unsigned_int){0, 0, 0, PKTLEN_SHIFT});
+
+ /* merge the now-aligned packet length fields back in */
+ descs[1] = (vector_unsigned_long_long)len1;
+ descs[0] = (vector_unsigned_long_long)len0;
+
+ /* D.1 pkt 1,2 convert format from desc to pktmbuf */
+ pkt_mb2 = vec_perm((vector_unsigned_char)descs[1],
+ (vector_unsigned_char){}, shuf_msk);
+ pkt_mb1 = vec_perm((vector_unsigned_char)descs[0],
+ (vector_unsigned_char){}, shuf_msk);
+
+ /* C.2 get 4 pkts staterr value */
+ staterr = (vector_unsigned_short)vec_mergeh(sterr_tmp1,
+ sterr_tmp2);
+
+ /* D.3 copy final 3,4 data to rx_pkts */
+ vec_xstd2(pkt_mb4, 0, (unsigned char *)&rx_pkts[pos + 3]
+ ->rx_descriptor_fields1);
+ vec_xstd2(pkt_mb3, 0, (unsigned char *)&rx_pkts[pos + 2]
+ ->rx_descriptor_fields1);
+
+ /* D.2 pkt 1,2 set in_port/nb_seg and remove crc */
+ pkt_mb2 = (vector_unsigned_char)((vector_unsigned_short)pkt_mb2
+ - crc_adjust);
+ pkt_mb1 = (vector_unsigned_char)((vector_unsigned_short)pkt_mb1
+ - crc_adjust);
+
+ /* C* extract and record EOP bit */
+ if (split_packet) {
+ vector_unsigned_char eop_shuf_mask =
+ (vector_unsigned_char){
+ 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x04, 0x0C, 0x00, 0x08
+ };
+
+ /* and with mask to extract bits, flipping 1-0 */
+ vector_unsigned_char eop_bits =
+ vec_and((vector_unsigned_char)vec_nor(staterr,
+ staterr), (vector_unsigned_char)eop_check);
+ /* the staterr values are not in order, as the
+ * count of dd bits doesn't care. However, for end of
+ * packet tracking, we do care, so shuffle. This also
+ * compresses the 32-bit values to 8-bit
+ */
+ eop_bits = vec_perm(eop_bits, (vector_unsigned_char){},
+ eop_shuf_mask);
+ /* store the resulting 32-bit value */
+ *split_packet = (vec_xld2(0,
+ (unsigned int *)&eop_bits))[0];
+ split_packet += RTE_I40E_DESCS_PER_LOOP;
+
+ /* zero-out next pointers */
+ rx_pkts[pos]->next = NULL;
+ rx_pkts[pos + 1]->next = NULL;
+ rx_pkts[pos + 2]->next = NULL;
+ rx_pkts[pos + 3]->next = NULL;
+ }
+
+ /* C.3 calc available number of desc */
+ staterr = vec_and(staterr, (vector_unsigned_short)dd_check);
+
+ /* D.3 copy final 1,2 data to rx_pkts */
+ vec_xstd2(pkt_mb2, 0, (unsigned char *)&rx_pkts[pos + 1]
+ ->rx_descriptor_fields1);
+ vec_xstd2(pkt_mb1, 0, (unsigned char *)&rx_pkts[pos]
+ ->rx_descriptor_fields1);
+
+ desc_to_ptype_v(descs, &rx_pkts[pos], ptype_tbl);
+ desc_to_olflags_v(descs, &rx_pkts[pos]);
+
+ /* C.4 calc avaialbe number of desc */
+ var = __builtin_popcountll((vec_xld2(0,
+ (unsigned long long *)&staterr)[0]));
+ nb_pkts_recd += var;
+ if (likely(var != RTE_I40E_DESCS_PER_LOOP))
+ break;
+ }
+
+ /* Update our internal tail pointer */
+ rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_pkts_recd);
+ rxq->rx_tail = (uint16_t)(rxq->rx_tail & (rxq->nb_rx_desc - 1));
+ rxq->rxrearm_nb = (uint16_t)(rxq->rxrearm_nb + nb_pkts_recd);
+
+ return nb_pkts_recd;
+}
+
+ /* Notice:
+ * - nb_pkts < RTE_I40E_DESCS_PER_LOOP, just return no packet
+ * - nb_pkts > RTE_I40E_VPMD_RX_BURST, only scan RTE_I40E_VPMD_RX_BURST
+ * numbers of DD bits
+ */
+uint16_t
+i40e_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
+ uint16_t nb_pkts)
+{
+ return _recv_raw_pkts_vec(rx_queue, rx_pkts, nb_pkts, NULL);
+}
+
+ /* vPMD receive routine that reassembles scattered packets
+ * Notice:
+ * - nb_pkts < RTE_I40E_DESCS_PER_LOOP, just return no packet
+ * - nb_pkts > RTE_I40E_VPMD_RX_BURST, only scan RTE_I40E_VPMD_RX_BURST
+ * numbers of DD bits
+ */
+uint16_t
+i40e_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
+ uint16_t nb_pkts)
+{
+ struct i40e_rx_queue *rxq = rx_queue;
+ uint8_t split_flags[RTE_I40E_VPMD_RX_BURST] = {0};
+
+ /* get some new buffers */
+ uint16_t nb_bufs = _recv_raw_pkts_vec(rxq, rx_pkts, nb_pkts,
+ split_flags);
+ if (nb_bufs == 0)
+ return 0;
+
+ /* happy day case, full burst + no packets to be joined */
+ const uint64_t *split_fl64 = (uint64_t *)split_flags;
+
+ if (rxq->pkt_first_seg == NULL &&
+ split_fl64[0] == 0 && split_fl64[1] == 0 &&
+ split_fl64[2] == 0 && split_fl64[3] == 0)
+ return nb_bufs;
+
+ /* reassemble any packets that need reassembly*/
+ unsigned int i = 0;
+
+ if (!rxq->pkt_first_seg) {
+ /* find the first split flag, and only reassemble then*/
+ while (i < nb_bufs && !split_flags[i])
+ i++;
+ if (i == nb_bufs)
+ return nb_bufs;
+ }
+ return i + reassemble_packets(rxq, &rx_pkts[i], nb_bufs - i,
+ &split_flags[i]);
+}
+
+static inline void
+vtx1(volatile struct i40e_tx_desc *txdp,
+ struct rte_mbuf *pkt, uint64_t flags)
+{
+ uint64_t high_qw = (I40E_TX_DESC_DTYPE_DATA |
+ ((uint64_t)flags << I40E_TXD_QW1_CMD_SHIFT) |
+ ((uint64_t)pkt->data_len << I40E_TXD_QW1_TX_BUF_SZ_SHIFT));
+
+ vector_unsigned_long_long descriptor = (vector_unsigned_long_long){
+ pkt->buf_iova + pkt->data_off, high_qw};
+ *(vector_unsigned_long_long *)txdp = descriptor;
+}
+
+static inline void
+vtx(volatile struct i40e_tx_desc *txdp,
+ struct rte_mbuf **pkt, uint16_t nb_pkts, uint64_t flags)
+{
+ int i;
+
+ for (i = 0; i < nb_pkts; ++i, ++txdp, ++pkt)
+ vtx1(txdp, *pkt, flags);
+}
+
+uint16_t
+i40e_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
+ uint16_t nb_pkts)
+{
+ struct i40e_tx_queue *txq = (struct i40e_tx_queue *)tx_queue;
+ volatile struct i40e_tx_desc *txdp;
+ struct i40e_tx_entry *txep;
+ uint16_t n, nb_commit, tx_id;
+ uint64_t flags = I40E_TD_CMD;
+ uint64_t rs = I40E_TX_DESC_CMD_RS | I40E_TD_CMD;
+ int i;
+
+ /* cross rx_thresh boundary is not allowed */
+ nb_pkts = RTE_MIN(nb_pkts, txq->tx_rs_thresh);
+
+ if (txq->nb_tx_free < txq->tx_free_thresh)
+ i40e_tx_free_bufs(txq);
+
+ nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
+ nb_commit = nb_pkts;
+ if (unlikely(nb_pkts == 0))
+ return 0;
+
+ tx_id = txq->tx_tail;
+ txdp = &txq->tx_ring[tx_id];
+ txep = &txq->sw_ring[tx_id];
+
+ txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
+
+ n = (uint16_t)(txq->nb_tx_desc - tx_id);
+ if (nb_commit >= n) {
+ tx_backlog_entry(txep, tx_pkts, n);
+
+ for (i = 0; i < n - 1; ++i, ++tx_pkts, ++txdp)
+ vtx1(txdp, *tx_pkts, flags);
+
+ vtx1(txdp, *tx_pkts++, rs);
+
+ nb_commit = (uint16_t)(nb_commit - n);
+
+ tx_id = 0;
+ txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
+
+ /* avoid reach the end of ring */
+ txdp = &txq->tx_ring[tx_id];
+ txep = &txq->sw_ring[tx_id];
+ }
+
+ tx_backlog_entry(txep, tx_pkts, nb_commit);
+
+ vtx(txdp, tx_pkts, nb_commit, flags);
+
+ tx_id = (uint16_t)(tx_id + nb_commit);
+ if (tx_id > txq->tx_next_rs) {
+ txq->tx_ring[txq->tx_next_rs].cmd_type_offset_bsz |=
+ rte_cpu_to_le_64(((uint64_t)I40E_TX_DESC_CMD_RS) <<
+ I40E_TXD_QW1_CMD_SHIFT);
+ txq->tx_next_rs =
+ (uint16_t)(txq->tx_next_rs + txq->tx_rs_thresh);
+ }
+
+ txq->tx_tail = tx_id;
+
+ I40E_PCI_REG_WRITE(txq->qtx_tail, txq->tx_tail);
+
+ return nb_pkts;
+}
+
+void __rte_cold
+i40e_rx_queue_release_mbufs_vec(struct i40e_rx_queue *rxq)
+{
+ _i40e_rx_queue_release_mbufs_vec(rxq);
+}
+
+int __rte_cold
+i40e_rxq_vec_setup(struct i40e_rx_queue *rxq)
+{
+ return i40e_rxq_vec_setup_default(rxq);
+}
+
+int __rte_cold
+i40e_txq_vec_setup(struct i40e_tx_queue __rte_unused * txq)
+{
+ return 0;
+}
+
+int __rte_cold
+i40e_rx_vec_dev_conf_condition_check(struct rte_eth_dev *dev)
+{
+ return i40e_rx_vec_dev_conf_condition_check_default(dev);
+}
@@ -91,6 +91,8 @@ if arch_subdir == 'x86'
endif
elif arch_subdir == 'ppc'
sources += files('i40e_rxtx_vec_altivec.c')
+elif arch_subdir == 's390x'
+ sources += files('i40e_rxtx_vec_s390x.c')
elif arch_subdir == 'arm'
sources += files('i40e_rxtx_vec_neon.c')
endif
@@ -6029,11 +6029,11 @@ ixgbe_config_rss_filter(struct rte_eth_dev *dev,
return 0;
}
-/* Stubs needed for linkage when RTE_ARCH_PPC_64, RTE_ARCH_RISCV or
- * RTE_ARCH_LOONGARCH is set.
+/* Stubs needed for linkage when RTE_ARCH_PPC_64, RTE_ARCH_RISCV,
+ * RTE_ARCH_LOONGARCH or RTE_ARCH_S390X is set.
*/
-#if defined(RTE_ARCH_PPC_64) || defined(RTE_ARCH_RISCV) || \
- defined(RTE_ARCH_LOONGARCH)
+#if defined(RTE_ARCH_PPC_64) || defined(RTE_ARCH_S390X) \
+ defined(RTE_ARCH_LOONGARCH) || defined(RTE_ARCH_RISCV)
int
ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev __rte_unused *dev)
{
@@ -182,6 +182,8 @@ const char *memif_version(void);
#define __NR_memfd_create 279
#elif defined __powerpc__
#define __NR_memfd_create 360
+#elif defined __s390x__
+#define __NR_memfd_create 350
#elif defined __i386__
#define __NR_memfd_create 356
#elif defined __riscv
@@ -229,6 +229,8 @@ mlx5_rx_burst_mode_get(struct rte_eth_dev *dev,
snprintf(mode->info, sizeof(mode->info), "%s", "Vector Neon");
#elif defined RTE_ARCH_PPC_64
snprintf(mode->info, sizeof(mode->info), "%s", "Vector AltiVec");
+#elif defined RTE_ARCH_S390X
+ snprintf(mode->info, sizeof(mode->info), "%s", "Vector S390X");
#else
return -EINVAL;
#endif
@@ -239,6 +241,8 @@ mlx5_rx_burst_mode_get(struct rte_eth_dev *dev,
snprintf(mode->info, sizeof(mode->info), "%s", "MPRQ Vector Neon");
#elif defined RTE_ARCH_PPC_64
snprintf(mode->info, sizeof(mode->info), "%s", "MPRQ Vector AltiVec");
+#elif defined RTE_ARCH_S390X
+ snprintf(mode->info, sizeof(mode->info), "%s", "MPRQ Vector S390X");
#else
return -EINVAL;
#endif
@@ -340,12 +344,24 @@ rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe,
uint8_t ptype;
uint8_t pinfo = (cqe->pkt_info & 0x3) << 6;
+ /*
+ * hdr_type_etc is from the cqe thus it is BE
+ * the logic below did not convert BE -> LE prior
+ * to using the value of it. So the logic below
+ * is written for LE thus the value of hdr_type_etc has
+ * to be converted from LE to BE for the logic to work
+ */
+ uint16_t cqe_t_le = rte_le_to_cpu_16(cqe->hdr_type_etc);
+ uint16_t mcqe_t_le;
+
/* Get l3/l4 header from mini-CQE in case L3/L4 format*/
if (mcqe == NULL ||
- rxq->mcqe_format != MLX5_CQE_RESP_FORMAT_L34H_STRIDX)
- ptype = (cqe->hdr_type_etc & 0xfc00) >> 10;
- else
- ptype = mcqe->hdr_type >> 2;
+ rxq->mcqe_format != MLX5_CQE_RESP_FORMAT_L34H_STRIDX) {
+ ptype = (cqe_t_le & 0xfc00) >> 10;
+ } else {
+ mcqe_t_le = rte_le_to_cpu_16(mcqe->hdr_type);
+ ptype = mcqe_t_le >> 2;
+ }
/*
* The index to the array should have:
* bit[1:0] = l3_hdr_type
@@ -157,6 +157,12 @@ typedef union octtx_wqe_s {
uint64_t lbptr : 8;
uint64_t laptr : 8;
} w4;
+
+ struct {
+ uint64_t size :16;
+ uint64_t dwd : 1;
+ uint64_t rsvd0 :47;
+ } w5;
#endif
} s;
@@ -239,6 +239,14 @@ em_mask_key(void *key, xmm_t mask)
return vec_and(data, mask);
}
+#elif defined(__s390x__)
+static inline xmm_t
+em_mask_key(void *key, xmm_t mask)
+{
+ xmm_t data = (xmm_t) vec_xld2(0, (unsigned int *)(key));
+
+ return data + mask;
+}
#elif defined(RTE_ARCH_RISCV)
static inline xmm_t
em_mask_key(void *key, xmm_t mask)
new file mode 100644
@@ -0,0 +1,137 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2016 Intel Corporation.
+ * (c) Copyright IBM Corp. 2017, 2019
+ */
+#ifndef __L3FWD_LPM_S390X_H__
+#define __L3FWD_LPM_S390X_H__
+
+#include "l3fwd_s390x.h"
+
+typedef unsigned char vector_unsigned_char
+ __attribute__((vector_size(16*sizeof(unsigned char))));
+
+/*
+ * Read packet_type and destination IPV4 addresses from 4 mbufs.
+ */
+static inline void
+processx4_step1(struct rte_mbuf *pkt[FWDSTEP],
+ vector_unsigned_int *dip,
+ uint32_t *ipv4_flag)
+{
+ struct ipv4_hdr *ipv4_hdr;
+ struct ether_hdr *eth_hdr;
+ uint32_t x0, x1, x2, x3;
+
+ eth_hdr = rte_pktmbuf_mtod(pkt[0], struct ether_hdr *);
+ ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1);
+ x0 = ipv4_hdr->dst_addr;
+ ipv4_flag[0] = pkt[0]->packet_type & RTE_PTYPE_L3_IPV4;
+
+ rte_compiler_barrier();
+ eth_hdr = rte_pktmbuf_mtod(pkt[1], struct ether_hdr *);
+ ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1);
+ x1 = ipv4_hdr->dst_addr;
+ ipv4_flag[0] &= pkt[1]->packet_type;
+
+ rte_compiler_barrier();
+ eth_hdr = rte_pktmbuf_mtod(pkt[2], struct ether_hdr *);
+ ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1);
+ x2 = ipv4_hdr->dst_addr;
+ ipv4_flag[0] &= pkt[2]->packet_type;
+
+ rte_compiler_barrier();
+ eth_hdr = rte_pktmbuf_mtod(pkt[3], struct ether_hdr *);
+ ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1);
+ x3 = ipv4_hdr->dst_addr;
+ ipv4_flag[0] &= pkt[3]->packet_type;
+
+ rte_compiler_barrier();
+ dip[0] = (vector_unsigned_int){x0, x1, x2, x3};
+}
+
+/*
+ * Lookup into LPM for destination port.
+ * If lookup fails, use incoming port (portid) as destination port.
+ */
+static inline void
+processx4_step2(const struct lcore_conf *qconf,
+ vector_unsigned_int dip,
+ uint32_t ipv4_flag,
+ uint8_t portid,
+ struct rte_mbuf *pkt[FWDSTEP],
+ uint16_t dprt[FWDSTEP])
+{
+ rte_xmm_t dst;
+ const vector_unsigned_char bswap_mask = (vector_unsigned_char){
+ 3, 2, 1, 0,
+ 7, 6, 5, 4,
+ 11, 10, 9, 8,
+ 15, 14, 13, 12};
+
+ /* Byte swap 4 IPV4 addresses. */
+ dip = (vector_unsigned_int)vec_perm(*(vector_unsigned_char *)&dip,
+ (vector_unsigned_char){}, bswap_mask);
+
+ /* if all 4 packets are IPV4. */
+ if (likely(ipv4_flag)) {
+ rte_lpm_lookupx4(qconf->ipv4_lookup_struct, (xmm_t)dip,
+ (uint32_t *)&dst, portid);
+ /* get rid of unused upper 16 bit for each dport. */
+ dst.x = (xmm_t)vec_packs(dst.x, dst.x);
+ *(uint64_t *)dprt = dst.u64[0];
+ } else {
+ dst.x = (xmm_t)dip;
+ dprt[0] = lpm_get_dst_port_with_ipv4(qconf, pkt[0],
+ dst.u32[0], portid);
+ dprt[1] = lpm_get_dst_port_with_ipv4(qconf, pkt[1],
+ dst.u32[1], portid);
+ dprt[2] = lpm_get_dst_port_with_ipv4(qconf, pkt[2],
+ dst.u32[2], portid);
+ dprt[3] = lpm_get_dst_port_with_ipv4(qconf, pkt[3],
+ dst.u32[3], portid);
+ }
+}
+
+/*
+ * Buffer optimized handling of packets, invoked
+ * from main_loop.
+ */
+static inline void
+l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
+ uint8_t portid, struct lcore_conf *qconf)
+{
+ int32_t j;
+ uint16_t dst_port[MAX_PKT_BURST];
+ vector_unsigned_int dip[MAX_PKT_BURST / FWDSTEP];
+ uint32_t ipv4_flag[MAX_PKT_BURST / FWDSTEP];
+ const int32_t k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP);
+
+ for (j = 0; j != k; j += FWDSTEP)
+ processx4_step1(&pkts_burst[j], &dip[j / FWDSTEP],
+ &ipv4_flag[j / FWDSTEP]);
+
+ for (j = 0; j != k; j += FWDSTEP)
+ processx4_step2(qconf, dip[j / FWDSTEP],
+ ipv4_flag[j / FWDSTEP],
+ portid, &pkts_burst[j], &dst_port[j]);
+
+ /* Classify last up to 3 packets one by one */
+ switch (nb_rx % FWDSTEP) {
+ case 3:
+ dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid);
+ j++;
+ /* fall-through */
+ case 2:
+ dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid);
+ j++;
+ /* fall-through */
+ case 1:
+ dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid);
+ j++;
+ /* fall-through */
+ }
+
+ send_packets_multi(qconf, pkts_burst, dst_port, nb_rx);
+}
+
+#endif /* __L3FWD_LPM_S390X_H__ */
new file mode 100644
@@ -0,0 +1,261 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2016 Intel Corporation.
+ * (c) Copyright IBM Corp. 2017, 2019
+ */
+#ifndef _L3FWD_S390X_H_
+#define _L3FWD_S390X_H_
+
+#include "l3fwd.h"
+#include "l3fwd_common.h"
+
+/* Vector Shift Right by Octet */
+#define vec_sro(a, b) vec_srb(a, (b) << 64)
+
+typedef unsigned int vector_unsigned_int
+ __attribute__((vector_size(4*sizeof(unsigned int))));
+typedef unsigned short vector_unsigned_short
+ __attribute__((vector_size(8*sizeof(unsigned short))));
+
+/*
+ * Update source and destination MAC addresses in the ethernet header.
+ * Perform RFC1812 checks and updates for IPV4 packets.
+ */
+static inline void
+processx4_step3(struct rte_mbuf *pkt[FWDSTEP], uint16_t dst_port[FWDSTEP])
+{
+ vector_unsigned_int te[FWDSTEP];
+ vector_unsigned_int ve[FWDSTEP];
+ vector_unsigned_int *p[FWDSTEP];
+
+ p[0] = rte_pktmbuf_mtod(pkt[0], vector_unsigned_int *);
+ p[1] = rte_pktmbuf_mtod(pkt[1], vector_unsigned_int *);
+ p[2] = rte_pktmbuf_mtod(pkt[2], vector_unsigned_int *);
+ p[3] = rte_pktmbuf_mtod(pkt[3], vector_unsigned_int *);
+
+ ve[0] = (vector_unsigned_int)val_eth[dst_port[0]];
+ te[0] = *p[0];
+
+ ve[1] = (vector_unsigned_int)val_eth[dst_port[1]];
+ te[1] = *p[1];
+
+ ve[2] = (vector_unsigned_int)val_eth[dst_port[2]];
+ te[2] = *p[2];
+
+ ve[3] = (vector_unsigned_int)val_eth[dst_port[3]];
+ te[3] = *p[3];
+
+ /* Update first 12 bytes, keep rest bytes intact. */
+ te[0] = (vector_unsigned_int)vec_sel(
+ (vector_unsigned_short)ve[0],
+ (vector_unsigned_short)te[0],
+ (vector_unsigned_short) {0, 0, 0, 0,
+ 0, 0, 0xffff, 0xffff});
+
+ te[1] = (vector_unsigned_int)vec_sel(
+ (vector_unsigned_short)ve[1],
+ (vector_unsigned_short)te[1],
+ (vector_unsigned_short) {0, 0, 0, 0,
+ 0, 0, 0xffff, 0xffff});
+
+ te[2] = (vector_unsigned_int)vec_sel(
+ (vector_unsigned_short)ve[2],
+ (vector_unsigned_short)te[2],
+ (vector_unsigned_short) {0, 0, 0, 0, 0,
+ 0, 0xffff, 0xffff});
+
+ te[3] = (vector_unsigned_int)vec_sel(
+ (vector_unsigned_short)ve[3],
+ (vector_unsigned_short)te[3],
+ (vector_unsigned_short) {0, 0, 0, 0,
+ 0, 0, 0xffff, 0xffff});
+
+ *p[0] = te[0];
+ *p[1] = te[1];
+ *p[2] = te[2];
+ *p[3] = te[3];
+
+ rfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[0] + 1),
+ &dst_port[0], pkt[0]->packet_type);
+ rfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[1] + 1),
+ &dst_port[1], pkt[1]->packet_type);
+ rfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[2] + 1),
+ &dst_port[2], pkt[2]->packet_type);
+ rfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[3] + 1),
+ &dst_port[3], pkt[3]->packet_type);
+}
+
+/*
+ * Group consecutive packets with the same destination port in bursts of 4.
+ * Suppose we have array of destination ports:
+ * dst_port[] = {a, b, c, d,, e, ... }
+ * dp1 should contain: <a, b, c, d>, dp2: <b, c, d, e>.
+ * We doing 4 comparisons at once and the result is 4 bit mask.
+ * This mask is used as an index into prebuild array of pnum values.
+ */
+static inline uint16_t *
+port_groupx4(uint16_t pn[FWDSTEP + 1], uint16_t *lp, vector_unsigned_short dp1,
+ vector_unsigned_short dp2)
+{
+ union {
+ uint16_t u16[FWDSTEP + 1];
+ uint64_t u64;
+ } *pnum = (void *)pn;
+
+ int32_t v;
+
+ v = vec_any_eq(dp1, dp2);
+
+
+ /* update last port counter. */
+ lp[0] += gptbl[v].lpv;
+
+ /* if dest port value has changed. */
+ if (v != GRPMSK) {
+ pnum->u64 = gptbl[v].pnum;
+ pnum->u16[FWDSTEP] = 1;
+ lp = pnum->u16 + gptbl[v].idx;
+ }
+
+ return lp;
+}
+
+/**
+ * Process one packet:
+ * Update source and destination MAC addresses in the ethernet header.
+ * Perform RFC1812 checks and updates for IPV4 packets.
+ */
+static inline void
+process_packet(struct rte_mbuf *pkt, uint16_t *dst_port)
+{
+ struct ether_hdr *eth_hdr;
+ vector_unsigned_int te, ve;
+
+ eth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *);
+
+ te = *(vector_unsigned_int *)eth_hdr;
+ ve = (vector_unsigned_int)val_eth[dst_port[0]];
+
+ rfc1812_process((struct ipv4_hdr *)(eth_hdr + 1), dst_port,
+ pkt->packet_type);
+
+ /* dynamically vec_sel te and ve for MASK_ETH (0x3f) */
+ te = (vector_unsigned_int)vec_sel(
+ (vector_unsigned_short)ve,
+ (vector_unsigned_short)te,
+ (vector_unsigned_short){0, 0, 0, 0,
+ 0, 0, 0xffff, 0xffff});
+
+ *(vector_unsigned_int *)eth_hdr = te;
+}
+
+/**
+ * Send packets burst from pkts_burst to the ports in dst_port array
+ */
+static __rte_always_inline void
+send_packets_multi(struct lcore_conf *qconf, struct rte_mbuf **pkts_burst,
+ uint16_t dst_port[MAX_PKT_BURST], int nb_rx)
+{
+ int32_t k;
+ int j = 0;
+ uint16_t dlp;
+ uint16_t *lp;
+ uint16_t pnum[MAX_PKT_BURST + 1];
+
+ /*
+ * Finish packet processing and group consecutive
+ * packets with the same destination port.
+ */
+ k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP);
+ if (k != 0) {
+ vector_unsigned_short dp1, dp2;
+
+ lp = pnum;
+ lp[0] = 1;
+
+ processx4_step3(pkts_burst, dst_port);
+
+ /* dp1: <d[0], d[1], d[2], d[3], ... > */
+ dp1 = *(vector_unsigned_short *)dst_port;
+
+ for (j = FWDSTEP; j != k; j += FWDSTEP) {
+ processx4_step3(&pkts_burst[j], &dst_port[j]);
+
+ /*
+ * dp2:
+ * <d[j-3], d[j-2], d[j-1], d[j], ... >
+ */
+ dp2 = *((vector_unsigned_short *)
+ &dst_port[j - FWDSTEP + 1]);
+ lp = port_groupx4(&pnum[j - FWDSTEP], lp, dp1, dp2);
+
+ /*
+ * dp1:
+ * <d[j], d[j+1], d[j+2], d[j+3], ... >
+ */
+ dp1 = vec_sro(dp2, (vector unsigned char) {
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, (FWDSTEP - 1) * sizeof(dst_port[0])});
+ }
+
+ /*
+ * dp2: <d[j-3], d[j-2], d[j-1], d[j-1], ... >
+ */
+ dp2 = vec_perm(dp1, (vector_unsigned_short){},
+ (vector unsigned char) {0xf9});
+ lp = port_groupx4(&pnum[j - FWDSTEP], lp, dp1, dp2);
+
+ /*
+ * remove values added by the last repeated
+ * dst port.
+ */
+ lp[0]--;
+ dlp = dst_port[j - 1];
+ } else {
+ /* set dlp and lp to the never used values. */
+ dlp = BAD_PORT - 1;
+ lp = pnum + MAX_PKT_BURST;
+ }
+
+ /* Process up to last 3 packets one by one. */
+ switch (nb_rx % FWDSTEP) {
+ case 3:
+ process_packet(pkts_burst[j], dst_port + j);
+ GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j);
+ j++;
+ /* fall-through */
+ case 2:
+ process_packet(pkts_burst[j], dst_port + j);
+ GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j);
+ j++;
+ /* fall-through */
+ case 1:
+ process_packet(pkts_burst[j], dst_port + j);
+ GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j);
+ j++;
+ }
+
+ /*
+ * Send packets out, through destination port.
+ * Consecutive packets with the same destination port
+ * are already grouped together.
+ * If destination port for the packet equals BAD_PORT,
+ * then free the packet without sending it out.
+ */
+ for (j = 0; j < nb_rx; j += k) {
+
+ int32_t m;
+ uint16_t pn;
+
+ pn = dst_port[j];
+ k = pnum[j];
+
+ if (likely(pn != BAD_PORT))
+ send_packetsx4(qconf, pn, pkts_burst + j, k);
+ else
+ for (m = j; m != j + k; m++)
+ rte_pktmbuf_free(pkts_burst[m]);
+
+ }
+}
+
+#endif /* _L3FWD_S390X_H_ */
@@ -780,6 +780,9 @@ acl_build_reset(struct rte_acl_ctx *ctx)
sizeof(*ctx) - offsetof(struct rte_acl_ctx, num_categories));
}
+
+
+
static void
acl_gen_full_range(struct acl_build_context *context, struct rte_acl_node *root,
struct rte_acl_node *end, int size, int level)
@@ -360,7 +360,16 @@ acl_gen_node(struct rte_acl_node *node, uint64_t *node_array,
array_ptr = &node_array[index->quad_index];
acl_add_ptrs(node, array_ptr, no_match, 0);
qtrp = (uint32_t *)node->transitions;
+
+ /* Swap qtrp on big endian that transitions[0]
+ * is at least signifcant byte.
+ */
+#if __BYTE_ORDER == __ORDER_BIG_ENDIAN__
+ node->node_index = __bswap_32(qtrp[0]);
+#else
node->node_index = qtrp[0];
+#endif
+
node->node_index <<= sizeof(index->quad_index) * CHAR_BIT;
node->node_index |= index->quad_index | node->node_type;
index->quad_index += node->fanout;
@@ -141,6 +141,14 @@ rte_acl_classify_scalar(const struct rte_acl_ctx *ctx, const uint8_t **data,
input0 = GET_NEXT_4BYTES(parms, 0);
input1 = GET_NEXT_4BYTES(parms, 1);
+ /* input needs to be swapped because the rules get
+ * swapped while building the trie.
+ */
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ input0 = __bswap_32(input0);
+ input1 = __bswap_32(input1);
+#endif
+
for (n = 0; n < 4; n++) {
transition0 = scalar_transition(flows.trans,
@@ -101,6 +101,8 @@ static const rte_acl_classify_t classify_fns[] = {
[RTE_ACL_CLASSIFY_AVX2] = rte_acl_classify_avx2,
[RTE_ACL_CLASSIFY_NEON] = rte_acl_classify_neon,
[RTE_ACL_CLASSIFY_ALTIVEC] = rte_acl_classify_altivec,
+ /* use scalar for s390x for now */
+ [RTE_ACL_CLASSIFY_S390X] = rte_acl_classify_scalar,
[RTE_ACL_CLASSIFY_AVX512X16] = rte_acl_classify_avx512x16,
[RTE_ACL_CLASSIFY_AVX512X32] = rte_acl_classify_avx512x32,
};
@@ -145,6 +147,27 @@ acl_check_alg_ppc(enum rte_acl_classify_alg alg)
return -EINVAL;
}
+
+
+/*
+ * Helper function for acl_check_alg.
+ * Check support for PPC specific classify methods.
+ */
+static int
+acl_check_alg_s390x(enum rte_acl_classify_alg alg)
+{
+ if (alg == RTE_ACL_CLASSIFY_S390X) {
+#if defined(RTE_ARCH_S390X)
+ if (rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128)
+ return 0;
+#endif
+ return -ENOTSUP;
+ }
+
+ return -EINVAL;
+}
+
+
#ifdef CC_AVX512_SUPPORT
static int
acl_check_avx512_cpu_flags(void)
@@ -216,6 +239,8 @@ acl_check_alg(enum rte_acl_classify_alg alg)
return acl_check_alg_arm(alg);
case RTE_ACL_CLASSIFY_ALTIVEC:
return acl_check_alg_ppc(alg);
+ case RTE_ACL_CLASSIFY_S390X:
+ return acl_check_alg_s390x(alg);
case RTE_ACL_CLASSIFY_AVX512X32:
case RTE_ACL_CLASSIFY_AVX512X16:
case RTE_ACL_CLASSIFY_AVX2:
@@ -244,6 +269,8 @@ acl_get_best_alg(void)
RTE_ACL_CLASSIFY_NEON,
#elif defined(RTE_ARCH_PPC_64)
RTE_ACL_CLASSIFY_ALTIVEC,
+#elif defined(RTE_ARCH_S390X)
+ RTE_ACL_CLASSIFY_S390X,
#elif defined(RTE_ARCH_X86)
RTE_ACL_CLASSIFY_AVX512X32,
RTE_ACL_CLASSIFY_AVX512X16,
@@ -242,8 +242,9 @@ enum rte_acl_classify_alg {
RTE_ACL_CLASSIFY_AVX2 = 3, /**< requires AVX2 support. */
RTE_ACL_CLASSIFY_NEON = 4, /**< requires NEON support. */
RTE_ACL_CLASSIFY_ALTIVEC = 5, /**< requires ALTIVEC support. */
- RTE_ACL_CLASSIFY_AVX512X16 = 6, /**< requires AVX512 support. */
- RTE_ACL_CLASSIFY_AVX512X32 = 7, /**< requires AVX512 support. */
+ RTE_ACL_CLASSIFY_S390X = 6, /**< requires s390x z13 support. */
+ RTE_ACL_CLASSIFY_AVX512X16 = 7, /**< requires AVX512 support. */
+ RTE_ACL_CLASSIFY_AVX512X32 = 8, /**< requires AVX512 support. */
};
/**
new file mode 100644
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# (c) Copyright IBM Corp. 2018, 2019
+
+install_headers(
+ 'rte_atomic.h',
+ 'rte_byteorder.h',
+ 'rte_cpuflags.h',
+ 'rte_cycles.h',
+ 'rte_io.h',
+ 'rte_memcpy.h',
+ 'rte_pause.h',
+ 'rte_prefetch.h',
+ 'rte_rwlock.h',
+ 'rte_spinlock.h',
+ 'rte_vect.h',
+ subdir: get_option('include_subdir_arch'))
new file mode 100644
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * (c) Copyright IBM Corp. 2018, 2019
+ */
+
+#ifndef _RTE_ATOMIC_S390X_H_
+#define _RTE_ATOMIC_S390X_H_
+
+#ifndef RTE_FORCE_INTRINSICS
+# error Platform must be built with CONFIG_RTE_FORCE_INTRINSICS
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_atomic.h"
+
+#define rte_mb() rte_compiler_barrier()
+
+#define rte_wmb() rte_mb()
+
+#define rte_rmb() rte_mb()
+
+#define rte_smp_mb() rte_mb()
+
+#define rte_smp_wmb() rte_wmb()
+
+#define rte_smp_rmb() rte_rmb()
+
+#define rte_io_mb() rte_mb()
+
+#define rte_io_wmb() rte_wmb()
+
+#define rte_io_rmb() rte_rmb()
+
+#define rte_cio_wmb() rte_wmb()
+
+#define rte_cio_rmb() rte_rmb()
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_ATOMIC_S390X_H_ */
new file mode 100644
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * (c) Copyright IBM Corp. 2018, 2019
+ */
+
+/* Inspired from FreeBSD src/sys/powerpc/include/endian.h
+ * Copyright (c) 1987, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ */
+
+#ifndef _RTE_BYTEORDER_S390X_H_
+#define _RTE_BYTEORDER_S390X_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+#include "generic/rte_byteorder.h"
+
+/* s390x is big endian
+ */
+
+#define rte_cpu_to_le_16(x) rte_bswap16(x)
+#define rte_cpu_to_le_32(x) rte_bswap32(x)
+#define rte_cpu_to_le_64(x) rte_bswap64(x)
+
+#define rte_cpu_to_be_16(x) (x)
+#define rte_cpu_to_be_32(x) (x)
+#define rte_cpu_to_be_64(x) (x)
+
+#define rte_le_to_cpu_16(x) rte_bswap16(x)
+#define rte_le_to_cpu_32(x) rte_bswap32(x)
+#define rte_le_to_cpu_64(x) rte_bswap64(x)
+
+#define rte_be_to_cpu_16(x) (x)
+#define rte_be_to_cpu_32(x) (x)
+#define rte_be_to_cpu_64(x) (x)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_BYTEORDER_S390X_H_ */
new file mode 100644
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * (c) Copyright IBM Corp. 2018, 2019
+ */
+
+#ifndef _RTE_CPUFLAGS_S390X_H_
+#define _RTE_CPUFLAGS_S390X_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Enumeration of all CPU features supported
+ */
+enum rte_cpu_flag_t {
+ RTE_CPUFLAG_ESAN3 = 0,
+ RTE_CPUFLAG_ZARCH,
+ RTE_CPUFLAG_STFLE,
+ RTE_CPUFLAG_MSA,
+ RTE_CPUFLAG_LDISP,
+ RTE_CPUFLAG_EIMM,
+ RTE_CPUFLAG_DFP,
+ RTE_CPUFLAG_HPAGE,
+ RTE_CPUFLAG_ETF3EH,
+ RTE_CPUFLAG_HIGH_GPRS,
+ RTE_CPUFLAG_TE,
+ RTE_CPUFLAG_VXRS,
+ RTE_CPUFLAG_VXRS_BCD,
+ RTE_CPUFLAG_VXRS_EXT,
+ RTE_CPUFLAG_GS,
+ /* The last item */
+ RTE_CPUFLAG_NUMFLAGS,/**< This should always be the last! */
+};
+
+#include "generic/rte_cpuflags.h"
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_CPUFLAGS_S390X_H_ */
new file mode 100644
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * (c) Copyright IBM Corp. 2018, 2019
+ */
+
+#ifndef _RTE_CYCLES_S390X_H_
+#define _RTE_CYCLES_S390X_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_cycles.h"
+
+#include <rte_common.h>
+
+/**
+ * Read the time base register.
+ *
+ * @return
+ * The time base for this lcore.
+ */
+static inline uint64_t
+rte_rdtsc(void)
+{
+ uint64_t tsc;
+ asm volatile("stckf %0" : "=Q"(tsc) : : "cc");
+ return tsc;
+}
+
+static inline uint64_t
+rte_rdtsc_precise(void)
+{
+ rte_mb();
+ return rte_rdtsc();
+}
+
+static inline uint64_t
+rte_get_tsc_cycles(void) { return rte_rdtsc(); }
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_CYCLES_S390X_H_ */
new file mode 100644
@@ -0,0 +1,184 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * (c) Copyright IBM Corp. 2018, 2019
+ */
+
+#ifndef _RTE_IO_S390X_H_
+#define _RTE_IO_S390X_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+
+#define RTE_OVERRIDE_IO_H
+
+#include "generic/rte_io.h"
+
+#include <unistd.h>
+#include <sys/syscall.h>
+
+union register_pair {
+ __int128_t pair;
+ struct {
+ unsigned long even;
+ unsigned long odd;
+ } even_odd;
+};
+
+/* s390 requires special instructions to access IO memory. */
+static inline uint64_t pcilgi(const volatile void *ioaddr, size_t len)
+{
+ int cc = -1;
+ uint64_t val;
+ union register_pair ioaddr_len = {
+ .even_odd.even = (uint64_t)ioaddr, .even_odd.odd = len
+ };
+ asm volatile (
+ " .insn rre,0xb9d60000,%[val],%[ioaddr_len]\n"
+ " ipm %[cc]\n"
+ " srl %[cc],28\n"
+ : [cc] "+d" (cc), [val] "=d" (val),
+ [ioaddr_len] "+&d" (ioaddr_len.pair) :: "cc");
+ return val;
+}
+
+static inline void pcistgi(volatile void *ioaddr, uint64_t val, size_t len)
+{
+ int cc = -1;
+ union register_pair ioaddr_len = {
+ .even_odd.even = (uint64_t)ioaddr, .even_odd.odd = len
+ };
+ asm volatile (
+ " .insn rre,0xb9d40000,%[val],%[ioaddr_len]\n"
+ " ipm %[cc]\n"
+ " srl %[cc],28\n"
+ : [cc] "+d" (cc), [ioaddr_len] "+&d" (ioaddr_len.pair)
+ : [val] "d" (val)
+ : "cc", "memory");
+}
+
+/* fall back to syscall on old machines ? */
+static __rte_always_inline uint8_t
+rte_read8_relaxed(const volatile void *addr)
+{
+ return pcilgi(addr, 1);
+}
+
+static __rte_always_inline uint16_t
+rte_read16_relaxed(const volatile void *addr)
+{
+ return pcilgi(addr, 2);
+}
+
+static __rte_always_inline uint32_t
+rte_read32_relaxed(const volatile void *addr)
+{
+ return pcilgi(addr, 4);
+}
+
+static __rte_always_inline uint64_t
+rte_read64_relaxed(const volatile void *addr)
+{
+ return pcilgi(addr, 8);
+}
+
+static __rte_always_inline void
+rte_write8_relaxed(uint8_t value, volatile void *addr)
+{
+ pcistgi(addr, value, sizeof(value));
+}
+
+static __rte_always_inline void
+rte_write16_relaxed(uint16_t value, volatile void *addr)
+{
+ pcistgi(addr, value, sizeof(value));
+}
+
+static __rte_always_inline void
+rte_write32_relaxed(uint32_t value, volatile void *addr)
+{
+ pcistgi(addr, value, sizeof(value));
+}
+
+static __rte_always_inline void
+rte_write64_relaxed(uint64_t value, volatile void *addr)
+{
+ pcistgi(addr, value, sizeof(value));
+}
+
+static __rte_always_inline uint8_t
+rte_read8(const volatile void *addr)
+{
+ uint8_t val;
+ val = rte_read8_relaxed(addr);
+ rte_io_rmb();
+ return val;
+}
+
+static __rte_always_inline uint16_t
+rte_read16(const volatile void *addr)
+{
+ uint16_t val;
+ val = rte_read16_relaxed(addr);
+ rte_io_rmb();
+ return val;
+}
+
+static __rte_always_inline uint32_t
+rte_read32(const volatile void *addr)
+{
+ uint32_t val;
+ val = rte_read32_relaxed(addr);
+ rte_io_rmb();
+ return val;
+}
+
+static __rte_always_inline uint64_t
+rte_read64(const volatile void *addr)
+{
+ uint64_t val;
+ val = rte_read64_relaxed(addr);
+ rte_io_rmb();
+ return val;
+}
+
+static __rte_always_inline void
+rte_write8(uint8_t value, volatile void *addr)
+{
+ rte_io_wmb();
+ rte_write8_relaxed(value, addr);
+}
+
+static __rte_always_inline void
+rte_write16(uint16_t value, volatile void *addr)
+{
+ rte_io_wmb();
+ rte_write16_relaxed(value, addr);
+}
+
+static __rte_always_inline void
+rte_write32(uint32_t value, volatile void *addr)
+{
+ rte_io_wmb();
+ rte_write32_relaxed(value, addr);
+}
+
+static __rte_always_inline void
+rte_write32_wc(uint32_t value, volatile void *addr)
+{
+ rte_write32(value, addr);
+}
+
+static __rte_always_inline void
+rte_write64(uint64_t value, volatile void *addr)
+{
+ rte_io_wmb();
+ rte_write64_relaxed(value, addr);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_IO_S390X_H_ */
new file mode 100644
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Arm Limited
+ */
+
+#ifndef _RTE_MCSLOCK_S390X_H_
+#define _RTE_MCSLOCK_S390X_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_mcslock.h"
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_MCSLOCK_S390X_H_ */
new file mode 100644
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * (c) Copyright IBM Corp. 2018, 2019
+ */
+
+#ifndef _RTE_MEMCPY_S390X_H_
+#define _RTE_MEMCPY_S390X_H_
+
+#include <stdint.h>
+#include <string.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_memcpy.h"
+
+
+static inline void
+rte_mov16(uint8_t *dst, const uint8_t *src)
+{
+ memcpy(dst, src, 16);
+}
+static inline void
+rte_mov32(uint8_t *dst, const uint8_t *src)
+{
+ memcpy(dst, src, 32);
+}
+static inline void
+rte_mov48(uint8_t *dst, const uint8_t *src)
+{
+ memcpy(dst, src, 48);
+}
+static inline void
+rte_mov64(uint8_t *dst, const uint8_t *src)
+{
+ memcpy(dst, src, 64);
+}
+static inline void
+rte_mov128(uint8_t *dst, const uint8_t *src)
+{
+ memcpy(dst, src, 128);
+}
+static inline void
+rte_mov256(uint8_t *dst, const uint8_t *src)
+{
+ memcpy(dst, src, 256);
+}
+#define rte_memcpy(d, s, n) memcpy((d), (s), (n))
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_MEMCPY_S390X_H_ */
new file mode 100644
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * (c) Copyright IBM Corp. 2018, 2019
+ */
+
+#ifndef _RTE_PAUSE_S390X_H_
+#define _RTE_PAUSE_S390X_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_pause.h"
+
+static inline void rte_pause(void)
+{
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_PAUSE_S390X_H_ */
new file mode 100644
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#ifndef _RTE_POWER_INTRINSIC_PPC_H_
+#define _RTE_POWER_INTRINSIC_PPC_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <rte_common.h>
+
+#include "generic/rte_power_intrinsics.h"
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_POWER_INTRINSIC_PPC_H_ */
new file mode 100644
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * (c) Copyright IBM Corp. 2018, 2019
+ */
+
+#ifndef _RTE_PREFETCH_S390X_H_
+#define _RTE_PREFETCH_S390X_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <rte_common.h>
+#include "generic/rte_prefetch.h"
+
+static inline void rte_prefetch0(const volatile void *p)
+{
+ asm volatile ("pfd 1, 0(%[p])" : : [p] "r" (p));
+}
+
+static inline void rte_prefetch1(const volatile void *p)
+{
+ asm volatile ("pfd 1, 0(%[p])" : : [p] "r" (p));
+}
+
+static inline void rte_prefetch2(const volatile void *p)
+{
+ asm volatile ("pfd 1, 0(%[p])" : : [p] "r" (p));
+}
+
+static inline void rte_prefetch_non_temporal(const volatile void *p)
+{
+ /* non-temporal version not available, fallback to rte_prefetch0 */
+ rte_prefetch0(p);
+}
+
+__rte_experimental
+static inline void rte_cldemote(const volatile void *p)
+{
+ RTE_SET_USED(p);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_PREFETCH_S390X_H_ */
new file mode 100644
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * (c) Copyright IBM Corp. 2018, 2019
+ */
+
+#ifndef _RTE_RWLOCK_S390X_H_
+#define _RTE_RWLOCK_S390X_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_rwlock.h"
+
+static inline void
+rte_rwlock_read_lock_tm(rte_rwlock_t *rwl)
+{
+ rte_rwlock_read_lock(rwl);
+}
+
+static inline void
+rte_rwlock_read_unlock_tm(rte_rwlock_t *rwl)
+{
+ rte_rwlock_read_unlock(rwl);
+}
+
+static inline void
+rte_rwlock_write_lock_tm(rte_rwlock_t *rwl)
+{
+ rte_rwlock_write_lock(rwl);
+}
+
+static inline void
+rte_rwlock_write_unlock_tm(rte_rwlock_t *rwl)
+{
+ rte_rwlock_write_unlock(rwl);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_RWLOCK_S390X_H_ */
new file mode 100644
@@ -0,0 +1,85 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * (c) Copyright IBM Corp. 2018, 2019
+ */
+
+#ifndef _RTE_SPINLOCK_S390X_H_
+#define _RTE_SPINLOCK_S390X_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <rte_common.h>
+#include "generic/rte_spinlock.h"
+
+#ifndef RTE_FORCE_INTRINSICS
+
+static inline void
+rte_spinlock_lock(rte_spinlock_t *sl)
+{
+ while (__sync_lock_test_and_set(&sl->locked, 1))
+ while (sl->locked)
+ rte_pause();
+}
+
+static inline void
+rte_spinlock_unlock(rte_spinlock_t *sl)
+{
+ __sync_lock_release(&sl->locked);
+}
+
+static inline int
+rte_spinlock_trylock(rte_spinlock_t *sl)
+{
+ return __sync_lock_test_and_set(&sl->locked, 1) == 0;
+}
+
+#endif
+
+
+static inline int rte_tm_supported(void)
+{
+ return 0;
+}
+
+static inline void
+rte_spinlock_lock_tm(rte_spinlock_t *sl)
+{
+ rte_spinlock_lock(sl); /* fall-back */
+}
+
+static inline int
+rte_spinlock_trylock_tm(rte_spinlock_t *sl)
+{
+ return rte_spinlock_trylock(sl);
+}
+
+static inline void
+rte_spinlock_unlock_tm(rte_spinlock_t *sl)
+{
+ rte_spinlock_unlock(sl);
+}
+
+static inline void
+rte_spinlock_recursive_lock_tm(rte_spinlock_recursive_t *slr)
+{
+ rte_spinlock_recursive_lock(slr); /* fall-back */
+}
+
+static inline void
+rte_spinlock_recursive_unlock_tm(rte_spinlock_recursive_t *slr)
+{
+ rte_spinlock_recursive_unlock(slr);
+}
+
+static inline int
+rte_spinlock_recursive_trylock_tm(rte_spinlock_recursive_t *slr)
+{
+ return rte_spinlock_recursive_trylock(slr);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_SPINLOCK_S390X_H_ */
new file mode 100644
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * (c) Copyright IBM Corp. 2019
+ */
+
+#ifndef _RTE_TICKETLOCK_S390X_H_
+#define _RTE_TICKETLOCK_S390X_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_ticketlock.h"
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_TICKETLOCK_S390X_H_ */
new file mode 100644
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * (c) Copyright IBM Corp. 2018, 2019
+ */
+
+#ifndef _RTE_VECT_S390X_H_
+#define _RTE_VECT_S390X_H_
+
+#include <vecintrin.h>
+#include "generic/rte_vect.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define RTE_VECT_DEFAULT_SIMD_BITWIDTH RTE_VECT_SIMD_256
+
+typedef int xmm_t __attribute__((vector_size(4*sizeof(int))));
+
+#define XMM_SIZE (sizeof(xmm_t))
+#define XMM_MASK (XMM_SIZE - 1)
+
+typedef union rte_xmm {
+ xmm_t x;
+ uint8_t u8[XMM_SIZE / sizeof(uint8_t)];
+ uint16_t u16[XMM_SIZE / sizeof(uint16_t)];
+ uint32_t u32[XMM_SIZE / sizeof(uint32_t)];
+ uint64_t u64[XMM_SIZE / sizeof(uint64_t)];
+ double pd[XMM_SIZE / sizeof(double)];
+} __aligned(16) rte_xmm_t;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_VECT_S390X_H_ */
new file mode 100644
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# (c) Copyright IBM Corp. 2018, 2019
+
+subdir('include')
+
+# 19.xx zarch patches lib/librte_eal/common/arch/s390x/meson.build:
+# var was: eal_common_arch_sources
+#
+sources += files(
+ 'rte_cpuflags.c',
+ 'rte_cycles.c',
+ 'rte_hypervisor.c',
+ 'rte_power_intrinsics.c',
+)
+
+
new file mode 100644
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * (c) Copyright IBM Corp. 2018, 2019
+ */
+
+#include "rte_cpuflags.h"
+
+#include <elf.h>
+#include <fcntl.h>
+#include <assert.h>
+#include <unistd.h>
+#include <string.h>
+
+/* Symbolic values for the entries in the auxiliary table */
+#define AT_HWCAP 16
+#define AT_HWCAP2 26
+
+/* software based registers */
+enum cpu_register_t {
+ REG_NONE = 0,
+ REG_HWCAP,
+ REG_HWCAP2,
+ REG_MAX
+};
+
+typedef uint32_t hwcap_registers_t[REG_MAX];
+
+struct feature_entry {
+ uint32_t reg;
+ uint32_t bit;
+#define CPU_FLAG_NAME_MAX_LEN 64
+ char name[CPU_FLAG_NAME_MAX_LEN];
+};
+
+#define FEAT_DEF(name, reg, bit) \
+ [RTE_CPUFLAG_##name] = {reg, bit, #name},
+
+const struct feature_entry rte_cpu_feature_table[] = {
+ FEAT_DEF(ESAN3, REG_HWCAP, 0)
+ FEAT_DEF(ZARCH, REG_HWCAP, 1)
+ FEAT_DEF(STFLE, REG_HWCAP, 2)
+ FEAT_DEF(MSA, REG_HWCAP, 3)
+ FEAT_DEF(LDISP, REG_HWCAP, 4)
+ FEAT_DEF(EIMM, REG_HWCAP, 5)
+ FEAT_DEF(DFP, REG_HWCAP, 6)
+ FEAT_DEF(HPAGE, REG_HWCAP, 7)
+ FEAT_DEF(ETF3EH, REG_HWCAP, 8)
+ FEAT_DEF(HIGH_GPRS, REG_HWCAP, 9)
+ FEAT_DEF(TE, REG_HWCAP, 10)
+ FEAT_DEF(VXRS, REG_HWCAP, 11)
+ FEAT_DEF(VXRS_BCD, REG_HWCAP, 12)
+ FEAT_DEF(VXRS_EXT, REG_HWCAP, 13)
+ FEAT_DEF(GS, REG_HWCAP, 14)
+};
+
+/*
+ * Read AUXV software register and get cpu features for Power
+ */
+static void
+rte_cpu_get_features(hwcap_registers_t out)
+{
+ out[REG_HWCAP] = rte_cpu_getauxval(AT_HWCAP);
+ out[REG_HWCAP2] = rte_cpu_getauxval(AT_HWCAP2);
+}
+
+/*
+ * Checks if a particular flag is available on current machine.
+ */
+int
+rte_cpu_get_flag_enabled(enum rte_cpu_flag_t feature)
+{
+ const struct feature_entry *feat;
+ hwcap_registers_t regs = {0};
+
+ if (feature >= RTE_CPUFLAG_NUMFLAGS)
+ return -ENOENT;
+
+ feat = &rte_cpu_feature_table[feature];
+ if (feat->reg == REG_NONE)
+ return -EFAULT;
+
+ rte_cpu_get_features(regs);
+ return (regs[feat->reg] >> feat->bit) & 1;
+}
+
+const char *
+rte_cpu_get_flag_name(enum rte_cpu_flag_t feature)
+{
+ if (feature >= RTE_CPUFLAG_NUMFLAGS)
+ return NULL;
+ return rte_cpu_feature_table[feature].name;
+}
new file mode 100644
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * (c) Copyright IBM Corp. 2018, 2019
+ */
+
+#include "eal_private.h"
+
+uint64_t
+get_tsc_freq_arch(void)
+{
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * (c) Copyright IBM Corp. 2018, 2019
+ */
+
+#include "rte_hypervisor.h"
+
+enum rte_hypervisor
+rte_hypervisor_get(void)
+{
+ return RTE_HYPERVISOR_UNKNOWN;
+}
new file mode 100644
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2021 Intel Corporation
+ */
+
+#include "rte_power_intrinsics.h"
+
+/**
+ * This function is not supported on PPC64.
+ */
+int
+rte_power_monitor(const struct rte_power_monitor_cond *pmc,
+ const uint64_t tsc_timestamp)
+{
+ RTE_SET_USED(pmc);
+ RTE_SET_USED(tsc_timestamp);
+
+ return -ENOTSUP;
+}
+
+/**
+ * This function is not supported on PPC64.
+ */
+int
+rte_power_pause(const uint64_t tsc_timestamp)
+{
+ RTE_SET_USED(tsc_timestamp);
+
+ return -ENOTSUP;
+}
+
+/**
+ * This function is not supported on PPC64.
+ */
+int
+rte_power_monitor_wakeup(const unsigned int lcore_id)
+{
+ RTE_SET_USED(lcore_id);
+
+ return -ENOTSUP;
+}
+
+int
+rte_power_monitor_multi(const struct rte_power_monitor_cond pmc[],
+ const uint32_t num, const uint64_t tsc_timestamp)
+{
+ RTE_SET_USED(pmc);
+ RTE_SET_USED(num);
+ RTE_SET_USED(tsc_timestamp);
+
+ return -ENOTSUP;
+}
@@ -123,9 +123,16 @@ rte_fbk_hash_add_key_with_bucket(struct rte_fbk_hash_table *ht,
* corrupted due to race conditions, but it's still possible to
* overwrite entries that have just been made valid.
*/
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
const uint64_t new_entry = ((uint64_t)(key) << 32) |
((uint64_t)(value) << 16) |
1; /* 1 = is_entry bit. */
+ #else
+ const uint64_t new_entry =
+ ((uint64_t)(1) << 48) | /* 1 = is_entry bit. */
+ ((uint64_t)(value) << 32) |
+ (uint64_t)(key);
+ #endif
uint32_t i;
for (i = 0; i < ht->entries_per_bucket; i++) {
@@ -7,6 +7,7 @@ headers = files('rte_lpm.h', 'rte_lpm6.h')
# without worrying about which architecture we actually need
indirect_headers += files(
'rte_lpm_altivec.h',
+ 'rte_lpm_s390x.h',
'rte_lpm_neon.h',
'rte_lpm_scalar.h',
'rte_lpm_sse.h',
@@ -407,6 +407,8 @@ rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
#include "rte_lpm_neon.h"
#elif defined(RTE_ARCH_PPC_64)
#include "rte_lpm_altivec.h"
+#elif defined(RTE_ARCH_S390X)
+#include "rte_lpm_s390x.h"
#elif defined(RTE_ARCH_X86)
#include "rte_lpm_sse.h"
#else
@@ -18,6 +18,7 @@
#include <assert.h>
#include <rte_jhash.h>
#include <rte_tailq.h>
+#include <rte_byteorder.h>
#include "rte_lpm6.h"
@@ -52,6 +53,8 @@ static struct rte_tailq_elem rte_lpm6_tailq = {
};
EAL_REGISTER_TAILQ(rte_lpm6_tailq)
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+
/** Tbl entry structure. It is the same for both tbl24 and tbl8 */
struct rte_lpm6_tbl_entry {
uint32_t next_hop: 21; /**< Next hop / next table to be checked. */
@@ -63,6 +66,21 @@ struct rte_lpm6_tbl_entry {
uint32_t ext_entry :1; /**< External entry. */
};
+#else
+
+struct rte_lpm6_tbl_entry {
+
+ /* Flags. */
+ uint32_t ext_entry :1; /**< External entry. */
+ uint32_t valid_group :1; /**< Group validation flag. */
+ uint32_t valid :1; /**< Validation flag. */
+
+ uint32_t depth :8; /**< Rule depth. */
+ uint32_t next_hop: 21; /**< Next hop / next table to be checked. */
+};
+
+#endif
+
/** Rules tbl entry structure. */
struct rte_lpm6_rule {
uint8_t ip[RTE_LPM6_IPV6_ADDR_SIZE]; /**< Rule IP address. */
new file mode 100644
@@ -0,0 +1,130 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * (c) Copyright IBM Corp. 2016, 2018
+ */
+
+#ifndef _RTE_LPM_S390X_H_
+#define _RTE_LPM_S390X_H_
+
+#include <rte_branch_prediction.h>
+#include <rte_byteorder.h>
+#include <rte_common.h>
+#include <rte_vect.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static inline void
+rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
+ uint32_t defv)
+{
+ typedef int vector_signed_int
+ __attribute__((vector_size(4*sizeof(int))));
+ vector_signed_int i24;
+ rte_xmm_t i8;
+ uint32_t tbl[4];
+ uint64_t idx, pt, pt2;
+ const uint32_t *ptbl;
+
+ const uint32_t mask = UINT8_MAX;
+ const vector_signed_int mask8 = (xmm_t){mask, mask, mask, mask};
+
+ /*
+ * RTE_LPM_VALID_EXT_ENTRY_BITMASK for 2 LPM entries
+ * as one 64-bit value (0x0300000003000000).
+ */
+ const uint64_t mask_xv =
+ ((uint64_t)RTE_LPM_VALID_EXT_ENTRY_BITMASK |
+ (uint64_t)RTE_LPM_VALID_EXT_ENTRY_BITMASK << 32);
+
+ /*
+ * RTE_LPM_LOOKUP_SUCCESS for 2 LPM entries
+ * as one 64-bit value (0x0100000001000000).
+ */
+ const uint64_t mask_v =
+ ((uint64_t)RTE_LPM_LOOKUP_SUCCESS |
+ (uint64_t)RTE_LPM_LOOKUP_SUCCESS << 32);
+
+ /* get 4 indexes for tbl24[]. */
+ i24[0] = (uint32_t)ip[0] >> 8;
+ i24[1] = (uint32_t)ip[1] >> 8;
+ i24[2] = (uint32_t)ip[2] >> 8;
+ i24[3] = (uint32_t)ip[3] >> 8;
+
+ /* extract values from tbl24[] */
+ idx = (uint32_t)i24[0];
+ idx = idx < (1<<24) ? idx : (1<<24)-1;
+ ptbl = (const uint32_t *)&lpm->tbl24[idx];
+ tbl[0] = *ptbl;
+
+ idx = (uint32_t) i24[1];
+ idx = idx < (1<<24) ? idx : (1<<24)-1;
+ ptbl = (const uint32_t *)&lpm->tbl24[idx];
+ tbl[1] = *ptbl;
+
+ idx = (uint32_t) i24[2];
+ idx = idx < (1<<24) ? idx : (1<<24)-1;
+ ptbl = (const uint32_t *)&lpm->tbl24[idx];
+ tbl[2] = *ptbl;
+
+ idx = (uint32_t) i24[3];
+ idx = idx < (1<<24) ? idx : (1<<24)-1;
+ ptbl = (const uint32_t *)&lpm->tbl24[idx];
+ tbl[3] = *ptbl;
+
+ /* get 4 indexes for tbl8[]. */
+ i8.x = vec_and(ip, mask8);
+
+ pt = (uint64_t)tbl[0] |
+ (uint64_t)tbl[1] << 32;
+ pt2 = (uint64_t)tbl[2] |
+ (uint64_t)tbl[3] << 32;
+
+ /* search successfully finished for all 4 IP addresses. */
+ if (likely((pt & mask_xv) == mask_v) &&
+ likely((pt2 & mask_xv) == mask_v)) {
+ *(uint64_t *)hop = pt & RTE_LPM_MASKX4_RES;
+ *(uint64_t *)(hop + 2) = pt2 & RTE_LPM_MASKX4_RES;
+ return;
+ }
+
+ if (unlikely((pt & RTE_LPM_VALID_EXT_ENTRY_BITMASK) ==
+ RTE_LPM_VALID_EXT_ENTRY_BITMASK)) {
+ i8.u32[0] = i8.u32[0] +
+ (uint8_t)tbl[0] * RTE_LPM_TBL8_GROUP_NUM_ENTRIES;
+ ptbl = (const uint32_t *)&lpm->tbl8[i8.u32[0]];
+ tbl[0] = *ptbl;
+ }
+ if (unlikely((pt >> 32 & RTE_LPM_VALID_EXT_ENTRY_BITMASK) ==
+ RTE_LPM_VALID_EXT_ENTRY_BITMASK)) {
+ i8.u32[1] = i8.u32[1] +
+ (uint8_t)tbl[1] * RTE_LPM_TBL8_GROUP_NUM_ENTRIES;
+ ptbl = (const uint32_t *)&lpm->tbl8[i8.u32[1]];
+ tbl[1] = *ptbl;
+ }
+ if (unlikely((pt2 & RTE_LPM_VALID_EXT_ENTRY_BITMASK) ==
+ RTE_LPM_VALID_EXT_ENTRY_BITMASK)) {
+ i8.u32[2] = i8.u32[2] +
+ (uint8_t)tbl[2] * RTE_LPM_TBL8_GROUP_NUM_ENTRIES;
+ ptbl = (const uint32_t *)&lpm->tbl8[i8.u32[2]];
+ tbl[2] = *ptbl;
+ }
+ if (unlikely((pt2 >> 32 & RTE_LPM_VALID_EXT_ENTRY_BITMASK) ==
+ RTE_LPM_VALID_EXT_ENTRY_BITMASK)) {
+ i8.u32[3] = i8.u32[3] +
+ (uint8_t)tbl[3] * RTE_LPM_TBL8_GROUP_NUM_ENTRIES;
+ ptbl = (const uint32_t *)&lpm->tbl8[i8.u32[3]];
+ tbl[3] = *ptbl;
+ }
+
+ hop[0] = (tbl[0] & RTE_LPM_LOOKUP_SUCCESS) ? tbl[0] & 0x00FFFFFF : defv;
+ hop[1] = (tbl[1] & RTE_LPM_LOOKUP_SUCCESS) ? tbl[1] & 0x00FFFFFF : defv;
+ hop[2] = (tbl[2] & RTE_LPM_LOOKUP_SUCCESS) ? tbl[2] & 0x00FFFFFF : defv;
+ hop[3] = (tbl[3] & RTE_LPM_LOOKUP_SUCCESS) ? tbl[3] & 0x00FFFFFF : defv;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_LPM_S390X_H_ */
@@ -58,6 +58,8 @@ elif host_machine.cpu_family().startswith('loongarch')
arch_subdir = 'loongarch'
elif host_machine.cpu_family().startswith('ppc')
arch_subdir = 'ppc'
+elif host_machine.cpu_family().startswith('s390x')
+ arch_subdir = 's390x'
elif host_machine.cpu_family().startswith('riscv')
arch_subdir = 'riscv'
endif