[dpdk-dev] [PATCH v6 1/9] bpf: add BPF loading and execution framework

Konstantin Ananyev konstantin.ananyev at intel.com
Thu May 10 12:23:03 CEST 2018


librte_bpf provides a framework to load and execute eBPF bytecode
inside user-space dpdk based applications.
It supports basic set of features from eBPF spec
(https://www.kernel.org/doc/Documentation/networking/filter.txt).

Not currently supported features:
 - JIT
 - cBPF
 - tail-pointer call
 - eBPF MAP
 - skb
 - function calls for 32-bit apps
 - mbuf pointer as input parameter for 32-bit apps

Signed-off-by: Konstantin Ananyev <konstantin.ananyev at intel.com>
Acked-by: Ferruh Yigit <ferruh.yigit at intel.com>
---
 MAINTAINERS                            |   3 +
 config/common_base                     |   5 +
 doc/guides/rel_notes/release_18_05.rst |   1 +
 lib/Makefile                           |   2 +
 lib/librte_bpf/Makefile                |  30 +++
 lib/librte_bpf/bpf.c                   |  59 +++++
 lib/librte_bpf/bpf_def.h               | 138 ++++++++++
 lib/librte_bpf/bpf_exec.c              | 453 +++++++++++++++++++++++++++++++++
 lib/librte_bpf/bpf_impl.h              |  41 +++
 lib/librte_bpf/bpf_load.c              |  85 +++++++
 lib/librte_bpf/bpf_validate.c          |  55 ++++
 lib/librte_bpf/meson.build             |  13 +
 lib/librte_bpf/rte_bpf.h               | 164 ++++++++++++
 lib/librte_bpf/rte_bpf_version.map     |  11 +
 lib/meson.build                        |   2 +-
 mk/rte.app.mk                          |   2 +
 16 files changed, 1063 insertions(+), 1 deletion(-)
 create mode 100644 lib/librte_bpf/Makefile
 create mode 100644 lib/librte_bpf/bpf.c
 create mode 100644 lib/librte_bpf/bpf_def.h
 create mode 100644 lib/librte_bpf/bpf_exec.c
 create mode 100644 lib/librte_bpf/bpf_impl.h
 create mode 100644 lib/librte_bpf/bpf_load.c
 create mode 100644 lib/librte_bpf/bpf_validate.c
 create mode 100644 lib/librte_bpf/meson.build
 create mode 100644 lib/librte_bpf/rte_bpf.h
 create mode 100644 lib/librte_bpf/rte_bpf_version.map

diff --git a/MAINTAINERS b/MAINTAINERS
index 7105920f3..7350f61ed 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1060,6 +1060,9 @@ Latency statistics
 M: Reshma Pattan <reshma.pattan at intel.com>
 F: lib/librte_latencystats/
 
+BPF
+M: Konstantin Ananyev <konstantin.ananyev at intel.com>
+F: lib/librte_bpf/
 
 Test Applications
 -----------------
diff --git a/config/common_base b/config/common_base
index 0d181ace8..b598feae8 100644
--- a/config/common_base
+++ b/config/common_base
@@ -873,3 +873,8 @@ CONFIG_RTE_APP_CRYPTO_PERF=y
 # Compile the eventdev application
 #
 CONFIG_RTE_APP_EVENTDEV=y
+
+#
+# Compile librte_bpf
+#
+CONFIG_RTE_LIBRTE_BPF=y
diff --git a/doc/guides/rel_notes/release_18_05.rst b/doc/guides/rel_notes/release_18_05.rst
index 718734852..5d1cc1807 100644
--- a/doc/guides/rel_notes/release_18_05.rst
+++ b/doc/guides/rel_notes/release_18_05.rst
@@ -432,6 +432,7 @@ The libraries prepended with a plus sign were incremented in this version.
      librte_acl.so.2
      librte_bbdev.so.1
      librte_bitratestats.so.2
+   + librte_bpf.so.1
      librte_bus_dpaa.so.1
      librte_bus_fslmc.so.1
      librte_bus_pci.so.1
diff --git a/lib/Makefile b/lib/Makefile
index 057bf7890..29cea6429 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -98,6 +98,8 @@ DEPDIRS-librte_pdump := librte_eal librte_mempool librte_mbuf librte_ethdev
 DIRS-$(CONFIG_RTE_LIBRTE_GSO) += librte_gso
 DEPDIRS-librte_gso := librte_eal librte_mbuf librte_ethdev librte_net
 DEPDIRS-librte_gso += librte_mempool
+DIRS-$(CONFIG_RTE_LIBRTE_BPF) += librte_bpf
+DEPDIRS-librte_bpf := librte_eal librte_mempool librte_mbuf librte_ethdev
 
 ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y)
 DIRS-$(CONFIG_RTE_LIBRTE_KNI) += librte_kni
diff --git a/lib/librte_bpf/Makefile b/lib/librte_bpf/Makefile
new file mode 100644
index 000000000..da9306564
--- /dev/null
+++ b/lib/librte_bpf/Makefile
@@ -0,0 +1,30 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Intel Corporation
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# library name
+LIB = librte_bpf.a
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR)
+CFLAGS += -DALLOW_EXPERIMENTAL_API
+LDLIBS += -lrte_net -lrte_eal
+LDLIBS += -lrte_mempool -lrte_ring
+LDLIBS += -lrte_mbuf -lrte_ethdev
+
+EXPORT_MAP := rte_bpf_version.map
+
+LIBABIVER := 1
+
+# all source are stored in SRCS-y
+SRCS-$(CONFIG_RTE_LIBRTE_BPF) += bpf.c
+SRCS-$(CONFIG_RTE_LIBRTE_BPF) += bpf_exec.c
+SRCS-$(CONFIG_RTE_LIBRTE_BPF) += bpf_load.c
+SRCS-$(CONFIG_RTE_LIBRTE_BPF) += bpf_validate.c
+
+# install header files
+SYMLINK-$(CONFIG_RTE_LIBRTE_BPF)-include += bpf_def.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_BPF)-include += rte_bpf.h
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_bpf/bpf.c b/lib/librte_bpf/bpf.c
new file mode 100644
index 000000000..d7f68c017
--- /dev/null
+++ b/lib/librte_bpf/bpf.c
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <stdint.h>
+#include <inttypes.h>
+
+#include <rte_common.h>
+#include <rte_eal.h>
+
+#include "bpf_impl.h"
+
+int rte_bpf_logtype;
+
+__rte_experimental void
+rte_bpf_destroy(struct rte_bpf *bpf)
+{
+	if (bpf != NULL) {
+		if (bpf->jit.func != NULL)
+			munmap(bpf->jit.func, bpf->jit.sz);
+		munmap(bpf, bpf->sz);
+	}
+}
+
+__rte_experimental int
+rte_bpf_get_jit(const struct rte_bpf *bpf, struct rte_bpf_jit *jit)
+{
+	if (bpf == NULL || jit == NULL)
+		return -EINVAL;
+
+	jit[0] = bpf->jit;
+	return 0;
+}
+
+int
+bpf_jit(struct rte_bpf *bpf)
+{
+	int32_t rc;
+
+	rc = -ENOTSUP;
+	if (rc != 0)
+		RTE_BPF_LOG(WARNING, "%s(%p) failed, error code: %d;\n",
+			__func__, bpf, rc);
+	return rc;
+}
+
+RTE_INIT(rte_bpf_init_log);
+
+static void
+rte_bpf_init_log(void)
+{
+	rte_bpf_logtype = rte_log_register("lib.bpf");
+	if (rte_bpf_logtype >= 0)
+		rte_log_set_level(rte_bpf_logtype, RTE_LOG_INFO);
+}
diff --git a/lib/librte_bpf/bpf_def.h b/lib/librte_bpf/bpf_def.h
new file mode 100644
index 000000000..6b69de345
--- /dev/null
+++ b/lib/librte_bpf/bpf_def.h
@@ -0,0 +1,138 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 1982, 1986, 1990, 1993
+ *      The Regents of the University of California.
+ * Copyright(c) 2018 Intel Corporation.
+ */
+
+#ifndef _RTE_BPF_DEF_H_
+#define _RTE_BPF_DEF_H_
+
+/**
+ * @file
+ *
+ * classic BPF (cBPF) and extended BPF (eBPF) related defines.
+ * For more information regarding cBPF and eBPF ISA and their differences,
+ * please refer to:
+ * https://www.kernel.org/doc/Documentation/networking/filter.txt.
+ * As a rule of thumb for that file:
+ * all definitions used by both cBPF and eBPF start with bpf(BPF)_ prefix,
+ * while eBPF only ones start with ebpf(EBPF)) prefix.
+ */
+
+#include <stdint.h>
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * The instruction encodings.
+ */
+
+/* Instruction classes */
+#define BPF_CLASS(code) ((code) & 0x07)
+#define	BPF_LD		0x00
+#define	BPF_LDX		0x01
+#define	BPF_ST		0x02
+#define	BPF_STX		0x03
+#define	BPF_ALU		0x04
+#define	BPF_JMP		0x05
+#define	BPF_RET		0x06
+#define	BPF_MISC        0x07
+
+#define EBPF_ALU64	0x07
+
+/* ld/ldx fields */
+#define BPF_SIZE(code)  ((code) & 0x18)
+#define	BPF_W		0x00
+#define	BPF_H		0x08
+#define	BPF_B		0x10
+#define	EBPF_DW		0x18
+
+#define BPF_MODE(code)  ((code) & 0xe0)
+#define	BPF_IMM		0x00
+#define	BPF_ABS		0x20
+#define	BPF_IND		0x40
+#define	BPF_MEM		0x60
+#define	BPF_LEN		0x80
+#define	BPF_MSH		0xa0
+
+#define EBPF_XADD	0xc0
+
+/* alu/jmp fields */
+#define BPF_OP(code)    ((code) & 0xf0)
+#define	BPF_ADD		0x00
+#define	BPF_SUB		0x10
+#define	BPF_MUL		0x20
+#define	BPF_DIV		0x30
+#define	BPF_OR		0x40
+#define	BPF_AND		0x50
+#define	BPF_LSH		0x60
+#define	BPF_RSH		0x70
+#define	BPF_NEG		0x80
+#define	BPF_MOD		0x90
+#define	BPF_XOR		0xa0
+
+#define EBPF_MOV	0xb0
+#define EBPF_ARSH	0xc0
+#define EBPF_END	0xd0
+
+#define	BPF_JA		0x00
+#define	BPF_JEQ		0x10
+#define	BPF_JGT		0x20
+#define	BPF_JGE		0x30
+#define	BPF_JSET        0x40
+
+#define EBPF_JNE	0x50
+#define EBPF_JSGT	0x60
+#define EBPF_JSGE	0x70
+#define EBPF_CALL	0x80
+#define EBPF_EXIT	0x90
+#define EBPF_JLT	0xa0
+#define EBPF_JLE	0xb0
+#define EBPF_JSLT	0xc0
+#define EBPF_JSLE	0xd0
+
+#define BPF_SRC(code)   ((code) & 0x08)
+#define	BPF_K		0x00
+#define	BPF_X		0x08
+
+/* if BPF_OP(code) == EBPF_END */
+#define EBPF_TO_LE	0x00  /* convert to little-endian */
+#define EBPF_TO_BE	0x08  /* convert to big-endian */
+
+/*
+ * eBPF registers
+ */
+enum {
+	EBPF_REG_0,  /* return value from internal function/for eBPF program */
+	EBPF_REG_1,  /* 0-th argument to internal function */
+	EBPF_REG_2,  /* 1-th argument to internal function */
+	EBPF_REG_3,  /* 2-th argument to internal function */
+	EBPF_REG_4,  /* 3-th argument to internal function */
+	EBPF_REG_5,  /* 4-th argument to internal function */
+	EBPF_REG_6,  /* callee saved register */
+	EBPF_REG_7,  /* callee saved register */
+	EBPF_REG_8,  /* callee saved register */
+	EBPF_REG_9,  /* callee saved register */
+	EBPF_REG_10, /* stack pointer (read-only) */
+	EBPF_REG_NUM,
+};
+
+/*
+ * eBPF instruction format
+ */
+struct ebpf_insn {
+	uint8_t code;
+	uint8_t dst_reg:4;
+	uint8_t src_reg:4;
+	int16_t off;
+	int32_t imm;
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* RTE_BPF_DEF_H_ */
diff --git a/lib/librte_bpf/bpf_exec.c b/lib/librte_bpf/bpf_exec.c
new file mode 100644
index 000000000..e373b1f3d
--- /dev/null
+++ b/lib/librte_bpf/bpf_exec.c
@@ -0,0 +1,453 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <stdint.h>
+#include <inttypes.h>
+
+#include <rte_common.h>
+#include <rte_log.h>
+#include <rte_debug.h>
+#include <rte_memory.h>
+#include <rte_eal.h>
+#include <rte_byteorder.h>
+
+#include "bpf_impl.h"
+
+#define BPF_JMP_UNC(ins)	((ins) += (ins)->off)
+
+#define BPF_JMP_CND_REG(reg, ins, op, type)	\
+	((ins) += \
+		((type)(reg)[(ins)->dst_reg] op (type)(reg)[(ins)->src_reg]) ? \
+		(ins)->off : 0)
+
+#define BPF_JMP_CND_IMM(reg, ins, op, type)	\
+	((ins) += \
+		((type)(reg)[(ins)->dst_reg] op (type)(ins)->imm) ? \
+		(ins)->off : 0)
+
+#define BPF_NEG_ALU(reg, ins, type)	\
+	((reg)[(ins)->dst_reg] = (type)(-(reg)[(ins)->dst_reg]))
+
+#define EBPF_MOV_ALU_REG(reg, ins, type)	\
+	((reg)[(ins)->dst_reg] = (type)(reg)[(ins)->src_reg])
+
+#define BPF_OP_ALU_REG(reg, ins, op, type)	\
+	((reg)[(ins)->dst_reg] = \
+		(type)(reg)[(ins)->dst_reg] op (type)(reg)[(ins)->src_reg])
+
+#define EBPF_MOV_ALU_IMM(reg, ins, type)	\
+	((reg)[(ins)->dst_reg] = (type)(ins)->imm)
+
+#define BPF_OP_ALU_IMM(reg, ins, op, type)	\
+	((reg)[(ins)->dst_reg] = \
+		(type)(reg)[(ins)->dst_reg] op (type)(ins)->imm)
+
+#define BPF_DIV_ZERO_CHECK(bpf, reg, ins, type) do { \
+	if ((type)(reg)[(ins)->src_reg] == 0) { \
+		RTE_BPF_LOG(ERR, \
+			"%s(%p): division by 0 at pc: %#zx;\n", \
+			__func__, bpf, \
+			(uintptr_t)(ins) - (uintptr_t)(bpf)->prm.ins); \
+		return 0; \
+	} \
+} while (0)
+
+#define BPF_LD_REG(reg, ins, type)	\
+	((reg)[(ins)->dst_reg] = \
+		*(type *)(uintptr_t)((reg)[(ins)->src_reg] + (ins)->off))
+
+#define BPF_ST_IMM(reg, ins, type)	\
+	(*(type *)(uintptr_t)((reg)[(ins)->dst_reg] + (ins)->off) = \
+		(type)(ins)->imm)
+
+#define BPF_ST_REG(reg, ins, type)	\
+	(*(type *)(uintptr_t)((reg)[(ins)->dst_reg] + (ins)->off) = \
+		(type)(reg)[(ins)->src_reg])
+
+#define BPF_ST_XADD_REG(reg, ins, tp)	\
+	(rte_atomic##tp##_add((rte_atomic##tp##_t *) \
+		(uintptr_t)((reg)[(ins)->dst_reg] + (ins)->off), \
+		reg[ins->src_reg]))
+
+static inline void
+bpf_alu_be(uint64_t reg[EBPF_REG_NUM], const struct ebpf_insn *ins)
+{
+	uint64_t *v;
+
+	v = reg + ins->dst_reg;
+	switch (ins->imm) {
+	case 16:
+		*v = rte_cpu_to_be_16(*v);
+		break;
+	case 32:
+		*v = rte_cpu_to_be_32(*v);
+		break;
+	case 64:
+		*v = rte_cpu_to_be_64(*v);
+		break;
+	}
+}
+
+static inline void
+bpf_alu_le(uint64_t reg[EBPF_REG_NUM], const struct ebpf_insn *ins)
+{
+	uint64_t *v;
+
+	v = reg + ins->dst_reg;
+	switch (ins->imm) {
+	case 16:
+		*v = rte_cpu_to_le_16(*v);
+		break;
+	case 32:
+		*v = rte_cpu_to_le_32(*v);
+		break;
+	case 64:
+		*v = rte_cpu_to_le_64(*v);
+		break;
+	}
+}
+
+static inline uint64_t
+bpf_exec(const struct rte_bpf *bpf, uint64_t reg[EBPF_REG_NUM])
+{
+	const struct ebpf_insn *ins;
+
+	for (ins = bpf->prm.ins; ; ins++) {
+		switch (ins->code) {
+		/* 32 bit ALU IMM operations */
+		case (BPF_ALU | BPF_ADD | BPF_K):
+			BPF_OP_ALU_IMM(reg, ins, +, uint32_t);
+			break;
+		case (BPF_ALU | BPF_SUB | BPF_K):
+			BPF_OP_ALU_IMM(reg, ins, -, uint32_t);
+			break;
+		case (BPF_ALU | BPF_AND | BPF_K):
+			BPF_OP_ALU_IMM(reg, ins, &, uint32_t);
+			break;
+		case (BPF_ALU | BPF_OR | BPF_K):
+			BPF_OP_ALU_IMM(reg, ins, |, uint32_t);
+			break;
+		case (BPF_ALU | BPF_LSH | BPF_K):
+			BPF_OP_ALU_IMM(reg, ins, <<, uint32_t);
+			break;
+		case (BPF_ALU | BPF_RSH | BPF_K):
+			BPF_OP_ALU_IMM(reg, ins, >>, uint32_t);
+			break;
+		case (BPF_ALU | BPF_XOR | BPF_K):
+			BPF_OP_ALU_IMM(reg, ins, ^, uint32_t);
+			break;
+		case (BPF_ALU | BPF_MUL | BPF_K):
+			BPF_OP_ALU_IMM(reg, ins, *, uint32_t);
+			break;
+		case (BPF_ALU | BPF_DIV | BPF_K):
+			BPF_OP_ALU_IMM(reg, ins, /, uint32_t);
+			break;
+		case (BPF_ALU | BPF_MOD | BPF_K):
+			BPF_OP_ALU_IMM(reg, ins, %, uint32_t);
+			break;
+		case (BPF_ALU | EBPF_MOV | BPF_K):
+			EBPF_MOV_ALU_IMM(reg, ins, uint32_t);
+			break;
+		/* 32 bit ALU REG operations */
+		case (BPF_ALU | BPF_ADD | BPF_X):
+			BPF_OP_ALU_REG(reg, ins, +, uint32_t);
+			break;
+		case (BPF_ALU | BPF_SUB | BPF_X):
+			BPF_OP_ALU_REG(reg, ins, -, uint32_t);
+			break;
+		case (BPF_ALU | BPF_AND | BPF_X):
+			BPF_OP_ALU_REG(reg, ins, &, uint32_t);
+			break;
+		case (BPF_ALU | BPF_OR | BPF_X):
+			BPF_OP_ALU_REG(reg, ins, |, uint32_t);
+			break;
+		case (BPF_ALU | BPF_LSH | BPF_X):
+			BPF_OP_ALU_REG(reg, ins, <<, uint32_t);
+			break;
+		case (BPF_ALU | BPF_RSH | BPF_X):
+			BPF_OP_ALU_REG(reg, ins, >>, uint32_t);
+			break;
+		case (BPF_ALU | BPF_XOR | BPF_X):
+			BPF_OP_ALU_REG(reg, ins, ^, uint32_t);
+			break;
+		case (BPF_ALU | BPF_MUL | BPF_X):
+			BPF_OP_ALU_REG(reg, ins, *, uint32_t);
+			break;
+		case (BPF_ALU | BPF_DIV | BPF_X):
+			BPF_DIV_ZERO_CHECK(bpf, reg, ins, uint32_t);
+			BPF_OP_ALU_REG(reg, ins, /, uint32_t);
+			break;
+		case (BPF_ALU | BPF_MOD | BPF_X):
+			BPF_DIV_ZERO_CHECK(bpf, reg, ins, uint32_t);
+			BPF_OP_ALU_REG(reg, ins, %, uint32_t);
+			break;
+		case (BPF_ALU | EBPF_MOV | BPF_X):
+			EBPF_MOV_ALU_REG(reg, ins, uint32_t);
+			break;
+		case (BPF_ALU | BPF_NEG):
+			BPF_NEG_ALU(reg, ins, uint32_t);
+			break;
+		case (BPF_ALU | EBPF_END | EBPF_TO_BE):
+			bpf_alu_be(reg, ins);
+			break;
+		case (BPF_ALU | EBPF_END | EBPF_TO_LE):
+			bpf_alu_le(reg, ins);
+			break;
+		/* 64 bit ALU IMM operations */
+		case (EBPF_ALU64 | BPF_ADD | BPF_K):
+			BPF_OP_ALU_IMM(reg, ins, +, uint64_t);
+			break;
+		case (EBPF_ALU64 | BPF_SUB | BPF_K):
+			BPF_OP_ALU_IMM(reg, ins, -, uint64_t);
+			break;
+		case (EBPF_ALU64 | BPF_AND | BPF_K):
+			BPF_OP_ALU_IMM(reg, ins, &, uint64_t);
+			break;
+		case (EBPF_ALU64 | BPF_OR | BPF_K):
+			BPF_OP_ALU_IMM(reg, ins, |, uint64_t);
+			break;
+		case (EBPF_ALU64 | BPF_LSH | BPF_K):
+			BPF_OP_ALU_IMM(reg, ins, <<, uint64_t);
+			break;
+		case (EBPF_ALU64 | BPF_RSH | BPF_K):
+			BPF_OP_ALU_IMM(reg, ins, >>, uint64_t);
+			break;
+		case (EBPF_ALU64 | EBPF_ARSH | BPF_K):
+			BPF_OP_ALU_IMM(reg, ins, >>, int64_t);
+			break;
+		case (EBPF_ALU64 | BPF_XOR | BPF_K):
+			BPF_OP_ALU_IMM(reg, ins, ^, uint64_t);
+			break;
+		case (EBPF_ALU64 | BPF_MUL | BPF_K):
+			BPF_OP_ALU_IMM(reg, ins, *, uint64_t);
+			break;
+		case (EBPF_ALU64 | BPF_DIV | BPF_K):
+			BPF_OP_ALU_IMM(reg, ins, /, uint64_t);
+			break;
+		case (EBPF_ALU64 | BPF_MOD | BPF_K):
+			BPF_OP_ALU_IMM(reg, ins, %, uint64_t);
+			break;
+		case (EBPF_ALU64 | EBPF_MOV | BPF_K):
+			EBPF_MOV_ALU_IMM(reg, ins, uint64_t);
+			break;
+		/* 64 bit ALU REG operations */
+		case (EBPF_ALU64 | BPF_ADD | BPF_X):
+			BPF_OP_ALU_REG(reg, ins, +, uint64_t);
+			break;
+		case (EBPF_ALU64 | BPF_SUB | BPF_X):
+			BPF_OP_ALU_REG(reg, ins, -, uint64_t);
+			break;
+		case (EBPF_ALU64 | BPF_AND | BPF_X):
+			BPF_OP_ALU_REG(reg, ins, &, uint64_t);
+			break;
+		case (EBPF_ALU64 | BPF_OR | BPF_X):
+			BPF_OP_ALU_REG(reg, ins, |, uint64_t);
+			break;
+		case (EBPF_ALU64 | BPF_LSH | BPF_X):
+			BPF_OP_ALU_REG(reg, ins, <<, uint64_t);
+			break;
+		case (EBPF_ALU64 | BPF_RSH | BPF_X):
+			BPF_OP_ALU_REG(reg, ins, >>, uint64_t);
+			break;
+		case (EBPF_ALU64 | EBPF_ARSH | BPF_X):
+			BPF_OP_ALU_REG(reg, ins, >>, int64_t);
+			break;
+		case (EBPF_ALU64 | BPF_XOR | BPF_X):
+			BPF_OP_ALU_REG(reg, ins, ^, uint64_t);
+			break;
+		case (EBPF_ALU64 | BPF_MUL | BPF_X):
+			BPF_OP_ALU_REG(reg, ins, *, uint64_t);
+			break;
+		case (EBPF_ALU64 | BPF_DIV | BPF_X):
+			BPF_DIV_ZERO_CHECK(bpf, reg, ins, uint64_t);
+			BPF_OP_ALU_REG(reg, ins, /, uint64_t);
+			break;
+		case (EBPF_ALU64 | BPF_MOD | BPF_X):
+			BPF_DIV_ZERO_CHECK(bpf, reg, ins, uint64_t);
+			BPF_OP_ALU_REG(reg, ins, %, uint64_t);
+			break;
+		case (EBPF_ALU64 | EBPF_MOV | BPF_X):
+			EBPF_MOV_ALU_REG(reg, ins, uint64_t);
+			break;
+		case (EBPF_ALU64 | BPF_NEG):
+			BPF_NEG_ALU(reg, ins, uint64_t);
+			break;
+		/* load instructions */
+		case (BPF_LDX | BPF_MEM | BPF_B):
+			BPF_LD_REG(reg, ins, uint8_t);
+			break;
+		case (BPF_LDX | BPF_MEM | BPF_H):
+			BPF_LD_REG(reg, ins, uint16_t);
+			break;
+		case (BPF_LDX | BPF_MEM | BPF_W):
+			BPF_LD_REG(reg, ins, uint32_t);
+			break;
+		case (BPF_LDX | BPF_MEM | EBPF_DW):
+			BPF_LD_REG(reg, ins, uint64_t);
+			break;
+		/* load 64 bit immediate value */
+		case (BPF_LD | BPF_IMM | EBPF_DW):
+			reg[ins->dst_reg] = (uint32_t)ins[0].imm |
+				(uint64_t)(uint32_t)ins[1].imm << 32;
+			ins++;
+			break;
+		/* store instructions */
+		case (BPF_STX | BPF_MEM | BPF_B):
+			BPF_ST_REG(reg, ins, uint8_t);
+			break;
+		case (BPF_STX | BPF_MEM | BPF_H):
+			BPF_ST_REG(reg, ins, uint16_t);
+			break;
+		case (BPF_STX | BPF_MEM | BPF_W):
+			BPF_ST_REG(reg, ins, uint32_t);
+			break;
+		case (BPF_STX | BPF_MEM | EBPF_DW):
+			BPF_ST_REG(reg, ins, uint64_t);
+			break;
+		case (BPF_ST | BPF_MEM | BPF_B):
+			BPF_ST_IMM(reg, ins, uint8_t);
+			break;
+		case (BPF_ST | BPF_MEM | BPF_H):
+			BPF_ST_IMM(reg, ins, uint16_t);
+			break;
+		case (BPF_ST | BPF_MEM | BPF_W):
+			BPF_ST_IMM(reg, ins, uint32_t);
+			break;
+		case (BPF_ST | BPF_MEM | EBPF_DW):
+			BPF_ST_IMM(reg, ins, uint64_t);
+			break;
+		/* atomic add instructions */
+		case (BPF_STX | EBPF_XADD | BPF_W):
+			BPF_ST_XADD_REG(reg, ins, 32);
+			break;
+		case (BPF_STX | EBPF_XADD | EBPF_DW):
+			BPF_ST_XADD_REG(reg, ins, 64);
+			break;
+		/* jump instructions */
+		case (BPF_JMP | BPF_JA):
+			BPF_JMP_UNC(ins);
+			break;
+		/* jump IMM instructions */
+		case (BPF_JMP | BPF_JEQ | BPF_K):
+			BPF_JMP_CND_IMM(reg, ins, ==, uint64_t);
+			break;
+		case (BPF_JMP | EBPF_JNE | BPF_K):
+			BPF_JMP_CND_IMM(reg, ins, !=, uint64_t);
+			break;
+		case (BPF_JMP | BPF_JGT | BPF_K):
+			BPF_JMP_CND_IMM(reg, ins, >, uint64_t);
+			break;
+		case (BPF_JMP | EBPF_JLT | BPF_K):
+			BPF_JMP_CND_IMM(reg, ins, <, uint64_t);
+			break;
+		case (BPF_JMP | BPF_JGE | BPF_K):
+			BPF_JMP_CND_IMM(reg, ins, >=, uint64_t);
+			break;
+		case (BPF_JMP | EBPF_JLE | BPF_K):
+			BPF_JMP_CND_IMM(reg, ins, <=, uint64_t);
+			break;
+		case (BPF_JMP | EBPF_JSGT | BPF_K):
+			BPF_JMP_CND_IMM(reg, ins, >, int64_t);
+			break;
+		case (BPF_JMP | EBPF_JSLT | BPF_K):
+			BPF_JMP_CND_IMM(reg, ins, <, int64_t);
+			break;
+		case (BPF_JMP | EBPF_JSGE | BPF_K):
+			BPF_JMP_CND_IMM(reg, ins, >=, int64_t);
+			break;
+		case (BPF_JMP | EBPF_JSLE | BPF_K):
+			BPF_JMP_CND_IMM(reg, ins, <=, int64_t);
+			break;
+		case (BPF_JMP | BPF_JSET | BPF_K):
+			BPF_JMP_CND_IMM(reg, ins, &, uint64_t);
+			break;
+		/* jump REG instructions */
+		case (BPF_JMP | BPF_JEQ | BPF_X):
+			BPF_JMP_CND_REG(reg, ins, ==, uint64_t);
+			break;
+		case (BPF_JMP | EBPF_JNE | BPF_X):
+			BPF_JMP_CND_REG(reg, ins, !=, uint64_t);
+			break;
+		case (BPF_JMP | BPF_JGT | BPF_X):
+			BPF_JMP_CND_REG(reg, ins, >, uint64_t);
+			break;
+		case (BPF_JMP | EBPF_JLT | BPF_X):
+			BPF_JMP_CND_REG(reg, ins, <, uint64_t);
+			break;
+		case (BPF_JMP | BPF_JGE | BPF_X):
+			BPF_JMP_CND_REG(reg, ins, >=, uint64_t);
+			break;
+		case (BPF_JMP | EBPF_JLE | BPF_X):
+			BPF_JMP_CND_REG(reg, ins, <=, uint64_t);
+			break;
+		case (BPF_JMP | EBPF_JSGT | BPF_X):
+			BPF_JMP_CND_REG(reg, ins, >, int64_t);
+			break;
+		case (BPF_JMP | EBPF_JSLT | BPF_X):
+			BPF_JMP_CND_REG(reg, ins, <, int64_t);
+			break;
+		case (BPF_JMP | EBPF_JSGE | BPF_X):
+			BPF_JMP_CND_REG(reg, ins, >=, int64_t);
+			break;
+		case (BPF_JMP | EBPF_JSLE | BPF_X):
+			BPF_JMP_CND_REG(reg, ins, <=, int64_t);
+			break;
+		case (BPF_JMP | BPF_JSET | BPF_X):
+			BPF_JMP_CND_REG(reg, ins, &, uint64_t);
+			break;
+		/* call instructions */
+		case (BPF_JMP | EBPF_CALL):
+			reg[EBPF_REG_0] = bpf->prm.xsym[ins->imm].func(
+				reg[EBPF_REG_1], reg[EBPF_REG_2],
+				reg[EBPF_REG_3], reg[EBPF_REG_4],
+				reg[EBPF_REG_5]);
+			break;
+		/* return instruction */
+		case (BPF_JMP | EBPF_EXIT):
+			return reg[EBPF_REG_0];
+		default:
+			RTE_BPF_LOG(ERR,
+				"%s(%p): invalid opcode %#x at pc: %#zx;\n",
+				__func__, bpf, ins->code,
+				(uintptr_t)ins - (uintptr_t)bpf->prm.ins);
+			return 0;
+		}
+	}
+
+	/* should never be reached */
+	RTE_VERIFY(0);
+	return 0;
+}
+
+__rte_experimental uint32_t
+rte_bpf_exec_burst(const struct rte_bpf *bpf, void *ctx[], uint64_t rc[],
+	uint32_t num)
+{
+	uint32_t i;
+	uint64_t reg[EBPF_REG_NUM];
+	uint64_t stack[MAX_BPF_STACK_SIZE / sizeof(uint64_t)];
+
+	for (i = 0; i != num; i++) {
+
+		reg[EBPF_REG_1] = (uintptr_t)ctx[i];
+		reg[EBPF_REG_10] = (uintptr_t)(stack + RTE_DIM(stack));
+
+		rc[i] = bpf_exec(bpf, reg);
+	}
+
+	return i;
+}
+
+__rte_experimental uint64_t
+rte_bpf_exec(const struct rte_bpf *bpf, void *ctx)
+{
+	uint64_t rc;
+
+	rte_bpf_exec_burst(bpf, &ctx, &rc, 1);
+	return rc;
+}
diff --git a/lib/librte_bpf/bpf_impl.h b/lib/librte_bpf/bpf_impl.h
new file mode 100644
index 000000000..5d7e65c31
--- /dev/null
+++ b/lib/librte_bpf/bpf_impl.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef _BPF_H_
+#define _BPF_H_
+
+#include <rte_bpf.h>
+#include <sys/mman.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define MAX_BPF_STACK_SIZE	0x200
+
+struct rte_bpf {
+	struct rte_bpf_prm prm;
+	struct rte_bpf_jit jit;
+	size_t sz;
+	uint32_t stack_sz;
+};
+
+extern int bpf_validate(struct rte_bpf *bpf);
+
+extern int bpf_jit(struct rte_bpf *bpf);
+
+#ifdef RTE_ARCH_X86_64
+extern int bpf_jit_x86(struct rte_bpf *);
+#endif
+
+extern int rte_bpf_logtype;
+
+#define	RTE_BPF_LOG(lvl, fmt, args...) \
+	rte_log(RTE_LOG_## lvl, rte_bpf_logtype, fmt, ##args)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _BPF_H_ */
diff --git a/lib/librte_bpf/bpf_load.c b/lib/librte_bpf/bpf_load.c
new file mode 100644
index 000000000..f28ecfb4d
--- /dev/null
+++ b/lib/librte_bpf/bpf_load.c
@@ -0,0 +1,85 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <inttypes.h>
+
+#include <rte_common.h>
+#include <rte_log.h>
+#include <rte_debug.h>
+#include <rte_memory.h>
+#include <rte_eal.h>
+#include <rte_byteorder.h>
+#include <rte_errno.h>
+
+#include "bpf_impl.h"
+
+static struct rte_bpf *
+bpf_load(const struct rte_bpf_prm *prm)
+{
+	uint8_t *buf;
+	struct rte_bpf *bpf;
+	size_t sz, bsz, insz, xsz;
+
+	xsz =  prm->nb_xsym * sizeof(prm->xsym[0]);
+	insz = prm->nb_ins * sizeof(prm->ins[0]);
+	bsz = sizeof(bpf[0]);
+	sz = insz + xsz + bsz;
+
+	buf = mmap(NULL, sz, PROT_READ | PROT_WRITE,
+		MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+	if (buf == MAP_FAILED)
+		return NULL;
+
+	bpf = (void *)buf;
+	bpf->sz = sz;
+
+	memcpy(&bpf->prm, prm, sizeof(bpf->prm));
+
+	memcpy(buf + bsz, prm->xsym, xsz);
+	memcpy(buf + bsz + xsz, prm->ins, insz);
+
+	bpf->prm.xsym = (void *)(buf + bsz);
+	bpf->prm.ins = (void *)(buf + bsz + xsz);
+
+	return bpf;
+}
+
+__rte_experimental struct rte_bpf *
+rte_bpf_load(const struct rte_bpf_prm *prm)
+{
+	struct rte_bpf *bpf;
+	int32_t rc;
+
+	if (prm == NULL || prm->ins == NULL) {
+		rte_errno = EINVAL;
+		return NULL;
+	}
+
+	bpf = bpf_load(prm);
+	if (bpf == NULL) {
+		rte_errno = ENOMEM;
+		return NULL;
+	}
+
+	rc = bpf_validate(bpf);
+	if (rc == 0) {
+		bpf_jit(bpf);
+		if (mprotect(bpf, bpf->sz, PROT_READ) != 0)
+			rc = -ENOMEM;
+	}
+
+	if (rc != 0) {
+		rte_bpf_destroy(bpf);
+		rte_errno = -rc;
+		return NULL;
+	}
+
+	return bpf;
+}
diff --git a/lib/librte_bpf/bpf_validate.c b/lib/librte_bpf/bpf_validate.c
new file mode 100644
index 000000000..6a1b33181
--- /dev/null
+++ b/lib/librte_bpf/bpf_validate.c
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <stdint.h>
+#include <inttypes.h>
+
+#include <rte_common.h>
+#include <rte_eal.h>
+
+#include "bpf_impl.h"
+
+/*
+ * dummy one for now, need more work.
+ */
+int
+bpf_validate(struct rte_bpf *bpf)
+{
+	int32_t rc, ofs, stack_sz;
+	uint32_t i, op, dr;
+	const struct ebpf_insn *ins;
+
+	rc = 0;
+	stack_sz = 0;
+	for (i = 0; i != bpf->prm.nb_ins; i++) {
+
+		ins = bpf->prm.ins + i;
+		op = ins->code;
+		dr = ins->dst_reg;
+		ofs = ins->off;
+
+		if ((BPF_CLASS(op) == BPF_STX || BPF_CLASS(op) == BPF_ST) &&
+				dr == EBPF_REG_10) {
+			ofs -= sizeof(uint64_t);
+			stack_sz = RTE_MIN(ofs, stack_sz);
+		}
+	}
+
+	if (stack_sz != 0) {
+		stack_sz = -stack_sz;
+		if (stack_sz > MAX_BPF_STACK_SIZE)
+			rc = -ERANGE;
+		else
+			bpf->stack_sz = stack_sz;
+	}
+
+	if (rc != 0)
+		RTE_BPF_LOG(ERR, "%s(%p) failed, error code: %d;\n",
+			__func__, bpf, rc);
+	return rc;
+}
diff --git a/lib/librte_bpf/meson.build b/lib/librte_bpf/meson.build
new file mode 100644
index 000000000..4fa000f5a
--- /dev/null
+++ b/lib/librte_bpf/meson.build
@@ -0,0 +1,13 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Intel Corporation
+
+allow_experimental_apis = true
+sources = files('bpf.c',
+		'bpf_exec.c',
+		'bpf_load.c',
+		'bpf_validate.c')
+
+install_headers = files('bpf_def.h',
+			'rte_bpf.h')
+
+deps += ['mbuf', 'net']
diff --git a/lib/librte_bpf/rte_bpf.h b/lib/librte_bpf/rte_bpf.h
new file mode 100644
index 000000000..4b3d970b9
--- /dev/null
+++ b/lib/librte_bpf/rte_bpf.h
@@ -0,0 +1,164 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef _RTE_BPF_H_
+#define _RTE_BPF_H_
+
+/**
+ * @file
+ *
+ * RTE BPF support.
+ * librte_bpf provides a framework to load and execute eBPF bytecode
+ * inside user-space dpdk based applications.
+ * It supports basic set of features from eBPF spec
+ * (https://www.kernel.org/doc/Documentation/networking/filter.txt).
+ */
+
+#include <rte_common.h>
+#include <rte_mbuf.h>
+#include <bpf_def.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Possible types for function/BPF program arguments.
+ */
+enum rte_bpf_arg_type {
+	RTE_BPF_ARG_UNDEF,      /**< undefined */
+	RTE_BPF_ARG_RAW,        /**< scalar value */
+	RTE_BPF_ARG_PTR = 0x10, /**< pointer to data buffer */
+	RTE_BPF_ARG_PTR_MBUF,   /**< pointer to rte_mbuf */
+	RTE_BPF_ARG_PTR_STACK,
+};
+
+/**
+ * function argument information
+ */
+struct rte_bpf_arg {
+	enum rte_bpf_arg_type type;
+	size_t size;     /**< for pointer types, size of data it points to */
+	size_t buf_size;
+	/**< for mbuf ptr type, max size of rte_mbuf data buffer */
+};
+
+/**
+ * determine is argument a pointer
+ */
+#define RTE_BPF_ARG_PTR_TYPE(x)	((x) & RTE_BPF_ARG_PTR)
+
+/**
+ * Possible types for external symbols.
+ */
+enum rte_bpf_xtype {
+	RTE_BPF_XTYPE_FUNC, /**< function */
+	RTE_BPF_XTYPE_VAR,  /**< variable */
+	RTE_BPF_XTYPE_NUM
+};
+
+/**
+ * Definition for external symbols available in the BPF program.
+ */
+struct rte_bpf_xsym {
+	const char *name;        /**< name */
+	enum rte_bpf_xtype type; /**< type */
+	union {
+		uint64_t (*func)(uint64_t, uint64_t, uint64_t,
+				uint64_t, uint64_t);
+		void *var;
+	}; /**< value */
+};
+
+/**
+ * Input parameters for loading eBPF code.
+ */
+struct rte_bpf_prm {
+	const struct ebpf_insn *ins; /**< array of eBPF instructions */
+	uint32_t nb_ins;            /**< number of instructions in ins */
+	const struct rte_bpf_xsym *xsym;
+	/**< array of external symbols that eBPF code is allowed to reference */
+	uint32_t nb_xsym; /**< number of elements in xsym */
+	struct rte_bpf_arg prog_arg; /**< eBPF program input arg description */
+};
+
+/**
+ * Information about compiled into native ISA eBPF code.
+ */
+struct rte_bpf_jit {
+	uint64_t (*func)(void *); /**< JIT-ed native code */
+	size_t sz;                /**< size of JIT-ed code */
+};
+
+struct rte_bpf;
+
+/**
+ * De-allocate all memory used by this eBPF execution context.
+ *
+ * @param bpf
+ *   BPF handle to destroy.
+ */
+void rte_bpf_destroy(struct rte_bpf *bpf);
+
+/**
+ * Create a new eBPF execution context and load given BPF code into it.
+ *
+ * @param prm
+ *  Parameters used to create and initialise the BPF exeution context.
+ * @return
+ *   BPF handle that is used in future BPF operations,
+ *   or NULL on error, with error code set in rte_errno.
+ *   Possible rte_errno errors include:
+ *   - EINVAL - invalid parameter passed to function
+ *   - ENOMEM - can't reserve enough memory
+ */
+struct rte_bpf *rte_bpf_load(const struct rte_bpf_prm *prm);
+
+/**
+ * Execute given BPF bytecode.
+ *
+ * @param bpf
+ *   handle for the BPF code to execute.
+ * @param ctx
+ *   pointer to input context.
+ * @return
+ *   BPF execution return value.
+ */
+uint64_t rte_bpf_exec(const struct rte_bpf *bpf, void *ctx);
+
+/**
+ * Execute given BPF bytecode over a set of input contexts.
+ *
+ * @param bpf
+ *   handle for the BPF code to execute.
+ * @param ctx
+ *   array of pointers to the input contexts.
+ * @param rc
+ *   array of return values (one per input).
+ * @param num
+ *   number of elements in ctx[] (and rc[]).
+ * @return
+ *   number of successfully processed inputs.
+ */
+uint32_t rte_bpf_exec_burst(const struct rte_bpf *bpf, void *ctx[],
+	uint64_t rc[], uint32_t num);
+
+/**
+ * Provide information about natively compield code for given BPF handle.
+ *
+ * @param bpf
+ *   handle for the BPF code.
+ * @param jit
+ *   pointer to the rte_bpf_jit structure to be filled with related data.
+ * @return
+ *   - -EINVAL if the parameters are invalid.
+ *   - Zero if operation completed successfully.
+ */
+int rte_bpf_get_jit(const struct rte_bpf *bpf, struct rte_bpf_jit *jit);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_BPF_H_ */
diff --git a/lib/librte_bpf/rte_bpf_version.map b/lib/librte_bpf/rte_bpf_version.map
new file mode 100644
index 000000000..ea1d621c4
--- /dev/null
+++ b/lib/librte_bpf/rte_bpf_version.map
@@ -0,0 +1,11 @@
+EXPERIMENTAL {
+	global:
+
+	rte_bpf_destroy;
+	rte_bpf_exec;
+	rte_bpf_exec_burst;
+	rte_bpf_get_jit;
+	rte_bpf_load;
+
+	local: *;
+};
diff --git a/lib/meson.build b/lib/meson.build
index 166905c1c..9635aff41 100644
--- a/lib/meson.build
+++ b/lib/meson.build
@@ -23,7 +23,7 @@ libraries = [ 'compat', # just a header, used for versioning
 	# add pkt framework libs which use other libs from above
 	'port', 'table', 'pipeline',
 	# flow_classify lib depends on pkt framework table lib
-	'flow_classify']
+	'flow_classify', 'bpf']
 
 foreach l:libraries
 	build = true
diff --git a/mk/rte.app.mk b/mk/rte.app.mk
index 26f35630a..9d3c421cc 100644
--- a/mk/rte.app.mk
+++ b/mk/rte.app.mk
@@ -82,6 +82,8 @@ _LDLIBS-$(CONFIG_RTE_LIBRTE_POWER)          += -lrte_power
 
 _LDLIBS-$(CONFIG_RTE_LIBRTE_EFD)            += -lrte_efd
 
+_LDLIBS-$(CONFIG_RTE_LIBRTE_BPF)            += -lrte_bpf
+
 _LDLIBS-y += --whole-archive
 
 _LDLIBS-$(CONFIG_RTE_LIBRTE_CFGFILE)        += -lrte_cfgfile
-- 
2.13.6



More information about the dev mailing list