[dpdk-dev] [PATCH] SDK: Add scripts to initialize DPDK runtime
Luca Boccassi
lboccass at brocade.com
Mon Dec 12 20:24:02 CET 2016
From: Christian Ehrhardt <christian.ehrhardt at canonical.com>
A tools/init directory is added with dpdk-init, a script that can be
used to initialize a DPDK runtime environment. 2 config files with
default options, dpdk.conf and interfaces, are provided as well
together with a SysV init script and a systemd service unit.
Signed-off-by: Luca Boccassi <lboccass at brocade.com>
Signed-off-by: Christian Ehrhardt <christian.ehrhardt at canonical.com>
---
mk/rte.sdkinstall.mk | 21 ++++
tools/init/dpdk-init.in | 256 +++++++++++++++++++++++++++++++++++++++++++++
tools/init/dpdk.conf | 60 +++++++++++
tools/init/dpdk.init.in | 57 ++++++++++
tools/init/dpdk.service.in | 12 +++
tools/init/interfaces | 16 +++
6 files changed, 422 insertions(+)
create mode 100755 tools/init/dpdk-init.in
create mode 100644 tools/init/dpdk.conf
create mode 100755 tools/init/dpdk.init.in
create mode 100644 tools/init/dpdk.service.in
create mode 100644 tools/init/interfaces
diff --git a/mk/rte.sdkinstall.mk b/mk/rte.sdkinstall.mk
index 7b0d8b5..a3a5a9a 100644
--- a/mk/rte.sdkinstall.mk
+++ b/mk/rte.sdkinstall.mk
@@ -69,6 +69,14 @@ datadir ?= $(datarootdir)/dpdk
mandir ?= $(datarootdir)/man
sdkdir ?= $(datadir)
targetdir ?= $(datadir)/$(RTE_TARGET)
+# If pkgconfig or systemd.pc are not available fall back to most likely default
+ifeq ($(shell pkg-config systemd; echo $$?), 0)
+systemduserunitdir ?= $(shell pkg-config --variable=systemdsystemunitdir systemd)
+else
+systemduserunitdir ?= /lib/systemd/system
+endif
+initdir ?= /etc/init.d
+configdir ?= /etc/dpdk
# The install directories may be staged in DESTDIR
@@ -162,6 +170,19 @@ install-sdk:
$(Q)cp -a $O/app/dpdk-pmdinfogen $(DESTDIR)$(targetdir)/app
$(Q)$(call rte_symlink, $(DESTDIR)$(includedir), $(DESTDIR)$(targetdir)/include)
$(Q)$(call rte_symlink, $(DESTDIR)$(libdir), $(DESTDIR)$(targetdir)/lib)
+ $(Q)$(call rte_mkdir, $(DESTDIR)$(initdir))
+ $(Q)sed -e "s|@@configdir@@|$(configdir)|g" -e "s|@@sbindir@@|$(sbindir)|g" \
+ $(RTE_SDK)/tools/init/dpdk.init.in > $(DESTDIR)$(initdir)/dpdk
+ $(Q)chmod +x $(DESTDIR)$(initdir)/dpdk
+ $(Q)$(call rte_mkdir, $(DESTDIR)$(systemduserunitdir))
+ $(Q)sed "s|@@sbindir@@|$(sbindir)|g" $(RTE_SDK)/tools/init/dpdk.service.in > \
+ $(DESTDIR)$(systemduserunitdir)/dpdk.service
+ $(Q)$(call rte_mkdir, $(DESTDIR)$(configdir))
+ $(Q)cp -a $(RTE_SDK)/tools/init/dpdk.conf $(DESTDIR)$(configdir)
+ $(Q)cp -a $(RTE_SDK)/tools/init/interfaces $(DESTDIR)$(configdir)
+ $(Q)sed -e "s|@@configdir@@|$(configdir)|g" -e "s|@@sbindir@@|$(sbindir)|g" \
+ $(RTE_SDK)/tools/init/dpdk-init.in > $(DESTDIR)$(sbindir)/dpdk-init
+ $(Q)chmod +x $(DESTDIR)$(sbindir)/dpdk-init
install-doc:
ifneq ($(wildcard $O/doc/html),)
diff --git a/tools/init/dpdk-init.in b/tools/init/dpdk-init.in
new file mode 100755
index 0000000..89e0399
--- /dev/null
+++ b/tools/init/dpdk-init.in
@@ -0,0 +1,256 @@
+#!/bin/sh
+#
+# dpdk-init: startup script to initialize a dpdk runtime environment
+#
+# Copyright 2015-2016 Canonical Ltd.
+# Autor: Stefan Bader <stefan.bader at canonical.com>
+# Autor: Christian Ehrhardt <christian.ehrhardt at canonical.com>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 3,
+# as published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+set -e
+
+DPDK_BIND="@@sbindir@@/dpdk-devbind"
+DPDK_INTERF="@@configdir@@/interfaces"
+DPDK_CONF="@@configdir@@/dpdk.conf"
+
+
+# pagesize supports [G|g]/[M|m]/[K|k]
+get_kbytes() {
+ local unit
+ local num
+ unit=$(echo "${1}" | sed 's/[0-9]*//g')
+ num=$(echo "${1}" | sed 's/[^0-9]*//g')
+ case ${unit} in
+ *g | *G)
+ echo $((num*1024*1024))
+ ;;
+ *m | *M)
+ echo $((num*1024))
+ ;;
+ *k | *K)
+ echo $((num))
+ ;;
+ *)
+ echo $((num/1024))
+ ;;
+ esac
+}
+
+get_default_hpgsz() {
+ default_hpgsz=$(grep "Hugepagesize:" /proc/meminfo \
+ | sed 's/^Hugepagesize:\s*//g' | sed 's/\s*kB$//g')
+ echo "${default_hpgsz}"
+}
+
+get_hugetlbfs_mountpoint() {
+ local requested_hpgsz
+ local mp_hpgsz
+ requested_hpgsz=$(get_kbytes "${1}")
+
+ grep hugetlbfs /proc/mounts | while read \
+ mntfrom mntpoint mntfstype mntopt mntdump mntfsck; do
+
+ # check if the current muntpoint is of the requested huge page size
+ case ${mntopt} in
+ *pagesize=*)
+ mp_hpgsz=$(echo "${mntopt}" | sed 's/.*pagesize=//g' | sed 's/,.*//g')
+ mp_hpgsz=$(get_kbytes "${mp_hpgsz}")
+ ;;
+ *)
+ mp_hpgsz=$(get_default_hpgsz)
+ ;;
+ esac
+ if [ "${requested_hpgsz}" -eq "${mp_hpgsz}" ]; then
+ echo "${mntpoint}"
+ return
+ fi
+ done
+}
+
+_mount_hugetlbfs() {
+ local MNT="/dev/hugepages"
+ local MNTOPTS=""
+ local requested_hpgsz
+ local default_hpgsz
+ requested_hpgsz=$(get_kbytes "${1}")
+ default_hpgsz=$(get_default_hpgsz)
+
+ # kernel might not support the requested size
+ if [ ! -d "/sys/kernel/mm/hugepages/hugepages-${requested_hpgsz}kB" ]; then
+ echo "WARNING: requested page size of ${requested_hpgsz}kB " \
+ "not supported by the kernel"
+ return 0
+ fi
+
+ # special case if this is not the default huge page size
+ if [ "${requested_hpgsz}" -ne "${default_hpgsz}" ]; then
+ MNT="${MNT}-${requested_hpgsz}"
+ MNTOPTS="pagesize=${requested_hpgsz}K"
+ fi
+
+ if [ ! -e "${MNT}" ]; then
+ mkdir "${MNT}"
+ if [ $? -ne 0 ]; then
+ echo "Could not create directory ${MNT}!" >&2
+ return 1
+ fi
+ fi
+ mount -thugetlbfs hugetlbfs "${MNT}" -o "${MNTOPTS}"
+ return $?
+}
+
+#
+# The DPDK library will use the first mounted instance it finds for a given
+# page size. so if there is already one for a given size there is no need to
+# create another for the same huge page size.
+#
+mount_hugetlbfs() {
+ if [ ! -r "$DPDK_CONF" ]; then
+ return 1
+ fi
+ . "$DPDK_CONF"
+
+ # if a page size is requested, there has to be a mountpoint for that size
+ if [ -n "${NR_2M_PAGES}" -a -z "$(get_hugetlbfs_mountpoint '2M')" ]; then
+ _mount_hugetlbfs 2M
+ fi
+ if [ -n "${NR_16M_PAGES}" -a -z "$(get_hugetlbfs_mountpoint '16M')" ]; then
+ _mount_hugetlbfs 16M
+ fi
+ if [ -n "${NR_1G_PAGES}" -a -z "$(get_hugetlbfs_mountpoint '1G')" ]; then
+ _mount_hugetlbfs 1G
+ fi
+}
+
+_setup_hugepages() {
+ MMDIR="/sys/kernel/mm/hugepages/${1}"
+ PAGES=${2}
+
+ if [ "$PAGES" != "" ]; then
+ if [ "$PAGES" -gt 0 ]; then
+ if [ -d "$MMDIR" -a -w "$MMDIR/nr_hugepages" ]; then
+ # increases the chance to allocate enough huge pages
+ # configurable, since it comes at a perf penality
+ if [ "$DROPCACHE_BEFORE_HP_ALLOC" = "1" ]; then
+ echo 3 > /proc/sys/vm/drop_caches
+ fi
+
+ echo "$PAGES" > "$MMDIR/nr_hugepages"
+
+ GOTPAGES=$(cat "$MMDIR/nr_hugepages")
+ if [ "$GOTPAGES" -lt "$PAGES" ]; then
+ echo "WARNING: could not allocate $PAGES at " \
+ "$MMDIR/nr_hugepages (only got $GOTPAGES)."
+ fi
+ else
+ echo "WARNING: $MMDIR/nr_hugepages not found/writable"
+ fi
+ fi
+ fi
+}
+
+#
+# Reserve a certain amount of hugepages (defined in /etc/dpdk.conf)
+#
+setup_hugepages() {
+ if [ ! -r "$DPDK_CONF" ]; then
+ return 1
+ fi
+ . "$DPDK_CONF"
+
+ _setup_hugepages "hugepages-2048kB" "$NR_2M_PAGES"
+ _setup_hugepages "hugepages-16384kB" "$NR_16M_PAGES"
+ _setup_hugepages "hugepages-1048576kB" "$NR_1G_PAGES"
+
+ # dpdk uses 2*#hugepages mappings, increase for huge systems LP #1507921
+ if [ -d /sys/kernel/mm/hugepages ]; then
+ max_map_count=$(awk -v pad=65530 '{tot+=$1}END{print tot*2+pad}' \
+ /sys/kernel/mm/hugepages/hugepages-*/nr_hugepages)
+ sysctl -q vm.max_map_count="${max_map_count:-65530}"
+ fi
+
+ return 0
+}
+
+#
+# Allow NICs to be automatically bound to DPDK compatible drivers on boot.
+#
+bind_interfaces() {
+ if [ ! -r "$DPDK_INTERF" ]; then
+ return 0
+ fi
+ grep -v '^[ \t]*#' "$DPDK_INTERF" | while read BUS ID MOD; do
+ if [ "$BUS" = "" -o "$ID" = "" -o "$MOD" = "" ]; then
+ echo "WARNING: incomplete spec in $DPDK_INTERF" \
+ " - BUS '$BUS' ID '$ID' MOD '$MOD'"
+ continue
+ fi
+ if [ "$BUS" != "pci" ]; then
+ echo "WARNING: incompatible bus '$BUS' in $DPDK_INTERF"
+ continue
+ fi
+
+ SYSFSPATH="/sys/bus/$BUS/devices/$ID"
+ if [ ! -e "$SYSFSPATH" ]; then
+ echo "WARNING: invalid pci ID '$ID' in $DPDK_INTERF" \
+ " - '$SYSFSPATH' does not exist"
+ continue
+ fi
+ if [ -L "$SYSFSPATH/driver" ]; then
+ CUR=$(readlink "$SYSFSPATH/driver")
+ CUR=$(basename "$CUR")
+ else
+ # device existing, but currently unregistered
+ CUR=""
+ fi
+ if [ "$MOD" != "$CUR" ]; then
+ modprobe -q "$MOD" || true
+ # cloud img have no linux-image-extra initially (uip_pci_generic)
+ # so check if the module is available (loadable/built in)
+ if [ -e "/sys/bus/pci/drivers/${MOD}" ]; then
+ echo "Reassigning pci:$ID to $MOD"
+ $DPDK_BIND -b "$MOD" "$ID"
+ else
+ echo "Warning: failed assigning pci:$ID," \
+ " module $MOD not available"
+ fi
+ else
+ echo "pci:$ID already assigned to $MOD"
+ fi
+ done
+}
+
+
+
+case "$1" in
+start)
+ mount_hugetlbfs
+ setup_hugepages
+ bind_interfaces
+ ;;
+stop)
+ ;;
+reload|force-reload)
+ setup_hugepages
+ bind_interfaces
+ ;;
+status)
+ $DPDK_BIND --status
+ ;;
+*)
+ echo "Usage: $0 {start|stop|reload|force-reload|status}"
+ exit 1
+ ;;
+esac
+
diff --git a/tools/init/dpdk.conf b/tools/init/dpdk.conf
new file mode 100644
index 0000000..a5aea86
--- /dev/null
+++ b/tools/init/dpdk.conf
@@ -0,0 +1,60 @@
+#
+# The number of 2M hugepages to reserve on system boot
+#
+# Default is 0
+# To e.g. let it reserve 128M via 64x 2M Hugepages set:
+# NR_2M_PAGES=64
+
+#
+# The number of 1G hugepages to reserve on system boot
+#
+# Default is 0
+# To e.g. let it reserve 2G via 2x 1G Hugepages set:
+# NR_1G_PAGES=2
+
+# The number of 16M hugepages to reserve, supported e.g. on ppc64el
+#
+# Default is 0
+# To e.g. let it reserve 512M via 32x 16M Hugepages set:
+# NR_16M_PAGES=32
+
+#
+# Dropping slab and pagecache can help to successfully allocate hugepages,
+# especially later in the lifecycle of a system.
+# This comes at the cost of loosing all slab and pagecache on (re)start
+# of the dpdk service - therefore the default is off.
+#
+# Default is 0
+# Set to 1 to enable it
+#DROPCACHE_BEFORE_HP_ALLOC=0
+
+# The DPDK library will use the first mounted hugetlbfs.
+# The init scripts try to ensure there is at least one default hugetlbfs
+# mountpoint on start.
+# If you have multiple hugetlbfs mountpoints for a complex (e.g. specific numa
+# policies) setup it should be controlled by the admin instead of this init
+# script. In that case specific mountpoints can be provided as parameters to
+# the DPDK library.
+
+# Hardware may support other granularities of hugepages (like 4M). But the
+# larger the hugepages the earlier those should be allocated.
+# Note: the dpdk init scripts will report warnings, but not fail if they could
+# not allocate the requested amount of hugepages.
+# The more or the larger the hugepages to be allocated are, the more it is
+# recommended to do the reservation as kernel commandline arguments.
+# To do so edit /etc/default/grub: GRUB_CMDLINE_LINUX_DEFAULT
+# and add [hugepagesz=xx] hugepages=yy ...
+#
+# Kernel commandline config:
+# hugepagesz sets the size for the next hugepages reservation (default 2M)
+# hugepages reserves the given number of hugepages of the size set before
+#
+# After modifying /etc/default/grub, the command "update-grub" has to be
+# run in order to re-generate the grub config files. The new values will
+# be used after next reboot.
+#
+# example:
+# GRUB_CMDLINE_LINUX_DEFAULT="... hugepages=16 hugepagesz=1G hugepages=2"
+#
+# If the system supports it, this will reserve 16x 2M pages and 2x 1G pages.
+#
diff --git a/tools/init/dpdk.init.in b/tools/init/dpdk.init.in
new file mode 100755
index 0000000..1e26450
--- /dev/null
+++ b/tools/init/dpdk.init.in
@@ -0,0 +1,57 @@
+#!/bin/sh
+
+### BEGIN INIT INFO
+# Provides: dpdk
+# Required-Start: $remote_fs $local_fs
+# Required-Stop: $remote_fs $local_fs
+# Default-Start: S
+# Default-Stop: 0 1 6
+# Short-Description: start dpdk runtime environment
+### END INIT INFO
+
+set -e
+
+PATH="/sbin:/bin:/usr/bin"
+
+[ -d @@configdir@@ ] || exit 0
+
+# Define LSB log_* functions.
+# Depend on lsb-base (>= 3.2-14) to ensure that this file is present
+# and status_of_proc is working.
+. /lib/lsb/init-functions
+
+error=0
+case "$1" in
+start)
+ log_action_begin_msg "Starting DPDK environment" "dpdk"
+ output=$(@@sbindir@@/dpdk-init start 2>&1) || error="$?"
+ if [ ! -z "$output" ]; then
+ echo "$output" | while read line; do
+ log_action_cont_msg "$line"
+ done
+ fi
+ log_action_end_msg $error
+ exit $error
+ ;;
+stop)
+ ;;
+restart|force-reload)
+ ;;
+status)
+ output=$(@@sbindir@@/dpdk-init --status 2>&1) || error="$?"
+ if [ ! -z "$output" ]; then
+ echo "$output" | while read line; do
+ log_action_cont_msg "$line"
+ done
+ fi
+ log_action_end_msg $error
+ exit $error
+ ;;
+*)
+ echo "Usage: $0 {start|stop|restart|force-reload|status}"
+ exit 1
+ ;;
+esac
+
+exit 0
+
diff --git a/tools/init/dpdk.service.in b/tools/init/dpdk.service.in
new file mode 100644
index 0000000..1968081
--- /dev/null
+++ b/tools/init/dpdk.service.in
@@ -0,0 +1,12 @@
+[Unit]
+Description=DPDK runtime environment
+DefaultDependencies=false
+After=network-pre.target local-fs.target
+
+[Service]
+Type=oneshot
+RemainAfterExit=yes
+ExecStart=@@sbindir@@/dpdk-init start
+
+[Install]
+WantedBy=multi-user.target
diff --git a/tools/init/interfaces b/tools/init/interfaces
new file mode 100644
index 0000000..73c3fca
--- /dev/null
+++ b/tools/init/interfaces
@@ -0,0 +1,16 @@
+#
+# <bus> Currently only "pci" is supported
+# <id> Device ID on the specified bus
+# <driver> Driver to bind against (vfio-pci, uio_pci_generic, igb_uio or
+# rte_kni)
+#
+# Be aware that the two dpdk compatible drivers uio_pci_generic and vfio-pci are
+# part of linux-image-extra-<VERSION> package on Debian-based distributions.
+# This package is not always installed by default - for example in cloud-images.
+# So please install it in case you run into missing module issues.
+#
+# <bus> <id> <driver>
+# pci 0000:04:00.0 vfio-pci
+# pci 0000:04:00.1 uio_pci_generic
+# pci 0000:05:00.0 igb_uio
+# pci 0000:06:00.0 rte_kni
--
2.1.4
More information about the dev
mailing list