[dpdk-dev] [PATCH v2] SDK: Add scripts to initialize DPDK runtime

Christian Ehrhardt christian.ehrhardt at canonical.com
Tue Sep 19 07:42:30 CEST 2017


On Mon, Sep 18, 2017 at 10:44 PM, Ferruh Yigit <ferruh.yigit at intel.com>
wrote:

> On 12/13/2016 4:47 PM, lboccass at brocade.com (Luca Boccassi) wrote:
> > From: Christian Ehrhardt <christian.ehrhardt at canonical.com>
> >
> > A tools/init directory is added with dpdk-init, a script that can be
> > used to initialize a DPDK runtime environment. 2 config files with
> > default options, dpdk.conf and interfaces, are provided as well
> > together with a SysV init script and a systemd service unit.
>
> I guess this patch missing comments, it is good to have helper scripts
> (and v2 has BSD license).
>
> Is this correct:
> This script runs as service and does following based on config:
>     mount_hugetlbfs
>     setup_hugepages
>     bind_interfaces
>
> So keeps interfaces binded to DPDK after reboot?
>

Yes, those three are the major items it helps the admin to take care of.


> [intentional leaving rest of the patch since this is an old patch]
>
> >
> > v2: relicensed dpdk-init.in from GPL3 to BSD-3-clause with authors'
> >     permission
> >
> > Signed-off-by: Luca Boccassi <lboccass at brocade.com>
> > Signed-off-by: Christian Ehrhardt <christian.ehrhardt at canonical.com>
> > ---
> >  mk/rte.sdkinstall.mk       |  21 ++++
> >  tools/init/dpdk-init.in    | 274 ++++++++++++++++++++++++++++++
> +++++++++++++++
> >  tools/init/dpdk.conf       |  60 ++++++++++
> >  tools/init/dpdk.init.in    |  57 ++++++++++
> >  tools/init/dpdk.service.in |  12 ++
> >  tools/init/interfaces      |  16 +++
> >  6 files changed, 440 insertions(+)
> >  create mode 100755 tools/init/dpdk-init.in
> >  create mode 100644 tools/init/dpdk.conf
> >  create mode 100755 tools/init/dpdk.init.in
> >  create mode 100644 tools/init/dpdk.service.in
> >  create mode 100644 tools/init/interfaces
> >
> > diff --git a/mk/rte.sdkinstall.mk b/mk/rte.sdkinstall.mk
> > index 7b0d8b5..a3a5a9a 100644
> > --- a/mk/rte.sdkinstall.mk
> > +++ b/mk/rte.sdkinstall.mk
> > @@ -69,6 +69,14 @@ datadir     ?=       $(datarootdir)/dpdk
> >  mandir      ?=       $(datarootdir)/man
> >  sdkdir      ?=                $(datadir)
> >  targetdir   ?=                $(datadir)/$(RTE_TARGET)
> > +# If pkgconfig or systemd.pc are not available fall back to most likely
> default
> > +ifeq ($(shell pkg-config systemd; echo $$?), 0)
> > +systemduserunitdir ?= $(shell pkg-config --variable=systemdsystemunitdir
> systemd)
> > +else
> > +systemduserunitdir ?= /lib/systemd/system
> > +endif
> > +initdir     ?= /etc/init.d
> > +configdir   ?= /etc/dpdk
> >
> >  # The install directories may be staged in DESTDIR
> >
> > @@ -162,6 +170,19 @@ install-sdk:
> >       $(Q)cp -a               $O/app/dpdk-pmdinfogen
>  $(DESTDIR)$(targetdir)/app
> >       $(Q)$(call rte_symlink, $(DESTDIR)$(includedir),
> $(DESTDIR)$(targetdir)/include)
> >       $(Q)$(call rte_symlink, $(DESTDIR)$(libdir),
>  $(DESTDIR)$(targetdir)/lib)
> > +     $(Q)$(call rte_mkdir,
> $(DESTDIR)$(initdir))
> > +     $(Q)sed -e "s|@@configdir@@|$(configdir)|g" -e "s|@@sbindir@@|$(sbindir)|g"
> \
> > +             $(RTE_SDK)/tools/init/dpdk.init.in >
> $(DESTDIR)$(initdir)/dpdk
> > +     $(Q)chmod +x
>  $(DESTDIR)$(initdir)/dpdk
> > +     $(Q)$(call rte_mkdir,                            $(DESTDIR)$(
> systemduserunitdir))
> > +     $(Q)sed "s|@@sbindir@@|$(sbindir)|g" $(RTE_SDK)/tools/init/dpdk.
> service.in > \
> > +             $(DESTDIR)$(systemduserunitdir)/dpdk.service
> > +     $(Q)$(call rte_mkdir,
> $(DESTDIR)$(configdir))
> > +     $(Q)cp -a               $(RTE_SDK)/tools/init/dpdk.conf
> $(DESTDIR)$(configdir)
> > +     $(Q)cp -a               $(RTE_SDK)/tools/init/interfaces
> $(DESTDIR)$(configdir)
> > +     $(Q)sed -e "s|@@configdir@@|$(configdir)|g" -e "s|@@sbindir@@|$(sbindir)|g"
> \
> > +             $(RTE_SDK)/tools/init/dpdk-init.in >
> $(DESTDIR)$(sbindir)/dpdk-init
> > +     $(Q)chmod +x
>  $(DESTDIR)$(sbindir)/dpdk-init
> >
> >  install-doc:
> >  ifneq ($(wildcard $O/doc/html),)
> > diff --git a/tools/init/dpdk-init.in b/tools/init/dpdk-init.in
> > new file mode 100755
> > index 0000000..a1a44f7
> > --- /dev/null
> > +++ b/tools/init/dpdk-init.in
> > @@ -0,0 +1,274 @@
> > +#!/bin/sh
> > +#
> > +# dpdk-init: startup script to initialize a dpdk runtime environment
> > +#
> > +# Autor: Stefan Bader <stefan.bader at canonical.com>
> > +# Autor: Christian Ehrhardt <christian.ehrhardt at canonical.com>
> > +#
> > +#   BSD LICENSE
> > +#
> > +#   Copyright(c) 2015-2016 Canonical Ltd. All rights reserved.
> > +#   All rights reserved.
> > +#
> > +#   Redistribution and use in source and binary forms, with or without
> > +#   modification, are permitted provided that the following conditions
> > +#   are met:
> > +#
> > +#     * Redistributions of source code must retain the above copyright
> > +#       notice, this list of conditions and the following disclaimer.
> > +#     * Redistributions in binary form must reproduce the above
> copyright
> > +#       notice, this list of conditions and the following disclaimer in
> > +#       the documentation and/or other materials provided with the
> > +#       distribution.
> > +#     * Neither the name of Intel Corporation nor the names of its
> > +#       contributors may be used to endorse or promote products derived
> > +#       from this software without specific prior written permission.
> > +#
> > +#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> > +#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> > +#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
> FOR
> > +#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> > +#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
> INCIDENTAL,
> > +#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> > +#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
> USE,
> > +#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
> ANY
> > +#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> > +#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
> USE
> > +#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> > +#
> > +set -e
> > +
> > +DPDK_BIND="@@sbindir@@/dpdk-devbind"
> > +DPDK_INTERF="@@configdir@@/interfaces"
> > +DPDK_CONF="@@configdir@@/dpdk.conf"
> > +
> > +
> > +# pagesize supports [G|g]/[M|m]/[K|k]
> > +get_kbytes() {
> > +    local unit
> > +    local num
> > +    unit=$(echo "${1}" | sed 's/[0-9]*//g')
> > +    num=$(echo "${1}" | sed 's/[^0-9]*//g')
> > +    case ${unit} in
> > +    *g | *G)
> > +        echo $((num*1024*1024))
> > +        ;;
> > +    *m | *M)
> > +        echo $((num*1024))
> > +        ;;
> > +    *k | *K)
> > +        echo $((num))
> > +        ;;
> > +    *)
> > +        echo $((num/1024))
> > +        ;;
> > +    esac
> > +}
> > +
> > +get_default_hpgsz() {
> > +    default_hpgsz=$(grep "Hugepagesize:" /proc/meminfo \
> > +        | sed 's/^Hugepagesize:\s*//g' | sed 's/\s*kB$//g')
> > +    echo "${default_hpgsz}"
> > +}
> > +
> > +get_hugetlbfs_mountpoint() {
> > +    local requested_hpgsz
> > +    local mp_hpgsz
> > +    requested_hpgsz=$(get_kbytes "${1}")
> > +
> > +    grep hugetlbfs /proc/mounts | while read \
> > +        mntfrom mntpoint mntfstype mntopt mntdump mntfsck; do
> > +
> > +        # check if the current muntpoint is of the requested huge page
> size
> > +        case ${mntopt} in
> > +        *pagesize=*)
> > +            mp_hpgsz=$(echo "${mntopt}" | sed 's/.*pagesize=//g' | sed
> 's/,.*//g')
> > +            mp_hpgsz=$(get_kbytes "${mp_hpgsz}")
> > +            ;;
> > +        *)
> > +            mp_hpgsz=$(get_default_hpgsz)
> > +            ;;
> > +        esac
> > +        if [ "${requested_hpgsz}" -eq "${mp_hpgsz}" ]; then
> > +            echo "${mntpoint}"
> > +            return
> > +        fi
> > +    done
> > +}
> > +
> > +_mount_hugetlbfs() {
> > +    local MNT="/dev/hugepages"
> > +    local MNTOPTS=""
> > +    local requested_hpgsz
> > +    local default_hpgsz
> > +    requested_hpgsz=$(get_kbytes "${1}")
> > +    default_hpgsz=$(get_default_hpgsz)
> > +
> > +    # kernel might not support the requested size
> > +    if [ ! -d "/sys/kernel/mm/hugepages/hugepages-${requested_hpgsz}kB"
> ]; then
> > +        echo "WARNING: requested page size of ${requested_hpgsz}kB " \
> > +             "not supported by the kernel"
> > +        return 0
> > +    fi
> > +
> > +    # special case if this is not the default huge page size
> > +    if [ "${requested_hpgsz}" -ne "${default_hpgsz}" ]; then
> > +        MNT="${MNT}-${requested_hpgsz}"
> > +        MNTOPTS="pagesize=${requested_hpgsz}K"
> > +    fi
> > +
> > +    if [ ! -e "${MNT}" ]; then
> > +        mkdir "${MNT}"
> > +        if [ $? -ne 0 ]; then
> > +            echo "Could not create directory ${MNT}!" >&2
> > +            return 1
> > +        fi
> > +    fi
> > +    mount -thugetlbfs hugetlbfs "${MNT}" -o "${MNTOPTS}"
> > +    return $?
> > +}
> > +
> > +#
> > +# The DPDK library will use the first mounted instance it finds for a
> given
> > +# page size. so if there is already one for a given size there is no
> need to
> > +# create another for the same huge page size.
> > +#
> > +mount_hugetlbfs() {
> > +    if [ ! -r "$DPDK_CONF" ]; then
> > +        return 1
> > +    fi
> > +    . "$DPDK_CONF"
> > +
> > +    # if a page size is requested, there has to be a mountpoint for
> that size
> > +    if [ -n "${NR_2M_PAGES}" -a -z "$(get_hugetlbfs_mountpoint '2M')"
> ]; then
> > +        _mount_hugetlbfs 2M
> > +    fi
> > +    if [ -n "${NR_16M_PAGES}" -a -z "$(get_hugetlbfs_mountpoint '16M')"
> ]; then
> > +        _mount_hugetlbfs 16M
> > +    fi
> > +    if [ -n "${NR_1G_PAGES}" -a -z "$(get_hugetlbfs_mountpoint '1G')"
> ]; then
> > +        _mount_hugetlbfs 1G
> > +    fi
> > +}
> > +
> > +_setup_hugepages() {
> > +    MMDIR="/sys/kernel/mm/hugepages/${1}"
> > +    PAGES=${2}
> > +
> > +    if [ "$PAGES" != "" ]; then
> > +        if [ "$PAGES" -gt 0 ]; then
> > +            if [ -d "$MMDIR" -a -w "$MMDIR/nr_hugepages" ]; then
> > +                # increases the chance to allocate enough huge pages
> > +                # configurable, since it comes at a perf penality
> > +                if [ "$DROPCACHE_BEFORE_HP_ALLOC" = "1" ]; then
> > +                    echo 3 > /proc/sys/vm/drop_caches
> > +                fi
> > +
> > +                echo "$PAGES" > "$MMDIR/nr_hugepages"
> > +
> > +                GOTPAGES=$(cat "$MMDIR/nr_hugepages")
> > +                if [ "$GOTPAGES" -lt "$PAGES" ]; then
> > +                    echo "WARNING: could not allocate $PAGES at " \
> > +                         "$MMDIR/nr_hugepages (only got $GOTPAGES)."
> > +                fi
> > +            else
> > +                echo "WARNING: $MMDIR/nr_hugepages not found/writable"
> > +            fi
> > +        fi
> > +    fi
> > +}
> > +
> > +#
> > +# Reserve a certain amount of hugepages (defined in /etc/dpdk.conf)
> > +#
> > +setup_hugepages() {
> > +    if [ ! -r "$DPDK_CONF" ]; then
> > +        return 1
> > +    fi
> > +    . "$DPDK_CONF"
> > +
> > +    _setup_hugepages "hugepages-2048kB" "$NR_2M_PAGES"
> > +    _setup_hugepages "hugepages-16384kB" "$NR_16M_PAGES"
> > +    _setup_hugepages "hugepages-1048576kB" "$NR_1G_PAGES"
> > +
> > +    # dpdk uses 2*#hugepages mappings, increase for huge systems LP
> #1507921
> > +    if [ -d /sys/kernel/mm/hugepages ]; then
> > +        max_map_count=$(awk -v pad=65530 '{tot+=$1}END{print
> tot*2+pad}' \
> > +            /sys/kernel/mm/hugepages/hugepages-*/nr_hugepages)
> > +        sysctl -q vm.max_map_count="${max_map_count:-65530}"
> > +    fi
> > +
> > +    return 0
> > +}
> > +
> > +#
> > +# Allow NICs to be automatically bound to DPDK compatible drivers on
> boot.
> > +#
> > +bind_interfaces() {
> > +    if [ ! -r "$DPDK_INTERF" ]; then
> > +        return 0
> > +    fi
> > +    grep -v '^[ \t]*#' "$DPDK_INTERF" | while read BUS ID MOD; do
> > +        if [ "$BUS" = "" -o "$ID" = "" -o "$MOD" = "" ]; then
> > +            echo "WARNING: incomplete spec in $DPDK_INTERF" \
> > +                " - BUS '$BUS' ID '$ID' MOD '$MOD'"
> > +            continue
> > +        fi
> > +        if [ "$BUS" != "pci" ]; then
> > +            echo "WARNING: incompatible bus '$BUS' in $DPDK_INTERF"
> > +            continue
> > +        fi
> > +
> > +        SYSFSPATH="/sys/bus/$BUS/devices/$ID"
> > +        if [ ! -e "$SYSFSPATH" ]; then
> > +            echo "WARNING: invalid pci ID '$ID' in $DPDK_INTERF" \
> > +                " - '$SYSFSPATH' does not exist"
> > +            continue
> > +        fi
> > +        if [ -L "$SYSFSPATH/driver" ]; then
> > +            CUR=$(readlink "$SYSFSPATH/driver")
> > +            CUR=$(basename "$CUR")
> > +        else
> > +            # device existing, but currently unregistered
> > +            CUR=""
> > +        fi
> > +        if [ "$MOD" != "$CUR" ]; then
> > +            modprobe -q "$MOD" || true
> > +            # cloud img have no linux-image-extra initially
> (uip_pci_generic)
> > +            # so check if the module is available (loadable/built in)
> > +            if [ -e "/sys/bus/pci/drivers/${MOD}" ]; then
> > +                echo "Reassigning pci:$ID to $MOD"
> > +                $DPDK_BIND -b "$MOD" "$ID"
> > +            else
> > +                echo "Warning: failed assigning pci:$ID," \
> > +                     " module $MOD not available"
> > +            fi
> > +        else
> > +            echo "pci:$ID already assigned to $MOD"
> > +        fi
> > +    done
> > +}
> > +
> > +
> > +
> > +case "$1" in
> > +start)
> > +    mount_hugetlbfs
> > +    setup_hugepages
> > +    bind_interfaces
> > +    ;;
> > +stop)
> > +    ;;
> > +reload|force-reload)
> > +    setup_hugepages
> > +    bind_interfaces
> > +    ;;
> > +status)
> > +    $DPDK_BIND --status
> > +    ;;
> > +*)
> > +    echo "Usage: $0 {start|stop|reload|force-reload|status}"
> > +    exit 1
> > +    ;;
> > +esac
> > +
> > diff --git a/tools/init/dpdk.conf b/tools/init/dpdk.conf
> > new file mode 100644
> > index 0000000..a5aea86
> > --- /dev/null
> > +++ b/tools/init/dpdk.conf
> > @@ -0,0 +1,60 @@
> > +#
> > +# The number of 2M hugepages to reserve on system boot
> > +#
> > +# Default is 0
> > +# To e.g. let it reserve 128M via 64x 2M Hugepages set:
> > +# NR_2M_PAGES=64
> > +
> > +#
> > +# The number of 1G hugepages to reserve on system boot
> > +#
> > +# Default is 0
> > +# To e.g. let it reserve 2G via 2x 1G Hugepages set:
> > +# NR_1G_PAGES=2
> > +
> > +# The number of 16M hugepages to reserve, supported e.g. on ppc64el
> > +#
> > +# Default is 0
> > +# To e.g. let it reserve 512M via 32x 16M Hugepages set:
> > +# NR_16M_PAGES=32
> > +
> > +#
> > +# Dropping slab and pagecache can help to successfully allocate
> hugepages,
> > +# especially later in the lifecycle of a system.
> > +# This comes at the cost of loosing all slab and pagecache on (re)start
> > +# of the dpdk service - therefore the default is off.
> > +#
> > +# Default is 0
> > +# Set to 1 to enable it
> > +#DROPCACHE_BEFORE_HP_ALLOC=0
> > +
> > +# The DPDK library will use the first mounted hugetlbfs.
> > +# The init scripts try to ensure there is at least one default hugetlbfs
> > +# mountpoint on start.
> > +# If you have multiple hugetlbfs mountpoints for a complex (e.g.
> specific numa
> > +# policies) setup it should be controlled by the admin instead of this
> init
> > +# script. In that case specific mountpoints can be provided as
> parameters to
> > +# the DPDK library.
> > +
> > +# Hardware may support other granularities of hugepages (like 4M). But
> the
> > +# larger the hugepages the earlier those should be allocated.
> > +# Note: the dpdk init scripts will report warnings, but not fail if
> they could
> > +# not allocate the requested amount of hugepages.
> > +# The more or the larger the hugepages to be allocated are, the more it
> is
> > +# recommended to do the reservation as kernel commandline arguments.
> > +# To do so edit /etc/default/grub: GRUB_CMDLINE_LINUX_DEFAULT
> > +# and add [hugepagesz=xx] hugepages=yy ...
> > +#
> > +# Kernel commandline config:
> > +# hugepagesz sets the size for the next hugepages reservation (default
> 2M)
> > +# hugepages  reserves the given number of hugepages of the size set
> before
> > +#
> > +# After modifying /etc/default/grub, the command "update-grub" has to be
> > +# run in order to re-generate the grub config files. The new values will
> > +# be used after next reboot.
> > +#
> > +# example:
> > +# GRUB_CMDLINE_LINUX_DEFAULT="... hugepages=16 hugepagesz=1G
> hugepages=2"
> > +#
> > +# If the system supports it, this will reserve 16x 2M pages and 2x 1G
> pages.
> > +#
> > diff --git a/tools/init/dpdk.init.in b/tools/init/dpdk.init.in
> > new file mode 100755
> > index 0000000..1e26450
> > --- /dev/null
> > +++ b/tools/init/dpdk.init.in
> > @@ -0,0 +1,57 @@
> > +#!/bin/sh
> > +
> > +### BEGIN INIT INFO
> > +# Provides:          dpdk
> > +# Required-Start:    $remote_fs $local_fs
> > +# Required-Stop:     $remote_fs $local_fs
> > +# Default-Start:     S
> > +# Default-Stop:      0 1 6
> > +# Short-Description: start dpdk runtime environment
> > +### END INIT INFO
> > +
> > +set -e
> > +
> > +PATH="/sbin:/bin:/usr/bin"
> > +
> > +[ -d @@configdir@@ ] || exit 0
> > +
> > +# Define LSB log_* functions.
> > +# Depend on lsb-base (>= 3.2-14) to ensure that this file is present
> > +# and status_of_proc is working.
> > +. /lib/lsb/init-functions
> > +
> > +error=0
> > +case "$1" in
> > +start)
> > +    log_action_begin_msg "Starting DPDK environment" "dpdk"
> > +    output=$(@@sbindir@@/dpdk-init start 2>&1) || error="$?"
> > +    if [ ! -z "$output" ]; then
> > +        echo "$output" | while read line; do
> > +            log_action_cont_msg "$line"
> > +        done
> > +    fi
> > +    log_action_end_msg $error
> > +    exit $error
> > +    ;;
> > +stop)
> > +    ;;
> > +restart|force-reload)
> > +    ;;
> > +status)
> > +    output=$(@@sbindir@@/dpdk-init --status 2>&1) || error="$?"
> > +    if [ ! -z "$output" ]; then
> > +        echo "$output" | while read line; do
> > +            log_action_cont_msg "$line"
> > +        done
> > +    fi
> > +    log_action_end_msg $error
> > +    exit $error
> > +    ;;
> > +*)
> > +    echo "Usage: $0 {start|stop|restart|force-reload|status}"
> > +    exit 1
> > +    ;;
> > +esac
> > +
> > +exit 0
> > +
> > diff --git a/tools/init/dpdk.service.in b/tools/init/dpdk.service.in
> > new file mode 100644
> > index 0000000..1968081
> > --- /dev/null
> > +++ b/tools/init/dpdk.service.in
> > @@ -0,0 +1,12 @@
> > +[Unit]
> > +Description=DPDK runtime environment
> > +DefaultDependencies=false
> > +After=network-pre.target local-fs.target
> > +
> > +[Service]
> > +Type=oneshot
> > +RemainAfterExit=yes
> > +ExecStart=@@sbindir@@/dpdk-init start
> > +
> > +[Install]
> > +WantedBy=multi-user.target
> > diff --git a/tools/init/interfaces b/tools/init/interfaces
> > new file mode 100644
> > index 0000000..73c3fca
> > --- /dev/null
> > +++ b/tools/init/interfaces
> > @@ -0,0 +1,16 @@
> > +#
> > +# <bus>              Currently only "pci" is supported
> > +# <id>               Device ID on the specified bus
> > +# <driver>   Driver to bind against (vfio-pci, uio_pci_generic, igb_uio
> or
> > +#               rte_kni)
> > +#
> > +# Be aware that the two dpdk compatible drivers uio_pci_generic and
> vfio-pci are
> > +# part of linux-image-extra-<VERSION> package on Debian-based
> distributions.
> > +# This package is not always installed by default - for example in
> cloud-images.
> > +# So please install it in case you run into missing module issues.
> > +#
> > +# <bus>      <id>            <driver>
> > +# pci        0000:04:00.0    vfio-pci
> > +# pci        0000:04:00.1    uio_pci_generic
> > +# pci        0000:05:00.0    igb_uio
> > +# pci        0000:06:00.0    rte_kni
> >
>
>


-- 
Christian Ehrhardt
Software Engineer, Ubuntu Server
Canonical Ltd


More information about the dev mailing list