eal: fix ctrl thread affinity with --lcores

Message ID 1564479354-11192-1-git-send-email-david.marchand@redhat.com (mailing list archive)
State Changes Requested, archived
Delegated to: Thomas Monjalon
Headers
Series eal: fix ctrl thread affinity with --lcores |

Checks

Context Check Description
ci/iol-Compile-Testing success Compile Testing PASS
ci/Intel-compilation fail Compilation issues
ci/mellanox-Performance-Testing success Performance Testing PASS
ci/checkpatch success coding style OK

Commit Message

David Marchand July 30, 2019, 9:35 a.m. UTC
  When using -l/-c options, each lcore is mapped to a physical cpu in a
1:1 fashion.
On the contrary, when using --lcores, each lcore has its own cpuset on
which the associated EAL thread runs.

To handle those two situations, rely on the per lcore cpuset.

Introduced macros to manipulate cpusets in both Linux and FreeBSD.

Examples in a 4 cores FreeBSD vm:

$ ./build/app/testpmd --master-lcore 1 --lcores '0@(1,3),1@2' \
 --no-huge --no-pci -m 512 -- -i --total-num-mbufs=2048

  PID    TID COMM                TDNAME              CPU CSID CPU MASK
31733 100155 testpmd             -                     2    1 2
31733 100286 testpmd             eal-intr-thread       0    1 0
31733 100287 testpmd             rte_mp_handle         0    1 0
31733 100288 testpmd             lcore-slave-0         3    1 1,3

$ cpuset -l 1,2,3 \
 ./build/app/testpmd --master-lcore 1 --lcores '0@(1,3),1@2' \
 --no-huge --no-pci -m 512 -- -i --total-num-mbufs=2048

  PID    TID COMM                TDNAME              CPU CSID CPU MASK
31757 100139 testpmd             -                     2    2 2
31757 100292 testpmd             eal-intr-thread       2    2 2
31757 100293 testpmd             rte_mp_handle         2    2 2
31757 100294 testpmd             lcore-slave-0         3    2 1,3

$ cpuset -l 1,2,3 \
 ./build/app/testpmd --master-lcore 1 --lcores '0@1,1@2' \
 --no-huge --no-pci -m 512 -- -i --total-num-mbufs=2048

  PID    TID COMM                TDNAME              CPU CSID CPU MASK
31776 100166 testpmd             -                     2    2 2
31776 100295 testpmd             eal-intr-thread       3    2 3
31776 100296 testpmd             rte_mp_handle         3    2 3
31776 100297 testpmd             lcore-slave-0         1    2 1

Bugzilla ID: 322
Fixes: c3568ea37670 ("eal: restrict control threads to startup CPU affinity")
Cc: stable@dpdk.org

Signed-off-by: David Marchand <david.marchand@redhat.com>
---
 lib/librte_eal/common/eal_common_options.c | 16 +++++++++-------
 lib/librte_eal/common/include/rte_lcore.h  | 28 ++++++++++++++++++++++++++++
 2 files changed, 37 insertions(+), 7 deletions(-)
  

Comments

Jerin Jacob Kollanukkaran July 30, 2019, 9:45 a.m. UTC | #1
> -----Original Message-----
> From: dev <dev-bounces@dpdk.org> On Behalf Of David Marchand
> Sent: Tuesday, July 30, 2019 3:06 PM
> To: dev@dpdk.org
> Cc: johan.kallstrom@ericsson.com; anatoly.burakov@intel.com;
> olivier.matz@6wind.com; stable@dpdk.org
> Subject: [dpdk-dev] [PATCH] eal: fix ctrl thread affinity with --lcores
> 
> When using -l/-c options, each lcore is mapped to a physical cpu in a
> 1:1 fashion.
> On the contrary, when using --lcores, each lcore has its own cpuset on which the
> associated EAL thread runs.
> 
> To handle those two situations, rely on the per lcore cpuset.
> 
> Introduced macros to manipulate cpusets in both Linux and FreeBSD.
> 
> Examples in a 4 cores FreeBSD vm:
> 
> $ ./build/app/testpmd --master-lcore 1 --lcores '0@(1,3),1@2' \  --no-huge --no-
> pci -m 512 -- -i --total-num-mbufs=2048
> 
>   PID    TID COMM                TDNAME              CPU CSID CPU MASK
> 31733 100155 testpmd             -                     2    1 2
> 31733 100286 testpmd             eal-intr-thread       0    1 0
> 31733 100287 testpmd             rte_mp_handle         0    1 0
> 31733 100288 testpmd             lcore-slave-0         3    1 1,3
> 
> $ cpuset -l 1,2,3 \
>  ./build/app/testpmd --master-lcore 1 --lcores '0@(1,3),1@2' \  --no-huge --no-
> pci -m 512 -- -i --total-num-mbufs=2048
> 
>   PID    TID COMM                TDNAME              CPU CSID CPU MASK
> 31757 100139 testpmd             -                     2    2 2
> 31757 100292 testpmd             eal-intr-thread       2    2 2
> 31757 100293 testpmd             rte_mp_handle         2    2 2
> 31757 100294 testpmd             lcore-slave-0         3    2 1,3
> 
> $ cpuset -l 1,2,3 \
>  ./build/app/testpmd --master-lcore 1 --lcores '0@1,1@2' \  --no-huge --no-pci -
> m 512 -- -i --total-num-mbufs=2048
> 
>   PID    TID COMM                TDNAME              CPU CSID CPU MASK
> 31776 100166 testpmd             -                     2    2 2
> 31776 100295 testpmd             eal-intr-thread       3    2 3
> 31776 100296 testpmd             rte_mp_handle         3    2 3
> 31776 100297 testpmd             lcore-slave-0         1    2 1
> 
> Bugzilla ID: 322
> Fixes: c3568ea37670 ("eal: restrict control threads to startup CPU affinity")
> Cc: stable@dpdk.org
> 
> Signed-off-by: David Marchand <david.marchand@redhat.com>
> @@ -25,6 +25,19 @@ extern "C" {
>  #if defined(__linux__)

[snip]

>  #elif defined(__FreeBSD__)
>  #include <pthread_np.h>
>  typedef cpuset_t rte_cpuset_t;
> @@ -35,6 +48,21 @@ typedef cpuset_t rte_cpuset_t;
>  	CPU_AND(&tmp, src2); \
>  	CPU_COPY(&tmp, dst); \
>  } while (0)
> +#define RTE_CPU_OR(dst, src1, src2) do \ { \
> +	cpuset_t tmp; \
> +	CPU_COPY(src1, &tmp); \
> +	CPU_OR(&tmp, src2); \
> +	CPU_COPY(&tmp, dst); \
> +} while (0)
> +#define RTE_CPU_FILL(set) CPU_FILL(set) #define RTE_CPU_NOT(dst, src)
> +do \ { \
> +	cpuset_t tmp; \
> +	CPU_FILL(&tmp); \
> +	CPU_NAND(&tmp, src); \
> +	CPU_COPY(&tmp, dst); \
> +} while (0)

Considering windows eal or a new eal in mind, IMO, it is better
to move to lib/librte_eal/freebsd/eal/include/rte_os.h
and it will avoid #ifdef clutter in common code too.
  
David Marchand July 30, 2019, 9:46 a.m. UTC | #2
On Tue, Jul 30, 2019 at 11:45 AM Jerin Jacob Kollanukkaran
<jerinj@marvell.com> wrote:
>
> > -----Original Message-----
> > From: dev <dev-bounces@dpdk.org> On Behalf Of David Marchand
> > Sent: Tuesday, July 30, 2019 3:06 PM
> > To: dev@dpdk.org
> > Cc: johan.kallstrom@ericsson.com; anatoly.burakov@intel.com;
> > olivier.matz@6wind.com; stable@dpdk.org
> > Subject: [dpdk-dev] [PATCH] eal: fix ctrl thread affinity with --lcores
> >
> > When using -l/-c options, each lcore is mapped to a physical cpu in a
> > 1:1 fashion.
> > On the contrary, when using --lcores, each lcore has its own cpuset on which the
> > associated EAL thread runs.
> >
> > To handle those two situations, rely on the per lcore cpuset.
> >
> > Introduced macros to manipulate cpusets in both Linux and FreeBSD.
> >
> > Examples in a 4 cores FreeBSD vm:
> >
> > $ ./build/app/testpmd --master-lcore 1 --lcores '0@(1,3),1@2' \  --no-huge --no-
> > pci -m 512 -- -i --total-num-mbufs=2048
> >
> >   PID    TID COMM                TDNAME              CPU CSID CPU MASK
> > 31733 100155 testpmd             -                     2    1 2
> > 31733 100286 testpmd             eal-intr-thread       0    1 0
> > 31733 100287 testpmd             rte_mp_handle         0    1 0
> > 31733 100288 testpmd             lcore-slave-0         3    1 1,3
> >
> > $ cpuset -l 1,2,3 \
> >  ./build/app/testpmd --master-lcore 1 --lcores '0@(1,3),1@2' \  --no-huge --no-
> > pci -m 512 -- -i --total-num-mbufs=2048
> >
> >   PID    TID COMM                TDNAME              CPU CSID CPU MASK
> > 31757 100139 testpmd             -                     2    2 2
> > 31757 100292 testpmd             eal-intr-thread       2    2 2
> > 31757 100293 testpmd             rte_mp_handle         2    2 2
> > 31757 100294 testpmd             lcore-slave-0         3    2 1,3
> >
> > $ cpuset -l 1,2,3 \
> >  ./build/app/testpmd --master-lcore 1 --lcores '0@1,1@2' \  --no-huge --no-pci -
> > m 512 -- -i --total-num-mbufs=2048
> >
> >   PID    TID COMM                TDNAME              CPU CSID CPU MASK
> > 31776 100166 testpmd             -                     2    2 2
> > 31776 100295 testpmd             eal-intr-thread       3    2 3
> > 31776 100296 testpmd             rte_mp_handle         3    2 3
> > 31776 100297 testpmd             lcore-slave-0         1    2 1
> >
> > Bugzilla ID: 322
> > Fixes: c3568ea37670 ("eal: restrict control threads to startup CPU affinity")
> > Cc: stable@dpdk.org
> >
> > Signed-off-by: David Marchand <david.marchand@redhat.com>
> > @@ -25,6 +25,19 @@ extern "C" {
> >  #if defined(__linux__)
>
> [snip]
>
> >  #elif defined(__FreeBSD__)
> >  #include <pthread_np.h>
> >  typedef cpuset_t rte_cpuset_t;
> > @@ -35,6 +48,21 @@ typedef cpuset_t rte_cpuset_t;
> >       CPU_AND(&tmp, src2); \
> >       CPU_COPY(&tmp, dst); \
> >  } while (0)
> > +#define RTE_CPU_OR(dst, src1, src2) do \ { \
> > +     cpuset_t tmp; \
> > +     CPU_COPY(src1, &tmp); \
> > +     CPU_OR(&tmp, src2); \
> > +     CPU_COPY(&tmp, dst); \
> > +} while (0)
> > +#define RTE_CPU_FILL(set) CPU_FILL(set) #define RTE_CPU_NOT(dst, src)
> > +do \ { \
> > +     cpuset_t tmp; \
> > +     CPU_FILL(&tmp); \
> > +     CPU_NAND(&tmp, src); \
> > +     CPU_COPY(&tmp, dst); \
> > +} while (0)
>
> Considering windows eal or a new eal in mind, IMO, it is better
> to move to lib/librte_eal/freebsd/eal/include/rte_os.h
> and it will avoid #ifdef clutter in common code too.
>
>
>


This patch will get backported in 18.11.
I would prefer we do this cleanup later when the windows port needs it.
  
Johan Källström July 30, 2019, 11:38 a.m. UTC | #3
See inline comments about not using cpuset for "thread affinity" and possible online cpu failsafe to detect if the thread affinity mask is not a subset of online cpus.
This feature was present before your suggested change.

The CPU failsafe is nice to have as you could set the thread affinity to offline cpus.

Maybe also add the example I gave you to trigger the bug? https://bugs.dpdk.org/show_bug.cgi?id=322#c12
This also shows how to set the default_affinity mask and proves that the calculation will result in threads inside the cpuset on Linux.

/Johan

On tis, 2019-07-30 at 11:35 +0200, David Marchand wrote:
> When using -l/-c options, each lcore is mapped to a physical cpu in a
> 1:1 fashion.
> On the contrary, when using --lcores, each lcore has its own cpuset 

Use "thread affinity" instead of cpuset when we talk about setting the thread affinity.

I know that the term cpuset is used in the data structure, but it is not a cpuset as described by 'man cpuset' (on Linux). This comment can be seen as cosmetic, but I think that it could be good to have a clear definitions to minimize confusion.

> on
> which the associated EAL thread runs.
> 
> To handle those two situations, rely on the per lcore cpuset.
> 
> Introduced macros to manipulate cpusets in both Linux and FreeBSD.
> 
> Examples in a 4 cores FreeBSD vm:
> 
> $ ./build/app/testpmd --master-lcore 1 --lcores '0@(1,3),mailto:1@2' \
>  --no-huge --no-pci -m 512 -- -i --total-num-mbufs=2048
> 
>   PID    TID COMM                TDNAME              CPU CSID CPU
> MASK
> 31733 100155 testpmd             -                     2    1 2
> 31733 100286 testpmd             eal-intr-thread       0    1 0
> 31733 100287 testpmd             rte_mp_handle         0    1 0
> 31733 100288 testpmd             lcore-slave-0         3    1 1,3
> 
> $ cpuset -l 1,2,3 \
>  ./build/app/testpmd --master-lcore 1 --lcores '0@(1,3),mailto:1@2' \
>  --no-huge --no-pci -m 512 -- -i --total-num-mbufs=2048
> 
>   PID    TID COMM                TDNAME              CPU CSID CPU
> MASK
> 31757 100139 testpmd             -                     2    2 2
> 31757 100292 testpmd             eal-intr-thread       2    2 2
> 31757 100293 testpmd             rte_mp_handle         2    2 2
> 31757 100294 testpmd             lcore-slave-0         3    2 1,3
> 
> $ cpuset -l 1,2,3 \
>  ./build/app/testpmd --master-lcore 1 --lcores mailto:'0@1,mailto:1@2' \
>  --no-huge --no-pci -m 512 -- -i --total-num-mbufs=2048
> 
>   PID    TID COMM                TDNAME              CPU CSID CPU
> MASK
> 31776 100166 testpmd             -                     2    2 2
> 31776 100295 testpmd             eal-intr-thread       3    2 3
> 31776 100296 testpmd             rte_mp_handle         3    2 3
> 31776 100297 testpmd             lcore-slave-0         1    2 1
> 
> Bugzilla ID: 322
> Fixes: c3568ea37670 ("eal: restrict control threads to startup CPU
> affinity")
> Cc: mailto:stable@dpdk.org
> 
> Signed-off-by: David Marchand <mailto:david.marchand@redhat.com>
> ---
>  lib/librte_eal/common/eal_common_options.c | 16 +++++++++-------
>  lib/librte_eal/common/include/rte_lcore.h  | 28
> ++++++++++++++++++++++++++++
>  2 files changed, 37 insertions(+), 7 deletions(-)
> 
> diff --git a/lib/librte_eal/common/eal_common_options.c
> b/lib/librte_eal/common/eal_common_options.c
> index 24e36cf..d828271 100644
> --- a/lib/librte_eal/common/eal_common_options.c
> +++ b/lib/librte_eal/common/eal_common_options.c
> @@ -1455,11 +1455,11 @@ compute_ctrl_threads_cpuset(struct
> internal_config *internal_cfg)
>  	unsigned int lcore_id;
>  CSB B                                                              
>                                                                      
>                              

+       /* Get online cpus */
+       CPU_ZERO(&cset_online);
+       for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+               if (eal_cpu_detected(lcore_id))
+                       CPU_SET(lcore_id, &cset_online);
+       }

>                                                                    
>                                                      
>  	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
> -		if (eal_cpu_detected(lcore_id) &&
> -				rte_lcore_has_role(lcore_id,
> ROLE_OFF)) {
> -			CPU_SET(lcore_id, cpuset);
> -		}
> +		if (rte_lcore_has_role(lcore_id, ROLE_OFF))
> +			continue;
> +		RTE_CPU_OR(cpuset, cpuset,
> &lcore_config[lcore_id].cpuset);
>  	}
> +	RTE_CPU_NOT(cpuset, cpuset);
>  
>  	if (pthread_getaffinity_np(pthread_self(),
> sizeof(rte_cpuset_t),
>  				&default_set))
> @@ -1467,9 +1467,11 @@ compute_ctrl_threads_cpuset(struct
> internal_config *internal_cfg)
>  
>  	RTE_CPU_AND(cpuset, cpuset, &default_set);
+ 	RTE_CPU_AND(cpuset, cpuset, &cset_online);
>  
> -	/* if no detected CPU is off, use master core */
> -	if (!CPU_COUNT(cpuset))
> -		CPU_SET(rte_get_master_lcore(), cpuset);
> +	/* if no detected CPU is off, use master lcore cpuset */
> +	if (!CPU_COUNT(cpuset)) {
> +		memcpy(cpuset,
> &lcore_config[rte_get_master_lcore()].cpuset,
> +			sizeof(*cpuset));
> +	}
>  }
>  
>  int
> diff --git a/lib/librte_eal/common/include/rte_lcore.h
> b/lib/librte_eal/common/include/rte_lcore.h
> index 411df30..9520d79 100644
> --- a/lib/librte_eal/common/include/rte_lcore.h
> +++ b/lib/librte_eal/common/include/rte_lcore.h
> @@ -25,6 +25,19 @@ extern "C" {
>  #if defined(__linux__)
>  typedef	cpu_set_t rte_cpuset_t;
>  #define RTE_CPU_AND(dst, src1, src2) CPU_AND(dst, src1, src2)
> +#define RTE_CPU_OR(dst, src1, src2) CPU_OR(dst, src1, src2)
> +#define RTE_CPU_FILL(set) do \
> +{ \
> +	unsigned int i; \
> +	for (i = 0; i < CPU_SETSIZE; i++) \
> +		CPU_SET(i, set); \
> +} while (0)
> +#define RTE_CPU_NOT(dst, src) do \
> +{ \
> +	cpu_set_t tmp; \
> +	RTE_CPU_FILL(&tmp); \
> +	CPU_XOR(dst, &tmp, src); \
> +} while (0)
>  #elif defined(__FreeBSD__)
>  #include <pthread_np.h>
>  typedef cpuset_t rte_cpuset_t;
> @@ -35,6 +48,21 @@ typedef cpuset_t rte_cpuset_t;
>  	CPU_AND(&tmp, src2); \
>  	CPU_COPY(&tmp, dst); \
>  } while (0)
> +#define RTE_CPU_OR(dst, src1, src2) do \
> +{ \
> +	cpuset_t tmp; \
> +	CPU_COPY(src1, &tmp); \
> +	CPU_OR(&tmp, src2); \
> +	CPU_COPY(&tmp, dst); \
> +} while (0)
> +#define RTE_CPU_FILL(set) CPU_FILL(set)
> +#define RTE_CPU_NOT(dst, src) do \
> +{ \
> +	cpuset_t tmp; \
> +	CPU_FILL(&tmp); \
> +	CPU_NAND(&tmp, src); \
> +	CPU_COPY(&tmp, dst); \
> +} while (0)
>  #endif
>  
>  /**
  
David Marchand July 30, 2019, 1:47 p.m. UTC | #4
On Tue, Jul 30, 2019 at 1:38 PM Johan Källström
<johan.kallstrom@ericsson.com> wrote:
> The CPU failsafe is nice to have as you could set the thread affinity to offline cpus.

Created a "dpdk" cpuset and put cpus 4-7 into it (my system is mono
numa with 8 cpus)
# cd /sys/fs/cgroup/cpuset/
# mkdir dpdk
# cd dpdk
# echo 4-7 > cpuset.cpus
# echo 0 > cpuset.mems

Disabled cpu 5.
# echo 0 > /sys/bus/cpu/devices/cpu5/online

Put my shell that starts testpmd in this dpdk cpuset
# echo 4439 > tasks


EAL refuses an offline core when parsing the thread affinities and
this did not change.

$ ./master/app/testpmd --master-lcore 0 --lcores '(0,7)@(7,4,5)'
--log-level *:debug --no-huge  --no-pci -m 512 -- -i
--total-num-mbufs=2048
EAL: Detected lcore 0 as core 0 on socket 0
EAL: Detected lcore 1 as core 1 on socket 0
EAL: Detected lcore 2 as core 2 on socket 0
EAL: Detected lcore 3 as core 3 on socket 0
EAL: Detected lcore 4 as core 0 on socket 0
EAL: Detected lcore 6 as core 2 on socket 0
EAL: Detected lcore 7 as core 3 on socket 0
EAL: Support maximum 128 logical core(s) by configuration.
EAL: Detected 7 lcore(s)
EAL: Detected 1 NUMA nodes
EAL: core 5 unavailable
EAL: invalid parameter for --lcores

What did I miss?


>
> Maybe also add the example I gave you to trigger the bug? https://bugs.dpdk.org/show_bug.cgi?id=322#c12

I managed to reproduce your error with the setup above (without
relying on the cset tool that is not available on rhel afaics), I can
add it to the commitlog yes.


> This also shows how to set the default_affinity mask and proves that the calculation will result in threads inside the cpuset on Linux.
>
> /Johan
>
> On tis, 2019-07-30 at 11:35 +0200, David Marchand wrote:
> > When using -l/-c options, each lcore is mapped to a physical cpu in a
> > 1:1 fashion.
> > On the contrary, when using --lcores, each lcore has its own cpuset
>
> Use "thread affinity" instead of cpuset when we talk about setting the thread affinity.
>
> I know that the term cpuset is used in the data structure, but it is not a cpuset as described by 'man cpuset' (on Linux). This comment can be seen as cosmetic, but I think that it could be good to have a clear definitions to minimize confusion.

Indeed, using cpuset is inappropriate.
I will update the commitlog and the comment.
  
Johan Källström July 30, 2019, 4:32 p.m. UTC | #5
Hi, for the online check I referred to the check of "default_set" via the initial thread affinity.

I see that pthread_getaffinity_np returns an already and:ed mask, was under the impression that pthread_getaffinity_np would return the same mask as was set using pthread_setaffinity_np. 
Looking on the implementation I see that it has been implemented on this line (https://github.com/torvalds/linux/blob/master/kernel/sched/core.c#L5242) for the last decade. Don’t know how this is implemented on FreeBSD or Windows.

Below is some example runs without the online cpu check running inside the exclusive cpuset 1-3,19,79 with cpu 79 offline.
Added a print statements after each consecutive calculation just to verify what the different steps.

Nice that you were able to reproduce the bug, the fix looks good otherwise :) .

= Example runs
echo 0 > /sys/bus/cpu/devices/cpu79/online
== 1. Ctrl threads via fallback
app# LD_LIBRARY_PATH=$PWD/../lib:$LD_LIBRARY_PATH taskset -c 19,79 ./testpmd --master-lcore 0 --lcores "(0,19)@(19,1,2,3)"
EAL: Detected 79 lcore(s)
EAL: Detected 2 NUMA nodes
EAL: default_set: 19
EAL: cset_online: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78
EAL: cset_non_busy: 0,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127
EAL: cpuset: 
EAL: cpuset fallback: 1,2,3,19
...
^Z
app#  grep -HE '^(Cpus_allowed_list|Name):' /proc/48803/task/*/status 
/proc/48803/task/48803/status:Name:     testpmd
/proc/48803/task/48803/status:Cpus_allowed_list:        1-3,19
/proc/48803/task/48804/status:Name:     eal-intr-thread
/proc/48803/task/48804/status:Cpus_allowed_list:        1-3,19
/proc/48803/task/48805/status:Name:     rte_mp_handle
/proc/48803/task/48805/status:Cpus_allowed_list:        1-3,19
/proc/48803/task/48806/status:Name:     lcore-slave-19
/proc/48803/task/48806/status:Cpus_allowed_list:        1-3,19

== 2. Ctrl threads via default_set
app# LD_LIBRARY_PATH=$PWD/../lib:$LD_LIBRARY_PATH taskset -c 3,79 ./testpmd --master-lcore 0 --lcores "(0,19)@(19,1,2)"
EAL: Detected 79 lcore(s)
EAL: Detected 2 NUMA nodes
EAL: default_set: 3
EAL: cset_online: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78
EAL: cset_non_busy: 0,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127
EAL: cpuset: 3
EAL: cpuset fallback: 3
...
^Z
app# grep -HE '^(Cpus_allowed_list|Name):' /proc/54032/task/*/status 
/proc/54032/task/54032/status:Name:     testpmd
/proc/54032/task/54032/status:Cpus_allowed_list:        1-2,19
/proc/54032/task/54033/status:Name:     eal-intr-thread
/proc/54032/task/54033/status:Cpus_allowed_list:        3
/proc/54032/task/54034/status:Name:     rte_mp_handle
/proc/54032/task/54034/status:Cpus_allowed_list:        3
/proc/54032/task/54035/status:Name:     lcore-slave-19
/proc/54032/task/54035/status:Cpus_allowed_list:        1-2,19

BR
Johan

-----Original Message-----
From: David Marchand [mailto:david.marchand@redhat.com] 
Sent: July 30, 2019 15:48
To: Johan Källström <johan.kallstrom@ericsson.com>
Cc: dev@dpdk.org; anatoly.burakov@intel.com; olivier.matz@6wind.com; stable@dpdk.org
Subject: Re: [PATCH] eal: fix ctrl thread affinity with --lcores

On Tue, Jul 30, 2019 at 1:38 PM Johan Källström <johan.kallstrom@ericsson.com> wrote:
> The CPU failsafe is nice to have as you could set the thread affinity to offline cpus.

Created a "dpdk" cpuset and put cpus 4-7 into it (my system is mono numa with 8 cpus) # cd /sys/fs/cgroup/cpuset/ # mkdir dpdk # cd dpdk # echo 4-7 > cpuset.cpus # echo 0 > cpuset.mems

Disabled cpu 5.
# echo 0 > /sys/bus/cpu/devices/cpu5/online

Put my shell that starts testpmd in this dpdk cpuset # echo 4439 > tasks


EAL refuses an offline core when parsing the thread affinities and this did not change.

$ ./master/app/testpmd --master-lcore 0 --lcores '(0,7)@(7,4,5)'
--log-level *:debug --no-huge  --no-pci -m 512 -- -i
--total-num-mbufs=2048
EAL: Detected lcore 0 as core 0 on socket 0
EAL: Detected lcore 1 as core 1 on socket 0
EAL: Detected lcore 2 as core 2 on socket 0
EAL: Detected lcore 3 as core 3 on socket 0
EAL: Detected lcore 4 as core 0 on socket 0
EAL: Detected lcore 6 as core 2 on socket 0
EAL: Detected lcore 7 as core 3 on socket 0
EAL: Support maximum 128 logical core(s) by configuration.
EAL: Detected 7 lcore(s)
EAL: Detected 1 NUMA nodes
EAL: core 5 unavailable
EAL: invalid parameter for --lcores

What did I miss?


>
> Maybe also add the example I gave you to trigger the bug? 
> https://protect2.fireeye.com/url?k=51a8b8b7-0d2163b8-51a8f82c-0cc47ad9
> 3e1a-2e7d7fab24e99be5&q=1&u=https%3A%2F%2Fbugs.dpdk.org%2Fshow_bug.cgi
> %3Fid%3D322%23c12

I managed to reproduce your error with the setup above (without relying on the cset tool that is not available on rhel afaics), I can add it to the commitlog yes.


> This also shows how to set the default_affinity mask and proves that the calculation will result in threads inside the cpuset on Linux.
>
> /Johan
>
> On tis, 2019-07-30 at 11:35 +0200, David Marchand wrote:
> > When using -l/-c options, each lcore is mapped to a physical cpu in 
> > a
> > 1:1 fashion.
> > On the contrary, when using --lcores, each lcore has its own cpuset
>
> Use "thread affinity" instead of cpuset when we talk about setting the thread affinity.
>
> I know that the term cpuset is used in the data structure, but it is not a cpuset as described by 'man cpuset' (on Linux). This comment can be seen as cosmetic, but I think that it could be good to have a clear definitions to minimize confusion.

Indeed, using cpuset is inappropriate.
I will update the commitlog and the comment.



--
David Marchand
  
David Marchand July 30, 2019, 7:21 p.m. UTC | #6
On Tue, Jul 30, 2019 at 6:32 PM Johan Källström
<johan.kallstrom@ericsson.com> wrote:
>
> Hi, for the online check I referred to the check of "default_set" via the initial thread affinity.
>
> I see that pthread_getaffinity_np returns an already and:ed mask, was under the impression that pthread_getaffinity_np would return the same mask as was set using pthread_setaffinity_np.
> Looking on the implementation I see that it has been implemented on this line (https://github.com/torvalds/linux/blob/master/kernel/sched/core.c#L5242) for the last decade. Don’t know how this is implemented on FreeBSD or Windows.

Afaics on FreeBSD, trying to set an unknown core is rejected with the
cpuset tool.
Not sure at which level the refusal is (cpuset, libc, kernel).


> Below is some example runs without the online cpu check running inside the exclusive cpuset 1-3,19,79 with cpu 79 offline.
> Added a print statements after each consecutive calculation just to verify what the different steps.
>
> Nice that you were able to reproduce the bug, the fix looks good otherwise :) .

Ok, I understand your concern now, but afaiu you confirm that there is
no issue with this patch.

We are really close to -rc3.
Can you send a review or test tag?


Thanks.

--
David Marchand
  
Johan Källström July 31, 2019, 8:12 a.m. UTC | #7
On tis, 2019-07-30 at 21:21 +0200, David Marchand wrote:
> 
> We are really close to -rc3.
> Can you send a review or test tag?
> 

Tested-by: Johan Källström <johan.kallstrom@ericsson.com>
Reviewed-by: Johan Källström <johan.kallstrom@ericsson.com>

I might be to late for that. Tested the patch on the v19.08-rc3 tag.


BR
Johan
  

Patch

diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c
index 24e36cf..d828271 100644
--- a/lib/librte_eal/common/eal_common_options.c
+++ b/lib/librte_eal/common/eal_common_options.c
@@ -1455,11 +1455,11 @@  compute_ctrl_threads_cpuset(struct internal_config *internal_cfg)
 	unsigned int lcore_id;
 
 	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
-		if (eal_cpu_detected(lcore_id) &&
-				rte_lcore_has_role(lcore_id, ROLE_OFF)) {
-			CPU_SET(lcore_id, cpuset);
-		}
+		if (rte_lcore_has_role(lcore_id, ROLE_OFF))
+			continue;
+		RTE_CPU_OR(cpuset, cpuset, &lcore_config[lcore_id].cpuset);
 	}
+	RTE_CPU_NOT(cpuset, cpuset);
 
 	if (pthread_getaffinity_np(pthread_self(), sizeof(rte_cpuset_t),
 				&default_set))
@@ -1467,9 +1467,11 @@  compute_ctrl_threads_cpuset(struct internal_config *internal_cfg)
 
 	RTE_CPU_AND(cpuset, cpuset, &default_set);
 
-	/* if no detected CPU is off, use master core */
-	if (!CPU_COUNT(cpuset))
-		CPU_SET(rte_get_master_lcore(), cpuset);
+	/* if no detected CPU is off, use master lcore cpuset */
+	if (!CPU_COUNT(cpuset)) {
+		memcpy(cpuset, &lcore_config[rte_get_master_lcore()].cpuset,
+			sizeof(*cpuset));
+	}
 }
 
 int
diff --git a/lib/librte_eal/common/include/rte_lcore.h b/lib/librte_eal/common/include/rte_lcore.h
index 411df30..9520d79 100644
--- a/lib/librte_eal/common/include/rte_lcore.h
+++ b/lib/librte_eal/common/include/rte_lcore.h
@@ -25,6 +25,19 @@  extern "C" {
 #if defined(__linux__)
 typedef	cpu_set_t rte_cpuset_t;
 #define RTE_CPU_AND(dst, src1, src2) CPU_AND(dst, src1, src2)
+#define RTE_CPU_OR(dst, src1, src2) CPU_OR(dst, src1, src2)
+#define RTE_CPU_FILL(set) do \
+{ \
+	unsigned int i; \
+	for (i = 0; i < CPU_SETSIZE; i++) \
+		CPU_SET(i, set); \
+} while (0)
+#define RTE_CPU_NOT(dst, src) do \
+{ \
+	cpu_set_t tmp; \
+	RTE_CPU_FILL(&tmp); \
+	CPU_XOR(dst, &tmp, src); \
+} while (0)
 #elif defined(__FreeBSD__)
 #include <pthread_np.h>
 typedef cpuset_t rte_cpuset_t;
@@ -35,6 +48,21 @@  typedef cpuset_t rte_cpuset_t;
 	CPU_AND(&tmp, src2); \
 	CPU_COPY(&tmp, dst); \
 } while (0)
+#define RTE_CPU_OR(dst, src1, src2) do \
+{ \
+	cpuset_t tmp; \
+	CPU_COPY(src1, &tmp); \
+	CPU_OR(&tmp, src2); \
+	CPU_COPY(&tmp, dst); \
+} while (0)
+#define RTE_CPU_FILL(set) CPU_FILL(set)
+#define RTE_CPU_NOT(dst, src) do \
+{ \
+	cpuset_t tmp; \
+	CPU_FILL(&tmp); \
+	CPU_NAND(&tmp, src); \
+	CPU_COPY(&tmp, dst); \
+} while (0)
 #endif
 
 /**