[dpdk-dev] [PATCH v4 10/11] eal: replace rte_panic instances in init sequence
Aaron Conole
aconole at redhat.com
Thu Apr 19 19:48:22 CEST 2018
Arnon Warshavsky <arnon at qwilt.com> writes:
> Local functions to this file,
> changing from void to int are non-abi-breaking.
> For handling the single function that cannot
> change from void to int due to abi,
> where this is the only place it is called in,
> I added a state variable that is being checked
> right after the call to this function.
>
> --
>
> v4 - fix split literal strings in log messages
>
> Signed-off-by: Arnon Warshavsky <arnon at qwilt.com>
> ---
Hi Arnon,
Always happy to see panic calls get removed. I have some comments inline.
> lib/librte_eal/bsdapp/eal/eal.c | 86 ++++++++++++++-------
> lib/librte_eal/bsdapp/eal/eal_thread.c | 65 +++++++++++-----
> lib/librte_eal/common/eal_common_launch.c | 21 ++++++
> lib/librte_eal/common/include/rte_debug.h | 12 +++
> lib/librte_eal/linuxapp/eal/eal.c | 120 ++++++++++++++++++++----------
> lib/librte_eal/linuxapp/eal/eal_thread.c | 65 +++++++++++-----
> 6 files changed, 270 insertions(+), 99 deletions(-)
>
> diff --git a/lib/librte_eal/bsdapp/eal/eal.c b/lib/librte_eal/bsdapp/eal/eal.c
> index d996190..9c2f6f1 100644
> --- a/lib/librte_eal/bsdapp/eal/eal.c
> +++ b/lib/librte_eal/bsdapp/eal/eal.c
> @@ -151,7 +151,7 @@ enum rte_iova_mode
> * We also don't lock the whole file, so that in future we can use read-locks
> * on other parts, e.g. memzones, to detect if there are running secondary
> * processes. */
> -static void
> +static int
> rte_eal_config_create(void)
> {
> void *rte_mem_cfg_addr;
> @@ -160,60 +160,78 @@ enum rte_iova_mode
> const char *pathname = eal_runtime_config_path();
>
> if (internal_config.no_shconf)
> - return;
> + return 0;
>
> if (mem_cfg_fd < 0){
> mem_cfg_fd = open(pathname, O_RDWR | O_CREAT, 0660);
> - if (mem_cfg_fd < 0)
> - rte_panic("Cannot open '%s' for rte_mem_config\n", pathname);
> + if (mem_cfg_fd < 0) {
> + RTE_LOG(CRIT, EAL, "%s(): Cannot open '%s' for rte_mem_config\n",
> + __func__, pathname);
> + return -1;
> + }
> }
>
> retval = ftruncate(mem_cfg_fd, sizeof(*rte_config.mem_config));
> if (retval < 0){
> close(mem_cfg_fd);
> - rte_panic("Cannot resize '%s' for rte_mem_config\n", pathname);
> + RTE_LOG(CRIT, EAL, "%s(): Cannot resize '%s' for rte_mem_config\n",
> + __func__, pathname);
> + return -1;
Previously, it wasn't possible for mem_cfg_fd to be reused after a
failure. Now it is - please reset it to -1. in these close conditions.
> }
>
> retval = fcntl(mem_cfg_fd, F_SETLK, &wr_lock);
> if (retval < 0){
> close(mem_cfg_fd);
> - rte_exit(EXIT_FAILURE, "Cannot create lock on '%s'. Is another primary "
> - "process running?\n", pathname);
> + RTE_LOG(CRIT, EAL, "%s(): Cannot create lock on '%s'. Is another primary process running?\n",
> + __func__, pathname);
> + return -1;
> }
>
> rte_mem_cfg_addr = mmap(NULL, sizeof(*rte_config.mem_config),
> PROT_READ | PROT_WRITE, MAP_SHARED, mem_cfg_fd, 0);
>
> if (rte_mem_cfg_addr == MAP_FAILED){
> - rte_panic("Cannot mmap memory for rte_config\n");
> + RTE_LOG(CRIT, EAL, "%s(): Cannot mmap memory for rte_config\n",
> + __func__);
> + return -1;
> }
> memcpy(rte_mem_cfg_addr, &early_mem_config, sizeof(early_mem_config));
> rte_config.mem_config = rte_mem_cfg_addr;
> +
> + return 0;
> }
>
> /* attach to an existing shared memory config */
> -static void
> +static int
> rte_eal_config_attach(void)
> {
> void *rte_mem_cfg_addr;
> const char *pathname = eal_runtime_config_path();
>
> if (internal_config.no_shconf)
> - return;
> + return 0;
>
> if (mem_cfg_fd < 0){
> mem_cfg_fd = open(pathname, O_RDWR);
> - if (mem_cfg_fd < 0)
> - rte_panic("Cannot open '%s' for rte_mem_config\n", pathname);
> + if (mem_cfg_fd < 0) {
> + RTE_LOG(CRIT, EAL, "%s(): Cannot open '%s' for rte_mem_config\n",
> + __func__, pathname);
> + return -1;
> + }
> }
>
> rte_mem_cfg_addr = mmap(NULL, sizeof(*rte_config.mem_config),
> PROT_READ | PROT_WRITE, MAP_SHARED, mem_cfg_fd, 0);
> close(mem_cfg_fd);
Again, previously this would have aborted on a failure. So it needs to
be reset to a value that allows retry.
> - if (rte_mem_cfg_addr == MAP_FAILED)
> - rte_panic("Cannot mmap memory for rte_config\n");
> + if (rte_mem_cfg_addr == MAP_FAILED) {
> + RTE_LOG(CRIT, EAL, "%s(): Cannot mmap memory for rte_config\n",
> + __func__);
> + return -1;
> + }
>
> rte_config.mem_config = rte_mem_cfg_addr;
> +
> + return 0;
> }
>
> /* Detect if we are a primary or a secondary process */
> @@ -237,23 +255,28 @@ enum rte_proc_type_t
> }
>
> /* Sets up rte_config structure with the pointer to shared memory config.*/
> -static void
> +static int
> rte_config_init(void)
> {
> rte_config.process_type = internal_config.process_type;
>
> switch (rte_config.process_type){
> case RTE_PROC_PRIMARY:
> - rte_eal_config_create();
> + if (rte_eal_config_create())
> + return -1;
> break;
> case RTE_PROC_SECONDARY:
> - rte_eal_config_attach();
> + if (rte_eal_config_attach())
> + return -1;
> rte_eal_mcfg_wait_complete(rte_config.mem_config);
> break;
> case RTE_PROC_AUTO:
> case RTE_PROC_INVALID:
Not for this patch, but I just noticed that this should probably use a
'default' case.
> - rte_panic("Invalid process type\n");
> + RTE_LOG(CRIT, EAL, "%s(): Invalid process type %d\n",
> + __func__, rte_config.process_type);
> + return -1;
> }
> + return 0;
> }
>
> /* display usage */
> @@ -595,7 +618,8 @@ static void rte_eal_init_alert(const char *msg)
>
> rte_srand(rte_rdtsc());
>
> - rte_config_init();
> + if (rte_config_init() != 0)
> + return -1;
Use rte_eal_init_alert to indicate why you are failing the init.
> if (rte_mp_channel_init() < 0) {
> rte_eal_init_alert("failed to init mp channel\n");
> @@ -652,7 +676,8 @@ static void rte_eal_init_alert(const char *msg)
>
> eal_check_mem_on_local_socket();
>
> - eal_thread_init_master(rte_config.master_lcore);
> + if (eal_thread_init_master(rte_config.master_lcore) != 0)
> + return -1;
Is it ever possible to recover from this? Still needs
rte_eal_init_alert() call.
>
> ret = eal_thread_dump_affinity(cpuset, RTE_CPU_AFFINITY_STR_LEN);
>
> @@ -666,18 +691,27 @@ static void rte_eal_init_alert(const char *msg)
> * create communication pipes between master thread
> * and children
> */
> - if (pipe(lcore_config[i].pipe_master2slave) < 0)
> - rte_panic("Cannot create pipe\n");
> - if (pipe(lcore_config[i].pipe_slave2master) < 0)
> - rte_panic("Cannot create pipe\n");
> + if (pipe(lcore_config[i].pipe_master2slave) < 0) {
> + RTE_LOG(CRIT, EAL, "%s(): Cannot create pipe\n",
> + __func__);
> + return -1;
> + }
> + if (pipe(lcore_config[i].pipe_slave2master) < 0) {
> + RTE_LOG(CRIT, EAL, "%s(): Cannot create pipe\n",
> + __func__);
> + return -1;
> + }
How are you cleaning up the threads that were spawned? Lets say this
loop will execute 5 times, and on the 3rd entry, these errors happen.
You now leave DPDK 'half-initialized' - you've spun up threads and
allocated memory.
Also, again use rte_eal_init_alert(). It was added for a reason :)
>
> lcore_config[i].state = WAIT;
>
> /* create a thread for each lcore */
> ret = pthread_create(&lcore_config[i].thread_id, NULL,
> eal_thread_loop, NULL);
> - if (ret != 0)
> - rte_panic("Cannot create thread\n");
> + if (ret != 0) {
> + RTE_LOG(CRIT, EAL, "%s(): Cannot create thread\n",
> + __func__);
> + return -1;
> + }
Same question as before. If pthread_create is failing, there are worse
problems than aborting.
> /* Set thread_name for aid in debugging. */
> snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN,
> diff --git a/lib/librte_eal/bsdapp/eal/eal_thread.c b/lib/librte_eal/bsdapp/eal/eal_thread.c
> index d602daf..5c3947c 100644
> --- a/lib/librte_eal/bsdapp/eal/eal_thread.c
> +++ b/lib/librte_eal/bsdapp/eal/eal_thread.c
> @@ -51,16 +51,22 @@
> n = 0;
> while (n == 0 || (n < 0 && errno == EINTR))
> n = write(m2s, &c, 1);
> - if (n < 0)
> - rte_panic("cannot write on configuration pipe\n");
> + if (n < 0) {
> + RTE_LOG(CRIT, EAL, "%s(): Cannot write on configuration pipe\n",
> + __func__);
> + return -1;
> + }
>
> /* wait ack */
> do {
> n = read(s2m, &c, 1);
> } while (n < 0 && errno == EINTR);
>
> - if (n <= 0)
> - rte_panic("cannot read on configuration pipe\n");
> + if (n <= 0) {
> + RTE_LOG(CRIT, EAL, "%s(): Cannot read on configuration pipe\n",
> + __func__);
> + return -1;
> + }
>
> return 0;
> }
> @@ -84,8 +90,19 @@ void eal_thread_init_master(unsigned lcore_id)
> RTE_PER_LCORE(_lcore_id) = lcore_id;
>
> /* set CPU affinity */
> - if (eal_thread_set_affinity() < 0)
> - rte_panic("cannot set affinity\n");
> + if (eal_thread_set_affinity() < 0) {
> + RTE_LOG(CRIT, EAL, "%s(): Cannot set affinity\n", __func__);
> + rte_move_to_panic_state();
> + }
> +}
> +
> +/* move to panic state and do not return */
> +static __attribute__((noreturn)) void
> +defunct_and_remain_in_endless_loop(void)
> +{
> + rte_move_to_panic_state();
> + while (1)
> + sleep(1);
> }
This is worse than a panic. Users will blame applications for appearing
to freeze. Please leave the panics in place rather than do this.
> /* main loop of threads */
> @@ -106,8 +123,11 @@ void eal_thread_init_master(unsigned lcore_id)
> if (thread_id == lcore_config[lcore_id].thread_id)
> break;
> }
> - if (lcore_id == RTE_MAX_LCORE)
> - rte_panic("cannot retrieve lcore id\n");
> + if (lcore_id == RTE_MAX_LCORE) {
> + RTE_LOG(CRIT, EAL, "%s(): Cannot retrieve lcore id\n",
> + __func__);
> + defunct_and_remain_in_endless_loop();
> + }
I'm not even sure this check has merit, tbh. Is there ever a chance for
an lcore thread to be spawned like this? Probably a better patch would
just remove all the code you've inserted, but keep the check you
removed.
> m2s = lcore_config[lcore_id].pipe_master2slave[0];
> s2m = lcore_config[lcore_id].pipe_slave2master[1];
> @@ -116,8 +136,10 @@ void eal_thread_init_master(unsigned lcore_id)
> RTE_PER_LCORE(_lcore_id) = lcore_id;
>
> /* set CPU affinity */
> - if (eal_thread_set_affinity() < 0)
> - rte_panic("cannot set affinity\n");
> + if (eal_thread_set_affinity() < 0) {
> + RTE_LOG(CRIT, EAL, "%s(): Cannot set affinity\n", __func__);
> + defunct_and_remain_in_endless_loop();
How does this improve the user experience?
> + }
>
> ret = eal_thread_dump_affinity(cpuset, RTE_CPU_AFFINITY_STR_LEN);
>
> @@ -133,8 +155,11 @@ void eal_thread_init_master(unsigned lcore_id)
> n = read(m2s, &c, 1);
> } while (n < 0 && errno == EINTR);
>
> - if (n <= 0)
> - rte_panic("cannot read on configuration pipe\n");
> + if (n <= 0) {
> + RTE_LOG(CRIT, EAL, "%s(): Cannot read on configuration pipe\n",
> + __func__);
> + defunct_and_remain_in_endless_loop();
Same question. Actually this could happen on shutdown, I think? If
there's a race where the pipe is torn down before the thread? Not sure
if there are any ordering guarantees around that.
> + }
>
> lcore_config[lcore_id].state = RUNNING;
>
> @@ -142,11 +167,17 @@ void eal_thread_init_master(unsigned lcore_id)
> n = 0;
> while (n == 0 || (n < 0 && errno == EINTR))
> n = write(s2m, &c, 1);
> - if (n < 0)
> - rte_panic("cannot write on configuration pipe\n");
> -
> - if (lcore_config[lcore_id].f == NULL)
> - rte_panic("NULL function pointer\n");
> + if (n < 0) {
> + RTE_LOG(CRIT, EAL, "%s(): Cannot write on configuration pipe\n",
> + __func__);
> + defunct_and_remain_in_endless_loop();
> + }
> +
> + if (lcore_config[lcore_id].f == NULL) {
> + RTE_LOG(CRIT, EAL, "%s(): NULL function pointer\n",
> + __func__);
> + defunct_and_remain_in_endless_loop();
> + }
I don't see how any of this is better for the user. In fact, I think
this is worse because it will make portions of the application stop
working without any way to move forward. rte_panic() will at least give
the process a chance to recover from a potentially ephemeral condition.
> /* call the function and store the return value */
> fct_arg = lcore_config[lcore_id].arg;
> diff --git a/lib/librte_eal/common/eal_common_launch.c b/lib/librte_eal/common/eal_common_launch.c
> index fe0ba3f..6f8bd46 100644
> --- a/lib/librte_eal/common/eal_common_launch.c
> +++ b/lib/librte_eal/common/eal_common_launch.c
> @@ -14,6 +14,7 @@
> #include <rte_pause.h>
> #include <rte_per_lcore.h>
> #include <rte_lcore.h>
> +#include <rte_debug.h>
>
> /*
> * Wait until a lcore finished its job.
> @@ -88,3 +89,23 @@ enum rte_lcore_state_t
> rte_eal_wait_lcore(lcore_id);
> }
> }
> +
> +/* panic state */
> +static int _panic_state;
> +
> +/**
> + * Check if the system is in panic state
> + * @return int
> + */
> +int rte_get_panic_state(void)
> +{
> + return _panic_state;
> +}
> +
> +/**
> + * Move the system to be in panic state
> + */
> +void rte_move_to_panic_state(void)
> +{
> + _panic_state = 1;
> +}
> diff --git a/lib/librte_eal/common/include/rte_debug.h b/lib/librte_eal/common/include/rte_debug.h
> index 272df49..b421d33 100644
> --- a/lib/librte_eal/common/include/rte_debug.h
> +++ b/lib/librte_eal/common/include/rte_debug.h
> @@ -79,4 +79,16 @@ void __rte_panic(const char *funcname , const char *format, ...)
> }
> #endif
>
> +/**
> + * Check if the system is in panic state
> + * @return int
> + */
> +int rte_get_panic_state(void);
> +
> +/**
> + * Move the system to be in panic state
> + */
> +void rte_move_to_panic_state(void);
This seems to only exist as a way of triggering the run_once check in
the eal_init. It doesn't add anything except one more state variable to
check against. What is the purpose?
Further, it seems unrelated to removing panics.
> +
> #endif /* _RTE_DEBUG_H_ */
> diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c
> index 21afa73..393441a 100644
> --- a/lib/librte_eal/linuxapp/eal/eal.c
> +++ b/lib/librte_eal/linuxapp/eal/eal.c
> @@ -160,7 +160,7 @@ enum rte_iova_mode
> * We also don't lock the whole file, so that in future we can use read-locks
> * on other parts, e.g. memzones, to detect if there are running secondary
> * processes. */
> -static void
> +static int
> rte_eal_config_create(void)
> {
> void *rte_mem_cfg_addr;
> @@ -169,7 +169,7 @@ enum rte_iova_mode
> const char *pathname = eal_runtime_config_path();
>
> if (internal_config.no_shconf)
> - return;
> + return 0;
>
> /* map the config before hugepage address so that we don't waste a page */
> if (internal_config.base_virtaddr != 0)
> @@ -179,30 +179,39 @@ enum rte_iova_mode
> else
> rte_mem_cfg_addr = NULL;
>
> - if (mem_cfg_fd < 0){
> + if (mem_cfg_fd < 0) {
> mem_cfg_fd = open(pathname, O_RDWR | O_CREAT, 0660);
> - if (mem_cfg_fd < 0)
> - rte_panic("Cannot open '%s' for rte_mem_config\n", pathname);
> + if (mem_cfg_fd < 0) {
> + RTE_LOG(CRIT, EAL, "%s(): Cannot open '%s' for rte_mem_config\n",
> + __func__, pathname);
> + return -1;
> + }
> }
>
> retval = ftruncate(mem_cfg_fd, sizeof(*rte_config.mem_config));
> - if (retval < 0){
> + if (retval < 0) {
> close(mem_cfg_fd);
> - rte_panic("Cannot resize '%s' for rte_mem_config\n", pathname);
> + RTE_LOG(CRIT, EAL, "%s(): Cannot resize '%s' for rte_mem_config\n",
> + __func__, pathname);
> + return -1;
> }
>
> retval = fcntl(mem_cfg_fd, F_SETLK, &wr_lock);
> - if (retval < 0){
> + if (retval < 0) {
> close(mem_cfg_fd);
> - rte_exit(EXIT_FAILURE, "Cannot create lock on '%s'. Is another primary "
> - "process running?\n", pathname);
> + RTE_LOG(CRIT, EAL, "%s(): Cannot create lock on '%s'."
> + " Is another primary process running?\n",
> + __func__, pathname);
> + return -1;
> }
>
> rte_mem_cfg_addr = mmap(rte_mem_cfg_addr, sizeof(*rte_config.mem_config),
> PROT_READ | PROT_WRITE, MAP_SHARED, mem_cfg_fd, 0);
>
> - if (rte_mem_cfg_addr == MAP_FAILED){
> - rte_panic("Cannot mmap memory for rte_config\n");
> + if (rte_mem_cfg_addr == MAP_FAILED) {
> + RTE_LOG(CRIT, EAL, "%s(): Cannot mmap memory for rte_config\n",
> + __func__);
> + return -1;
> }
> memcpy(rte_mem_cfg_addr, &early_mem_config, sizeof(early_mem_config));
> rte_config.mem_config = rte_mem_cfg_addr;
> @@ -211,10 +220,11 @@ enum rte_iova_mode
> * processes could later map the config into this exact location */
> rte_config.mem_config->mem_cfg_addr = (uintptr_t) rte_mem_cfg_addr;
>
> + return 0;
> }
>
> /* attach to an existing shared memory config */
> -static void
> +static int
> rte_eal_config_attach(void)
> {
> struct rte_mem_config *mem_config;
> @@ -222,33 +232,40 @@ enum rte_iova_mode
> const char *pathname = eal_runtime_config_path();
>
> if (internal_config.no_shconf)
> - return;
> + return 0;
>
> - if (mem_cfg_fd < 0){
> + if (mem_cfg_fd < 0) {
> mem_cfg_fd = open(pathname, O_RDWR);
> - if (mem_cfg_fd < 0)
> - rte_panic("Cannot open '%s' for rte_mem_config\n", pathname);
> + if (mem_cfg_fd < 0) {
> + RTE_LOG(CRIT, EAL, "%s(): Cannot open '%s' for rte_mem_config\n",
> + __func__, pathname);
> + return -1;
> + }
> }
>
> /* map it as read-only first */
> mem_config = (struct rte_mem_config *) mmap(NULL, sizeof(*mem_config),
> PROT_READ, MAP_SHARED, mem_cfg_fd, 0);
> - if (mem_config == MAP_FAILED)
> - rte_panic("Cannot mmap memory for rte_config! error %i (%s)\n",
> - errno, strerror(errno));
> + if (mem_config == MAP_FAILED) {
> + RTE_LOG(CRIT, EAL, "%s(): Cannot mmap memory for rte_config! error %i (%s)\n",
> + __func__, errno, strerror(errno));
> + return -1;
> + }
>
> rte_config.mem_config = mem_config;
> +
> + return 0;
> }
>
> /* reattach the shared config at exact memory location primary process has it */
> -static void
> +static int
> rte_eal_config_reattach(void)
> {
> struct rte_mem_config *mem_config;
> void *rte_mem_cfg_addr;
>
> if (internal_config.no_shconf)
> - return;
> + return 0;
>
> /* save the address primary process has mapped shared config to */
> rte_mem_cfg_addr = (void *) (uintptr_t) rte_config.mem_config->mem_cfg_addr;
> @@ -263,16 +280,21 @@ enum rte_iova_mode
> if (mem_config == MAP_FAILED || mem_config != rte_mem_cfg_addr) {
> if (mem_config != MAP_FAILED)
> /* errno is stale, don't use */
> - rte_panic("Cannot mmap memory for rte_config at [%p], got [%p]"
> - " - please use '--base-virtaddr' option\n",
> - rte_mem_cfg_addr, mem_config);
> + RTE_LOG(CRIT, EAL, "%s(): Cannot mmap memory for "
> + "rte_config at [%p], got [%p] - please use "
> + "'--base-virtaddr' option\n",
> + __func__, rte_mem_cfg_addr, mem_config);
> else
> - rte_panic("Cannot mmap memory for rte_config! error %i (%s)\n",
> - errno, strerror(errno));
> + RTE_LOG(CRIT, EAL, "%s(): Cannot mmap memory for "
> + "rte_config! error %i (%s)\n",
> + __func__, errno, strerror(errno));
> + return -1;
> }
> close(mem_cfg_fd);
>
> rte_config.mem_config = mem_config;
> +
> + return 0;
> }
>
> /* Detect if we are a primary or a secondary process */
> @@ -296,24 +318,31 @@ enum rte_proc_type_t
> }
>
> /* Sets up rte_config structure with the pointer to shared memory config.*/
> -static void
> +static int
> rte_config_init(void)
> {
> rte_config.process_type = internal_config.process_type;
>
> switch (rte_config.process_type){
> case RTE_PROC_PRIMARY:
> - rte_eal_config_create();
> + if (rte_eal_config_create() != 0)
> + return -1;
> break;
> case RTE_PROC_SECONDARY:
> - rte_eal_config_attach();
> + if (rte_eal_config_attach() != 0)
> + return -1;
> rte_eal_mcfg_wait_complete(rte_config.mem_config);
> - rte_eal_config_reattach();
> + if (rte_eal_config_reattach() != 0)
> + return -1;
> break;
> case RTE_PROC_AUTO:
> case RTE_PROC_INVALID:
> - rte_panic("Invalid process type\n");
> + RTE_LOG(CRIT, EAL, "%s(): Invalid process type %d\n",
> + __func__, rte_config.process_type);
> + return -1;
> }
> +
> + return 0;
> }
>
> /* Unlocks hugepage directories that were locked by eal_hugepage_info_init */
> @@ -820,7 +849,8 @@ static void rte_eal_init_alert(const char *msg)
>
> rte_srand(rte_rdtsc());
>
> - rte_config_init();
> + if (rte_config_init() != 0)
> + return -1;
>
> if (rte_eal_log_init(logid, internal_config.syslog_facility) < 0) {
> rte_eal_init_alert("Cannot init logging.");
> @@ -892,6 +922,9 @@ static void rte_eal_init_alert(const char *msg)
>
> eal_thread_init_master(rte_config.master_lcore);
>
> + if (rte_get_panic_state())
> + return -1;
> +
Please just use run_once. That's a better way of preventing this.
> ret = eal_thread_dump_affinity(cpuset, RTE_CPU_AFFINITY_STR_LEN);
>
> RTE_LOG(DEBUG, EAL, "Master lcore %u is ready (tid=%x;cpuset=[%s%s])\n",
> @@ -909,18 +942,27 @@ static void rte_eal_init_alert(const char *msg)
> * create communication pipes between master thread
> * and children
> */
> - if (pipe(lcore_config[i].pipe_master2slave) < 0)
> - rte_panic("Cannot create pipe\n");
> - if (pipe(lcore_config[i].pipe_slave2master) < 0)
> - rte_panic("Cannot create pipe\n");
> + if (pipe(lcore_config[i].pipe_master2slave) < 0) {
> + RTE_LOG(CRIT, EAL, "%s(): Cannot create pipe\n",
> + __func__);
> + return -1;
> + }
> + if (pipe(lcore_config[i].pipe_slave2master) < 0) {
> + RTE_LOG(CRIT, EAL, "%s(): Cannot create pipe\n",
> + __func__);
> + return -1;
> + }
>
> lcore_config[i].state = WAIT;
>
> /* create a thread for each lcore */
> ret = pthread_create(&lcore_config[i].thread_id, NULL,
> eal_thread_loop, NULL);
> - if (ret != 0)
> - rte_panic("Cannot create thread\n");
> + if (ret != 0) {
> + RTE_LOG(CRIT, EAL, "%s(): Cannot create thread\n",
> + __func__);
> + return -1;
> + }
>
> /* Set thread_name for aid in debugging. */
> snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN,
> diff --git a/lib/librte_eal/linuxapp/eal/eal_thread.c b/lib/librte_eal/linuxapp/eal/eal_thread.c
> index 08e150b..3afcee5 100644
> --- a/lib/librte_eal/linuxapp/eal/eal_thread.c
> +++ b/lib/librte_eal/linuxapp/eal/eal_thread.c
All of the comments from the bsd side apply here.
> @@ -51,16 +51,22 @@
> n = 0;
> while (n == 0 || (n < 0 && errno == EINTR))
> n = write(m2s, &c, 1);
> - if (n < 0)
> - rte_panic("cannot write on configuration pipe\n");
> + if (n < 0) {
> + RTE_LOG(CRIT, EAL, "%s(): Cannot write on configuration pipe\n",
> + __func__);
> + return -1;
> + }
>
> /* wait ack */
> do {
> n = read(s2m, &c, 1);
> } while (n < 0 && errno == EINTR);
>
> - if (n <= 0)
> - rte_panic("cannot read on configuration pipe\n");
> + if (n <= 0) {
> + RTE_LOG(CRIT, EAL, "%s(): Cannot read on configuration pipe\n",
> + __func__);
> + return -1;
> + }
>
> return 0;
> }
> @@ -84,8 +90,19 @@ void eal_thread_init_master(unsigned lcore_id)
> RTE_PER_LCORE(_lcore_id) = lcore_id;
>
> /* set CPU affinity */
> - if (eal_thread_set_affinity() < 0)
> - rte_panic("cannot set affinity\n");
> + if (eal_thread_set_affinity() < 0) {
> + RTE_LOG(CRIT, EAL, "%s(): Cannot set affinity\n", __func__);
> + rte_move_to_panic_state();
> + }
> +}
> +
> +/* move to panic state and do not return */
> +static __attribute__((noreturn)) void
> +defunct_and_remain_in_endless_loop(void)
> +{
> + rte_move_to_panic_state();
> + while (1)
> + sleep(1);
> }
>
> /* main loop of threads */
> @@ -106,8 +123,11 @@ void eal_thread_init_master(unsigned lcore_id)
> if (thread_id == lcore_config[lcore_id].thread_id)
> break;
> }
> - if (lcore_id == RTE_MAX_LCORE)
> - rte_panic("cannot retrieve lcore id\n");
> + if (lcore_id == RTE_MAX_LCORE) {
> + RTE_LOG(CRIT, EAL, "%s(): Cannot retrieve lcore id\n",
> + __func__);
> + defunct_and_remain_in_endless_loop();
> + }
>
> m2s = lcore_config[lcore_id].pipe_master2slave[0];
> s2m = lcore_config[lcore_id].pipe_slave2master[1];
> @@ -116,8 +136,10 @@ void eal_thread_init_master(unsigned lcore_id)
> RTE_PER_LCORE(_lcore_id) = lcore_id;
>
> /* set CPU affinity */
> - if (eal_thread_set_affinity() < 0)
> - rte_panic("cannot set affinity\n");
> + if (eal_thread_set_affinity() < 0) {
> + RTE_LOG(CRIT, EAL, "%s(): Cannot set affinity\n", __func__);
> + defunct_and_remain_in_endless_loop();
> + }
>
> ret = eal_thread_dump_affinity(cpuset, RTE_CPU_AFFINITY_STR_LEN);
>
> @@ -133,8 +155,11 @@ void eal_thread_init_master(unsigned lcore_id)
> n = read(m2s, &c, 1);
> } while (n < 0 && errno == EINTR);
>
> - if (n <= 0)
> - rte_panic("cannot read on configuration pipe\n");
> + if (n <= 0) {
> + RTE_LOG(CRIT, EAL, "%s(): Cannot read on configuration pipe\n",
> + __func__);
> + defunct_and_remain_in_endless_loop();
> + }
>
> lcore_config[lcore_id].state = RUNNING;
>
> @@ -142,11 +167,17 @@ void eal_thread_init_master(unsigned lcore_id)
> n = 0;
> while (n == 0 || (n < 0 && errno == EINTR))
> n = write(s2m, &c, 1);
> - if (n < 0)
> - rte_panic("cannot write on configuration pipe\n");
> -
> - if (lcore_config[lcore_id].f == NULL)
> - rte_panic("NULL function pointer\n");
> + if (n < 0) {
> + RTE_LOG(CRIT, EAL, "%s(): Cannot write on configuration pipe\n",
> + __func__);
> + defunct_and_remain_in_endless_loop();
> + }
> +
> + if (lcore_config[lcore_id].f == NULL) {
> + RTE_LOG(CRIT, EAL, "%s(): NULL function pointer\n",
> + __func__);
> + defunct_and_remain_in_endless_loop();
> + }
>
> /* call the function and store the return value */
> fct_arg = lcore_config[lcore_id].arg;
More information about the dev
mailing list