[dpdk-dev] [PATCH v4 10/11] eal: replace rte_panic instances in init sequence

Aaron Conole aconole at redhat.com
Thu Apr 19 19:48:22 CEST 2018


Arnon Warshavsky <arnon at qwilt.com> writes:

> Local functions to this file,
> changing from void to int are non-abi-breaking.
> For handling the single function that cannot
> change from void to int due to abi,
> where this is the only place it is called in,
> I added a state variable that is being checked
> right after the call to this function.
>
> --
>
> v4 - fix split literal strings in log messages
>
> Signed-off-by: Arnon Warshavsky <arnon at qwilt.com>
> ---

Hi Arnon,

Always happy to see panic calls get removed.  I have some comments inline.

>  lib/librte_eal/bsdapp/eal/eal.c           |  86 ++++++++++++++-------
>  lib/librte_eal/bsdapp/eal/eal_thread.c    |  65 +++++++++++-----
>  lib/librte_eal/common/eal_common_launch.c |  21 ++++++
>  lib/librte_eal/common/include/rte_debug.h |  12 +++
>  lib/librte_eal/linuxapp/eal/eal.c         | 120 ++++++++++++++++++++----------
>  lib/librte_eal/linuxapp/eal/eal_thread.c  |  65 +++++++++++-----
>  6 files changed, 270 insertions(+), 99 deletions(-)
>
> diff --git a/lib/librte_eal/bsdapp/eal/eal.c b/lib/librte_eal/bsdapp/eal/eal.c
> index d996190..9c2f6f1 100644
> --- a/lib/librte_eal/bsdapp/eal/eal.c
> +++ b/lib/librte_eal/bsdapp/eal/eal.c
> @@ -151,7 +151,7 @@ enum rte_iova_mode
>   * We also don't lock the whole file, so that in future we can use read-locks
>   * on other parts, e.g. memzones, to detect if there are running secondary
>   * processes. */
> -static void
> +static int
>  rte_eal_config_create(void)
>  {
>  	void *rte_mem_cfg_addr;
> @@ -160,60 +160,78 @@ enum rte_iova_mode
>  	const char *pathname = eal_runtime_config_path();
>  
>  	if (internal_config.no_shconf)
> -		return;
> +		return 0;
>  
>  	if (mem_cfg_fd < 0){
>  		mem_cfg_fd = open(pathname, O_RDWR | O_CREAT, 0660);
> -		if (mem_cfg_fd < 0)
> -			rte_panic("Cannot open '%s' for rte_mem_config\n", pathname);
> +		if (mem_cfg_fd < 0) {
> +			RTE_LOG(CRIT, EAL, "%s(): Cannot open '%s' for rte_mem_config\n",
> +					__func__, pathname);
> +			return -1;
> +		}
>  	}
>  
>  	retval = ftruncate(mem_cfg_fd, sizeof(*rte_config.mem_config));
>  	if (retval < 0){
>  		close(mem_cfg_fd);
> -		rte_panic("Cannot resize '%s' for rte_mem_config\n", pathname);
> +		RTE_LOG(CRIT, EAL, "%s(): Cannot resize '%s' for rte_mem_config\n",
> +				__func__, pathname);
> +		return -1;

Previously, it wasn't possible for mem_cfg_fd to be reused after a
failure.  Now it is - please reset it to -1. in these close conditions.

>  	}
>  
>  	retval = fcntl(mem_cfg_fd, F_SETLK, &wr_lock);
>  	if (retval < 0){
>  		close(mem_cfg_fd);
> -		rte_exit(EXIT_FAILURE, "Cannot create lock on '%s'. Is another primary "
> -				"process running?\n", pathname);
> +		RTE_LOG(CRIT, EAL, "%s(): Cannot create lock on '%s'. Is another primary process running?\n",
> +				__func__, pathname);
> +		return -1;
>  	}
>  
>  	rte_mem_cfg_addr = mmap(NULL, sizeof(*rte_config.mem_config),
>  				PROT_READ | PROT_WRITE, MAP_SHARED, mem_cfg_fd, 0);
>  
>  	if (rte_mem_cfg_addr == MAP_FAILED){
> -		rte_panic("Cannot mmap memory for rte_config\n");
> +		RTE_LOG(CRIT, EAL, "%s(): Cannot mmap memory for rte_config\n",
> +				__func__);
> +		return -1;
>  	}
>  	memcpy(rte_mem_cfg_addr, &early_mem_config, sizeof(early_mem_config));
>  	rte_config.mem_config = rte_mem_cfg_addr;
> +
> +	return 0;
>  }
>  
>  /* attach to an existing shared memory config */
> -static void
> +static int
>  rte_eal_config_attach(void)
>  {
>  	void *rte_mem_cfg_addr;
>  	const char *pathname = eal_runtime_config_path();
>  
>  	if (internal_config.no_shconf)
> -		return;
> +		return 0;
>  
>  	if (mem_cfg_fd < 0){
>  		mem_cfg_fd = open(pathname, O_RDWR);
> -		if (mem_cfg_fd < 0)
> -			rte_panic("Cannot open '%s' for rte_mem_config\n", pathname);
> +		if (mem_cfg_fd < 0) {
> +			RTE_LOG(CRIT, EAL, "%s(): Cannot open '%s' for rte_mem_config\n",
> +					__func__, pathname);
> +			return -1;
> +		}
>  	}
>  
>  	rte_mem_cfg_addr = mmap(NULL, sizeof(*rte_config.mem_config),
>  				PROT_READ | PROT_WRITE, MAP_SHARED, mem_cfg_fd, 0);
>  	close(mem_cfg_fd);

Again, previously this would have aborted on a failure.  So it needs to
be reset to a value that allows retry.

> -	if (rte_mem_cfg_addr == MAP_FAILED)
> -		rte_panic("Cannot mmap memory for rte_config\n");
> +	if (rte_mem_cfg_addr == MAP_FAILED) {
> +		RTE_LOG(CRIT, EAL, "%s(): Cannot mmap memory for rte_config\n",
> +				__func__);
> +		return -1;
> +	}
>  
>  	rte_config.mem_config = rte_mem_cfg_addr;
> +
> +	return 0;
>  }
>  
>  /* Detect if we are a primary or a secondary process */
> @@ -237,23 +255,28 @@ enum rte_proc_type_t
>  }
>  
>  /* Sets up rte_config structure with the pointer to shared memory config.*/
> -static void
> +static int
>  rte_config_init(void)
>  {
>  	rte_config.process_type = internal_config.process_type;
>  
>  	switch (rte_config.process_type){
>  	case RTE_PROC_PRIMARY:
> -		rte_eal_config_create();
> +		if (rte_eal_config_create())
> +			return -1;
>  		break;
>  	case RTE_PROC_SECONDARY:
> -		rte_eal_config_attach();
> +		if (rte_eal_config_attach())
> +			return -1;
>  		rte_eal_mcfg_wait_complete(rte_config.mem_config);
>  		break;
>  	case RTE_PROC_AUTO:
>  	case RTE_PROC_INVALID:

Not for this patch, but I just noticed that this should probably use a
'default' case.

> -		rte_panic("Invalid process type\n");
> +		RTE_LOG(CRIT, EAL, "%s(): Invalid process type %d\n",
> +				__func__, rte_config.process_type);
> +		return -1;
>  	}
> +	return 0;
>  }
>  
>  /* display usage */
> @@ -595,7 +618,8 @@ static void rte_eal_init_alert(const char *msg)
>  
>  	rte_srand(rte_rdtsc());
>  
> -	rte_config_init();
> +	if (rte_config_init() != 0)
> +		return -1;

Use rte_eal_init_alert to indicate why you are failing the init.

>  	if (rte_mp_channel_init() < 0) {
>  		rte_eal_init_alert("failed to init mp channel\n");
> @@ -652,7 +676,8 @@ static void rte_eal_init_alert(const char *msg)
>  
>  	eal_check_mem_on_local_socket();
>  
> -	eal_thread_init_master(rte_config.master_lcore);
> +	if (eal_thread_init_master(rte_config.master_lcore) != 0)
> +		return -1;

Is it ever possible to recover from this?  Still needs
rte_eal_init_alert() call.

>  
>  	ret = eal_thread_dump_affinity(cpuset, RTE_CPU_AFFINITY_STR_LEN);
>  
> @@ -666,18 +691,27 @@ static void rte_eal_init_alert(const char *msg)
>  		 * create communication pipes between master thread
>  		 * and children
>  		 */
> -		if (pipe(lcore_config[i].pipe_master2slave) < 0)
> -			rte_panic("Cannot create pipe\n");
> -		if (pipe(lcore_config[i].pipe_slave2master) < 0)
> -			rte_panic("Cannot create pipe\n");
> +		if (pipe(lcore_config[i].pipe_master2slave) < 0) {
> +			RTE_LOG(CRIT, EAL, "%s(): Cannot create pipe\n",
> +					__func__);
> +			return -1;
> +		}
> +		if (pipe(lcore_config[i].pipe_slave2master) < 0) {
> +			RTE_LOG(CRIT, EAL, "%s(): Cannot create pipe\n",
> +					__func__);
> +			return -1;
> +		}

How are you cleaning up the threads that were spawned?  Lets say this
loop will execute 5 times, and on the 3rd entry, these errors happen.
You now leave DPDK 'half-initialized' - you've spun up threads and
allocated memory.

Also, again use rte_eal_init_alert().  It was added for a reason :)

>  
>  		lcore_config[i].state = WAIT;
>  
>  		/* create a thread for each lcore */
>  		ret = pthread_create(&lcore_config[i].thread_id, NULL,
>  				     eal_thread_loop, NULL);
> -		if (ret != 0)
> -			rte_panic("Cannot create thread\n");
> +		if (ret != 0) {
> +			RTE_LOG(CRIT, EAL, "%s(): Cannot create thread\n",
> +					__func__);
> +			return -1;
> +		}

Same question as before.  If pthread_create is failing, there are worse
problems than aborting.

>  		/* Set thread_name for aid in debugging. */
>  		snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN,
> diff --git a/lib/librte_eal/bsdapp/eal/eal_thread.c b/lib/librte_eal/bsdapp/eal/eal_thread.c
> index d602daf..5c3947c 100644
> --- a/lib/librte_eal/bsdapp/eal/eal_thread.c
> +++ b/lib/librte_eal/bsdapp/eal/eal_thread.c
> @@ -51,16 +51,22 @@
>  	n = 0;
>  	while (n == 0 || (n < 0 && errno == EINTR))
>  		n = write(m2s, &c, 1);
> -	if (n < 0)
> -		rte_panic("cannot write on configuration pipe\n");
> +	if (n < 0) {
> +		RTE_LOG(CRIT, EAL, "%s(): Cannot write on configuration pipe\n",
> +				__func__);
> +		return -1;
> +	}
>  
>  	/* wait ack */
>  	do {
>  		n = read(s2m, &c, 1);
>  	} while (n < 0 && errno == EINTR);
>  
> -	if (n <= 0)
> -		rte_panic("cannot read on configuration pipe\n");
> +	if (n <= 0) {
> +		RTE_LOG(CRIT, EAL, "%s(): Cannot read on configuration pipe\n",
> +				__func__);
> +		return -1;
> +	}
>  
>  	return 0;
>  }
> @@ -84,8 +90,19 @@ void eal_thread_init_master(unsigned lcore_id)
>  	RTE_PER_LCORE(_lcore_id) = lcore_id;
>  
>  	/* set CPU affinity */
> -	if (eal_thread_set_affinity() < 0)
> -		rte_panic("cannot set affinity\n");
> +	if (eal_thread_set_affinity() < 0) {
> +		RTE_LOG(CRIT, EAL, "%s(): Cannot set affinity\n", __func__);
> +		rte_move_to_panic_state();
> +	}
> +}
> +
> +/* move to panic state and do not return */
> +static __attribute__((noreturn)) void
> +defunct_and_remain_in_endless_loop(void)
> +{
> +	rte_move_to_panic_state();
> +	while (1)
> +		sleep(1);
>  }

This is worse than a panic.  Users will blame applications for appearing
to freeze.  Please leave the panics in place rather than do this.

>  /* main loop of threads */
> @@ -106,8 +123,11 @@ void eal_thread_init_master(unsigned lcore_id)
>  		if (thread_id == lcore_config[lcore_id].thread_id)
>  			break;
>  	}
> -	if (lcore_id == RTE_MAX_LCORE)
> -		rte_panic("cannot retrieve lcore id\n");
> +	if (lcore_id == RTE_MAX_LCORE) {
> +		RTE_LOG(CRIT, EAL, "%s(): Cannot retrieve lcore id\n",
> +				__func__);
> +		defunct_and_remain_in_endless_loop();
> +	}

I'm not even sure this check has merit, tbh.  Is there ever a chance for
an lcore thread to be spawned like this?  Probably a better patch would
just remove all the code you've inserted, but keep the check you
removed.

>  	m2s = lcore_config[lcore_id].pipe_master2slave[0];
>  	s2m = lcore_config[lcore_id].pipe_slave2master[1];
> @@ -116,8 +136,10 @@ void eal_thread_init_master(unsigned lcore_id)
>  	RTE_PER_LCORE(_lcore_id) = lcore_id;
>  
>  	/* set CPU affinity */
> -	if (eal_thread_set_affinity() < 0)
> -		rte_panic("cannot set affinity\n");
> +	if (eal_thread_set_affinity() < 0) {
> +		RTE_LOG(CRIT, EAL, "%s(): Cannot set affinity\n", __func__);
> +		defunct_and_remain_in_endless_loop();

How does this improve the user experience?

> +	}
>  
>  	ret = eal_thread_dump_affinity(cpuset, RTE_CPU_AFFINITY_STR_LEN);
>  
> @@ -133,8 +155,11 @@ void eal_thread_init_master(unsigned lcore_id)
>  			n = read(m2s, &c, 1);
>  		} while (n < 0 && errno == EINTR);
>  
> -		if (n <= 0)
> -			rte_panic("cannot read on configuration pipe\n");
> +		if (n <= 0) {
> +			RTE_LOG(CRIT, EAL, "%s(): Cannot read on configuration pipe\n",
> +					__func__);
> +			defunct_and_remain_in_endless_loop();

Same question.  Actually this could happen on shutdown, I think?  If
there's a race where the pipe is torn down before the thread?  Not sure
if there are any ordering guarantees around that.

> +		}
>  
>  		lcore_config[lcore_id].state = RUNNING;
>  
> @@ -142,11 +167,17 @@ void eal_thread_init_master(unsigned lcore_id)
>  		n = 0;
>  		while (n == 0 || (n < 0 && errno == EINTR))
>  			n = write(s2m, &c, 1);
> -		if (n < 0)
> -			rte_panic("cannot write on configuration pipe\n");
> -
> -		if (lcore_config[lcore_id].f == NULL)
> -			rte_panic("NULL function pointer\n");
> +		if (n < 0) {
> +			RTE_LOG(CRIT, EAL, "%s(): Cannot write on configuration pipe\n",
> +					__func__);
> +			defunct_and_remain_in_endless_loop();
> +		}
> +
> +		if (lcore_config[lcore_id].f == NULL) {
> +			RTE_LOG(CRIT, EAL, "%s(): NULL function pointer\n",
> +					__func__);
> +			defunct_and_remain_in_endless_loop();
> +		}

I don't see how any of this is better for the user.  In fact, I think
this is worse because it will make portions of the application stop
working without any way to move forward.  rte_panic() will at least give
the process a chance to recover from a potentially ephemeral condition.

>  		/* call the function and store the return value */
>  		fct_arg = lcore_config[lcore_id].arg;
> diff --git a/lib/librte_eal/common/eal_common_launch.c b/lib/librte_eal/common/eal_common_launch.c
> index fe0ba3f..6f8bd46 100644
> --- a/lib/librte_eal/common/eal_common_launch.c
> +++ b/lib/librte_eal/common/eal_common_launch.c
> @@ -14,6 +14,7 @@
>  #include <rte_pause.h>
>  #include <rte_per_lcore.h>
>  #include <rte_lcore.h>
> +#include <rte_debug.h>
>  
>  /*
>   * Wait until a lcore finished its job.
> @@ -88,3 +89,23 @@ enum rte_lcore_state_t
>  		rte_eal_wait_lcore(lcore_id);
>  	}
>  }
> +
> +/* panic state */
> +static int _panic_state;
> +
> +/**
> + * Check if the system is in panic state
> + * @return int
> + */
> +int rte_get_panic_state(void)
> +{
> +	return _panic_state;
> +}
> +
> +/**
> + * Move the system to be in panic state
> + */
> +void rte_move_to_panic_state(void)
> +{
> +	_panic_state = 1;
> +}
> diff --git a/lib/librte_eal/common/include/rte_debug.h b/lib/librte_eal/common/include/rte_debug.h
> index 272df49..b421d33 100644
> --- a/lib/librte_eal/common/include/rte_debug.h
> +++ b/lib/librte_eal/common/include/rte_debug.h
> @@ -79,4 +79,16 @@ void __rte_panic(const char *funcname , const char *format, ...)
>  }
>  #endif
>  
> +/**
> + * Check if the system is in panic state
> + * @return int
> + */
> +int rte_get_panic_state(void);
> +
> +/**
> + * Move the system to be in panic state
> + */
> +void rte_move_to_panic_state(void);

This seems to only exist as a way of triggering the run_once check in
the eal_init.  It doesn't add anything except one more state variable to
check against.  What is the purpose?

Further, it seems unrelated to removing panics.

> +
>  #endif /* _RTE_DEBUG_H_ */
> diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c
> index 21afa73..393441a 100644
> --- a/lib/librte_eal/linuxapp/eal/eal.c
> +++ b/lib/librte_eal/linuxapp/eal/eal.c
> @@ -160,7 +160,7 @@ enum rte_iova_mode
>   * We also don't lock the whole file, so that in future we can use read-locks
>   * on other parts, e.g. memzones, to detect if there are running secondary
>   * processes. */
> -static void
> +static int
>  rte_eal_config_create(void)
>  {
>  	void *rte_mem_cfg_addr;
> @@ -169,7 +169,7 @@ enum rte_iova_mode
>  	const char *pathname = eal_runtime_config_path();
>  
>  	if (internal_config.no_shconf)
> -		return;
> +		return 0;
>  
>  	/* map the config before hugepage address so that we don't waste a page */
>  	if (internal_config.base_virtaddr != 0)
> @@ -179,30 +179,39 @@ enum rte_iova_mode
>  	else
>  		rte_mem_cfg_addr = NULL;
>  
> -	if (mem_cfg_fd < 0){
> +	if (mem_cfg_fd < 0) {
>  		mem_cfg_fd = open(pathname, O_RDWR | O_CREAT, 0660);
> -		if (mem_cfg_fd < 0)
> -			rte_panic("Cannot open '%s' for rte_mem_config\n", pathname);
> +		if (mem_cfg_fd < 0) {
> +			RTE_LOG(CRIT, EAL, "%s(): Cannot open '%s' for rte_mem_config\n",
> +				__func__, pathname);
> +			return -1;
> +		}
>  	}
>  
>  	retval = ftruncate(mem_cfg_fd, sizeof(*rte_config.mem_config));
> -	if (retval < 0){
> +	if (retval < 0) {
>  		close(mem_cfg_fd);
> -		rte_panic("Cannot resize '%s' for rte_mem_config\n", pathname);
> +		RTE_LOG(CRIT, EAL, "%s(): Cannot resize '%s' for rte_mem_config\n",
> +				__func__, pathname);
> +		return -1;
>  	}
>  
>  	retval = fcntl(mem_cfg_fd, F_SETLK, &wr_lock);
> -	if (retval < 0){
> +	if (retval < 0) {
>  		close(mem_cfg_fd);
> -		rte_exit(EXIT_FAILURE, "Cannot create lock on '%s'. Is another primary "
> -				"process running?\n", pathname);
> +		RTE_LOG(CRIT, EAL, "%s(): Cannot create lock on '%s'."
> +				" Is another primary process running?\n",
> +				__func__, pathname);
> +		return -1;
>  	}
>  
>  	rte_mem_cfg_addr = mmap(rte_mem_cfg_addr, sizeof(*rte_config.mem_config),
>  				PROT_READ | PROT_WRITE, MAP_SHARED, mem_cfg_fd, 0);
>  
> -	if (rte_mem_cfg_addr == MAP_FAILED){
> -		rte_panic("Cannot mmap memory for rte_config\n");
> +	if (rte_mem_cfg_addr == MAP_FAILED) {
> +		RTE_LOG(CRIT, EAL, "%s(): Cannot mmap memory for rte_config\n",
> +			__func__);
> +		return -1;
>  	}
>  	memcpy(rte_mem_cfg_addr, &early_mem_config, sizeof(early_mem_config));
>  	rte_config.mem_config = rte_mem_cfg_addr;
> @@ -211,10 +220,11 @@ enum rte_iova_mode
>  	 * processes could later map the config into this exact location */
>  	rte_config.mem_config->mem_cfg_addr = (uintptr_t) rte_mem_cfg_addr;
>  
> +	return 0;
>  }
>  
>  /* attach to an existing shared memory config */
> -static void
> +static int
>  rte_eal_config_attach(void)
>  {
>  	struct rte_mem_config *mem_config;
> @@ -222,33 +232,40 @@ enum rte_iova_mode
>  	const char *pathname = eal_runtime_config_path();
>  
>  	if (internal_config.no_shconf)
> -		return;
> +		return 0;
>  
> -	if (mem_cfg_fd < 0){
> +	if (mem_cfg_fd < 0) {
>  		mem_cfg_fd = open(pathname, O_RDWR);
> -		if (mem_cfg_fd < 0)
> -			rte_panic("Cannot open '%s' for rte_mem_config\n", pathname);
> +		if (mem_cfg_fd < 0) {
> +			RTE_LOG(CRIT, EAL, "%s(): Cannot open '%s' for rte_mem_config\n",
> +						__func__, pathname);
> +			return -1;
> +		}
>  	}
>  
>  	/* map it as read-only first */
>  	mem_config = (struct rte_mem_config *) mmap(NULL, sizeof(*mem_config),
>  			PROT_READ, MAP_SHARED, mem_cfg_fd, 0);
> -	if (mem_config == MAP_FAILED)
> -		rte_panic("Cannot mmap memory for rte_config! error %i (%s)\n",
> -			  errno, strerror(errno));
> +	if (mem_config == MAP_FAILED) {
> +		RTE_LOG(CRIT, EAL, "%s(): Cannot mmap memory for rte_config! error %i (%s)\n",
> +				__func__, errno, strerror(errno));
> +		return -1;
> +	}
>  
>  	rte_config.mem_config = mem_config;
> +
> +	return 0;
>  }
>  
>  /* reattach the shared config at exact memory location primary process has it */
> -static void
> +static int
>  rte_eal_config_reattach(void)
>  {
>  	struct rte_mem_config *mem_config;
>  	void *rte_mem_cfg_addr;
>  
>  	if (internal_config.no_shconf)
> -		return;
> +		return 0;
>  
>  	/* save the address primary process has mapped shared config to */
>  	rte_mem_cfg_addr = (void *) (uintptr_t) rte_config.mem_config->mem_cfg_addr;
> @@ -263,16 +280,21 @@ enum rte_iova_mode
>  	if (mem_config == MAP_FAILED || mem_config != rte_mem_cfg_addr) {
>  		if (mem_config != MAP_FAILED)
>  			/* errno is stale, don't use */
> -			rte_panic("Cannot mmap memory for rte_config at [%p], got [%p]"
> -				  " - please use '--base-virtaddr' option\n",
> -				  rte_mem_cfg_addr, mem_config);
> +			RTE_LOG(CRIT, EAL, "%s(): Cannot mmap memory for "
> +					"rte_config at [%p], got [%p] - please use "
> +					"'--base-virtaddr' option\n",
> +					__func__, rte_mem_cfg_addr, mem_config);
>  		else
> -			rte_panic("Cannot mmap memory for rte_config! error %i (%s)\n",
> -				  errno, strerror(errno));
> +			RTE_LOG(CRIT, EAL, "%s(): Cannot mmap memory for "
> +					"rte_config! error %i (%s)\n",
> +					__func__, errno, strerror(errno));
> +		return -1;
>  	}
>  	close(mem_cfg_fd);
>  
>  	rte_config.mem_config = mem_config;
> +
> +	return 0;
>  }
>  
>  /* Detect if we are a primary or a secondary process */
> @@ -296,24 +318,31 @@ enum rte_proc_type_t
>  }
>  
>  /* Sets up rte_config structure with the pointer to shared memory config.*/
> -static void
> +static int
>  rte_config_init(void)
>  {
>  	rte_config.process_type = internal_config.process_type;
>  
>  	switch (rte_config.process_type){
>  	case RTE_PROC_PRIMARY:
> -		rte_eal_config_create();
> +		if (rte_eal_config_create() != 0)
> +			return -1;
>  		break;
>  	case RTE_PROC_SECONDARY:
> -		rte_eal_config_attach();
> +		if (rte_eal_config_attach() != 0)
> +			return -1;
>  		rte_eal_mcfg_wait_complete(rte_config.mem_config);
> -		rte_eal_config_reattach();
> +		if (rte_eal_config_reattach() != 0)
> +			return -1;
>  		break;
>  	case RTE_PROC_AUTO:
>  	case RTE_PROC_INVALID:
> -		rte_panic("Invalid process type\n");
> +		RTE_LOG(CRIT, EAL, "%s(): Invalid process type %d\n",
> +				__func__, rte_config.process_type);
> +		return -1;
>  	}
> +
> +	return 0;
>  }
>  
>  /* Unlocks hugepage directories that were locked by eal_hugepage_info_init */
> @@ -820,7 +849,8 @@ static void rte_eal_init_alert(const char *msg)
>  
>  	rte_srand(rte_rdtsc());
>  
> -	rte_config_init();
> +	if (rte_config_init() != 0)
> +		return -1;
>  
>  	if (rte_eal_log_init(logid, internal_config.syslog_facility) < 0) {
>  		rte_eal_init_alert("Cannot init logging.");
> @@ -892,6 +922,9 @@ static void rte_eal_init_alert(const char *msg)
>  
>  	eal_thread_init_master(rte_config.master_lcore);
>  
> +	if (rte_get_panic_state())
> +		return -1;
> +

Please just use run_once.  That's a better way of preventing this.

>  	ret = eal_thread_dump_affinity(cpuset, RTE_CPU_AFFINITY_STR_LEN);
>  
>  	RTE_LOG(DEBUG, EAL, "Master lcore %u is ready (tid=%x;cpuset=[%s%s])\n",
> @@ -909,18 +942,27 @@ static void rte_eal_init_alert(const char *msg)
>  		 * create communication pipes between master thread
>  		 * and children
>  		 */
> -		if (pipe(lcore_config[i].pipe_master2slave) < 0)
> -			rte_panic("Cannot create pipe\n");
> -		if (pipe(lcore_config[i].pipe_slave2master) < 0)
> -			rte_panic("Cannot create pipe\n");
> +		if (pipe(lcore_config[i].pipe_master2slave) < 0) {
> +			RTE_LOG(CRIT, EAL, "%s(): Cannot create pipe\n",
> +					__func__);
> +			return -1;
> +		}
> +		if (pipe(lcore_config[i].pipe_slave2master) < 0) {
> +			RTE_LOG(CRIT, EAL, "%s(): Cannot create pipe\n",
> +					__func__);
> +			return -1;
> +		}
>  
>  		lcore_config[i].state = WAIT;
>  
>  		/* create a thread for each lcore */
>  		ret = pthread_create(&lcore_config[i].thread_id, NULL,
>  				     eal_thread_loop, NULL);
> -		if (ret != 0)
> -			rte_panic("Cannot create thread\n");
> +		if (ret != 0) {
> +			RTE_LOG(CRIT, EAL, "%s(): Cannot create thread\n",
> +					__func__);
> +			return -1;
> +		}
>  
>  		/* Set thread_name for aid in debugging. */
>  		snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN,
> diff --git a/lib/librte_eal/linuxapp/eal/eal_thread.c b/lib/librte_eal/linuxapp/eal/eal_thread.c
> index 08e150b..3afcee5 100644
> --- a/lib/librte_eal/linuxapp/eal/eal_thread.c
> +++ b/lib/librte_eal/linuxapp/eal/eal_thread.c

All of the comments from the bsd side apply here.

> @@ -51,16 +51,22 @@
>  	n = 0;
>  	while (n == 0 || (n < 0 && errno == EINTR))
>  		n = write(m2s, &c, 1);
> -	if (n < 0)
> -		rte_panic("cannot write on configuration pipe\n");
> +	if (n < 0) {
> +		RTE_LOG(CRIT, EAL, "%s(): Cannot write on configuration pipe\n",
> +				__func__);
> +		return -1;
> +	}
>  
>  	/* wait ack */
>  	do {
>  		n = read(s2m, &c, 1);
>  	} while (n < 0 && errno == EINTR);
>  
> -	if (n <= 0)
> -		rte_panic("cannot read on configuration pipe\n");
> +	if (n <= 0) {
> +		RTE_LOG(CRIT, EAL, "%s(): Cannot read on configuration pipe\n",
> +				__func__);
> +		return -1;
> +	}
>  
>  	return 0;
>  }
> @@ -84,8 +90,19 @@ void eal_thread_init_master(unsigned lcore_id)
>  	RTE_PER_LCORE(_lcore_id) = lcore_id;
>  
>  	/* set CPU affinity */
> -	if (eal_thread_set_affinity() < 0)
> -		rte_panic("cannot set affinity\n");
> +	if (eal_thread_set_affinity() < 0) {
> +		RTE_LOG(CRIT, EAL, "%s(): Cannot set affinity\n", __func__);
> +		rte_move_to_panic_state();
> +	}
> +}
> +
> +/* move to panic state and do not return */
> +static __attribute__((noreturn)) void
> +defunct_and_remain_in_endless_loop(void)
> +{
> +	rte_move_to_panic_state();
> +	while (1)
> +		sleep(1);
>  }
>  
>  /* main loop of threads */
> @@ -106,8 +123,11 @@ void eal_thread_init_master(unsigned lcore_id)
>  		if (thread_id == lcore_config[lcore_id].thread_id)
>  			break;
>  	}
> -	if (lcore_id == RTE_MAX_LCORE)
> -		rte_panic("cannot retrieve lcore id\n");
> +	if (lcore_id == RTE_MAX_LCORE) {
> +		RTE_LOG(CRIT, EAL, "%s(): Cannot retrieve lcore id\n",
> +				__func__);
> +		defunct_and_remain_in_endless_loop();
> +	}
>  
>  	m2s = lcore_config[lcore_id].pipe_master2slave[0];
>  	s2m = lcore_config[lcore_id].pipe_slave2master[1];
> @@ -116,8 +136,10 @@ void eal_thread_init_master(unsigned lcore_id)
>  	RTE_PER_LCORE(_lcore_id) = lcore_id;
>  
>  	/* set CPU affinity */
> -	if (eal_thread_set_affinity() < 0)
> -		rte_panic("cannot set affinity\n");
> +	if (eal_thread_set_affinity() < 0) {
> +		RTE_LOG(CRIT, EAL, "%s(): Cannot set affinity\n", __func__);
> +		defunct_and_remain_in_endless_loop();
> +	}
>  
>  	ret = eal_thread_dump_affinity(cpuset, RTE_CPU_AFFINITY_STR_LEN);
>  
> @@ -133,8 +155,11 @@ void eal_thread_init_master(unsigned lcore_id)
>  			n = read(m2s, &c, 1);
>  		} while (n < 0 && errno == EINTR);
>  
> -		if (n <= 0)
> -			rte_panic("cannot read on configuration pipe\n");
> +		if (n <= 0) {
> +			RTE_LOG(CRIT, EAL, "%s(): Cannot read on configuration pipe\n",
> +					__func__);
> +			defunct_and_remain_in_endless_loop();
> +		}
>  
>  		lcore_config[lcore_id].state = RUNNING;
>  
> @@ -142,11 +167,17 @@ void eal_thread_init_master(unsigned lcore_id)
>  		n = 0;
>  		while (n == 0 || (n < 0 && errno == EINTR))
>  			n = write(s2m, &c, 1);
> -		if (n < 0)
> -			rte_panic("cannot write on configuration pipe\n");
> -
> -		if (lcore_config[lcore_id].f == NULL)
> -			rte_panic("NULL function pointer\n");
> +		if (n < 0) {
> +			RTE_LOG(CRIT, EAL, "%s(): Cannot write on configuration pipe\n",
> +					__func__);
> +			defunct_and_remain_in_endless_loop();
> +		}
> +
> +		if (lcore_config[lcore_id].f == NULL) {
> +			RTE_LOG(CRIT, EAL, "%s(): NULL function pointer\n",
> +					__func__);
> +			defunct_and_remain_in_endless_loop();
> +		}
>  
>  		/* call the function and store the return value */
>  		fct_arg = lcore_config[lcore_id].arg;


More information about the dev mailing list