[dpdk-dev,v3,4/5] eal: prevent secondary process init while sending messages

Message ID 4be9dbc2f5751e9584f69997d4ef0077992eae52.1519740527.git.anatoly.burakov@intel.com (mailing list archive)
State Superseded, archived
Headers

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation fail apply patch file failure

Commit Message

Anatoly Burakov Feb. 27, 2018, 2:35 p.m. UTC
  Currently, it is possible to spin up a secondary process while
either sendmsg or request is in progress. Fix this by adding
directory locks during init, sendmsg and requests.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---

Notes:
    v3: no changes
    
    v2: no changes

 lib/librte_eal/common/eal_common_proc.c | 47 ++++++++++++++++++++++++++++++++-
 1 file changed, 46 insertions(+), 1 deletion(-)
  

Comments

Jianfeng Tan Feb. 28, 2018, 1:58 a.m. UTC | #1
Hi Anatoly,

> -----Original Message-----
> From: Burakov, Anatoly
> Sent: Tuesday, February 27, 2018 10:36 PM
> To: dev@dpdk.org
> Cc: Tan, Jianfeng
> Subject: [PATCH v3 4/5] eal: prevent secondary process init while sending
> messages
> 
> Currently, it is possible to spin up a secondary process while
> either sendmsg or request is in progress. Fix this by adding
> directory locks during init, sendmsg and requests.

Could you give a more detailed example for this issue?

And why locking the directory can help?

Thanks,
Jianfeng

> 
> Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
> ---
> 
> Notes:
>     v3: no changes
> 
>     v2: no changes
> 
>  lib/librte_eal/common/eal_common_proc.c | 47
> ++++++++++++++++++++++++++++++++-
>  1 file changed, 46 insertions(+), 1 deletion(-)
> 
> diff --git a/lib/librte_eal/common/eal_common_proc.c
> b/lib/librte_eal/common/eal_common_proc.c
> index 7c87971..7856a7b 100644
> --- a/lib/librte_eal/common/eal_common_proc.c
> +++ b/lib/librte_eal/common/eal_common_proc.c
> @@ -507,6 +507,7 @@ rte_mp_channel_init(void)
>  {
>  	char thread_name[RTE_MAX_THREAD_NAME_LEN];
>  	char *path;
> +	int dir_fd;
>  	pthread_t tid;
> 
>  	snprintf(mp_filter, PATH_MAX, ".%s_unix_*",
> @@ -516,14 +517,32 @@ rte_mp_channel_init(void)
>  	snprintf(mp_dir_path, PATH_MAX, "%s", dirname(path));
>  	free(path);
> 
> +	/* lock the directory */
> +	dir_fd = open(mp_dir_path, O_RDONLY);
> +	if (dir_fd < 0) {
> +		RTE_LOG(ERR, EAL, "failed to open %s: %s\n",
> +			mp_dir_path, strerror(errno));
> +		return -1;
> +	}
> +
> +	if (flock(dir_fd, LOCK_EX)) {
> +		RTE_LOG(ERR, EAL, "failed to lock %s: %s\n",
> +			mp_dir_path, strerror(errno));
> +		close(dir_fd);
> +		return -1;
> +	}
> +
>  	if (rte_eal_process_type() == RTE_PROC_PRIMARY &&
>  	    unlink_sockets(mp_filter)) {
>  		RTE_LOG(ERR, EAL, "failed to unlink mp sockets\n");
> +		close(dir_fd);
>  		return -1;
>  	}
> 
> -	if (open_socket_fd() < 0)
> +	if (open_socket_fd() < 0) {
> +		close(dir_fd);
>  		return -1;
> +	}
> 
>  	if (pthread_create(&tid, NULL, mp_handle, NULL) < 0) {
>  		RTE_LOG(ERR, EAL, "failed to create mp thead: %s\n",
> @@ -536,6 +555,11 @@ rte_mp_channel_init(void)
>  	/* try best to set thread name */
>  	snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN,
> "rte_mp_handle");
>  	rte_thread_setname(tid, thread_name);
> +
> +	/* unlock the directory */
> +	flock(dir_fd, LOCK_UN);
> +	close(dir_fd);
> +
>  	return 0;
>  }
> 
> @@ -650,6 +674,14 @@ mp_send(struct rte_mp_msg *msg, const char
> *peer, int type)
>  		return -1;
>  	}
>  	dir_fd = dirfd(mp_dir);
> +	/* lock the directory to prevent processes spinning up while we send
> */
> +	if (flock(dir_fd, LOCK_EX)) {
> +		RTE_LOG(ERR, EAL, "Unable to lock directory %s\n",
> +			mp_dir_path);
> +		rte_errno = errno;
> +		closedir(mp_dir);
> +		return -1;
> +	}
>  	while ((ent = readdir(mp_dir))) {
>  		char path[PATH_MAX];
>  		const char *peer_name;
> @@ -673,6 +705,8 @@ mp_send(struct rte_mp_msg *msg, const char *peer,
> int type)
>  		else if (active > 0 && send_msg(path, msg, type) < 0)
>  			ret = -1;
>  	}
> +	/* unlock the dir */
> +	flock(dir_fd, LOCK_UN);
> 
>  	closedir(mp_dir);
>  	return ret;
> @@ -832,6 +866,15 @@ rte_mp_request(struct rte_mp_msg *req, struct
> rte_mp_reply *reply,
>  	}
>  	dir_fd = dirfd(mp_dir);
> 
> +	/* lock the directory to prevent processes spinning up while we send
> */
> +	if (flock(dir_fd, LOCK_EX)) {
> +		RTE_LOG(ERR, EAL, "Unable to lock directory %s\n",
> +			mp_dir_path);
> +		closedir(mp_dir);
> +		rte_errno = errno;
> +		return -1;
> +	}
> +
>  	while ((ent = readdir(mp_dir))) {
>  		const char *peer_name;
>  		char path[PATH_MAX];
> @@ -857,6 +900,8 @@ rte_mp_request(struct rte_mp_msg *req, struct
> rte_mp_reply *reply,
>  		if (mp_request_one(path, req, reply, &end))
>  			ret = -1;
>  	}
> +	/* unlock the directory */
> +	flock(dir_fd, LOCK_UN);
> 
>  	closedir(mp_dir);
>  	return ret;
> --
> 2.7.4
  
Anatoly Burakov Feb. 28, 2018, 10:19 a.m. UTC | #2
On 28-Feb-18 1:58 AM, Tan, Jianfeng wrote:
> Hi Anatoly,
> 
>> -----Original Message-----
>> From: Burakov, Anatoly
>> Sent: Tuesday, February 27, 2018 10:36 PM
>> To: dev@dpdk.org
>> Cc: Tan, Jianfeng
>> Subject: [PATCH v3 4/5] eal: prevent secondary process init while sending
>> messages
>>
>> Currently, it is possible to spin up a secondary process while
>> either sendmsg or request is in progress. Fix this by adding
>> directory locks during init, sendmsg and requests.
> 
> Could you give a more detailed example for this issue?
> 
> And why locking the directory can help?
> 
> Thanks,
> Jianfeng
> 

Consider this. You start a request. Since sending this out takes 
non-zero amount of time, and you're waiting for process to reply each 
time you send a message, there's a non-zero chance where contents of 
/var/run may change and another socket file may appear that wasn't there 
when we started sending out those messages.

This is simply making sending requests atomic, if you will. Honestly, i 
can't think of a situation where this might be a problem, but it just 
doesn't feel right, so i fixed it :)
  
Jianfeng Tan Feb. 28, 2018, 3:49 p.m. UTC | #3
On 2/28/2018 6:19 PM, Burakov, Anatoly wrote:
> On 28-Feb-18 1:58 AM, Tan, Jianfeng wrote:
>> Hi Anatoly,
>>
>>> -----Original Message-----
>>> From: Burakov, Anatoly
>>> Sent: Tuesday, February 27, 2018 10:36 PM
>>> To: dev@dpdk.org
>>> Cc: Tan, Jianfeng
>>> Subject: [PATCH v3 4/5] eal: prevent secondary process init while 
>>> sending
>>> messages
>>>
>>> Currently, it is possible to spin up a secondary process while
>>> either sendmsg or request is in progress. Fix this by adding
>>> directory locks during init, sendmsg and requests.
>>
>> Could you give a more detailed example for this issue?
>>
>> And why locking the directory can help?
>>
>> Thanks,
>> Jianfeng
>>
>
> Consider this. You start a request. Since sending this out takes 
> non-zero amount of time, and you're waiting for process to reply each 
> time you send a message, there's a non-zero chance where contents of 
> /var/run may change and another socket file may appear that wasn't 
> there when we started sending out those messages.

OK, I see the issue now. When primary broadcasts a request and another 
secondary joins, then if that request will be delivered to the new 
secondary, it's an undefined behavior.

>
> This is simply making sending requests atomic, if you will. Honestly, 
> i can't think of a situation where this might be a problem, but it 
> just doesn't feel right, so i fixed it :)
>

The way seems a little overkill to me. But I did not find a better way :-)

Thanks,
Jianfeng
  

Patch

diff --git a/lib/librte_eal/common/eal_common_proc.c b/lib/librte_eal/common/eal_common_proc.c
index 7c87971..7856a7b 100644
--- a/lib/librte_eal/common/eal_common_proc.c
+++ b/lib/librte_eal/common/eal_common_proc.c
@@ -507,6 +507,7 @@  rte_mp_channel_init(void)
 {
 	char thread_name[RTE_MAX_THREAD_NAME_LEN];
 	char *path;
+	int dir_fd;
 	pthread_t tid;
 
 	snprintf(mp_filter, PATH_MAX, ".%s_unix_*",
@@ -516,14 +517,32 @@  rte_mp_channel_init(void)
 	snprintf(mp_dir_path, PATH_MAX, "%s", dirname(path));
 	free(path);
 
+	/* lock the directory */
+	dir_fd = open(mp_dir_path, O_RDONLY);
+	if (dir_fd < 0) {
+		RTE_LOG(ERR, EAL, "failed to open %s: %s\n",
+			mp_dir_path, strerror(errno));
+		return -1;
+	}
+
+	if (flock(dir_fd, LOCK_EX)) {
+		RTE_LOG(ERR, EAL, "failed to lock %s: %s\n",
+			mp_dir_path, strerror(errno));
+		close(dir_fd);
+		return -1;
+	}
+
 	if (rte_eal_process_type() == RTE_PROC_PRIMARY &&
 	    unlink_sockets(mp_filter)) {
 		RTE_LOG(ERR, EAL, "failed to unlink mp sockets\n");
+		close(dir_fd);
 		return -1;
 	}
 
-	if (open_socket_fd() < 0)
+	if (open_socket_fd() < 0) {
+		close(dir_fd);
 		return -1;
+	}
 
 	if (pthread_create(&tid, NULL, mp_handle, NULL) < 0) {
 		RTE_LOG(ERR, EAL, "failed to create mp thead: %s\n",
@@ -536,6 +555,11 @@  rte_mp_channel_init(void)
 	/* try best to set thread name */
 	snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, "rte_mp_handle");
 	rte_thread_setname(tid, thread_name);
+
+	/* unlock the directory */
+	flock(dir_fd, LOCK_UN);
+	close(dir_fd);
+
 	return 0;
 }
 
@@ -650,6 +674,14 @@  mp_send(struct rte_mp_msg *msg, const char *peer, int type)
 		return -1;
 	}
 	dir_fd = dirfd(mp_dir);
+	/* lock the directory to prevent processes spinning up while we send */
+	if (flock(dir_fd, LOCK_EX)) {
+		RTE_LOG(ERR, EAL, "Unable to lock directory %s\n",
+			mp_dir_path);
+		rte_errno = errno;
+		closedir(mp_dir);
+		return -1;
+	}
 	while ((ent = readdir(mp_dir))) {
 		char path[PATH_MAX];
 		const char *peer_name;
@@ -673,6 +705,8 @@  mp_send(struct rte_mp_msg *msg, const char *peer, int type)
 		else if (active > 0 && send_msg(path, msg, type) < 0)
 			ret = -1;
 	}
+	/* unlock the dir */
+	flock(dir_fd, LOCK_UN);
 
 	closedir(mp_dir);
 	return ret;
@@ -832,6 +866,15 @@  rte_mp_request(struct rte_mp_msg *req, struct rte_mp_reply *reply,
 	}
 	dir_fd = dirfd(mp_dir);
 
+	/* lock the directory to prevent processes spinning up while we send */
+	if (flock(dir_fd, LOCK_EX)) {
+		RTE_LOG(ERR, EAL, "Unable to lock directory %s\n",
+			mp_dir_path);
+		closedir(mp_dir);
+		rte_errno = errno;
+		return -1;
+	}
+
 	while ((ent = readdir(mp_dir))) {
 		const char *peer_name;
 		char path[PATH_MAX];
@@ -857,6 +900,8 @@  rte_mp_request(struct rte_mp_msg *req, struct rte_mp_reply *reply,
 		if (mp_request_one(path, req, reply, &end))
 			ret = -1;
 	}
+	/* unlock the directory */
+	flock(dir_fd, LOCK_UN);
 
 	closedir(mp_dir);
 	return ret;