[dpdk-dev,v3,4/5] eal: prevent secondary process init while sending messages
Checks
Commit Message
Currently, it is possible to spin up a secondary process while
either sendmsg or request is in progress. Fix this by adding
directory locks during init, sendmsg and requests.
Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
Notes:
v3: no changes
v2: no changes
lib/librte_eal/common/eal_common_proc.c | 47 ++++++++++++++++++++++++++++++++-
1 file changed, 46 insertions(+), 1 deletion(-)
Comments
Hi Anatoly,
> -----Original Message-----
> From: Burakov, Anatoly
> Sent: Tuesday, February 27, 2018 10:36 PM
> To: dev@dpdk.org
> Cc: Tan, Jianfeng
> Subject: [PATCH v3 4/5] eal: prevent secondary process init while sending
> messages
>
> Currently, it is possible to spin up a secondary process while
> either sendmsg or request is in progress. Fix this by adding
> directory locks during init, sendmsg and requests.
Could you give a more detailed example for this issue?
And why locking the directory can help?
Thanks,
Jianfeng
>
> Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
> ---
>
> Notes:
> v3: no changes
>
> v2: no changes
>
> lib/librte_eal/common/eal_common_proc.c | 47
> ++++++++++++++++++++++++++++++++-
> 1 file changed, 46 insertions(+), 1 deletion(-)
>
> diff --git a/lib/librte_eal/common/eal_common_proc.c
> b/lib/librte_eal/common/eal_common_proc.c
> index 7c87971..7856a7b 100644
> --- a/lib/librte_eal/common/eal_common_proc.c
> +++ b/lib/librte_eal/common/eal_common_proc.c
> @@ -507,6 +507,7 @@ rte_mp_channel_init(void)
> {
> char thread_name[RTE_MAX_THREAD_NAME_LEN];
> char *path;
> + int dir_fd;
> pthread_t tid;
>
> snprintf(mp_filter, PATH_MAX, ".%s_unix_*",
> @@ -516,14 +517,32 @@ rte_mp_channel_init(void)
> snprintf(mp_dir_path, PATH_MAX, "%s", dirname(path));
> free(path);
>
> + /* lock the directory */
> + dir_fd = open(mp_dir_path, O_RDONLY);
> + if (dir_fd < 0) {
> + RTE_LOG(ERR, EAL, "failed to open %s: %s\n",
> + mp_dir_path, strerror(errno));
> + return -1;
> + }
> +
> + if (flock(dir_fd, LOCK_EX)) {
> + RTE_LOG(ERR, EAL, "failed to lock %s: %s\n",
> + mp_dir_path, strerror(errno));
> + close(dir_fd);
> + return -1;
> + }
> +
> if (rte_eal_process_type() == RTE_PROC_PRIMARY &&
> unlink_sockets(mp_filter)) {
> RTE_LOG(ERR, EAL, "failed to unlink mp sockets\n");
> + close(dir_fd);
> return -1;
> }
>
> - if (open_socket_fd() < 0)
> + if (open_socket_fd() < 0) {
> + close(dir_fd);
> return -1;
> + }
>
> if (pthread_create(&tid, NULL, mp_handle, NULL) < 0) {
> RTE_LOG(ERR, EAL, "failed to create mp thead: %s\n",
> @@ -536,6 +555,11 @@ rte_mp_channel_init(void)
> /* try best to set thread name */
> snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN,
> "rte_mp_handle");
> rte_thread_setname(tid, thread_name);
> +
> + /* unlock the directory */
> + flock(dir_fd, LOCK_UN);
> + close(dir_fd);
> +
> return 0;
> }
>
> @@ -650,6 +674,14 @@ mp_send(struct rte_mp_msg *msg, const char
> *peer, int type)
> return -1;
> }
> dir_fd = dirfd(mp_dir);
> + /* lock the directory to prevent processes spinning up while we send
> */
> + if (flock(dir_fd, LOCK_EX)) {
> + RTE_LOG(ERR, EAL, "Unable to lock directory %s\n",
> + mp_dir_path);
> + rte_errno = errno;
> + closedir(mp_dir);
> + return -1;
> + }
> while ((ent = readdir(mp_dir))) {
> char path[PATH_MAX];
> const char *peer_name;
> @@ -673,6 +705,8 @@ mp_send(struct rte_mp_msg *msg, const char *peer,
> int type)
> else if (active > 0 && send_msg(path, msg, type) < 0)
> ret = -1;
> }
> + /* unlock the dir */
> + flock(dir_fd, LOCK_UN);
>
> closedir(mp_dir);
> return ret;
> @@ -832,6 +866,15 @@ rte_mp_request(struct rte_mp_msg *req, struct
> rte_mp_reply *reply,
> }
> dir_fd = dirfd(mp_dir);
>
> + /* lock the directory to prevent processes spinning up while we send
> */
> + if (flock(dir_fd, LOCK_EX)) {
> + RTE_LOG(ERR, EAL, "Unable to lock directory %s\n",
> + mp_dir_path);
> + closedir(mp_dir);
> + rte_errno = errno;
> + return -1;
> + }
> +
> while ((ent = readdir(mp_dir))) {
> const char *peer_name;
> char path[PATH_MAX];
> @@ -857,6 +900,8 @@ rte_mp_request(struct rte_mp_msg *req, struct
> rte_mp_reply *reply,
> if (mp_request_one(path, req, reply, &end))
> ret = -1;
> }
> + /* unlock the directory */
> + flock(dir_fd, LOCK_UN);
>
> closedir(mp_dir);
> return ret;
> --
> 2.7.4
On 28-Feb-18 1:58 AM, Tan, Jianfeng wrote:
> Hi Anatoly,
>
>> -----Original Message-----
>> From: Burakov, Anatoly
>> Sent: Tuesday, February 27, 2018 10:36 PM
>> To: dev@dpdk.org
>> Cc: Tan, Jianfeng
>> Subject: [PATCH v3 4/5] eal: prevent secondary process init while sending
>> messages
>>
>> Currently, it is possible to spin up a secondary process while
>> either sendmsg or request is in progress. Fix this by adding
>> directory locks during init, sendmsg and requests.
>
> Could you give a more detailed example for this issue?
>
> And why locking the directory can help?
>
> Thanks,
> Jianfeng
>
Consider this. You start a request. Since sending this out takes
non-zero amount of time, and you're waiting for process to reply each
time you send a message, there's a non-zero chance where contents of
/var/run may change and another socket file may appear that wasn't there
when we started sending out those messages.
This is simply making sending requests atomic, if you will. Honestly, i
can't think of a situation where this might be a problem, but it just
doesn't feel right, so i fixed it :)
On 2/28/2018 6:19 PM, Burakov, Anatoly wrote:
> On 28-Feb-18 1:58 AM, Tan, Jianfeng wrote:
>> Hi Anatoly,
>>
>>> -----Original Message-----
>>> From: Burakov, Anatoly
>>> Sent: Tuesday, February 27, 2018 10:36 PM
>>> To: dev@dpdk.org
>>> Cc: Tan, Jianfeng
>>> Subject: [PATCH v3 4/5] eal: prevent secondary process init while
>>> sending
>>> messages
>>>
>>> Currently, it is possible to spin up a secondary process while
>>> either sendmsg or request is in progress. Fix this by adding
>>> directory locks during init, sendmsg and requests.
>>
>> Could you give a more detailed example for this issue?
>>
>> And why locking the directory can help?
>>
>> Thanks,
>> Jianfeng
>>
>
> Consider this. You start a request. Since sending this out takes
> non-zero amount of time, and you're waiting for process to reply each
> time you send a message, there's a non-zero chance where contents of
> /var/run may change and another socket file may appear that wasn't
> there when we started sending out those messages.
OK, I see the issue now. When primary broadcasts a request and another
secondary joins, then if that request will be delivered to the new
secondary, it's an undefined behavior.
>
> This is simply making sending requests atomic, if you will. Honestly,
> i can't think of a situation where this might be a problem, but it
> just doesn't feel right, so i fixed it :)
>
The way seems a little overkill to me. But I did not find a better way :-)
Thanks,
Jianfeng
@@ -507,6 +507,7 @@ rte_mp_channel_init(void)
{
char thread_name[RTE_MAX_THREAD_NAME_LEN];
char *path;
+ int dir_fd;
pthread_t tid;
snprintf(mp_filter, PATH_MAX, ".%s_unix_*",
@@ -516,14 +517,32 @@ rte_mp_channel_init(void)
snprintf(mp_dir_path, PATH_MAX, "%s", dirname(path));
free(path);
+ /* lock the directory */
+ dir_fd = open(mp_dir_path, O_RDONLY);
+ if (dir_fd < 0) {
+ RTE_LOG(ERR, EAL, "failed to open %s: %s\n",
+ mp_dir_path, strerror(errno));
+ return -1;
+ }
+
+ if (flock(dir_fd, LOCK_EX)) {
+ RTE_LOG(ERR, EAL, "failed to lock %s: %s\n",
+ mp_dir_path, strerror(errno));
+ close(dir_fd);
+ return -1;
+ }
+
if (rte_eal_process_type() == RTE_PROC_PRIMARY &&
unlink_sockets(mp_filter)) {
RTE_LOG(ERR, EAL, "failed to unlink mp sockets\n");
+ close(dir_fd);
return -1;
}
- if (open_socket_fd() < 0)
+ if (open_socket_fd() < 0) {
+ close(dir_fd);
return -1;
+ }
if (pthread_create(&tid, NULL, mp_handle, NULL) < 0) {
RTE_LOG(ERR, EAL, "failed to create mp thead: %s\n",
@@ -536,6 +555,11 @@ rte_mp_channel_init(void)
/* try best to set thread name */
snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, "rte_mp_handle");
rte_thread_setname(tid, thread_name);
+
+ /* unlock the directory */
+ flock(dir_fd, LOCK_UN);
+ close(dir_fd);
+
return 0;
}
@@ -650,6 +674,14 @@ mp_send(struct rte_mp_msg *msg, const char *peer, int type)
return -1;
}
dir_fd = dirfd(mp_dir);
+ /* lock the directory to prevent processes spinning up while we send */
+ if (flock(dir_fd, LOCK_EX)) {
+ RTE_LOG(ERR, EAL, "Unable to lock directory %s\n",
+ mp_dir_path);
+ rte_errno = errno;
+ closedir(mp_dir);
+ return -1;
+ }
while ((ent = readdir(mp_dir))) {
char path[PATH_MAX];
const char *peer_name;
@@ -673,6 +705,8 @@ mp_send(struct rte_mp_msg *msg, const char *peer, int type)
else if (active > 0 && send_msg(path, msg, type) < 0)
ret = -1;
}
+ /* unlock the dir */
+ flock(dir_fd, LOCK_UN);
closedir(mp_dir);
return ret;
@@ -832,6 +866,15 @@ rte_mp_request(struct rte_mp_msg *req, struct rte_mp_reply *reply,
}
dir_fd = dirfd(mp_dir);
+ /* lock the directory to prevent processes spinning up while we send */
+ if (flock(dir_fd, LOCK_EX)) {
+ RTE_LOG(ERR, EAL, "Unable to lock directory %s\n",
+ mp_dir_path);
+ closedir(mp_dir);
+ rte_errno = errno;
+ return -1;
+ }
+
while ((ent = readdir(mp_dir))) {
const char *peer_name;
char path[PATH_MAX];
@@ -857,6 +900,8 @@ rte_mp_request(struct rte_mp_msg *req, struct rte_mp_reply *reply,
if (mp_request_one(path, req, reply, &end))
ret = -1;
}
+ /* unlock the directory */
+ flock(dir_fd, LOCK_UN);
closedir(mp_dir);
return ret;