[dpdk-dev] [PATCH v2] vhost: fix connect hang in client mode
Ilya Maximets
i.maximets at samsung.com
Thu Jul 21 13:12:58 CEST 2016
If something abnormal happened to QEMU, 'connect()' can block calling
thread (e.g. main thread of OVS) forever or for a really long time.
This can break whole application or block the reconnection thread.
Example with OVS:
ovs_rcu(urcu2)|WARN|blocked 512000 ms waiting for main to quiesce
(gdb) bt
#0 connect () from /lib64/libpthread.so.0
#1 vhost_user_create_client (vsocket=0xa816e0)
#2 rte_vhost_driver_register
#3 netdev_dpdk_vhost_user_construct
#4 netdev_open (name=0xa664b0 "vhost1")
[...]
#11 main
Fix that by setting non-blocking mode for client sockets for connection.
Fixes: 64ab701c3d1e ("vhost: add vhost-user client mode")
Signed-off-by: Ilya Maximets <i.maximets at samsung.com>
---
This was reproduced with current QEMU master branch
(commit 1ecfb24da987b862f) + patch-set "vhost-user reconnect fixes"
(https://lists.nongnu.org/archive/html/qemu-devel/2016-07/msg01547.html).
OVS was patched to support client mode:
http://openvswitch.org/pipermail/dev/2016-July/074972.html
Following script forces QEMU to fail to initialize vhost because
disconnection occures while device not fully configured:
while true
do
ovs-vsctl set Interface vhost1 ofport_request=125
ovs-vsctl set Interface vhost1 ofport_request=126
done
As a result: QEMU still works, network interface broken and OVS main
thread stalled inside 'connect()'.
Version 2:
* EINPROGRESS not checked. EISCONN checked instead on
the next iteration of reconnection loop.
lib/librte_vhost/vhost_user/vhost-net-user.c | 62 ++++++++++++++++++++++++++--
1 file changed, 58 insertions(+), 4 deletions(-)
diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.c b/lib/librte_vhost/vhost_user/vhost-net-user.c
index 8c6a096..63e0840 100644
--- a/lib/librte_vhost/vhost_user/vhost-net-user.c
+++ b/lib/librte_vhost/vhost_user/vhost-net-user.c
@@ -43,6 +43,7 @@
#include <sys/un.h>
#include <sys/queue.h>
#include <errno.h>
+#include <fcntl.h>
#include <pthread.h>
#include <rte_log.h>
@@ -449,6 +450,14 @@ create_unix_socket(const char *path, struct sockaddr_un *un, bool is_server)
RTE_LOG(INFO, VHOST_CONFIG, "vhost-user %s: socket created, fd: %d\n",
is_server ? "server" : "client", fd);
+ if (!is_server && fcntl(fd, F_SETFL, O_NONBLOCK)) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "vhost-user: can't set nonblocking mode for socket, fd: "
+ "%d (%s)\n", fd, strerror(errno));
+ close(fd);
+ return -1;
+ }
+
memset(un, 0, sizeof(*un));
un->sun_family = AF_UNIX;
strncpy(un->sun_path, path, sizeof(un->sun_path));
@@ -516,9 +525,43 @@ struct vhost_user_reconnect_list {
static struct vhost_user_reconnect_list reconn_list;
static pthread_t reconn_tid;
+static int
+vhost_user_connect_nonblock(int fd, struct sockaddr *un, size_t sz)
+{
+ int ret, flags, so_error;
+ socklen_t len = sizeof(so_error);
+
+ errno = EINVAL;
+
+ ret = connect(fd, un, sz);
+ if (ret < 0 && errno != EISCONN)
+ return -1;
+
+ ret = getsockopt(fd, SOL_SOCKET, SO_ERROR, &so_error, &len);
+ if (ret < 0 || so_error) {
+ if (!ret)
+ errno = so_error;
+ return -1;
+ }
+
+ flags = fcntl(fd, F_GETFL, 0);
+ if (flags < 0) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "can't get flags for connfd %d\n", fd);
+ return -2;
+ }
+ if ((flags & O_NONBLOCK) && fcntl(fd, F_SETFL, flags & ~O_NONBLOCK)) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "can't disable nonblocking on fd %d\n", fd);
+ return -2;
+ }
+ return 0;
+}
+
static void *
vhost_user_client_reconnect(void *arg __rte_unused)
{
+ int ret;
struct vhost_user_reconnect *reconn, *next;
while (1) {
@@ -532,13 +575,23 @@ vhost_user_client_reconnect(void *arg __rte_unused)
reconn != NULL; reconn = next) {
next = TAILQ_NEXT(reconn, next);
- if (connect(reconn->fd, (struct sockaddr *)&reconn->un,
- sizeof(reconn->un)) < 0)
+ ret = vhost_user_connect_nonblock(reconn->fd,
+ (struct sockaddr *)&reconn->un,
+ sizeof(reconn->un));
+ if (ret == -2) {
+ close(reconn->fd);
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "reconnection for fd %d failed\n",
+ reconn->fd);
+ goto remove_fd;
+ }
+ if (ret == -1)
continue;
RTE_LOG(INFO, VHOST_CONFIG,
"%s: connected\n", reconn->vsocket->path);
vhost_user_add_connection(reconn->fd, reconn->vsocket);
+remove_fd:
TAILQ_REMOVE(&reconn_list.head, reconn, next);
free(reconn);
}
@@ -579,7 +632,8 @@ vhost_user_create_client(struct vhost_user_socket *vsocket)
if (fd < 0)
return -1;
- ret = connect(fd, (struct sockaddr *)&un, sizeof(un));
+ ret = vhost_user_connect_nonblock(fd, (struct sockaddr *)&un,
+ sizeof(un));
if (ret == 0) {
vhost_user_add_connection(fd, vsocket);
return 0;
@@ -589,7 +643,7 @@ vhost_user_create_client(struct vhost_user_socket *vsocket)
"failed to connect to %s: %s\n",
path, strerror(errno));
- if (!vsocket->reconnect) {
+ if (ret == -2 || !vsocket->reconnect) {
close(fd);
return -1;
}
--
2.7.4
More information about the dev
mailing list