Hi Ferruh, I am clark, a security researcher of Tencent Blade Team. I recently discovered several security vulnerabilities in DPDK, as follows qemu attacks DPDK, causing DoS or RCE [background] struct vhost_virtqueue { struct vring_desc *desc; struct vring_avail *avail; struct vring_used *used; uint32_t size; uint16_t last_avail_idx; uint16_t last_used_idx; ... }; "size", "last_avail_idx", "last_used_idx", All above are passed to DPDK by qemu through control commands. "desc", "avail", "used" are dynamically allocated by qemu sending control commands, and their length is "size". The following qemu command sequence may cause "desc", "avail" and "used" to read and write out of bounds: 1. qemu sets "size" to value X 2. qemu dynamically allocates the length of "desc", "avail" and "used" to "size", that is value X 3. qemu sets "size" to value Y, and Y > X 4. if (index < size) //At this time "size" is Y > X desc[index] = xxxx; //Write out of bounds [code 1] int rte_vdpa_relay_vring_used(int vid, uint16_t qid, void *vring_m) { ... vq->used->ring[idx & (vq->size - 1)] = s_vring->used->ring[idx & (vq->size - 1)]; //idx Out-of-bounds read and write desc_id = vq->used->ring[idx & (vq->size - 1)].id; //idx Out-of-bounds read ... } [code 2] int rte_vhost_set_inflight_desc_packed(int vid, uint16_t vring_idx, uint16_t head, uint16_t last, uint16_t *inflight_entry) { if (unlikely(head >= vq->size)) return -1; if (unlikely(old_free_head >= vq->size)) return -1; ... inflight_info->desc[old_free_head].num++; //old_free_head head Out-of-bounds read and write inflight_info->desc[free_head].addr = desc[head].addr; inflight_info->desc[free_head].len = desc[head].len; inflight_info->desc[free_head].flags = desc[head].flags; inflight_info->desc[free_head].id = desc[head].id; inflight_info->desc[old_free_head].last = free_head; } [code 3] int rte_vhost_set_last_inflight_io_packed(int vid, uint16_t vring_idx, uint16_t head) { if (unlikely(head >= vq->size)) return -1; last = inflight_info->desc[head].last; if (unlikely(last >= vq->size)) return -1; inflight_info->desc[last].next = inflight_info->free_head; //head last Out-of-bounds read and write inflight_info->free_head = head; inflight_info->used_idx += inflight_info->desc[head].num; } [code 4] uint16_t rte_vhost_crypto_fetch_requests(int vid, uint32_t qid, struct rte_crypto_op **ops, uint16_t nb_ops) { uint16_t used_idx = (start_idx + i) & (vq->size - 1); uint16_t desc_idx = vq->avail->ring[used_idx]; //used_idx Out-of-bounds read and write } [code 5] static __rte_always_inline void vhost_flush_enqueue_shadow_packed(struct virtio_net *dev, struct vhost_virtqueue *vq) { for (i = 0; i < vq->shadow_used_idx; i++) { vq->desc_packed[used_idx].id = vq->shadow_used_packed[i].id; //used_idx Out-of-bounds read and write vq->desc_packed[used_idx].len = vq->shadow_used_packed[i].len; used_idx += vq->shadow_used_packed[i].count; if (used_idx >= vq->size) used_idx -= vq->size; } } [code 6] static __rte_always_inline int fill_vec_buf_split(struct virtio_net *dev, struct vhost_virtqueue *vq, uint32_t avail_idx, uint16_t *vec_idx, struct buf_vector *buf_vec, uint16_t *desc_chain_head, uint32_t *desc_chain_len, uint8_t perm) { uint16_t idx = vq->avail->ring[avail_idx & (vq->size - 1)]; //avail_idx Out-of-bounds read and write if (unlikely(idx >= vq->size)) return -1; if (vq->desc[idx].flags & VRING_DESC_F_INDIRECT) { //idx Out-of-bounds read and write dlen = vq->desc[idx].len; ... } } [code 7] static __rte_always_inline int fill_vec_buf_packed_indirect(struct virtio_net *dev, struct vhost_virtqueue *vq, struct vring_packed_desc *desc, uint16_t *vec_idx, struct buf_vector *buf_vec, uint32_t *len, uint8_t perm) { struct vring_packed_desc *descs = (struct vring_packed_desc *)(uintptr_t) vhost_iova_to_vva(dev, vq, desc->addr, &dlen, VHOST_ACCESS_RO); nr_descs = desc->len / sizeof(struct vring_packed_desc); if (unlikely(nr_descs >= vq->size)) { free_ind_table(idescs); return -1; } for (i = 0; i < nr_descs; i++) { //nr_descs Out-of-bounds read and write if (unlikely(vec_id >= BUF_VECTOR_MAX)) { free_ind_table(idescs); return -1; } *len += descs[i].len; if (unlikely(map_one_desc(dev, vq, buf_vec, &vec_id, descs[i].addr, descs[i].len, perm))) return -1; } } [code 8] static __rte_always_inline int fill_vec_buf_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, uint16_t avail_idx, uint16_t *desc_count, struct buf_vector *buf_vec, uint16_t *vec_idx, uint16_t *buf_id, uint32_t *len, uint8_t perm) { while (1) { if (unlikely(vec_id >= BUF_VECTOR_MAX)) return -1; if (unlikely(*desc_count >= vq->size)) return -1; *desc_count += 1; *buf_id = descs[avail_idx].id; if (descs[avail_idx].flags & VRING_DESC_F_INDIRECT) { if (unlikely(fill_vec_buf_packed_indirect(dev, vq, &descs[avail_idx], &vec_id, buf_vec, len, perm) < 0)) return -1; } else { *len += descs[avail_idx].len; if (unlikely(map_one_desc(dev, vq, buf_vec, &vec_id, descs[avail_idx].addr, descs[avail_idx].len, perm))) return -1; } if ((descs[avail_idx].flags & VRING_DESC_F_NEXT) == 0) break; if (++avail_idx >= vq->size) { //avail_idx Out-of-bounds read and write avail_idx -= vq->size; wrap_counter ^= 1; } } } [code 9] static __rte_always_inline int virtio_dev_rx_batch_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, struct rte_mbuf **pkts) { if (unlikely((avail_idx + PACKED_BATCH_SIZE) > vq->size)) //avail_idx Out-of-bounds read and write return -1; ... lens[i] = descs[avail_idx + i].len; } [code 10] static __rte_always_inline int vhost_reserve_avail_batch_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t avail_idx, uintptr_t *desc_addrs, uint16_t *ids) { if (unlikely((avail_idx + PACKED_BATCH_SIZE) > vq->size)) // return -1; vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { flags = descs[avail_idx + i].flags; } } [harm] Malicious qemu process may cause DPDK DoS or RCE
cc'ed Maxime and Chenbo for comments.
replied from Maxime: > As for this issue. > > Is it the same? we usually consider the Qemu process to be trusted, so > we would say no CVE is required here. > > We should consider it as a regular issue? I would say yes, let's consider it as a regular issue. Maybe propose Clark to send fixes, and if he is not willing to proceed, file a regular Bz so that someone can pick the task. Thanks, Maxime
No reply from the reporter, maybe we can convert the this defect to regular defect.