[dpdk-dev] [PATCH v3 04/11] baseband/acc100: add queue configuration
Xu, Rosen
rosen.xu at intel.com
Fri Sep 4 04:01:50 CEST 2020
Hi,
> -----Original Message-----
> From: Chautru, Nicolas <nicolas.chautru at intel.com>
> Sent: Friday, September 04, 2020 6:49
> To: Xu, Rosen <rosen.xu at intel.com>; dev at dpdk.org; akhil.goyal at nxp.com
> Cc: Richardson, Bruce <bruce.richardson at intel.com>
> Subject: RE: [dpdk-dev] [PATCH v3 04/11] baseband/acc100: add queue
> configuration
>
> > From: Xu, Rosen <rosen.xu at intel.com>
> >
> > Hi,
> >
> > > -----Original Message-----
> > > From: Chautru, Nicolas <nicolas.chautru at intel.com>
> > > Sent: Sunday, August 30, 2020 1:48
> > > To: Xu, Rosen <rosen.xu at intel.com>; dev at dpdk.org;
> > > akhil.goyal at nxp.com
> > > Cc: Richardson, Bruce <bruce.richardson at intel.com>
> > > Subject: RE: [dpdk-dev] [PATCH v3 04/11] baseband/acc100: add queue
> > > configuration
> > >
> > > Hi,
> > >
> > > > From: Xu, Rosen <rosen.xu at intel.com>
> > > >
> > > > Hi,
> > > >
> > > > > -----Original Message-----
> > > > > From: dev <dev-bounces at dpdk.org> On Behalf Of Nicolas Chautru
> > > > > Sent: Wednesday, August 19, 2020 8:25
> > > > > To: dev at dpdk.org; akhil.goyal at nxp.com
> > > > > Cc: Richardson, Bruce <bruce.richardson at intel.com>; Chautru,
> > > > > Nicolas <nicolas.chautru at intel.com>
> > > > > Subject: [dpdk-dev] [PATCH v3 04/11] baseband/acc100: add queue
> > > > > configuration
> > > > >
> > > > > Adding function to create and configure queues for the device.
> > > > > Still no capability.
> > > > >
> > > > > Signed-off-by: Nicolas Chautru <nicolas.chautru at intel.com>
> > > > > ---
> > > > > drivers/baseband/acc100/rte_acc100_pmd.c | 420
> > > > > ++++++++++++++++++++++++++++++-
> > > > > drivers/baseband/acc100/rte_acc100_pmd.h | 45 ++++
> > > > > 2 files changed, 464 insertions(+), 1 deletion(-)
> > > > >
> > > > > diff --git a/drivers/baseband/acc100/rte_acc100_pmd.c
> > > > > b/drivers/baseband/acc100/rte_acc100_pmd.c
> > > > > index 7807a30..7a21c57 100644
> > > > > --- a/drivers/baseband/acc100/rte_acc100_pmd.c
> > > > > +++ b/drivers/baseband/acc100/rte_acc100_pmd.c
> > > > > @@ -26,6 +26,22 @@
> > > > > RTE_LOG_REGISTER(acc100_logtype, pmd.bb.acc100, NOTICE);
> > > > > #endif
> > > > >
> > > > > +/* Write to MMIO register address */ static inline void
> > > > > +mmio_write(void *addr, uint32_t value) {
> > > > > + *((volatile uint32_t *)(addr)) = rte_cpu_to_le_32(value); }
> > > > > +
> > > > > +/* Write a register of a ACC100 device */ static inline void
> > > > > +acc100_reg_write(struct acc100_device *d, uint32_t offset,
> > > > > +uint32_t
> > > > > +payload) {
> > > > > + void *reg_addr = RTE_PTR_ADD(d->mmio_base, offset);
> > > > > + mmio_write(reg_addr, payload);
> > > > > + usleep(1000);
> > > > > +}
> > > > > +
> > > > > /* Read a register of a ACC100 device */ static inline
> > > > > uint32_t acc100_reg_read(struct acc100_device *d, uint32_t
> > > > > offset) @@ -36,6
> > > > > +52,22 @@
> > > > > return rte_le_to_cpu_32(ret);
> > > > > }
> > > > >
> > > > > +/* Basic Implementation of Log2 for exact 2^N */ static inline
> > > > > +uint32_t log2_basic(uint32_t value) {
> > > > > + return (value == 0) ? 0 : __builtin_ctz(value); }
> > > > > +
> > > > > +/* Calculate memory alignment offset assuming alignment is 2^N
> > > > > +*/ static inline uint32_t calc_mem_alignment_offset(void
> > > > > +*unaligned_virt_mem, uint32_t alignment) {
> > > > > + rte_iova_t unaligned_phy_mem =
> > > > > rte_malloc_virt2iova(unaligned_virt_mem);
> > > > > + return (uint32_t)(alignment -
> > > > > + (unaligned_phy_mem & (alignment-1))); }
> > > > > +
> > > > > /* Calculate the offset of the enqueue register */ static
> > > > > inline uint32_t queue_offset(bool pf_device, uint8_t vf_id,
> > > > > uint8_t qgrp_id, uint16_t aq_id) @@ -204,10 +236,393 @@
> > > > > acc100_conf->q_dl_5g.aq_depth_log2);
> > > > > }
> > > > >
> > > > > +static void
> > > > > +free_base_addresses(void **base_addrs, int size) {
> > > > > + int i;
> > > > > + for (i = 0; i < size; i++)
> > > > > + rte_free(base_addrs[i]);
> > > > > +}
> > > > > +
> > > > > +static inline uint32_t
> > > > > +get_desc_len(void)
> > > > > +{
> > > > > + return sizeof(union acc100_dma_desc); }
> > > > > +
> > > > > +/* Allocate the 2 * 64MB block for the sw rings */ static int
> > > > > +alloc_2x64mb_sw_rings_mem(struct rte_bbdev *dev, struct
> > > > > +acc100_device
> > > > > *d,
> > > > > + int socket)
> > > > > +{
> > > > > + uint32_t sw_ring_size = ACC100_SIZE_64MBYTE;
> > > > > + d->sw_rings_base = rte_zmalloc_socket(dev->device-
> >driver-
> > > > > >name,
> > > > > + 2 * sw_ring_size, RTE_CACHE_LINE_SIZE,
> socket);
> > > > > + if (d->sw_rings_base == NULL) {
> > > > > + rte_bbdev_log(ERR, "Failed to allocate memory
> for %s:%u",
> > > > > + dev->device->driver->name,
> > > > > + dev->data->dev_id);
> > > > > + return -ENOMEM;
> > > > > + }
> > > > > + memset(d->sw_rings_base, 0, ACC100_SIZE_64MBYTE);
> > > > > + uint32_t next_64mb_align_offset =
> calc_mem_alignment_offset(
> > > > > + d->sw_rings_base, ACC100_SIZE_64MBYTE);
> > > > > + d->sw_rings = RTE_PTR_ADD(d->sw_rings_base,
> > > > > next_64mb_align_offset);
> > > > > + d->sw_rings_phys = rte_malloc_virt2iova(d->sw_rings_base)
> +
> > > > > + next_64mb_align_offset;
> > > > > + d->sw_ring_size = MAX_QUEUE_DEPTH * get_desc_len();
> > > > > + d->sw_ring_max_depth = d->sw_ring_size / get_desc_len();
> > > > > +
> > > > > + return 0;
> > > > > +}
> > > >
> > > > Why not a common alloc memory function but special function for
> > > > different memory size?
> > >
> > > This is a bit convoluted but due to the fact the first attempt
> > > method which is optimal (minimum) may not always find aligned memory.
> >
> > What's convoluted? Can you explain?
> > For packet processing, in most scenarios, aren't we aligned memory
> > when we alloc memory?
>
> Hi Rosen,
> This is related to both the alignment and the size of the contiguous amount
> of data in pinned down memory = 64MB contiguous block aligned on 64MB
> boundary of physical address (not linear).
> The first method can potentially fail hence is run incrementally while the 2nd
> version may be used as safe fall through and is more wasteful in term of
> footprint (hence not used as default).
> That is the part that I considered "convoluted" in this way to reliably allocate
> memory. It is possible to only use the 2nd version which would look cleaner
> in term of code but more wasteful in memory usage.
As you mentioned, it's not cleaner, looking forwarding your next version patch.
>
>
> > >
> > > >
> > > > > +/* Attempt to allocate minimised memory space for sw rings */
> > > > > +static void alloc_sw_rings_min_mem(struct rte_bbdev *dev, struct
> > > > > acc100_device
> > > > > +*d,
> > > > > + uint16_t num_queues, int socket) {
> > > > > + rte_iova_t sw_rings_base_phy, next_64mb_align_addr_phy;
> > > > > + uint32_t next_64mb_align_offset;
> > > > > + rte_iova_t sw_ring_phys_end_addr;
> > > > > + void *base_addrs[SW_RING_MEM_ALLOC_ATTEMPTS];
> > > > > + void *sw_rings_base;
> > > > > + int i = 0;
> > > > > + uint32_t q_sw_ring_size = MAX_QUEUE_DEPTH *
> get_desc_len();
> > > > > + uint32_t dev_sw_ring_size = q_sw_ring_size * num_queues;
> > > > > +
> > > > > + /* Find an aligned block of memory to store sw rings */
> > > > > + while (i < SW_RING_MEM_ALLOC_ATTEMPTS) {
> > > > > + /*
> > > > > + * sw_ring allocated memory is guaranteed to be
> aligned to
> > > > > + * q_sw_ring_size at the condition that the
> requested size is
> > > > > + * less than the page size
> > > > > + */
> > > > > + sw_rings_base = rte_zmalloc_socket(
> > > > > + dev->device->driver->name,
> > > > > + dev_sw_ring_size, q_sw_ring_size,
> socket);
> > > > > +
> > > > > + if (sw_rings_base == NULL) {
> > > > > + rte_bbdev_log(ERR,
> > > > > + "Failed to allocate memory
> > > > > for %s:%u",
> > > > > + dev->device->driver->name,
> > > > > + dev->data->dev_id);
> > > > > + break;
> > > > > + }
> > > > > +
> > > > > + sw_rings_base_phy =
> rte_malloc_virt2iova(sw_rings_base);
> > > > > + next_64mb_align_offset =
> calc_mem_alignment_offset(
> > > > > + sw_rings_base,
> ACC100_SIZE_64MBYTE);
> > > > > + next_64mb_align_addr_phy = sw_rings_base_phy +
> > > > > + next_64mb_align_offset;
> > > > > + sw_ring_phys_end_addr = sw_rings_base_phy +
> > > > > dev_sw_ring_size;
> > > > > +
> > > > > + /* Check if the end of the sw ring memory block is
> before the
> > > > > + * start of next 64MB aligned mem address
> > > > > + */
> > > > > + if (sw_ring_phys_end_addr <
> next_64mb_align_addr_phy) {
> > > > > + d->sw_rings_phys = sw_rings_base_phy;
> > > > > + d->sw_rings = sw_rings_base;
> > > > > + d->sw_rings_base = sw_rings_base;
> > > > > + d->sw_ring_size = q_sw_ring_size;
> > > > > + d->sw_ring_max_depth =
> MAX_QUEUE_DEPTH;
> > > > > + break;
> > > > > + }
> > > > > + /* Store the address of the unaligned mem block */
> > > > > + base_addrs[i] = sw_rings_base;
> > > > > + i++;
> > > > > + }
> > > > > +
> > > > > + /* Free all unaligned blocks of mem allocated in the loop */
> > > > > + free_base_addresses(base_addrs, i); }
> > > >
> > > > It's strange to firstly alloc memory and then free memory but on
> > > > operations on this memory.
> > >
> > > I may miss your point. We are freeing the exact same mem we did get
> > > from rte_zmalloc.
> > > Not that the base_addrs array refers to multiple attempts of mallocs,
> > > not multiple operations in a ring.
> >
> > You alloc memory sw_rings_base, after some translate, assign this memory
> to
> > cc100_device *d, and before the function return, this memory has been
> freed.
>
> If you follow the logic, this actually only frees the memory from attempts
> which were not successfully well aligned, not the one which ends up being in
> fact used for sw rings.
> The actually memory for sw rings is obviously used and actually gets freed
> when closing the device below => ie. rte_free(d->sw_rings_base);
> Let me know if unclear. I could add more comments if this not obvious from
> the code. Ie. /* Free all _unaligned_ blocks of mem allocated in the loop */*
>
> Thanks for your review. I can see how it can look a bit odd initially.
Pls make sure you code can works well in each branch.
> >
> > > >
> > > > > +
> > > > > +/* Allocate 64MB memory used for all software rings */ static int
> > > > > +acc100_setup_queues(struct rte_bbdev *dev, uint16_t
> num_queues,
> > > int
> > > > > +socket_id) {
> > > > > + uint32_t phys_low, phys_high, payload;
> > > > > + struct acc100_device *d = dev->data->dev_private;
> > > > > + const struct acc100_registry_addr *reg_addr;
> > > > > +
> > > > > + if (d->pf_device && !d->acc100_conf.pf_mode_en) {
> > > > > + rte_bbdev_log(NOTICE,
> > > > > + "%s has PF mode disabled. This PF
> can't be
> > > > > used.",
> > > > > + dev->data->name);
> > > > > + return -ENODEV;
> > > > > + }
> > > > > +
> > > > > + alloc_sw_rings_min_mem(dev, d, num_queues, socket_id);
> > > > > +
> > > > > + /* If minimal memory space approach failed, then allocate
> > > > > + * the 2 * 64MB block for the sw rings
> > > > > + */
> > > > > + if (d->sw_rings == NULL)
> > > > > + alloc_2x64mb_sw_rings_mem(dev, d, socket_id);
> > > > > +
> > > > > + /* Configure ACC100 with the base address for DMA
> descriptor rings
> > > > > + * Same descriptor rings used for UL and DL DMA Engines
> > > > > + * Note : Assuming only VF0 bundle is used for PF mode
> > > > > + */
> > > > > + phys_high = (uint32_t)(d->sw_rings_phys >> 32);
> > > > > + phys_low = (uint32_t)(d->sw_rings_phys &
> > > > > ~(ACC100_SIZE_64MBYTE-1));
> > > > > +
> > > > > + /* Choose correct registry addresses for the device type */
> > > > > + if (d->pf_device)
> > > > > + reg_addr = &pf_reg_addr;
> > > > > + else
> > > > > + reg_addr = &vf_reg_addr;
> > > > > +
> > > > > + /* Read the populated cfg from ACC100 registers */
> > > > > + fetch_acc100_config(dev);
> > > > > +
> > > > > + /* Mark as configured properly */
> > > > > + d->configured = true;
> > > > > +
> > > > > + /* Release AXI from PF */
> > > > > + if (d->pf_device)
> > > > > + acc100_reg_write(d, HWPfDmaAxiControl, 1);
> > > > > +
> > > > > + acc100_reg_write(d, reg_addr->dma_ring_ul5g_hi,
> phys_high);
> > > > > + acc100_reg_write(d, reg_addr->dma_ring_ul5g_lo,
> phys_low);
> > > > > + acc100_reg_write(d, reg_addr->dma_ring_dl5g_hi,
> phys_high);
> > > > > + acc100_reg_write(d, reg_addr->dma_ring_dl5g_lo,
> phys_low);
> > > > > + acc100_reg_write(d, reg_addr->dma_ring_ul4g_hi,
> phys_high);
> > > > > + acc100_reg_write(d, reg_addr->dma_ring_ul4g_lo,
> phys_low);
> > > > > + acc100_reg_write(d, reg_addr->dma_ring_dl4g_hi,
> phys_high);
> > > > > + acc100_reg_write(d, reg_addr->dma_ring_dl4g_lo,
> phys_low);
> > > > > +
> > > > > + /*
> > > > > + * Configure Ring Size to the max queue ring size
> > > > > + * (used for wrapping purpose)
> > > > > + */
> > > > > + payload = log2_basic(d->sw_ring_size / 64);
> > > > > + acc100_reg_write(d, reg_addr->ring_size, payload);
> > > > > +
> > > > > + /* Configure tail pointer for use when SDONE enabled */
> > > > > + d->tail_ptrs = rte_zmalloc_socket(
> > > > > + dev->device->driver->name,
> > > > > + ACC100_NUM_QGRPS * ACC100_NUM_AQS
> *
> > > > > sizeof(uint32_t),
> > > > > + RTE_CACHE_LINE_SIZE, socket_id);
> > > > > + if (d->tail_ptrs == NULL) {
> > > > > + rte_bbdev_log(ERR, "Failed to allocate tail ptr
> for %s:%u",
> > > > > + dev->device->driver->name,
> > > > > + dev->data->dev_id);
> > > > > + rte_free(d->sw_rings);
> > > > > + return -ENOMEM;
> > > > > + }
> > > > > + d->tail_ptr_phys = rte_malloc_virt2iova(d->tail_ptrs);
> > > > > +
> > > > > + phys_high = (uint32_t)(d->tail_ptr_phys >> 32);
> > > > > + phys_low = (uint32_t)(d->tail_ptr_phys);
> > > > > + acc100_reg_write(d, reg_addr->tail_ptrs_ul5g_hi, phys_high);
> > > > > + acc100_reg_write(d, reg_addr->tail_ptrs_ul5g_lo, phys_low);
> > > > > + acc100_reg_write(d, reg_addr->tail_ptrs_dl5g_hi, phys_high);
> > > > > + acc100_reg_write(d, reg_addr->tail_ptrs_dl5g_lo, phys_low);
> > > > > + acc100_reg_write(d, reg_addr->tail_ptrs_ul4g_hi, phys_high);
> > > > > + acc100_reg_write(d, reg_addr->tail_ptrs_ul4g_lo, phys_low);
> > > > > + acc100_reg_write(d, reg_addr->tail_ptrs_dl4g_hi, phys_high);
> > > > > + acc100_reg_write(d, reg_addr->tail_ptrs_dl4g_lo, phys_low);
> > > > > +
> > > > > + d->harq_layout = rte_zmalloc_socket("HARQ Layout",
> > > > > + ACC100_HARQ_LAYOUT * sizeof(*d-
> >harq_layout),
> > > > > + RTE_CACHE_LINE_SIZE, dev->data-
> >socket_id);
> > > > > +
> > > > > + rte_bbdev_log_debug(
> > > > > + "ACC100 (%s) configured sw_rings = %p,
> > > > > sw_rings_phys = %#"
> > > > > + PRIx64, dev->data->name, d->sw_rings, d-
> > > > > >sw_rings_phys);
> > > > > +
> > > > > + return 0;
> > > > > +}
> > > > > +
> > > > > /* Free 64MB memory used for software rings */ static int -
> > > > > acc100_dev_close(struct rte_bbdev *dev __rte_unused)
> > > > > +acc100_dev_close(struct rte_bbdev *dev)
> > > > > {
> > > > > + struct acc100_device *d = dev->data->dev_private;
> > > > > + if (d->sw_rings_base != NULL) {
> > > > > + rte_free(d->tail_ptrs);
> > > > > + rte_free(d->sw_rings_base);
> > > > > + d->sw_rings_base = NULL;
> > > > > + }
> > > > > + usleep(1000);
> > > > > + return 0;
> > > > > +}
> > > > > +
> > > > > +
> > > > > +/**
> > > > > + * Report a ACC100 queue index which is free
> > > > > + * Return 0 to 16k for a valid queue_idx or -1 when no queue is
> > > > > +available
> > > > > + * Note : Only supporting VF0 Bundle for PF mode */ static int
> > > > > +acc100_find_free_queue_idx(struct rte_bbdev *dev,
> > > > > + const struct rte_bbdev_queue_conf *conf) {
> > > > > + struct acc100_device *d = dev->data->dev_private;
> > > > > + int op_2_acc[5] = {0, UL_4G, DL_4G, UL_5G, DL_5G};
> > > > > + int acc = op_2_acc[conf->op_type];
> > > > > + struct rte_q_topology_t *qtop = NULL;
> > > > > + qtopFromAcc(&qtop, acc, &(d->acc100_conf));
> > > > > + if (qtop == NULL)
> > > > > + return -1;
> > > > > + /* Identify matching QGroup Index which are sorted in
> priority
> > > > > +order
> > > > > */
> > > > > + uint16_t group_idx = qtop->first_qgroup_index;
> > > > > + group_idx += conf->priority;
> > > > > + if (group_idx >= ACC100_NUM_QGRPS ||
> > > > > + conf->priority >= qtop->num_qgroups) {
> > > > > + rte_bbdev_log(INFO, "Invalid Priority on %s,
> priority %u",
> > > > > + dev->data->name, conf->priority);
> > > > > + return -1;
> > > > > + }
> > > > > + /* Find a free AQ_idx */
> > > > > + uint16_t aq_idx;
> > > > > + for (aq_idx = 0; aq_idx < qtop->num_aqs_per_groups;
> aq_idx++) {
> > > > > + if (((d->q_assigned_bit_map[group_idx] >> aq_idx)
> & 0x1)
> > > > > == 0) {
> > > > > + /* Mark the Queue as assigned */
> > > > > + d->q_assigned_bit_map[group_idx] |= (1 <<
> aq_idx);
> > > > > + /* Report the AQ Index */
> > > > > + return (group_idx << GRP_ID_SHIFT) +
> aq_idx;
> > > > > + }
> > > > > + }
> > > > > + rte_bbdev_log(INFO, "Failed to find free queue on %s,
> priority %u",
> > > > > + dev->data->name, conf->priority);
> > > > > + return -1;
> > > > > +}
> > > > > +
> > > > > +/* Setup ACC100 queue */
> > > > > +static int
> > > > > +acc100_queue_setup(struct rte_bbdev *dev, uint16_t queue_id,
> > > > > + const struct rte_bbdev_queue_conf *conf) {
> > > > > + struct acc100_device *d = dev->data->dev_private;
> > > > > + struct acc100_queue *q;
> > > > > + int16_t q_idx;
> > > > > +
> > > > > + /* Allocate the queue data structure. */
> > > > > + q = rte_zmalloc_socket(dev->device->driver->name,
> sizeof(*q),
> > > > > + RTE_CACHE_LINE_SIZE, conf->socket);
> > > > > + if (q == NULL) {
> > > > > + rte_bbdev_log(ERR, "Failed to allocate queue
> memory");
> > > > > + return -ENOMEM;
> > > > > + }
> > > > > +
> > > > > + q->d = d;
> > > > > + q->ring_addr = RTE_PTR_ADD(d->sw_rings, (d->sw_ring_size
> *
> > > > > queue_id));
> > > > > + q->ring_addr_phys = d->sw_rings_phys + (d->sw_ring_size *
> > > > > queue_id);
> > > > > +
> > > > > + /* Prepare the Ring with default descriptor format */
> > > > > + union acc100_dma_desc *desc = NULL;
> > > > > + unsigned int desc_idx, b_idx;
> > > > > + int fcw_len = (conf->op_type == RTE_BBDEV_OP_LDPC_ENC ?
> > > > > + ACC100_FCW_LE_BLEN : (conf->op_type ==
> > > > > RTE_BBDEV_OP_TURBO_DEC ?
> > > > > + ACC100_FCW_TD_BLEN : ACC100_FCW_LD_BLEN));
> > > > > +
> > > > > + for (desc_idx = 0; desc_idx < d->sw_ring_max_depth;
> desc_idx++) {
> > > > > + desc = q->ring_addr + desc_idx;
> > > > > + desc->req.word0 = ACC100_DMA_DESC_TYPE;
> > > > > + desc->req.word1 = 0; /**< Timestamp */
> > > > > + desc->req.word2 = 0;
> > > > > + desc->req.word3 = 0;
> > > > > + uint64_t fcw_offset = (desc_idx << 8) +
> > > > > ACC100_DESC_FCW_OFFSET;
> > > > > + desc->req.data_ptrs[0].address = q->ring_addr_phys
> +
> > > > > fcw_offset;
> > > > > + desc->req.data_ptrs[0].blen = fcw_len;
> > > > > + desc->req.data_ptrs[0].blkid =
> ACC100_DMA_BLKID_FCW;
> > > > > + desc->req.data_ptrs[0].last = 0;
> > > > > + desc->req.data_ptrs[0].dma_ext = 0;
> > > > > + for (b_idx = 1; b_idx <
> ACC100_DMA_MAX_NUM_POINTERS
> > > > > - 1;
> > > > > + b_idx++) {
> > > > > + desc->req.data_ptrs[b_idx].blkid =
> > > > > ACC100_DMA_BLKID_IN;
> > > > > + desc->req.data_ptrs[b_idx].last = 1;
> > > > > + desc->req.data_ptrs[b_idx].dma_ext = 0;
> > > > > + b_idx++;
> > > > > + desc->req.data_ptrs[b_idx].blkid =
> > > > > +
> ACC100_DMA_BLKID_OUT_ENC;
> > > > > + desc->req.data_ptrs[b_idx].last = 1;
> > > > > + desc->req.data_ptrs[b_idx].dma_ext = 0;
> > > > > + }
> > > > > + /* Preset some fields of LDPC FCW */
> > > > > + desc->req.fcw_ld.FCWversion = ACC100_FCW_VER;
> > > > > + desc->req.fcw_ld.gain_i = 1;
> > > > > + desc->req.fcw_ld.gain_h = 1;
> > > > > + }
> > > > > +
> > > > > + q->lb_in = rte_zmalloc_socket(dev->device->driver->name,
> > > > > + RTE_CACHE_LINE_SIZE,
> > > > > + RTE_CACHE_LINE_SIZE, conf->socket);
> > > > > + if (q->lb_in == NULL) {
> > > > > + rte_bbdev_log(ERR, "Failed to allocate lb_in
> memory");
> > > > > + return -ENOMEM;
> > > > > + }
> > > > > + q->lb_in_addr_phys = rte_malloc_virt2iova(q->lb_in);
> > > > > + q->lb_out = rte_zmalloc_socket(dev->device->driver->name,
> > > > > + RTE_CACHE_LINE_SIZE,
> > > > > + RTE_CACHE_LINE_SIZE, conf->socket);
> > > > > + if (q->lb_out == NULL) {
> > > > > + rte_bbdev_log(ERR, "Failed to allocate lb_out
> memory");
> > > > > + return -ENOMEM;
> > > > > + }
> > > > > + q->lb_out_addr_phys = rte_malloc_virt2iova(q->lb_out);
> > > > > +
> > > > > + /*
> > > > > + * Software queue ring wraps synchronously with the HW
> when it
> > > > > reaches
> > > > > + * the boundary of the maximum allocated queue size, no
> matter
> > > > > what the
> > > > > + * sw queue size is. This wrapping is guarded by setting the
> > > > > wrap_mask
> > > > > + * to represent the maximum queue size as allocated at the
> time
> > > > > when
> > > > > + * the device has been setup (in configure()).
> > > > > + *
> > > > > + * The queue depth is set to the queue size value (conf-
> > > > > >queue_size).
> > > > > + * This limits the occupancy of the queue at any point of time,
> so that
> > > > > + * the queue does not get swamped with enqueue requests.
> > > > > + */
> > > > > + q->sw_ring_depth = conf->queue_size;
> > > > > + q->sw_ring_wrap_mask = d->sw_ring_max_depth - 1;
> > > > > +
> > > > > + q->op_type = conf->op_type;
> > > > > +
> > > > > + q_idx = acc100_find_free_queue_idx(dev, conf);
> > > > > + if (q_idx == -1) {
> > > > > + rte_free(q);
> > > > > + return -1;
> > > > > + }
> > > > > +
> > > > > + q->qgrp_id = (q_idx >> GRP_ID_SHIFT) & 0xF;
> > > > > + q->vf_id = (q_idx >> VF_ID_SHIFT) & 0x3F;
> > > > > + q->aq_id = q_idx & 0xF;
> > > > > + q->aq_depth = (conf->op_type ==
> RTE_BBDEV_OP_TURBO_DEC) ?
> > > > > + (1 << d-
> >acc100_conf.q_ul_4g.aq_depth_log2) :
> > > > > + (1 << d-
> >acc100_conf.q_dl_4g.aq_depth_log2);
> > > > > +
> > > > > + q->mmio_reg_enqueue = RTE_PTR_ADD(d->mmio_base,
> > > > > + queue_offset(d->pf_device,
> > > > > + q->vf_id, q->qgrp_id, q-
> >aq_id));
> > > > > +
> > > > > + rte_bbdev_log_debug(
> > > > > + "Setup dev%u q%u: qgrp_id=%u, vf_id=%u,
> > > > > aq_id=%u, aq_depth=%u, mmio_reg_enqueue=%p",
> > > > > + dev->data->dev_id, queue_id, q->qgrp_id,
> q->vf_id,
> > > > > + q->aq_id, q->aq_depth, q-
> >mmio_reg_enqueue);
> > > > > +
> > > > > + dev->data->queues[queue_id].queue_private = q;
> > > > > + return 0;
> > > > > +}
> > > > > +
> > > > > +/* Release ACC100 queue */
> > > > > +static int
> > > > > +acc100_queue_release(struct rte_bbdev *dev, uint16_t q_id) {
> > > > > + struct acc100_device *d = dev->data->dev_private;
> > > > > + struct acc100_queue *q = dev->data-
> >queues[q_id].queue_private;
> > > > > +
> > > > > + if (q != NULL) {
> > > > > + /* Mark the Queue as un-assigned */
> > > > > + d->q_assigned_bit_map[q->qgrp_id] &= (0xFFFFFFFF
> -
> > > > > + (1 << q->aq_id));
> > > > > + rte_free(q->lb_in);
> > > > > + rte_free(q->lb_out);
> > > > > + rte_free(q);
> > > > > + dev->data->queues[q_id].queue_private = NULL;
> > > > > + }
> > > > > +
> > > > > return 0;
> > > > > }
> > > > >
> > > > > @@ -258,8 +673,11 @@
> > > > > }
> > > > >
> > > > > static const struct rte_bbdev_ops acc100_bbdev_ops = {
> > > > > + .setup_queues = acc100_setup_queues,
> > > > > .close = acc100_dev_close,
> > > > > .info_get = acc100_dev_info_get,
> > > > > + .queue_setup = acc100_queue_setup,
> > > > > + .queue_release = acc100_queue_release,
> > > > > };
> > > > >
> > > > > /* ACC100 PCI PF address map */
> > > > > diff --git a/drivers/baseband/acc100/rte_acc100_pmd.h
> > > > > b/drivers/baseband/acc100/rte_acc100_pmd.h
> > > > > index 662e2c8..0e2b79c 100644
> > > > > --- a/drivers/baseband/acc100/rte_acc100_pmd.h
> > > > > +++ b/drivers/baseband/acc100/rte_acc100_pmd.h
> > > > > @@ -518,11 +518,56 @@ struct acc100_registry_addr {
> > > > > .ddr_range = HWVfDmaDdrBaseRangeRoVf, };
> > > > >
> > > > > +/* Structure associated with each queue. */ struct
> > > > > +__rte_cache_aligned acc100_queue {
> > > > > + union acc100_dma_desc *ring_addr; /* Virtual address of sw
> ring */
> > > > > + rte_iova_t ring_addr_phys; /* Physical address of software
> ring */
> > > > > + uint32_t sw_ring_head; /* software ring head */
> > > > > + uint32_t sw_ring_tail; /* software ring tail */
> > > > > + /* software ring size (descriptors, not bytes) */
> > > > > + uint32_t sw_ring_depth;
> > > > > + /* mask used to wrap enqueued descriptors on the sw ring
> */
> > > > > + uint32_t sw_ring_wrap_mask;
> > > > > + /* MMIO register used to enqueue descriptors */
> > > > > + void *mmio_reg_enqueue;
> > > > > + uint8_t vf_id; /* VF ID (max = 63) */
> > > > > + uint8_t qgrp_id; /* Queue Group ID */
> > > > > + uint16_t aq_id; /* Atomic Queue ID */
> > > > > + uint16_t aq_depth; /* Depth of atomic queue */
> > > > > + uint32_t aq_enqueued; /* Count how many "batches" have
> been
> > > > > enqueued */
> > > > > + uint32_t aq_dequeued; /* Count how many "batches" have
> been
> > > > > dequeued */
> > > > > + uint32_t irq_enable; /* Enable ops dequeue interrupts if set
> to 1 */
> > > > > + struct rte_mempool *fcw_mempool; /* FCW mempool */
> > > > > + enum rte_bbdev_op_type op_type; /* Type of this Queue:
> TE or TD
> > > > > */
> > > > > + /* Internal Buffers for loopback input */
> > > > > + uint8_t *lb_in;
> > > > > + uint8_t *lb_out;
> > > > > + rte_iova_t lb_in_addr_phys;
> > > > > + rte_iova_t lb_out_addr_phys;
> > > > > + struct acc100_device *d;
> > > > > +};
> > > > > +
> > > > > /* Private data structure for each ACC100 device */ struct
> > > > > acc100_device
> > > {
> > > > > void *mmio_base; /**< Base address of MMIO registers
> (BAR0) */
> > > > > + void *sw_rings_base; /* Base addr of un-aligned memory
> for sw
> > > > > rings */
> > > > > + void *sw_rings; /* 64MBs of 64MB aligned memory for sw
> rings */
> > > > > + rte_iova_t sw_rings_phys; /* Physical address of sw_rings */
> > > > > + /* Virtual address of the info memory routed to the this
> > > > > +function
> > > > > under
> > > > > + * operation, whether it is PF or VF.
> > > > > + */
> > > > > + union acc100_harq_layout_data *harq_layout;
> > > > > + uint32_t sw_ring_size;
> > > > > uint32_t ddr_size; /* Size in kB */
> > > > > + uint32_t *tail_ptrs; /* Base address of response tail pointer
> buffer */
> > > > > + rte_iova_t tail_ptr_phys; /* Physical address of tail pointers
> */
> > > > > + /* Max number of entries available for each queue in device,
> > > > > depending
> > > > > + * on how many queues are enabled with configure()
> > > > > + */
> > > > > + uint32_t sw_ring_max_depth;
> > > > > struct acc100_conf acc100_conf; /* ACC100 Initial
> configuration
> > > > > */
> > > > > + /* Bitmap capturing which Queues have already been
> assigned */
> > > > > + uint16_t q_assigned_bit_map[ACC100_NUM_QGRPS];
> > > > > bool pf_device; /**< True if this is a PF ACC100 device */
> > > > > bool configured; /**< True if this ACC100 device is configured
> > > > > */ };
> > > > > --
> > > > > 1.8.3.1
More information about the dev
mailing list