[v9,07/10] baseband/acc100: add support for 4G processing

Message ID 1601339385-117424-8-git-send-email-nicolas.chautru@intel.com (mailing list archive)
State Superseded, archived
Delegated to: akhil goyal
Headers
Series bbdev PMD ACC100 |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Chautru, Nicolas Sept. 29, 2020, 12:29 a.m. UTC
  Adding capability for 4G encode and decoder processing

Signed-off-by: Nicolas Chautru <nicolas.chautru@intel.com>
Acked-by: Liu Tianjiao <Tianjiao.liu@intel.com>
---
 doc/guides/bbdevs/features/acc100.ini    |    4 +-
 drivers/baseband/acc100/rte_acc100_pmd.c | 1010 ++++++++++++++++++++++++++++--
 2 files changed, 945 insertions(+), 69 deletions(-)
  

Comments

Tom Rix Sept. 30, 2020, 6:37 p.m. UTC | #1
On 9/28/20 5:29 PM, Nicolas Chautru wrote:
> Adding capability for 4G encode and decoder processing
>
> Signed-off-by: Nicolas Chautru <nicolas.chautru@intel.com>
> Acked-by: Liu Tianjiao <Tianjiao.liu@intel.com>
> ---
>  doc/guides/bbdevs/features/acc100.ini    |    4 +-
>  drivers/baseband/acc100/rte_acc100_pmd.c | 1010 ++++++++++++++++++++++++++++--
>  2 files changed, 945 insertions(+), 69 deletions(-)
>
> diff --git a/doc/guides/bbdevs/features/acc100.ini b/doc/guides/bbdevs/features/acc100.ini
> index 40c7adc..642cd48 100644
> --- a/doc/guides/bbdevs/features/acc100.ini
> +++ b/doc/guides/bbdevs/features/acc100.ini
> @@ -4,8 +4,8 @@
>  ; Refer to default.ini for the full list of available PMD features.
>  ;
>  [Features]
> -Turbo Decoder (4G)     = N
> -Turbo Encoder (4G)     = N
> +Turbo Decoder (4G)     = Y
> +Turbo Encoder (4G)     = Y
>  LDPC Decoder (5G)      = Y
>  LDPC Encoder (5G)      = Y
>  LLR/HARQ Compression   = Y
> diff --git a/drivers/baseband/acc100/rte_acc100_pmd.c b/drivers/baseband/acc100/rte_acc100_pmd.c
> index e484c0a..7d4c3df 100644
> --- a/drivers/baseband/acc100/rte_acc100_pmd.c
> +++ b/drivers/baseband/acc100/rte_acc100_pmd.c
> @@ -339,7 +339,6 @@
>  	free_base_addresses(base_addrs, i);
>  }
>  
> -
>  /* Allocate 64MB memory used for all software rings */
>  static int
>  acc100_setup_queues(struct rte_bbdev *dev, uint16_t num_queues, int socket_id)
> @@ -637,6 +636,41 @@
>  
>  	static const struct rte_bbdev_op_cap bbdev_capabilities[] = {
>  		{
> +			.type = RTE_BBDEV_OP_TURBO_DEC,
> +			.cap.turbo_dec = {
> +				.capability_flags =
> +					RTE_BBDEV_TURBO_SUBBLOCK_DEINTERLEAVE |
> +					RTE_BBDEV_TURBO_CRC_TYPE_24B |
> +					RTE_BBDEV_TURBO_HALF_ITERATION_EVEN |
> +					RTE_BBDEV_TURBO_EARLY_TERMINATION |
> +					RTE_BBDEV_TURBO_NEG_LLR_1_BIT_IN |
> +					RTE_BBDEV_TURBO_MAP_DEC |
> +					RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP |
> +					RTE_BBDEV_TURBO_DEC_SCATTER_GATHER,
> +				.max_llr_modulus = INT8_MAX,
> +				.num_buffers_src =
> +						RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
> +				.num_buffers_hard_out =
> +						RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
> +				.num_buffers_soft_out =
> +						RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
> +			}
> +		},
> +		{
> +			.type = RTE_BBDEV_OP_TURBO_ENC,
> +			.cap.turbo_enc = {
> +				.capability_flags =
> +					RTE_BBDEV_TURBO_CRC_24B_ATTACH |
> +					RTE_BBDEV_TURBO_RV_INDEX_BYPASS |
> +					RTE_BBDEV_TURBO_RATE_MATCH |
> +					RTE_BBDEV_TURBO_ENC_SCATTER_GATHER,
> +				.num_buffers_src =
> +						RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
> +				.num_buffers_dst =
> +						RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
> +			}
> +		},
> +		{
>  			.type   = RTE_BBDEV_OP_LDPC_ENC,
>  			.cap.ldpc_enc = {
>  				.capability_flags =
> @@ -719,7 +753,6 @@
>  #endif
>  }
>  
> -
>  static const struct rte_bbdev_ops acc100_bbdev_ops = {
>  	.setup_queues = acc100_setup_queues,
>  	.close = acc100_dev_close,
> @@ -763,6 +796,58 @@
>  	return tail;
>  }
>  
> +/* Fill in a frame control word for turbo encoding. */
> +static inline void
> +acc100_fcw_te_fill(const struct rte_bbdev_enc_op *op, struct acc100_fcw_te *fcw)
> +{
> +	fcw->code_block_mode = op->turbo_enc.code_block_mode;
> +	if (fcw->code_block_mode == 0) { /* For TB mode */
> +		fcw->k_neg = op->turbo_enc.tb_params.k_neg;
> +		fcw->k_pos = op->turbo_enc.tb_params.k_pos;
> +		fcw->c_neg = op->turbo_enc.tb_params.c_neg;
> +		fcw->c = op->turbo_enc.tb_params.c;
> +		fcw->ncb_neg = op->turbo_enc.tb_params.ncb_neg;
> +		fcw->ncb_pos = op->turbo_enc.tb_params.ncb_pos;
> +
> +		if (check_bit(op->turbo_enc.op_flags,
> +				RTE_BBDEV_TURBO_RATE_MATCH)) {
> +			fcw->bypass_rm = 0;
> +			fcw->cab = op->turbo_enc.tb_params.cab;
> +			fcw->ea = op->turbo_enc.tb_params.ea;
> +			fcw->eb = op->turbo_enc.tb_params.eb;
> +		} else {
> +			/* E is set to the encoding output size when RM is
> +			 * bypassed.
> +			 */
> +			fcw->bypass_rm = 1;
> +			fcw->cab = fcw->c_neg;
> +			fcw->ea = 3 * fcw->k_neg + 12;
> +			fcw->eb = 3 * fcw->k_pos + 12;
> +		}
> +	} else { /* For CB mode */
> +		fcw->k_pos = op->turbo_enc.cb_params.k;
> +		fcw->ncb_pos = op->turbo_enc.cb_params.ncb;
> +
> +		if (check_bit(op->turbo_enc.op_flags,
> +				RTE_BBDEV_TURBO_RATE_MATCH)) {
> +			fcw->bypass_rm = 0;
> +			fcw->eb = op->turbo_enc.cb_params.e;
> +		} else {
> +			/* E is set to the encoding output size when RM is
> +			 * bypassed.
> +			 */
> +			fcw->bypass_rm = 1;
> +			fcw->eb = 3 * fcw->k_pos + 12;
> +		}
> +	}
> +
> +	fcw->bypass_rv_idx1 = check_bit(op->turbo_enc.op_flags,
> +			RTE_BBDEV_TURBO_RV_INDEX_BYPASS);
> +	fcw->code_block_crc = check_bit(op->turbo_enc.op_flags,
> +			RTE_BBDEV_TURBO_CRC_24B_ATTACH);
> +	fcw->rv_idx1 = op->turbo_enc.rv_index;
> +}
> +
>  /* Compute value of k0.
>   * Based on 3GPP 38.212 Table 5.4.2.1-2
>   * Starting position of different redundancy versions, k0
> @@ -813,6 +898,25 @@
>  	fcw->mcb_count = num_cb;
>  }
>  
> +/* Fill in a frame control word for turbo decoding. */
> +static inline void
> +acc100_fcw_td_fill(const struct rte_bbdev_dec_op *op, struct acc100_fcw_td *fcw)
> +{
> +	/* Note : Early termination is always enabled for 4GUL */
> +	fcw->fcw_ver = 1;
> +	if (op->turbo_dec.code_block_mode == 0)
> +		fcw->k_pos = op->turbo_dec.tb_params.k_pos;
> +	else
> +		fcw->k_pos = op->turbo_dec.cb_params.k;
> +	fcw->turbo_crc_type = check_bit(op->turbo_dec.op_flags,
> +			RTE_BBDEV_TURBO_CRC_TYPE_24B);
> +	fcw->bypass_sb_deint = 0;
> +	fcw->raw_decoder_input_on = 0;
> +	fcw->max_iter = op->turbo_dec.iter_max;
> +	fcw->half_iter_on = !check_bit(op->turbo_dec.op_flags,
> +			RTE_BBDEV_TURBO_HALF_ITERATION_EVEN);
> +}
> +
>  /* Fill in a frame control word for LDPC decoding. */
>  static inline void
>  acc100_fcw_ld_fill(const struct rte_bbdev_dec_op *op, struct acc100_fcw_ld *fcw,
> @@ -1042,6 +1146,87 @@
>  }
>  
>  static inline int
> +acc100_dma_desc_te_fill(struct rte_bbdev_enc_op *op,
> +		struct acc100_dma_req_desc *desc, struct rte_mbuf **input,
> +		struct rte_mbuf *output, uint32_t *in_offset,
> +		uint32_t *out_offset, uint32_t *out_length,
> +		uint32_t *mbuf_total_left, uint32_t *seg_total_left, uint8_t r)
> +{
> +	int next_triplet = 1; /* FCW already done */
> +	uint32_t e, ea, eb, length;
> +	uint16_t k, k_neg, k_pos;
> +	uint8_t cab, c_neg;
> +
> +	desc->word0 = ACC100_DMA_DESC_TYPE;
> +	desc->word1 = 0; /**< Timestamp could be disabled */
> +	desc->word2 = 0;
> +	desc->word3 = 0;
> +	desc->numCBs = 1;
> +
> +	if (op->turbo_enc.code_block_mode == 0) {
> +		ea = op->turbo_enc.tb_params.ea;
> +		eb = op->turbo_enc.tb_params.eb;
> +		cab = op->turbo_enc.tb_params.cab;
> +		k_neg = op->turbo_enc.tb_params.k_neg;
> +		k_pos = op->turbo_enc.tb_params.k_pos;
> +		c_neg = op->turbo_enc.tb_params.c_neg;
> +		e = (r < cab) ? ea : eb;
> +		k = (r < c_neg) ? k_neg : k_pos;
> +	} else {
> +		e = op->turbo_enc.cb_params.e;
> +		k = op->turbo_enc.cb_params.k;
> +	}
> +
> +	if (check_bit(op->turbo_enc.op_flags, RTE_BBDEV_TURBO_CRC_24B_ATTACH))
> +		length = (k - 24) >> 3;
> +	else
> +		length = k >> 3;
> +
> +	if (unlikely((*mbuf_total_left == 0) || (*mbuf_total_left < length))) {

similar to other patches, this check can be combined to <=

change generally

> +		rte_bbdev_log(ERR,
> +				"Mismatch between mbuf length and included CB sizes: mbuf len %u, cb len %u",
> +				*mbuf_total_left, length);
> +		return -1;
> +	}
> +
> +	next_triplet = acc100_dma_fill_blk_type_in(desc, input, in_offset,
> +			length, seg_total_left, next_triplet);
> +	if (unlikely(next_triplet < 0)) {
> +		rte_bbdev_log(ERR,
> +				"Mismatch between data to process and mbuf data length in bbdev_op: %p",
> +				op);
> +		return -1;
> +	}
> +	desc->data_ptrs[next_triplet - 1].last = 1;
> +	desc->m2dlen = next_triplet;
> +	*mbuf_total_left -= length;
> +
> +	/* Set output length */
> +	if (check_bit(op->turbo_enc.op_flags, RTE_BBDEV_TURBO_RATE_MATCH))
> +		/* Integer round up division by 8 */
> +		*out_length = (e + 7) >> 3;
> +	else
> +		*out_length = (k >> 3) * 3 + 2;
> +
> +	next_triplet = acc100_dma_fill_blk_type_out(desc, output, *out_offset,
> +			*out_length, next_triplet, ACC100_DMA_BLKID_OUT_ENC);
> +	if (unlikely(next_triplet < 0)) {
> +		rte_bbdev_log(ERR,
> +				"Mismatch between data to process and mbuf data length in bbdev_op: %p",
> +				op);
> +		return -1;
> +	}
> +	op->turbo_enc.output.length += *out_length;
> +	*out_offset += *out_length;
> +	desc->data_ptrs[next_triplet - 1].last = 1;
> +	desc->d2mlen = next_triplet - desc->m2dlen;
> +
> +	desc->op_addr = op;
> +
> +	return 0;
> +}
> +
> +static inline int
>  acc100_dma_desc_le_fill(struct rte_bbdev_enc_op *op,
>  		struct acc100_dma_req_desc *desc, struct rte_mbuf **input,
>  		struct rte_mbuf *output, uint32_t *in_offset,
> @@ -1110,6 +1295,117 @@
>  }
>  
>  static inline int
> +acc100_dma_desc_td_fill(struct rte_bbdev_dec_op *op,
> +		struct acc100_dma_req_desc *desc, struct rte_mbuf **input,
> +		struct rte_mbuf *h_output, struct rte_mbuf *s_output,
> +		uint32_t *in_offset, uint32_t *h_out_offset,
> +		uint32_t *s_out_offset, uint32_t *h_out_length,
> +		uint32_t *s_out_length, uint32_t *mbuf_total_left,
> +		uint32_t *seg_total_left, uint8_t r)
> +{
> +	int next_triplet = 1; /* FCW already done */
> +	uint16_t k;
> +	uint16_t crc24_overlap = 0;
> +	uint32_t e, kw;
> +
> +	desc->word0 = ACC100_DMA_DESC_TYPE;
> +	desc->word1 = 0; /**< Timestamp could be disabled */
> +	desc->word2 = 0;
> +	desc->word3 = 0;
> +	desc->numCBs = 1;
> +
> +	if (op->turbo_dec.code_block_mode == 0) {
> +		k = (r < op->turbo_dec.tb_params.c_neg)
> +			? op->turbo_dec.tb_params.k_neg
> +			: op->turbo_dec.tb_params.k_pos;
> +		e = (r < op->turbo_dec.tb_params.cab)
> +			? op->turbo_dec.tb_params.ea
> +			: op->turbo_dec.tb_params.eb;
> +	} else {
> +		k = op->turbo_dec.cb_params.k;
> +		e = op->turbo_dec.cb_params.e;
> +	}
> +
> +	if ((op->turbo_dec.code_block_mode == 0)
> +		&& !check_bit(op->turbo_dec.op_flags,
> +		RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP))
> +		crc24_overlap = 24;
> +
> +	/* Calculates circular buffer size.
> +	 * According to 3gpp 36.212 section 5.1.4.2
> +	 *   Kw = 3 * Kpi,
> +	 * where:
> +	 *   Kpi = nCol * nRow
> +	 * where nCol is 32 and nRow can be calculated from:
> +	 *   D =< nCol * nRow
> +	 * where D is the size of each output from turbo encoder block (k + 4).
> +	 */
> +	kw = RTE_ALIGN_CEIL(k + 4, 32) * 3;
> +
> +	if (unlikely((*mbuf_total_left == 0) || (*mbuf_total_left < kw))) {
> +		rte_bbdev_log(ERR,
> +				"Mismatch between mbuf length and included CB sizes: mbuf len %u, cb len %u",
> +				*mbuf_total_left, kw);
> +		return -1;
> +	}
> +
> +	next_triplet = acc100_dma_fill_blk_type_in(desc, input, in_offset, kw,
> +			seg_total_left, next_triplet);
> +	if (unlikely(next_triplet < 0)) {
> +		rte_bbdev_log(ERR,
> +				"Mismatch between data to process and mbuf data length in bbdev_op: %p",
> +				op);
> +		return -1;
> +	}
> +	desc->data_ptrs[next_triplet - 1].last = 1;
> +	desc->m2dlen = next_triplet;
> +	*mbuf_total_left -= kw;
> +
> +	next_triplet = acc100_dma_fill_blk_type_out(
> +			desc, h_output, *h_out_offset,
> +			k >> 3, next_triplet, ACC100_DMA_BLKID_OUT_HARD);
> +	if (unlikely(next_triplet < 0)) {
> +		rte_bbdev_log(ERR,
> +				"Mismatch between data to process and mbuf data length in bbdev_op: %p",
> +				op);
> +		return -1;
> +	}
> +
> +	*h_out_length = ((k - crc24_overlap) >> 3);
> +	op->turbo_dec.hard_output.length += *h_out_length;
> +	*h_out_offset += *h_out_length;
> +
> +	/* Soft output */
> +	if (check_bit(op->turbo_dec.op_flags, RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
> +		if (check_bit(op->turbo_dec.op_flags,
> +				RTE_BBDEV_TURBO_EQUALIZER))
> +			*s_out_length = e;
> +		else
> +			*s_out_length = (k * 3) + 12;
> +
> +		next_triplet = acc100_dma_fill_blk_type_out(desc, s_output,
> +				*s_out_offset, *s_out_length, next_triplet,
> +				ACC100_DMA_BLKID_OUT_SOFT);
> +		if (unlikely(next_triplet < 0)) {
> +			rte_bbdev_log(ERR,
> +					"Mismatch between data to process and mbuf data length in bbdev_op: %p",
> +					op);
> +			return -1;
> +		}
> +
> +		op->turbo_dec.soft_output.length += *s_out_length;
> +		*s_out_offset += *s_out_length;
> +	}
> +
> +	desc->data_ptrs[next_triplet - 1].last = 1;
> +	desc->d2mlen = next_triplet - desc->m2dlen;
> +
> +	desc->op_addr = op;
> +
> +	return 0;
> +}
> +
> +static inline int
>  acc100_dma_desc_ld_fill(struct rte_bbdev_dec_op *op,
>  		struct acc100_dma_req_desc *desc,
>  		struct rte_mbuf **input, struct rte_mbuf *h_output,
> @@ -1374,6 +1670,57 @@
>  
>  /* Enqueue one encode operations for ACC100 device in CB mode */
>  static inline int
> +enqueue_enc_one_op_cb(struct acc100_queue *q, struct rte_bbdev_enc_op *op,
> +		uint16_t total_enqueued_cbs)
> +{
> +	union acc100_dma_desc *desc = NULL;
> +	int ret;
> +	uint32_t in_offset, out_offset, out_length, mbuf_total_left,
> +		seg_total_left;
> +	struct rte_mbuf *input, *output_head, *output;
> +
> +	uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
> +			& q->sw_ring_wrap_mask);
> +	desc = q->ring_addr + desc_idx;
> +	acc100_fcw_te_fill(op, &desc->req.fcw_te);
> +
> +	input = op->turbo_enc.input.data;
> +	output_head = output = op->turbo_enc.output.data;
> +	in_offset = op->turbo_enc.input.offset;
> +	out_offset = op->turbo_enc.output.offset;
> +	out_length = 0;
> +	mbuf_total_left = op->turbo_enc.input.length;
> +	seg_total_left = rte_pktmbuf_data_len(op->turbo_enc.input.data)
> +			- in_offset;
> +
> +	ret = acc100_dma_desc_te_fill(op, &desc->req, &input, output,
> +			&in_offset, &out_offset, &out_length, &mbuf_total_left,
> +			&seg_total_left, 0);
> +
> +	if (unlikely(ret < 0))
> +		return ret;
> +
> +	mbuf_append(output_head, output, out_length);
> +
> +#ifdef RTE_LIBRTE_BBDEV_DEBUG
> +	rte_memdump(stderr, "FCW", &desc->req.fcw_te,
> +			sizeof(desc->req.fcw_te) - 8);
> +	rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc));
> +
> +	/* Check if any data left after processing one CB */
> +	if (mbuf_total_left != 0) {
> +		rte_bbdev_log(ERR,
> +				"Some date still left after processing one CB: mbuf_total_left = %u",
> +				mbuf_total_left);
> +		return -EINVAL;
> +	}
> +#endif
> +	/* One CB (one op) was successfully prepared to enqueue */
> +	return 1;
> +}
> +
> +/* Enqueue one encode operations for ACC100 device in CB mode */
> +static inline int
>  enqueue_ldpc_enc_n_op_cb(struct acc100_queue *q, struct rte_bbdev_enc_op **ops,
>  		uint16_t total_enqueued_cbs, int16_t num)
>  {
> @@ -1481,78 +1828,235 @@
>  	return 1;
>  }
>  
> -static inline int
> -harq_loopback(struct acc100_queue *q, struct rte_bbdev_dec_op *op,
> -		uint16_t total_enqueued_cbs) {
> -	struct acc100_fcw_ld *fcw;
> -	union acc100_dma_desc *desc;
> -	int next_triplet = 1;
> -	struct rte_mbuf *hq_output_head, *hq_output;
> -	uint16_t harq_in_length = op->ldpc_dec.harq_combined_input.length;
> -	if (harq_in_length == 0) {
> -		rte_bbdev_log(ERR, "Loopback of invalid null size\n");
> -		return -EINVAL;
> -	}
>  
> -	int h_comp = check_bit(op->ldpc_dec.op_flags,
> -			RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION
> -			) ? 1 : 0;
> -	if (h_comp == 1)
> -		harq_in_length = harq_in_length * 8 / 6;
> -	harq_in_length = RTE_ALIGN(harq_in_length, 64);
> -	uint16_t harq_dma_length_in = (h_comp == 0) ?
> -			harq_in_length :
> -			harq_in_length * 6 / 8;
> -	uint16_t harq_dma_length_out = harq_dma_length_in;
> -	bool ddr_mem_in = check_bit(op->ldpc_dec.op_flags,
> -			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE);
> -	union acc100_harq_layout_data *harq_layout = q->d->harq_layout;
> -	uint16_t harq_index = (ddr_mem_in ?
> -			op->ldpc_dec.harq_combined_input.offset :
> -			op->ldpc_dec.harq_combined_output.offset)
> -			/ ACC100_HARQ_OFFSET;
> +/* Enqueue one encode operations for ACC100 device in TB mode. */
> +static inline int
> +enqueue_enc_one_op_tb(struct acc100_queue *q, struct rte_bbdev_enc_op *op,
> +		uint16_t total_enqueued_cbs, uint8_t cbs_in_tb)
> +{
> +	union acc100_dma_desc *desc = NULL;
> +	int ret;
> +	uint8_t r, c;
> +	uint32_t in_offset, out_offset, out_length, mbuf_total_left,
> +		seg_total_left;
> +	struct rte_mbuf *input, *output_head, *output;
> +	uint16_t current_enqueued_cbs = 0;
>  
>  	uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
>  			& q->sw_ring_wrap_mask);
>  	desc = q->ring_addr + desc_idx;
> -	fcw = &desc->req.fcw_ld;
> -	/* Set the FCW from loopback into DDR */
> -	memset(fcw, 0, sizeof(struct acc100_fcw_ld));
> -	fcw->FCWversion = ACC100_FCW_VER;
> -	fcw->qm = 2;
> -	fcw->Zc = 384;
> -	if (harq_in_length < 16 * N_ZC_1)
> -		fcw->Zc = 16;
> -	fcw->ncb = fcw->Zc * N_ZC_1;
> -	fcw->rm_e = 2;
> -	fcw->hcin_en = 1;
> -	fcw->hcout_en = 1;
> +	uint64_t fcw_offset = (desc_idx << 8) + ACC100_DESC_FCW_OFFSET;
> +	acc100_fcw_te_fill(op, &desc->req.fcw_te);
>  
> -	rte_bbdev_log(DEBUG, "Loopback IN %d Index %d offset %d length %d %d\n",
> -			ddr_mem_in, harq_index,
> -			harq_layout[harq_index].offset, harq_in_length,
> -			harq_dma_length_in);
> +	input = op->turbo_enc.input.data;
> +	output_head = output = op->turbo_enc.output.data;
> +	in_offset = op->turbo_enc.input.offset;
> +	out_offset = op->turbo_enc.output.offset;
> +	out_length = 0;
> +	mbuf_total_left = op->turbo_enc.input.length;
>  
> -	if (ddr_mem_in && (harq_layout[harq_index].offset > 0)) {
> -		fcw->hcin_size0 = harq_layout[harq_index].size0;
> -		fcw->hcin_offset = harq_layout[harq_index].offset;
> -		fcw->hcin_size1 = harq_in_length - fcw->hcin_offset;
> -		harq_dma_length_in = (fcw->hcin_size0 + fcw->hcin_size1);
> -		if (h_comp == 1)
> -			harq_dma_length_in = harq_dma_length_in * 6 / 8;
> -	} else {
> -		fcw->hcin_size0 = harq_in_length;
> -	}
> -	harq_layout[harq_index].val = 0;
> -	rte_bbdev_log(DEBUG, "Loopback FCW Config %d %d %d\n",
> -			fcw->hcin_size0, fcw->hcin_offset, fcw->hcin_size1);
> -	fcw->hcout_size0 = harq_in_length;
> -	fcw->hcin_decomp_mode = h_comp;
> -	fcw->hcout_comp_mode = h_comp;
> -	fcw->gain_i = 1;
> -	fcw->gain_h = 1;
> +	c = op->turbo_enc.tb_params.c;
> +	r = op->turbo_enc.tb_params.r;
>  
> -	/* Set the prefix of descriptor. This could be done at polling */
> +	while (mbuf_total_left > 0 && r < c) {
> +		seg_total_left = rte_pktmbuf_data_len(input) - in_offset;
> +		/* Set up DMA descriptor */
> +		desc = q->ring_addr + ((q->sw_ring_head + total_enqueued_cbs)
> +				& q->sw_ring_wrap_mask);
> +		desc->req.data_ptrs[0].address = q->ring_addr_phys + fcw_offset;
> +		desc->req.data_ptrs[0].blen = ACC100_FCW_TE_BLEN;
> +
> +		ret = acc100_dma_desc_te_fill(op, &desc->req, &input, output,
> +				&in_offset, &out_offset, &out_length,
> +				&mbuf_total_left, &seg_total_left, r);
> +		if (unlikely(ret < 0))
> +			return ret;
> +		mbuf_append(output_head, output, out_length);
> +
> +		/* Set total number of CBs in TB */
> +		desc->req.cbs_in_tb = cbs_in_tb;
> +#ifdef RTE_LIBRTE_BBDEV_DEBUG
> +		rte_memdump(stderr, "FCW", &desc->req.fcw_te,
> +				sizeof(desc->req.fcw_te) - 8);
> +		rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc));
> +#endif
> +
> +		if (seg_total_left == 0) {
> +			/* Go to the next mbuf */
> +			input = input->next;
> +			in_offset = 0;
> +			output = output->next;
> +			out_offset = 0;
> +		}
> +
> +		total_enqueued_cbs++;
> +		current_enqueued_cbs++;
> +		r++;
> +	}
> +
> +	if (unlikely(desc == NULL))
> +		return current_enqueued_cbs;
> +
> +#ifdef RTE_LIBRTE_BBDEV_DEBUG
> +	/* Check if any CBs left for processing */
> +	if (mbuf_total_left != 0) {
> +		rte_bbdev_log(ERR,
> +				"Some date still left for processing: mbuf_total_left = %u",
> +				mbuf_total_left);
> +		return -EINVAL;
> +	}
> +#endif
> +
> +	/* Set SDone on last CB descriptor for TB mode. */
> +	desc->req.sdone_enable = 1;
> +	desc->req.irq_enable = q->irq_enable;
> +
> +	return current_enqueued_cbs;
> +}
> +
> +/** Enqueue one decode operations for ACC100 device in CB mode */
> +static inline int
> +enqueue_dec_one_op_cb(struct acc100_queue *q, struct rte_bbdev_dec_op *op,
> +		uint16_t total_enqueued_cbs)
> +{
> +	union acc100_dma_desc *desc = NULL;
> +	int ret;
> +	uint32_t in_offset, h_out_offset, s_out_offset, s_out_length,
> +		h_out_length, mbuf_total_left, seg_total_left;
> +	struct rte_mbuf *input, *h_output_head, *h_output,
> +		*s_output_head, *s_output;
> +
> +	uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
> +			& q->sw_ring_wrap_mask);
> +	desc = q->ring_addr + desc_idx;
> +	acc100_fcw_td_fill(op, &desc->req.fcw_td);
> +
> +	input = op->turbo_dec.input.data;
> +	h_output_head = h_output = op->turbo_dec.hard_output.data;
> +	s_output_head = s_output = op->turbo_dec.soft_output.data;
> +	in_offset = op->turbo_dec.input.offset;
> +	h_out_offset = op->turbo_dec.hard_output.offset;
> +	s_out_offset = op->turbo_dec.soft_output.offset;
> +	h_out_length = s_out_length = 0;
> +	mbuf_total_left = op->turbo_dec.input.length;
> +	seg_total_left = rte_pktmbuf_data_len(input) - in_offset;
> +
> +#ifdef RTE_LIBRTE_BBDEV_DEBUG
> +	if (unlikely(input == NULL)) {
> +		rte_bbdev_log(ERR, "Invalid mbuf pointer");
> +		return -EFAULT;
> +	}
> +#endif
> +
> +	/* Set up DMA descriptor */
> +	desc = q->ring_addr + ((q->sw_ring_head + total_enqueued_cbs)
> +			& q->sw_ring_wrap_mask);
> +
> +	ret = acc100_dma_desc_td_fill(op, &desc->req, &input, h_output,
> +			s_output, &in_offset, &h_out_offset, &s_out_offset,
> +			&h_out_length, &s_out_length, &mbuf_total_left,
> +			&seg_total_left, 0);
> +
> +	if (unlikely(ret < 0))
> +		return ret;
> +
> +	/* Hard output */
> +	mbuf_append(h_output_head, h_output, h_out_length);
> +
> +	/* Soft output */
> +	if (check_bit(op->turbo_dec.op_flags, RTE_BBDEV_TURBO_SOFT_OUTPUT))
> +		mbuf_append(s_output_head, s_output, s_out_length);
> +
> +#ifdef RTE_LIBRTE_BBDEV_DEBUG
> +	rte_memdump(stderr, "FCW", &desc->req.fcw_td,
> +			sizeof(desc->req.fcw_td) - 8);
> +	rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc));
> +
> +	/* Check if any CBs left for processing */
> +	if (mbuf_total_left != 0) {
> +		rte_bbdev_log(ERR,
> +				"Some date still left after processing one CB: mbuf_total_left = %u",
> +				mbuf_total_left);
> +		return -EINVAL;
> +	}
> +#endif
logic similar to debug in mbuf_append, should be a common function.
> +
> +	/* One CB (one op) was successfully prepared to enqueue */
> +	return 1;
> +}
> +
> +static inline int
> +harq_loopback(struct acc100_queue *q, struct rte_bbdev_dec_op *op,
> +		uint16_t total_enqueued_cbs) {
> +	struct acc100_fcw_ld *fcw;
> +	union acc100_dma_desc *desc;
> +	int next_triplet = 1;
> +	struct rte_mbuf *hq_output_head, *hq_output;
> +	uint16_t harq_in_length = op->ldpc_dec.harq_combined_input.length;
> +	if (harq_in_length == 0) {
> +		rte_bbdev_log(ERR, "Loopback of invalid null size\n");
> +		return -EINVAL;
> +	}
> +
> +	int h_comp = check_bit(op->ldpc_dec.op_flags,
> +			RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION
> +			) ? 1 : 0;
> +	if (h_comp == 1)
> +		harq_in_length = harq_in_length * 8 / 6;
> +	harq_in_length = RTE_ALIGN(harq_in_length, 64);
> +	uint16_t harq_dma_length_in = (h_comp == 0) ?
Can these h_comp checks be combined to a single if/else ?
> +			harq_in_length :
> +			harq_in_length * 6 / 8;
> +	uint16_t harq_dma_length_out = harq_dma_length_in;
> +	bool ddr_mem_in = check_bit(op->ldpc_dec.op_flags,
> +			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE);
> +	union acc100_harq_layout_data *harq_layout = q->d->harq_layout;
> +	uint16_t harq_index = (ddr_mem_in ?
> +			op->ldpc_dec.harq_combined_input.offset :
> +			op->ldpc_dec.harq_combined_output.offset)
> +			/ ACC100_HARQ_OFFSET;
> +
> +	uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
> +			& q->sw_ring_wrap_mask);
> +	desc = q->ring_addr + desc_idx;
> +	fcw = &desc->req.fcw_ld;
> +	/* Set the FCW from loopback into DDR */
> +	memset(fcw, 0, sizeof(struct acc100_fcw_ld));
> +	fcw->FCWversion = ACC100_FCW_VER;
> +	fcw->qm = 2;
> +	fcw->Zc = 384;
these magic numbers should have #defines
> +	if (harq_in_length < 16 * N_ZC_1)
> +		fcw->Zc = 16;
> +	fcw->ncb = fcw->Zc * N_ZC_1;
> +	fcw->rm_e = 2;
> +	fcw->hcin_en = 1;
> +	fcw->hcout_en = 1;
> +
> +	rte_bbdev_log(DEBUG, "Loopback IN %d Index %d offset %d length %d %d\n",
> +			ddr_mem_in, harq_index,
> +			harq_layout[harq_index].offset, harq_in_length,
> +			harq_dma_length_in);
> +
> +	if (ddr_mem_in && (harq_layout[harq_index].offset > 0)) {
> +		fcw->hcin_size0 = harq_layout[harq_index].size0;
> +		fcw->hcin_offset = harq_layout[harq_index].offset;
> +		fcw->hcin_size1 = harq_in_length - fcw->hcin_offset;
> +		harq_dma_length_in = (fcw->hcin_size0 + fcw->hcin_size1);
> +		if (h_comp == 1)
> +			harq_dma_length_in = harq_dma_length_in * 6 / 8;
> +	} else {
> +		fcw->hcin_size0 = harq_in_length;
> +	}
> +	harq_layout[harq_index].val = 0;
> +	rte_bbdev_log(DEBUG, "Loopback FCW Config %d %d %d\n",
> +			fcw->hcin_size0, fcw->hcin_offset, fcw->hcin_size1);
> +	fcw->hcout_size0 = harq_in_length;
> +	fcw->hcin_decomp_mode = h_comp;
> +	fcw->hcout_comp_mode = h_comp;
> +	fcw->gain_i = 1;
> +	fcw->gain_h = 1;
> +
> +	/* Set the prefix of descriptor. This could be done at polling */
>  	desc->req.word0 = ACC100_DMA_DESC_TYPE;
>  	desc->req.word1 = 0; /**< Timestamp could be disabled */
>  	desc->req.word2 = 0;
> @@ -1816,6 +2320,107 @@
>  	return current_enqueued_cbs;
>  }
>  
> +/* Enqueue one decode operations for ACC100 device in TB mode */
> +static inline int
> +enqueue_dec_one_op_tb(struct acc100_queue *q, struct rte_bbdev_dec_op *op,
> +		uint16_t total_enqueued_cbs, uint8_t cbs_in_tb)
> +{
> +	union acc100_dma_desc *desc = NULL;
> +	int ret;
> +	uint8_t r, c;
> +	uint32_t in_offset, h_out_offset, s_out_offset, s_out_length,
> +		h_out_length, mbuf_total_left, seg_total_left;
> +	struct rte_mbuf *input, *h_output_head, *h_output,
> +		*s_output_head, *s_output;
> +	uint16_t current_enqueued_cbs = 0;
> +
> +	uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
> +			& q->sw_ring_wrap_mask);
> +	desc = q->ring_addr + desc_idx;
> +	uint64_t fcw_offset = (desc_idx << 8) + ACC100_DESC_FCW_OFFSET;
> +	acc100_fcw_td_fill(op, &desc->req.fcw_td);
> +
> +	input = op->turbo_dec.input.data;
> +	h_output_head = h_output = op->turbo_dec.hard_output.data;
> +	s_output_head = s_output = op->turbo_dec.soft_output.data;
> +	in_offset = op->turbo_dec.input.offset;
> +	h_out_offset = op->turbo_dec.hard_output.offset;
> +	s_out_offset = op->turbo_dec.soft_output.offset;
> +	h_out_length = s_out_length = 0;
> +	mbuf_total_left = op->turbo_dec.input.length;
> +	c = op->turbo_dec.tb_params.c;
> +	r = op->turbo_dec.tb_params.r;
> +
> +	while (mbuf_total_left > 0 && r < c) {
> +
> +		seg_total_left = rte_pktmbuf_data_len(input) - in_offset;
> +
> +		/* Set up DMA descriptor */
> +		desc = q->ring_addr + ((q->sw_ring_head + total_enqueued_cbs)
> +				& q->sw_ring_wrap_mask);
> +		desc->req.data_ptrs[0].address = q->ring_addr_phys + fcw_offset;
> +		desc->req.data_ptrs[0].blen = ACC100_FCW_TD_BLEN;
> +		ret = acc100_dma_desc_td_fill(op, &desc->req, &input,
> +				h_output, s_output, &in_offset, &h_out_offset,
> +				&s_out_offset, &h_out_length, &s_out_length,
> +				&mbuf_total_left, &seg_total_left, r);
> +
> +		if (unlikely(ret < 0))
> +			return ret;
> +
> +		/* Hard output */
> +		mbuf_append(h_output_head, h_output, h_out_length);
> +
> +		/* Soft output */
> +		if (check_bit(op->turbo_dec.op_flags,
> +				RTE_BBDEV_TURBO_SOFT_OUTPUT))
> +			mbuf_append(s_output_head, s_output, s_out_length);
> +
> +		/* Set total number of CBs in TB */
> +		desc->req.cbs_in_tb = cbs_in_tb;
> +#ifdef RTE_LIBRTE_BBDEV_DEBUG
> +		rte_memdump(stderr, "FCW", &desc->req.fcw_td,
> +				sizeof(desc->req.fcw_td) - 8);
> +		rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc));
> +#endif
> +
> +		if (seg_total_left == 0) {
> +			/* Go to the next mbuf */
> +			input = input->next;
> +			in_offset = 0;
> +			h_output = h_output->next;
> +			h_out_offset = 0;
> +
> +			if (check_bit(op->turbo_dec.op_flags,
> +					RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
> +				s_output = s_output->next;
> +				s_out_offset = 0;
> +			}
> +		}
> +
> +		total_enqueued_cbs++;
> +		current_enqueued_cbs++;
> +		r++;
> +	}
> +
> +	if (unlikely(desc == NULL))
> +		return current_enqueued_cbs;
> +
> +#ifdef RTE_LIBRTE_BBDEV_DEBUG
> +	/* Check if any CBs left for processing */
> +	if (mbuf_total_left != 0) {
> +		rte_bbdev_log(ERR,
> +				"Some date still left for processing: mbuf_total_left = %u",
> +				mbuf_total_left);
> +		return -EINVAL;
> +	}
> +#endif
> +	/* Set SDone on last CB descriptor for TB mode */
> +	desc->req.sdone_enable = 1;
> +	desc->req.irq_enable = q->irq_enable;
> +
> +	return current_enqueued_cbs;
> +}
>  
>  /* Calculates number of CBs in processed encoder TB based on 'r' and input
>   * length.
> @@ -1893,6 +2498,45 @@
>  	return cbs_in_tb;
>  }
>  
> +/* Enqueue encode operations for ACC100 device in CB mode. */
> +static uint16_t
> +acc100_enqueue_enc_cb(struct rte_bbdev_queue_data *q_data,
> +		struct rte_bbdev_enc_op **ops, uint16_t num)
> +{
> +	struct acc100_queue *q = q_data->queue_private;
> +	int32_t avail = q->sw_ring_depth + q->sw_ring_tail - q->sw_ring_head;
> +	uint16_t i;
> +	union acc100_dma_desc *desc;
> +	int ret;
> +
> +	for (i = 0; i < num; ++i) {
> +		/* Check if there are available space for further processing */
> +		if (unlikely(avail - 1 < 0))
> +			break;
> +		avail -= 1;
> +
> +		ret = enqueue_enc_one_op_cb(q, ops[i], i);
> +		if (ret < 0)
> +			break;
> +	}
> +
> +	if (unlikely(i == 0))
> +		return 0; /* Nothing to enqueue */
> +
> +	/* Set SDone in last CB in enqueued ops for CB mode*/
> +	desc = q->ring_addr + ((q->sw_ring_head + i - 1)
> +			& q->sw_ring_wrap_mask);
> +	desc->req.sdone_enable = 1;
> +	desc->req.irq_enable = q->irq_enable;
> +
> +	acc100_dma_enqueue(q, i, &q_data->queue_stats);
> +
> +	/* Update stats */
> +	q_data->queue_stats.enqueued_count += i;
> +	q_data->queue_stats.enqueue_err_count += num - i;
> +	return i;
> +}
> +
>  /* Check we can mux encode operations with common FCW */
>  static inline bool
>  check_mux(struct rte_bbdev_enc_op **ops, uint16_t num) {
> @@ -1960,6 +2604,52 @@
>  	return i;
>  }
>  
> +/* Enqueue encode operations for ACC100 device in TB mode. */
> +static uint16_t
> +acc100_enqueue_enc_tb(struct rte_bbdev_queue_data *q_data,
> +		struct rte_bbdev_enc_op **ops, uint16_t num)
> +{
> +	struct acc100_queue *q = q_data->queue_private;
> +	int32_t avail = q->sw_ring_depth + q->sw_ring_tail - q->sw_ring_head;
> +	uint16_t i, enqueued_cbs = 0;
> +	uint8_t cbs_in_tb;
> +	int ret;
> +
> +	for (i = 0; i < num; ++i) {
> +		cbs_in_tb = get_num_cbs_in_tb_enc(&ops[i]->turbo_enc);
> +		/* Check if there are available space for further processing */
> +		if (unlikely(avail - cbs_in_tb < 0))
> +			break;
> +		avail -= cbs_in_tb;
> +
> +		ret = enqueue_enc_one_op_tb(q, ops[i], enqueued_cbs, cbs_in_tb);
> +		if (ret < 0)
> +			break;
> +		enqueued_cbs += ret;
> +	}
> +
other similar functions have a (i == 0) check here.
> +	acc100_dma_enqueue(q, enqueued_cbs, &q_data->queue_stats);
> +
> +	/* Update stats */
> +	q_data->queue_stats.enqueued_count += i;
> +	q_data->queue_stats.enqueue_err_count += num - i;
> +
> +	return i;
> +}
> +
> +/* Enqueue encode operations for ACC100 device. */
> +static uint16_t
> +acc100_enqueue_enc(struct rte_bbdev_queue_data *q_data,
> +		struct rte_bbdev_enc_op **ops, uint16_t num)
> +{
> +	if (unlikely(num == 0))
> +		return 0;
num == 0 check should move into the tb/cb functions
> +	if (ops[0]->turbo_enc.code_block_mode == 0)
> +		return acc100_enqueue_enc_tb(q_data, ops, num);
> +	else
> +		return acc100_enqueue_enc_cb(q_data, ops, num);
> +}
> +
>  /* Enqueue encode operations for ACC100 device. */
>  static uint16_t
>  acc100_enqueue_ldpc_enc(struct rte_bbdev_queue_data *q_data,
> @@ -1967,7 +2657,51 @@
>  {
>  	if (unlikely(num == 0))
>  		return 0;
> -	return acc100_enqueue_ldpc_enc_cb(q_data, ops, num);
> +	if (ops[0]->ldpc_enc.code_block_mode == 0)
> +		return acc100_enqueue_enc_tb(q_data, ops, num);
> +	else
> +		return acc100_enqueue_ldpc_enc_cb(q_data, ops, num);
> +}
> +
> +
> +/* Enqueue decode operations for ACC100 device in CB mode */
> +static uint16_t
> +acc100_enqueue_dec_cb(struct rte_bbdev_queue_data *q_data,
> +		struct rte_bbdev_dec_op **ops, uint16_t num)
> +{

Seems like the 10th variant of a similar function could these be combined to fewer functions ?

Maybe by passing in a function pointer to the enqueue_one_dec_one* that does the work ?

> +	struct acc100_queue *q = q_data->queue_private;
> +	int32_t avail = q->sw_ring_depth + q->sw_ring_tail - q->sw_ring_head;
> +	uint16_t i;
> +	union acc100_dma_desc *desc;
> +	int ret;
> +
> +	for (i = 0; i < num; ++i) {
> +		/* Check if there are available space for further processing */
> +		if (unlikely(avail - 1 < 0))
> +			break;
> +		avail -= 1;
> +
> +		ret = enqueue_dec_one_op_cb(q, ops[i], i);
> +		if (ret < 0)
> +			break;
> +	}
> +
> +	if (unlikely(i == 0))
> +		return 0; /* Nothing to enqueue */
> +
> +	/* Set SDone in last CB in enqueued ops for CB mode*/
> +	desc = q->ring_addr + ((q->sw_ring_head + i - 1)
> +			& q->sw_ring_wrap_mask);
> +	desc->req.sdone_enable = 1;
> +	desc->req.irq_enable = q->irq_enable;
> +
> +	acc100_dma_enqueue(q, i, &q_data->queue_stats);
> +
> +	/* Update stats */
> +	q_data->queue_stats.enqueued_count += i;
> +	q_data->queue_stats.enqueue_err_count += num - i;
> +
> +	return i;
>  }
>  
>  /* Check we can mux encode operations with common FCW */
> @@ -2065,6 +2799,53 @@
>  	return i;
>  }
>  
> +
> +/* Enqueue decode operations for ACC100 device in TB mode */
> +static uint16_t
> +acc100_enqueue_dec_tb(struct rte_bbdev_queue_data *q_data,
> +		struct rte_bbdev_dec_op **ops, uint16_t num)
> +{
11th ;)
> +	struct acc100_queue *q = q_data->queue_private;
> +	int32_t avail = q->sw_ring_depth + q->sw_ring_tail - q->sw_ring_head;
> +	uint16_t i, enqueued_cbs = 0;
> +	uint8_t cbs_in_tb;
> +	int ret;
> +
> +	for (i = 0; i < num; ++i) {
> +		cbs_in_tb = get_num_cbs_in_tb_dec(&ops[i]->turbo_dec);
> +		/* Check if there are available space for further processing */
> +		if (unlikely(avail - cbs_in_tb < 0))
> +			break;
> +		avail -= cbs_in_tb;
> +
> +		ret = enqueue_dec_one_op_tb(q, ops[i], enqueued_cbs, cbs_in_tb);
> +		if (ret < 0)
> +			break;
> +		enqueued_cbs += ret;
> +	}
> +
> +	acc100_dma_enqueue(q, enqueued_cbs, &q_data->queue_stats);
> +
> +	/* Update stats */
> +	q_data->queue_stats.enqueued_count += i;
> +	q_data->queue_stats.enqueue_err_count += num - i;
> +
> +	return i;
> +}
> +
> +/* Enqueue decode operations for ACC100 device. */
> +static uint16_t
> +acc100_enqueue_dec(struct rte_bbdev_queue_data *q_data,
> +		struct rte_bbdev_dec_op **ops, uint16_t num)
> +{
> +	if (unlikely(num == 0))
> +		return 0;
similar move the num == 0 check to the tb/cb functions.
> +	if (ops[0]->turbo_dec.code_block_mode == 0)
> +		return acc100_enqueue_dec_tb(q_data, ops, num);
> +	else
> +		return acc100_enqueue_dec_cb(q_data, ops, num);
> +}
> +
>  /* Enqueue decode operations for ACC100 device. */
>  static uint16_t
>  acc100_enqueue_ldpc_dec(struct rte_bbdev_queue_data *q_data,
> @@ -2388,6 +3169,51 @@
>  	return cb_idx;
>  }
>  
> +/* Dequeue encode operations from ACC100 device. */
> +static uint16_t
> +acc100_dequeue_enc(struct rte_bbdev_queue_data *q_data,
> +		struct rte_bbdev_enc_op **ops, uint16_t num)
> +{
> +	struct acc100_queue *q = q_data->queue_private;
> +	uint16_t dequeue_num;
> +	uint32_t avail = q->sw_ring_head - q->sw_ring_tail;
> +	uint32_t aq_dequeued = 0;
> +	uint16_t i;
> +	uint16_t dequeued_cbs = 0;
> +	struct rte_bbdev_enc_op *op;
> +	int ret;
> +
> +#ifdef RTE_LIBRTE_BBDEV_DEBUG
> +	if (unlikely(ops == 0 && q == NULL))

ops is a pointer so should compare with NULL

The && likely needs to be ||

Maybe print out a message so caller knows something wrong happened.

> +		return 0;
> +#endif
> +
> +	dequeue_num = (avail < num) ? avail : num;
> +
> +	for (i = 0; i < dequeue_num; ++i) {
> +		op = (q->ring_addr + ((q->sw_ring_tail + dequeued_cbs)
> +			& q->sw_ring_wrap_mask))->req.op_addr;
> +		if (op->turbo_enc.code_block_mode == 0)
> +			ret = dequeue_enc_one_op_tb(q, &ops[i], dequeued_cbs,
> +					&aq_dequeued);
> +		else
> +			ret = dequeue_enc_one_op_cb(q, &ops[i], dequeued_cbs,
> +					&aq_dequeued);
> +
> +		if (ret < 0)
> +			break;
> +		dequeued_cbs += ret;
> +	}
> +
> +	q->aq_dequeued += aq_dequeued;
> +	q->sw_ring_tail += dequeued_cbs;
> +
> +	/* Update enqueue stats */
> +	q_data->queue_stats.dequeued_count += i;
> +
> +	return i;
> +}
> +
>  /* Dequeue LDPC encode operations from ACC100 device. */
>  static uint16_t
>  acc100_dequeue_ldpc_enc(struct rte_bbdev_queue_data *q_data,
> @@ -2426,6 +3252,52 @@
>  	return dequeued_cbs;
>  }
>  
> +
> +/* Dequeue decode operations from ACC100 device. */
> +static uint16_t
> +acc100_dequeue_dec(struct rte_bbdev_queue_data *q_data,
> +		struct rte_bbdev_dec_op **ops, uint16_t num)
> +{

very similar to enc function above, consider how to combine them to a single function.

Tom

> +	struct acc100_queue *q = q_data->queue_private;
> +	uint16_t dequeue_num;
> +	uint32_t avail = q->sw_ring_head - q->sw_ring_tail;
> +	uint32_t aq_dequeued = 0;
> +	uint16_t i;
> +	uint16_t dequeued_cbs = 0;
> +	struct rte_bbdev_dec_op *op;
> +	int ret;
> +
> +#ifdef RTE_LIBRTE_BBDEV_DEBUG
> +	if (unlikely(ops == 0 && q == NULL))
> +		return 0;
> +#endif
> +
> +	dequeue_num = (avail < num) ? avail : num;
> +
> +	for (i = 0; i < dequeue_num; ++i) {
> +		op = (q->ring_addr + ((q->sw_ring_tail + dequeued_cbs)
> +			& q->sw_ring_wrap_mask))->req.op_addr;
> +		if (op->turbo_dec.code_block_mode == 0)
> +			ret = dequeue_dec_one_op_tb(q, &ops[i], dequeued_cbs,
> +					&aq_dequeued);
> +		else
> +			ret = dequeue_dec_one_op_cb(q_data, q, &ops[i],
> +					dequeued_cbs, &aq_dequeued);
> +
> +		if (ret < 0)
> +			break;
> +		dequeued_cbs += ret;
> +	}
> +
> +	q->aq_dequeued += aq_dequeued;
> +	q->sw_ring_tail += dequeued_cbs;
> +
> +	/* Update enqueue stats */
> +	q_data->queue_stats.dequeued_count += i;
> +
> +	return i;
> +}
> +
>  /* Dequeue decode operations from ACC100 device. */
>  static uint16_t
>  acc100_dequeue_ldpc_dec(struct rte_bbdev_queue_data *q_data,
> @@ -2479,6 +3351,10 @@
>  	struct rte_pci_device *pci_dev = RTE_DEV_TO_PCI(dev->device);
>  
>  	dev->dev_ops = &acc100_bbdev_ops;
> +	dev->enqueue_enc_ops = acc100_enqueue_enc;
> +	dev->enqueue_dec_ops = acc100_enqueue_dec;
> +	dev->dequeue_enc_ops = acc100_dequeue_enc;
> +	dev->dequeue_dec_ops = acc100_dequeue_dec;
>  	dev->enqueue_ldpc_enc_ops = acc100_enqueue_ldpc_enc;
>  	dev->enqueue_ldpc_dec_ops = acc100_enqueue_ldpc_dec;
>  	dev->dequeue_ldpc_enc_ops = acc100_dequeue_ldpc_enc;
  
Chautru, Nicolas Sept. 30, 2020, 7:10 p.m. UTC | #2
Hi Tom, 

> From: Tom Rix <trix@redhat.com>
> On 9/28/20 5:29 PM, Nicolas Chautru wrote:
> > Adding capability for 4G encode and decoder processing
> >
> > Signed-off-by: Nicolas Chautru <nicolas.chautru@intel.com>
> > Acked-by: Liu Tianjiao <Tianjiao.liu@intel.com>
> > ---
> >  doc/guides/bbdevs/features/acc100.ini    |    4 +-
> >  drivers/baseband/acc100/rte_acc100_pmd.c | 1010
> > ++++++++++++++++++++++++++++--
> >  2 files changed, 945 insertions(+), 69 deletions(-)
> >
> > diff --git a/doc/guides/bbdevs/features/acc100.ini
> > b/doc/guides/bbdevs/features/acc100.ini
> > index 40c7adc..642cd48 100644
> > --- a/doc/guides/bbdevs/features/acc100.ini
> > +++ b/doc/guides/bbdevs/features/acc100.ini
> > @@ -4,8 +4,8 @@
> >  ; Refer to default.ini for the full list of available PMD features.
> >  ;
> >  [Features]
> > -Turbo Decoder (4G)     = N
> > -Turbo Encoder (4G)     = N
> > +Turbo Decoder (4G)     = Y
> > +Turbo Encoder (4G)     = Y
> >  LDPC Decoder (5G)      = Y
> >  LDPC Encoder (5G)      = Y
> >  LLR/HARQ Compression   = Y
> > diff --git a/drivers/baseband/acc100/rte_acc100_pmd.c
> > b/drivers/baseband/acc100/rte_acc100_pmd.c
> > index e484c0a..7d4c3df 100644
> > --- a/drivers/baseband/acc100/rte_acc100_pmd.c
> > +++ b/drivers/baseband/acc100/rte_acc100_pmd.c
> > @@ -339,7 +339,6 @@
> >  	free_base_addresses(base_addrs, i);
> >  }
> >
> > -
> >  /* Allocate 64MB memory used for all software rings */  static int
> > acc100_setup_queues(struct rte_bbdev *dev, uint16_t num_queues, int
> > socket_id) @@ -637,6 +636,41 @@
> >
> >  	static const struct rte_bbdev_op_cap bbdev_capabilities[] = {
> >  		{
> > +			.type = RTE_BBDEV_OP_TURBO_DEC,
> > +			.cap.turbo_dec = {
> > +				.capability_flags =
> > +
> 	RTE_BBDEV_TURBO_SUBBLOCK_DEINTERLEAVE |
> > +					RTE_BBDEV_TURBO_CRC_TYPE_24B
> |
> > +
> 	RTE_BBDEV_TURBO_HALF_ITERATION_EVEN |
> > +
> 	RTE_BBDEV_TURBO_EARLY_TERMINATION |
> > +
> 	RTE_BBDEV_TURBO_NEG_LLR_1_BIT_IN |
> > +					RTE_BBDEV_TURBO_MAP_DEC |
> > +
> 	RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP |
> > +
> 	RTE_BBDEV_TURBO_DEC_SCATTER_GATHER,
> > +				.max_llr_modulus = INT8_MAX,
> > +				.num_buffers_src =
> > +
> 	RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
> > +				.num_buffers_hard_out =
> > +
> 	RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
> > +				.num_buffers_soft_out =
> > +
> 	RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
> > +			}
> > +		},
> > +		{
> > +			.type = RTE_BBDEV_OP_TURBO_ENC,
> > +			.cap.turbo_enc = {
> > +				.capability_flags =
> > +
> 	RTE_BBDEV_TURBO_CRC_24B_ATTACH |
> > +
> 	RTE_BBDEV_TURBO_RV_INDEX_BYPASS |
> > +					RTE_BBDEV_TURBO_RATE_MATCH |
> > +
> 	RTE_BBDEV_TURBO_ENC_SCATTER_GATHER,
> > +				.num_buffers_src =
> > +
> 	RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
> > +				.num_buffers_dst =
> > +
> 	RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
> > +			}
> > +		},
> > +		{
> >  			.type   = RTE_BBDEV_OP_LDPC_ENC,
> >  			.cap.ldpc_enc = {
> >  				.capability_flags =
> > @@ -719,7 +753,6 @@
> >  #endif
> >  }
> >
> > -
> >  static const struct rte_bbdev_ops acc100_bbdev_ops = {
> >  	.setup_queues = acc100_setup_queues,
> >  	.close = acc100_dev_close,
> > @@ -763,6 +796,58 @@
> >  	return tail;
> >  }
> >
> > +/* Fill in a frame control word for turbo encoding. */ static inline
> > +void acc100_fcw_te_fill(const struct rte_bbdev_enc_op *op, struct
> > +acc100_fcw_te *fcw) {
> > +	fcw->code_block_mode = op->turbo_enc.code_block_mode;
> > +	if (fcw->code_block_mode == 0) { /* For TB mode */
> > +		fcw->k_neg = op->turbo_enc.tb_params.k_neg;
> > +		fcw->k_pos = op->turbo_enc.tb_params.k_pos;
> > +		fcw->c_neg = op->turbo_enc.tb_params.c_neg;
> > +		fcw->c = op->turbo_enc.tb_params.c;
> > +		fcw->ncb_neg = op->turbo_enc.tb_params.ncb_neg;
> > +		fcw->ncb_pos = op->turbo_enc.tb_params.ncb_pos;
> > +
> > +		if (check_bit(op->turbo_enc.op_flags,
> > +				RTE_BBDEV_TURBO_RATE_MATCH)) {
> > +			fcw->bypass_rm = 0;
> > +			fcw->cab = op->turbo_enc.tb_params.cab;
> > +			fcw->ea = op->turbo_enc.tb_params.ea;
> > +			fcw->eb = op->turbo_enc.tb_params.eb;
> > +		} else {
> > +			/* E is set to the encoding output size when RM is
> > +			 * bypassed.
> > +			 */
> > +			fcw->bypass_rm = 1;
> > +			fcw->cab = fcw->c_neg;
> > +			fcw->ea = 3 * fcw->k_neg + 12;
> > +			fcw->eb = 3 * fcw->k_pos + 12;
> > +		}
> > +	} else { /* For CB mode */
> > +		fcw->k_pos = op->turbo_enc.cb_params.k;
> > +		fcw->ncb_pos = op->turbo_enc.cb_params.ncb;
> > +
> > +		if (check_bit(op->turbo_enc.op_flags,
> > +				RTE_BBDEV_TURBO_RATE_MATCH)) {
> > +			fcw->bypass_rm = 0;
> > +			fcw->eb = op->turbo_enc.cb_params.e;
> > +		} else {
> > +			/* E is set to the encoding output size when RM is
> > +			 * bypassed.
> > +			 */
> > +			fcw->bypass_rm = 1;
> > +			fcw->eb = 3 * fcw->k_pos + 12;
> > +		}
> > +	}
> > +
> > +	fcw->bypass_rv_idx1 = check_bit(op->turbo_enc.op_flags,
> > +			RTE_BBDEV_TURBO_RV_INDEX_BYPASS);
> > +	fcw->code_block_crc = check_bit(op->turbo_enc.op_flags,
> > +			RTE_BBDEV_TURBO_CRC_24B_ATTACH);
> > +	fcw->rv_idx1 = op->turbo_enc.rv_index; }
> > +
> >  /* Compute value of k0.
> >   * Based on 3GPP 38.212 Table 5.4.2.1-2
> >   * Starting position of different redundancy versions, k0 @@ -813,6
> > +898,25 @@
> >  	fcw->mcb_count = num_cb;
> >  }
> >
> > +/* Fill in a frame control word for turbo decoding. */ static inline
> > +void acc100_fcw_td_fill(const struct rte_bbdev_dec_op *op, struct
> > +acc100_fcw_td *fcw) {
> > +	/* Note : Early termination is always enabled for 4GUL */
> > +	fcw->fcw_ver = 1;
> > +	if (op->turbo_dec.code_block_mode == 0)
> > +		fcw->k_pos = op->turbo_dec.tb_params.k_pos;
> > +	else
> > +		fcw->k_pos = op->turbo_dec.cb_params.k;
> > +	fcw->turbo_crc_type = check_bit(op->turbo_dec.op_flags,
> > +			RTE_BBDEV_TURBO_CRC_TYPE_24B);
> > +	fcw->bypass_sb_deint = 0;
> > +	fcw->raw_decoder_input_on = 0;
> > +	fcw->max_iter = op->turbo_dec.iter_max;
> > +	fcw->half_iter_on = !check_bit(op->turbo_dec.op_flags,
> > +			RTE_BBDEV_TURBO_HALF_ITERATION_EVEN);
> > +}
> > +
> >  /* Fill in a frame control word for LDPC decoding. */  static inline
> > void  acc100_fcw_ld_fill(const struct rte_bbdev_dec_op *op, struct
> > acc100_fcw_ld *fcw, @@ -1042,6 +1146,87 @@  }
> >
> >  static inline int
> > +acc100_dma_desc_te_fill(struct rte_bbdev_enc_op *op,
> > +		struct acc100_dma_req_desc *desc, struct rte_mbuf **input,
> > +		struct rte_mbuf *output, uint32_t *in_offset,
> > +		uint32_t *out_offset, uint32_t *out_length,
> > +		uint32_t *mbuf_total_left, uint32_t *seg_total_left, uint8_t
> r) {
> > +	int next_triplet = 1; /* FCW already done */
> > +	uint32_t e, ea, eb, length;
> > +	uint16_t k, k_neg, k_pos;
> > +	uint8_t cab, c_neg;
> > +
> > +	desc->word0 = ACC100_DMA_DESC_TYPE;
> > +	desc->word1 = 0; /**< Timestamp could be disabled */
> > +	desc->word2 = 0;
> > +	desc->word3 = 0;
> > +	desc->numCBs = 1;
> > +
> > +	if (op->turbo_enc.code_block_mode == 0) {
> > +		ea = op->turbo_enc.tb_params.ea;
> > +		eb = op->turbo_enc.tb_params.eb;
> > +		cab = op->turbo_enc.tb_params.cab;
> > +		k_neg = op->turbo_enc.tb_params.k_neg;
> > +		k_pos = op->turbo_enc.tb_params.k_pos;
> > +		c_neg = op->turbo_enc.tb_params.c_neg;
> > +		e = (r < cab) ? ea : eb;
> > +		k = (r < c_neg) ? k_neg : k_pos;
> > +	} else {
> > +		e = op->turbo_enc.cb_params.e;
> > +		k = op->turbo_enc.cb_params.k;
> > +	}
> > +
> > +	if (check_bit(op->turbo_enc.op_flags,
> RTE_BBDEV_TURBO_CRC_24B_ATTACH))
> > +		length = (k - 24) >> 3;
> > +	else
> > +		length = k >> 3;
> > +
> > +	if (unlikely((*mbuf_total_left == 0) || (*mbuf_total_left <
> > +length))) {
> 
> similar to other patches, this check can be combined to <=
> 
> change generally

same comment on other patch

> 
> > +		rte_bbdev_log(ERR,
> > +				"Mismatch between mbuf length and
> included CB sizes: mbuf len %u, cb len %u",
> > +				*mbuf_total_left, length);
> > +		return -1;
> > +	}
> > +
> > +	next_triplet = acc100_dma_fill_blk_type_in(desc, input, in_offset,
> > +			length, seg_total_left, next_triplet);
> > +	if (unlikely(next_triplet < 0)) {
> > +		rte_bbdev_log(ERR,
> > +				"Mismatch between data to process and
> mbuf data length in bbdev_op: %p",
> > +				op);
> > +		return -1;
> > +	}
> > +	desc->data_ptrs[next_triplet - 1].last = 1;
> > +	desc->m2dlen = next_triplet;
> > +	*mbuf_total_left -= length;
> > +
> > +	/* Set output length */
> > +	if (check_bit(op->turbo_enc.op_flags,
> RTE_BBDEV_TURBO_RATE_MATCH))
> > +		/* Integer round up division by 8 */
> > +		*out_length = (e + 7) >> 3;
> > +	else
> > +		*out_length = (k >> 3) * 3 + 2;
> > +
> > +	next_triplet = acc100_dma_fill_blk_type_out(desc, output,
> *out_offset,
> > +			*out_length, next_triplet,
> ACC100_DMA_BLKID_OUT_ENC);
> > +	if (unlikely(next_triplet < 0)) {
> > +		rte_bbdev_log(ERR,
> > +				"Mismatch between data to process and
> mbuf data length in bbdev_op: %p",
> > +				op);
> > +		return -1;
> > +	}
> > +	op->turbo_enc.output.length += *out_length;
> > +	*out_offset += *out_length;
> > +	desc->data_ptrs[next_triplet - 1].last = 1;
> > +	desc->d2mlen = next_triplet - desc->m2dlen;
> > +
> > +	desc->op_addr = op;
> > +
> > +	return 0;
> > +}
> > +
> > +static inline int
> >  acc100_dma_desc_le_fill(struct rte_bbdev_enc_op *op,
> >  		struct acc100_dma_req_desc *desc, struct rte_mbuf **input,
> >  		struct rte_mbuf *output, uint32_t *in_offset, @@ -1110,6
> +1295,117
> > @@  }
> >
> >  static inline int
> > +acc100_dma_desc_td_fill(struct rte_bbdev_dec_op *op,
> > +		struct acc100_dma_req_desc *desc, struct rte_mbuf **input,
> > +		struct rte_mbuf *h_output, struct rte_mbuf *s_output,
> > +		uint32_t *in_offset, uint32_t *h_out_offset,
> > +		uint32_t *s_out_offset, uint32_t *h_out_length,
> > +		uint32_t *s_out_length, uint32_t *mbuf_total_left,
> > +		uint32_t *seg_total_left, uint8_t r) {
> > +	int next_triplet = 1; /* FCW already done */
> > +	uint16_t k;
> > +	uint16_t crc24_overlap = 0;
> > +	uint32_t e, kw;
> > +
> > +	desc->word0 = ACC100_DMA_DESC_TYPE;
> > +	desc->word1 = 0; /**< Timestamp could be disabled */
> > +	desc->word2 = 0;
> > +	desc->word3 = 0;
> > +	desc->numCBs = 1;
> > +
> > +	if (op->turbo_dec.code_block_mode == 0) {
> > +		k = (r < op->turbo_dec.tb_params.c_neg)
> > +			? op->turbo_dec.tb_params.k_neg
> > +			: op->turbo_dec.tb_params.k_pos;
> > +		e = (r < op->turbo_dec.tb_params.cab)
> > +			? op->turbo_dec.tb_params.ea
> > +			: op->turbo_dec.tb_params.eb;
> > +	} else {
> > +		k = op->turbo_dec.cb_params.k;
> > +		e = op->turbo_dec.cb_params.e;
> > +	}
> > +
> > +	if ((op->turbo_dec.code_block_mode == 0)
> > +		&& !check_bit(op->turbo_dec.op_flags,
> > +		RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP))
> > +		crc24_overlap = 24;
> > +
> > +	/* Calculates circular buffer size.
> > +	 * According to 3gpp 36.212 section 5.1.4.2
> > +	 *   Kw = 3 * Kpi,
> > +	 * where:
> > +	 *   Kpi = nCol * nRow
> > +	 * where nCol is 32 and nRow can be calculated from:
> > +	 *   D =< nCol * nRow
> > +	 * where D is the size of each output from turbo encoder block (k +
> 4).
> > +	 */
> > +	kw = RTE_ALIGN_CEIL(k + 4, 32) * 3;
> > +
> > +	if (unlikely((*mbuf_total_left == 0) || (*mbuf_total_left < kw))) {
> > +		rte_bbdev_log(ERR,
> > +				"Mismatch between mbuf length and
> included CB sizes: mbuf len %u, cb len %u",
> > +				*mbuf_total_left, kw);
> > +		return -1;
> > +	}
> > +
> > +	next_triplet = acc100_dma_fill_blk_type_in(desc, input, in_offset,
> kw,
> > +			seg_total_left, next_triplet);
> > +	if (unlikely(next_triplet < 0)) {
> > +		rte_bbdev_log(ERR,
> > +				"Mismatch between data to process and
> mbuf data length in bbdev_op: %p",
> > +				op);
> > +		return -1;
> > +	}
> > +	desc->data_ptrs[next_triplet - 1].last = 1;
> > +	desc->m2dlen = next_triplet;
> > +	*mbuf_total_left -= kw;
> > +
> > +	next_triplet = acc100_dma_fill_blk_type_out(
> > +			desc, h_output, *h_out_offset,
> > +			k >> 3, next_triplet,
> ACC100_DMA_BLKID_OUT_HARD);
> > +	if (unlikely(next_triplet < 0)) {
> > +		rte_bbdev_log(ERR,
> > +				"Mismatch between data to process and
> mbuf data length in bbdev_op: %p",
> > +				op);
> > +		return -1;
> > +	}
> > +
> > +	*h_out_length = ((k - crc24_overlap) >> 3);
> > +	op->turbo_dec.hard_output.length += *h_out_length;
> > +	*h_out_offset += *h_out_length;
> > +
> > +	/* Soft output */
> > +	if (check_bit(op->turbo_dec.op_flags,
> RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
> > +		if (check_bit(op->turbo_dec.op_flags,
> > +				RTE_BBDEV_TURBO_EQUALIZER))
> > +			*s_out_length = e;
> > +		else
> > +			*s_out_length = (k * 3) + 12;
> > +
> > +		next_triplet = acc100_dma_fill_blk_type_out(desc, s_output,
> > +				*s_out_offset, *s_out_length, next_triplet,
> > +				ACC100_DMA_BLKID_OUT_SOFT);
> > +		if (unlikely(next_triplet < 0)) {
> > +			rte_bbdev_log(ERR,
> > +					"Mismatch between data to process
> and mbuf data length in bbdev_op: %p",
> > +					op);
> > +			return -1;
> > +		}
> > +
> > +		op->turbo_dec.soft_output.length += *s_out_length;
> > +		*s_out_offset += *s_out_length;
> > +	}
> > +
> > +	desc->data_ptrs[next_triplet - 1].last = 1;
> > +	desc->d2mlen = next_triplet - desc->m2dlen;
> > +
> > +	desc->op_addr = op;
> > +
> > +	return 0;
> > +}
> > +
> > +static inline int
> >  acc100_dma_desc_ld_fill(struct rte_bbdev_dec_op *op,
> >  		struct acc100_dma_req_desc *desc,
> >  		struct rte_mbuf **input, struct rte_mbuf *h_output, @@ -
> 1374,6
> > +1670,57 @@
> >
> >  /* Enqueue one encode operations for ACC100 device in CB mode */
> > static inline int
> > +enqueue_enc_one_op_cb(struct acc100_queue *q, struct
> rte_bbdev_enc_op *op,
> > +		uint16_t total_enqueued_cbs)
> > +{
> > +	union acc100_dma_desc *desc = NULL;
> > +	int ret;
> > +	uint32_t in_offset, out_offset, out_length, mbuf_total_left,
> > +		seg_total_left;
> > +	struct rte_mbuf *input, *output_head, *output;
> > +
> > +	uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
> > +			& q->sw_ring_wrap_mask);
> > +	desc = q->ring_addr + desc_idx;
> > +	acc100_fcw_te_fill(op, &desc->req.fcw_te);
> > +
> > +	input = op->turbo_enc.input.data;
> > +	output_head = output = op->turbo_enc.output.data;
> > +	in_offset = op->turbo_enc.input.offset;
> > +	out_offset = op->turbo_enc.output.offset;
> > +	out_length = 0;
> > +	mbuf_total_left = op->turbo_enc.input.length;
> > +	seg_total_left = rte_pktmbuf_data_len(op->turbo_enc.input.data)
> > +			- in_offset;
> > +
> > +	ret = acc100_dma_desc_te_fill(op, &desc->req, &input, output,
> > +			&in_offset, &out_offset, &out_length,
> &mbuf_total_left,
> > +			&seg_total_left, 0);
> > +
> > +	if (unlikely(ret < 0))
> > +		return ret;
> > +
> > +	mbuf_append(output_head, output, out_length);
> > +
> > +#ifdef RTE_LIBRTE_BBDEV_DEBUG
> > +	rte_memdump(stderr, "FCW", &desc->req.fcw_te,
> > +			sizeof(desc->req.fcw_te) - 8);
> > +	rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc));
> > +
> > +	/* Check if any data left after processing one CB */
> > +	if (mbuf_total_left != 0) {
> > +		rte_bbdev_log(ERR,
> > +				"Some date still left after processing one CB:
> mbuf_total_left = %u",
> > +				mbuf_total_left);
> > +		return -EINVAL;
> > +	}
> > +#endif
> > +	/* One CB (one op) was successfully prepared to enqueue */
> > +	return 1;
> > +}
> > +
> > +/* Enqueue one encode operations for ACC100 device in CB mode */
> > +static inline int
> >  enqueue_ldpc_enc_n_op_cb(struct acc100_queue *q, struct
> rte_bbdev_enc_op **ops,
> >  		uint16_t total_enqueued_cbs, int16_t num)  { @@ -1481,78
> +1828,235
> > @@
> >  	return 1;
> >  }
> >
> > -static inline int
> > -harq_loopback(struct acc100_queue *q, struct rte_bbdev_dec_op *op,
> > -		uint16_t total_enqueued_cbs) {
> > -	struct acc100_fcw_ld *fcw;
> > -	union acc100_dma_desc *desc;
> > -	int next_triplet = 1;
> > -	struct rte_mbuf *hq_output_head, *hq_output;
> > -	uint16_t harq_in_length = op-
> >ldpc_dec.harq_combined_input.length;
> > -	if (harq_in_length == 0) {
> > -		rte_bbdev_log(ERR, "Loopback of invalid null size\n");
> > -		return -EINVAL;
> > -	}
> >
> > -	int h_comp = check_bit(op->ldpc_dec.op_flags,
> > -			RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION
> > -			) ? 1 : 0;
> > -	if (h_comp == 1)
> > -		harq_in_length = harq_in_length * 8 / 6;
> > -	harq_in_length = RTE_ALIGN(harq_in_length, 64);
> > -	uint16_t harq_dma_length_in = (h_comp == 0) ?
> > -			harq_in_length :
> > -			harq_in_length * 6 / 8;
> > -	uint16_t harq_dma_length_out = harq_dma_length_in;
> > -	bool ddr_mem_in = check_bit(op->ldpc_dec.op_flags,
> > -
> 	RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE);
> > -	union acc100_harq_layout_data *harq_layout = q->d->harq_layout;
> > -	uint16_t harq_index = (ddr_mem_in ?
> > -			op->ldpc_dec.harq_combined_input.offset :
> > -			op->ldpc_dec.harq_combined_output.offset)
> > -			/ ACC100_HARQ_OFFSET;
> > +/* Enqueue one encode operations for ACC100 device in TB mode. */
> > +static inline int enqueue_enc_one_op_tb(struct acc100_queue *q,
> > +struct rte_bbdev_enc_op *op,
> > +		uint16_t total_enqueued_cbs, uint8_t cbs_in_tb) {
> > +	union acc100_dma_desc *desc = NULL;
> > +	int ret;
> > +	uint8_t r, c;
> > +	uint32_t in_offset, out_offset, out_length, mbuf_total_left,
> > +		seg_total_left;
> > +	struct rte_mbuf *input, *output_head, *output;
> > +	uint16_t current_enqueued_cbs = 0;
> >
> >  	uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
> >  			& q->sw_ring_wrap_mask);
> >  	desc = q->ring_addr + desc_idx;
> > -	fcw = &desc->req.fcw_ld;
> > -	/* Set the FCW from loopback into DDR */
> > -	memset(fcw, 0, sizeof(struct acc100_fcw_ld));
> > -	fcw->FCWversion = ACC100_FCW_VER;
> > -	fcw->qm = 2;
> > -	fcw->Zc = 384;
> > -	if (harq_in_length < 16 * N_ZC_1)
> > -		fcw->Zc = 16;
> > -	fcw->ncb = fcw->Zc * N_ZC_1;
> > -	fcw->rm_e = 2;
> > -	fcw->hcin_en = 1;
> > -	fcw->hcout_en = 1;
> > +	uint64_t fcw_offset = (desc_idx << 8) + ACC100_DESC_FCW_OFFSET;
> > +	acc100_fcw_te_fill(op, &desc->req.fcw_te);
> >
> > -	rte_bbdev_log(DEBUG, "Loopback IN %d Index %d offset %d length
> %d %d\n",
> > -			ddr_mem_in, harq_index,
> > -			harq_layout[harq_index].offset, harq_in_length,
> > -			harq_dma_length_in);
> > +	input = op->turbo_enc.input.data;
> > +	output_head = output = op->turbo_enc.output.data;
> > +	in_offset = op->turbo_enc.input.offset;
> > +	out_offset = op->turbo_enc.output.offset;
> > +	out_length = 0;
> > +	mbuf_total_left = op->turbo_enc.input.length;
> >
> > -	if (ddr_mem_in && (harq_layout[harq_index].offset > 0)) {
> > -		fcw->hcin_size0 = harq_layout[harq_index].size0;
> > -		fcw->hcin_offset = harq_layout[harq_index].offset;
> > -		fcw->hcin_size1 = harq_in_length - fcw->hcin_offset;
> > -		harq_dma_length_in = (fcw->hcin_size0 + fcw->hcin_size1);
> > -		if (h_comp == 1)
> > -			harq_dma_length_in = harq_dma_length_in * 6 / 8;
> > -	} else {
> > -		fcw->hcin_size0 = harq_in_length;
> > -	}
> > -	harq_layout[harq_index].val = 0;
> > -	rte_bbdev_log(DEBUG, "Loopback FCW Config %d %d %d\n",
> > -			fcw->hcin_size0, fcw->hcin_offset, fcw->hcin_size1);
> > -	fcw->hcout_size0 = harq_in_length;
> > -	fcw->hcin_decomp_mode = h_comp;
> > -	fcw->hcout_comp_mode = h_comp;
> > -	fcw->gain_i = 1;
> > -	fcw->gain_h = 1;
> > +	c = op->turbo_enc.tb_params.c;
> > +	r = op->turbo_enc.tb_params.r;
> >
> > -	/* Set the prefix of descriptor. This could be done at polling */
> > +	while (mbuf_total_left > 0 && r < c) {
> > +		seg_total_left = rte_pktmbuf_data_len(input) - in_offset;
> > +		/* Set up DMA descriptor */
> > +		desc = q->ring_addr + ((q->sw_ring_head +
> total_enqueued_cbs)
> > +				& q->sw_ring_wrap_mask);
> > +		desc->req.data_ptrs[0].address = q->ring_addr_phys +
> fcw_offset;
> > +		desc->req.data_ptrs[0].blen = ACC100_FCW_TE_BLEN;
> > +
> > +		ret = acc100_dma_desc_te_fill(op, &desc->req, &input,
> output,
> > +				&in_offset, &out_offset, &out_length,
> > +				&mbuf_total_left, &seg_total_left, r);
> > +		if (unlikely(ret < 0))
> > +			return ret;
> > +		mbuf_append(output_head, output, out_length);
> > +
> > +		/* Set total number of CBs in TB */
> > +		desc->req.cbs_in_tb = cbs_in_tb;
> > +#ifdef RTE_LIBRTE_BBDEV_DEBUG
> > +		rte_memdump(stderr, "FCW", &desc->req.fcw_te,
> > +				sizeof(desc->req.fcw_te) - 8);
> > +		rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc));
> #endif
> > +
> > +		if (seg_total_left == 0) {
> > +			/* Go to the next mbuf */
> > +			input = input->next;
> > +			in_offset = 0;
> > +			output = output->next;
> > +			out_offset = 0;
> > +		}
> > +
> > +		total_enqueued_cbs++;
> > +		current_enqueued_cbs++;
> > +		r++;
> > +	}
> > +
> > +	if (unlikely(desc == NULL))
> > +		return current_enqueued_cbs;
> > +
> > +#ifdef RTE_LIBRTE_BBDEV_DEBUG
> > +	/* Check if any CBs left for processing */
> > +	if (mbuf_total_left != 0) {
> > +		rte_bbdev_log(ERR,
> > +				"Some date still left for processing:
> mbuf_total_left = %u",
> > +				mbuf_total_left);
> > +		return -EINVAL;
> > +	}
> > +#endif
> > +
> > +	/* Set SDone on last CB descriptor for TB mode. */
> > +	desc->req.sdone_enable = 1;
> > +	desc->req.irq_enable = q->irq_enable;
> > +
> > +	return current_enqueued_cbs;
> > +}
> > +
> > +/** Enqueue one decode operations for ACC100 device in CB mode */
> > +static inline int enqueue_dec_one_op_cb(struct acc100_queue *q,
> > +struct rte_bbdev_dec_op *op,
> > +		uint16_t total_enqueued_cbs)
> > +{
> > +	union acc100_dma_desc *desc = NULL;
> > +	int ret;
> > +	uint32_t in_offset, h_out_offset, s_out_offset, s_out_length,
> > +		h_out_length, mbuf_total_left, seg_total_left;
> > +	struct rte_mbuf *input, *h_output_head, *h_output,
> > +		*s_output_head, *s_output;
> > +
> > +	uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
> > +			& q->sw_ring_wrap_mask);
> > +	desc = q->ring_addr + desc_idx;
> > +	acc100_fcw_td_fill(op, &desc->req.fcw_td);
> > +
> > +	input = op->turbo_dec.input.data;
> > +	h_output_head = h_output = op->turbo_dec.hard_output.data;
> > +	s_output_head = s_output = op->turbo_dec.soft_output.data;
> > +	in_offset = op->turbo_dec.input.offset;
> > +	h_out_offset = op->turbo_dec.hard_output.offset;
> > +	s_out_offset = op->turbo_dec.soft_output.offset;
> > +	h_out_length = s_out_length = 0;
> > +	mbuf_total_left = op->turbo_dec.input.length;
> > +	seg_total_left = rte_pktmbuf_data_len(input) - in_offset;
> > +
> > +#ifdef RTE_LIBRTE_BBDEV_DEBUG
> > +	if (unlikely(input == NULL)) {
> > +		rte_bbdev_log(ERR, "Invalid mbuf pointer");
> > +		return -EFAULT;
> > +	}
> > +#endif
> > +
> > +	/* Set up DMA descriptor */
> > +	desc = q->ring_addr + ((q->sw_ring_head + total_enqueued_cbs)
> > +			& q->sw_ring_wrap_mask);
> > +
> > +	ret = acc100_dma_desc_td_fill(op, &desc->req, &input, h_output,
> > +			s_output, &in_offset, &h_out_offset, &s_out_offset,
> > +			&h_out_length, &s_out_length, &mbuf_total_left,
> > +			&seg_total_left, 0);
> > +
> > +	if (unlikely(ret < 0))
> > +		return ret;
> > +
> > +	/* Hard output */
> > +	mbuf_append(h_output_head, h_output, h_out_length);
> > +
> > +	/* Soft output */
> > +	if (check_bit(op->turbo_dec.op_flags,
> RTE_BBDEV_TURBO_SOFT_OUTPUT))
> > +		mbuf_append(s_output_head, s_output, s_out_length);
> > +
> > +#ifdef RTE_LIBRTE_BBDEV_DEBUG
> > +	rte_memdump(stderr, "FCW", &desc->req.fcw_td,
> > +			sizeof(desc->req.fcw_td) - 8);
> > +	rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc));
> > +
> > +	/* Check if any CBs left for processing */
> > +	if (mbuf_total_left != 0) {
> > +		rte_bbdev_log(ERR,
> > +				"Some date still left after processing one CB:
> mbuf_total_left = %u",
> > +				mbuf_total_left);
> > +		return -EINVAL;
> > +	}
> > +#endif
> logic similar to debug in mbuf_append, should be a common function.

Not exactly except if I miss your point. 

> > +
> > +	/* One CB (one op) was successfully prepared to enqueue */
> > +	return 1;
> > +}
> > +
> > +static inline int
> > +harq_loopback(struct acc100_queue *q, struct rte_bbdev_dec_op *op,
> > +		uint16_t total_enqueued_cbs) {
> > +	struct acc100_fcw_ld *fcw;
> > +	union acc100_dma_desc *desc;
> > +	int next_triplet = 1;
> > +	struct rte_mbuf *hq_output_head, *hq_output;
> > +	uint16_t harq_in_length = op-
> >ldpc_dec.harq_combined_input.length;
> > +	if (harq_in_length == 0) {
> > +		rte_bbdev_log(ERR, "Loopback of invalid null size\n");
> > +		return -EINVAL;
> > +	}
> > +
> > +	int h_comp = check_bit(op->ldpc_dec.op_flags,
> > +			RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION
> > +			) ? 1 : 0;
> > +	if (h_comp == 1)
> > +		harq_in_length = harq_in_length * 8 / 6;
> > +	harq_in_length = RTE_ALIGN(harq_in_length, 64);
> > +	uint16_t harq_dma_length_in = (h_comp == 0) ?
> Can these h_comp checks be combined to a single if/else ?

it may be clearer, ok.


> > +			harq_in_length :
> > +			harq_in_length * 6 / 8;
> > +	uint16_t harq_dma_length_out = harq_dma_length_in;
> > +	bool ddr_mem_in = check_bit(op->ldpc_dec.op_flags,
> > +
> 	RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE);
> > +	union acc100_harq_layout_data *harq_layout = q->d->harq_layout;
> > +	uint16_t harq_index = (ddr_mem_in ?
> > +			op->ldpc_dec.harq_combined_input.offset :
> > +			op->ldpc_dec.harq_combined_output.offset)
> > +			/ ACC100_HARQ_OFFSET;
> > +
> > +	uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
> > +			& q->sw_ring_wrap_mask);
> > +	desc = q->ring_addr + desc_idx;
> > +	fcw = &desc->req.fcw_ld;
> > +	/* Set the FCW from loopback into DDR */
> > +	memset(fcw, 0, sizeof(struct acc100_fcw_ld));
> > +	fcw->FCWversion = ACC100_FCW_VER;
> > +	fcw->qm = 2;
> > +	fcw->Zc = 384;
> these magic numbers should have #defines

These are not magic numbers, but actually 3GPP values

> > +	if (harq_in_length < 16 * N_ZC_1)
> > +		fcw->Zc = 16;
> > +	fcw->ncb = fcw->Zc * N_ZC_1;
> > +	fcw->rm_e = 2;
> > +	fcw->hcin_en = 1;
> > +	fcw->hcout_en = 1;
> > +
> > +	rte_bbdev_log(DEBUG, "Loopback IN %d Index %d offset %d length
> %d %d\n",
> > +			ddr_mem_in, harq_index,
> > +			harq_layout[harq_index].offset, harq_in_length,
> > +			harq_dma_length_in);
> > +
> > +	if (ddr_mem_in && (harq_layout[harq_index].offset > 0)) {
> > +		fcw->hcin_size0 = harq_layout[harq_index].size0;
> > +		fcw->hcin_offset = harq_layout[harq_index].offset;
> > +		fcw->hcin_size1 = harq_in_length - fcw->hcin_offset;
> > +		harq_dma_length_in = (fcw->hcin_size0 + fcw->hcin_size1);
> > +		if (h_comp == 1)
> > +			harq_dma_length_in = harq_dma_length_in * 6 / 8;
> > +	} else {
> > +		fcw->hcin_size0 = harq_in_length;
> > +	}
> > +	harq_layout[harq_index].val = 0;
> > +	rte_bbdev_log(DEBUG, "Loopback FCW Config %d %d %d\n",
> > +			fcw->hcin_size0, fcw->hcin_offset, fcw->hcin_size1);
> > +	fcw->hcout_size0 = harq_in_length;
> > +	fcw->hcin_decomp_mode = h_comp;
> > +	fcw->hcout_comp_mode = h_comp;
> > +	fcw->gain_i = 1;
> > +	fcw->gain_h = 1;
> > +
> > +	/* Set the prefix of descriptor. This could be done at polling */
> >  	desc->req.word0 = ACC100_DMA_DESC_TYPE;
> >  	desc->req.word1 = 0; /**< Timestamp could be disabled */
> >  	desc->req.word2 = 0;
> > @@ -1816,6 +2320,107 @@
> >  	return current_enqueued_cbs;
> >  }
> >
> > +/* Enqueue one decode operations for ACC100 device in TB mode */
> > +static inline int enqueue_dec_one_op_tb(struct acc100_queue *q,
> > +struct rte_bbdev_dec_op *op,
> > +		uint16_t total_enqueued_cbs, uint8_t cbs_in_tb) {
> > +	union acc100_dma_desc *desc = NULL;
> > +	int ret;
> > +	uint8_t r, c;
> > +	uint32_t in_offset, h_out_offset, s_out_offset, s_out_length,
> > +		h_out_length, mbuf_total_left, seg_total_left;
> > +	struct rte_mbuf *input, *h_output_head, *h_output,
> > +		*s_output_head, *s_output;
> > +	uint16_t current_enqueued_cbs = 0;
> > +
> > +	uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
> > +			& q->sw_ring_wrap_mask);
> > +	desc = q->ring_addr + desc_idx;
> > +	uint64_t fcw_offset = (desc_idx << 8) + ACC100_DESC_FCW_OFFSET;
> > +	acc100_fcw_td_fill(op, &desc->req.fcw_td);
> > +
> > +	input = op->turbo_dec.input.data;
> > +	h_output_head = h_output = op->turbo_dec.hard_output.data;
> > +	s_output_head = s_output = op->turbo_dec.soft_output.data;
> > +	in_offset = op->turbo_dec.input.offset;
> > +	h_out_offset = op->turbo_dec.hard_output.offset;
> > +	s_out_offset = op->turbo_dec.soft_output.offset;
> > +	h_out_length = s_out_length = 0;
> > +	mbuf_total_left = op->turbo_dec.input.length;
> > +	c = op->turbo_dec.tb_params.c;
> > +	r = op->turbo_dec.tb_params.r;
> > +
> > +	while (mbuf_total_left > 0 && r < c) {
> > +
> > +		seg_total_left = rte_pktmbuf_data_len(input) - in_offset;
> > +
> > +		/* Set up DMA descriptor */
> > +		desc = q->ring_addr + ((q->sw_ring_head +
> total_enqueued_cbs)
> > +				& q->sw_ring_wrap_mask);
> > +		desc->req.data_ptrs[0].address = q->ring_addr_phys +
> fcw_offset;
> > +		desc->req.data_ptrs[0].blen = ACC100_FCW_TD_BLEN;
> > +		ret = acc100_dma_desc_td_fill(op, &desc->req, &input,
> > +				h_output, s_output, &in_offset,
> &h_out_offset,
> > +				&s_out_offset, &h_out_length,
> &s_out_length,
> > +				&mbuf_total_left, &seg_total_left, r);
> > +
> > +		if (unlikely(ret < 0))
> > +			return ret;
> > +
> > +		/* Hard output */
> > +		mbuf_append(h_output_head, h_output, h_out_length);
> > +
> > +		/* Soft output */
> > +		if (check_bit(op->turbo_dec.op_flags,
> > +				RTE_BBDEV_TURBO_SOFT_OUTPUT))
> > +			mbuf_append(s_output_head, s_output,
> s_out_length);
> > +
> > +		/* Set total number of CBs in TB */
> > +		desc->req.cbs_in_tb = cbs_in_tb;
> > +#ifdef RTE_LIBRTE_BBDEV_DEBUG
> > +		rte_memdump(stderr, "FCW", &desc->req.fcw_td,
> > +				sizeof(desc->req.fcw_td) - 8);
> > +		rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc));
> #endif
> > +
> > +		if (seg_total_left == 0) {
> > +			/* Go to the next mbuf */
> > +			input = input->next;
> > +			in_offset = 0;
> > +			h_output = h_output->next;
> > +			h_out_offset = 0;
> > +
> > +			if (check_bit(op->turbo_dec.op_flags,
> > +					RTE_BBDEV_TURBO_SOFT_OUTPUT))
> {
> > +				s_output = s_output->next;
> > +				s_out_offset = 0;
> > +			}
> > +		}
> > +
> > +		total_enqueued_cbs++;
> > +		current_enqueued_cbs++;
> > +		r++;
> > +	}
> > +
> > +	if (unlikely(desc == NULL))
> > +		return current_enqueued_cbs;
> > +
> > +#ifdef RTE_LIBRTE_BBDEV_DEBUG
> > +	/* Check if any CBs left for processing */
> > +	if (mbuf_total_left != 0) {
> > +		rte_bbdev_log(ERR,
> > +				"Some date still left for processing:
> mbuf_total_left = %u",
> > +				mbuf_total_left);
> > +		return -EINVAL;
> > +	}
> > +#endif
> > +	/* Set SDone on last CB descriptor for TB mode */
> > +	desc->req.sdone_enable = 1;
> > +	desc->req.irq_enable = q->irq_enable;
> > +
> > +	return current_enqueued_cbs;
> > +}
> >
> >  /* Calculates number of CBs in processed encoder TB based on 'r' and
> input
> >   * length.
> > @@ -1893,6 +2498,45 @@
> >  	return cbs_in_tb;
> >  }
> >
> > +/* Enqueue encode operations for ACC100 device in CB mode. */ static
> > +uint16_t acc100_enqueue_enc_cb(struct rte_bbdev_queue_data
> *q_data,
> > +		struct rte_bbdev_enc_op **ops, uint16_t num) {
> > +	struct acc100_queue *q = q_data->queue_private;
> > +	int32_t avail = q->sw_ring_depth + q->sw_ring_tail - q-
> >sw_ring_head;
> > +	uint16_t i;
> > +	union acc100_dma_desc *desc;
> > +	int ret;
> > +
> > +	for (i = 0; i < num; ++i) {
> > +		/* Check if there are available space for further processing */
> > +		if (unlikely(avail - 1 < 0))
> > +			break;
> > +		avail -= 1;
> > +
> > +		ret = enqueue_enc_one_op_cb(q, ops[i], i);
> > +		if (ret < 0)
> > +			break;
> > +	}
> > +
> > +	if (unlikely(i == 0))
> > +		return 0; /* Nothing to enqueue */
> > +
> > +	/* Set SDone in last CB in enqueued ops for CB mode*/
> > +	desc = q->ring_addr + ((q->sw_ring_head + i - 1)
> > +			& q->sw_ring_wrap_mask);
> > +	desc->req.sdone_enable = 1;
> > +	desc->req.irq_enable = q->irq_enable;
> > +
> > +	acc100_dma_enqueue(q, i, &q_data->queue_stats);
> > +
> > +	/* Update stats */
> > +	q_data->queue_stats.enqueued_count += i;
> > +	q_data->queue_stats.enqueue_err_count += num - i;
> > +	return i;
> > +}
> > +
> >  /* Check we can mux encode operations with common FCW */  static
> > inline bool  check_mux(struct rte_bbdev_enc_op **ops, uint16_t num) {
> > @@ -1960,6 +2604,52 @@
> >  	return i;
> >  }
> >
> > +/* Enqueue encode operations for ACC100 device in TB mode. */ static
> > +uint16_t acc100_enqueue_enc_tb(struct rte_bbdev_queue_data
> *q_data,
> > +		struct rte_bbdev_enc_op **ops, uint16_t num) {
> > +	struct acc100_queue *q = q_data->queue_private;
> > +	int32_t avail = q->sw_ring_depth + q->sw_ring_tail - q-
> >sw_ring_head;
> > +	uint16_t i, enqueued_cbs = 0;
> > +	uint8_t cbs_in_tb;
> > +	int ret;
> > +
> > +	for (i = 0; i < num; ++i) {
> > +		cbs_in_tb = get_num_cbs_in_tb_enc(&ops[i]->turbo_enc);
> > +		/* Check if there are available space for further processing */
> > +		if (unlikely(avail - cbs_in_tb < 0))
> > +			break;
> > +		avail -= cbs_in_tb;
> > +
> > +		ret = enqueue_enc_one_op_tb(q, ops[i], enqueued_cbs,
> cbs_in_tb);
> > +		if (ret < 0)
> > +			break;
> > +		enqueued_cbs += ret;
> > +	}
> > +
> other similar functions have a (i == 0) check here.

ok

> > +	acc100_dma_enqueue(q, enqueued_cbs, &q_data->queue_stats);
> > +
> > +	/* Update stats */
> > +	q_data->queue_stats.enqueued_count += i;
> > +	q_data->queue_stats.enqueue_err_count += num - i;
> > +
> > +	return i;
> > +}
> > +
> > +/* Enqueue encode operations for ACC100 device. */ static uint16_t
> > +acc100_enqueue_enc(struct rte_bbdev_queue_data *q_data,
> > +		struct rte_bbdev_enc_op **ops, uint16_t num) {
> > +	if (unlikely(num == 0))
> > +		return 0;
> num == 0 check should move into the tb/cb functions

same comment on other patch, why not catch it early?

> > +	if (ops[0]->turbo_enc.code_block_mode == 0)
> > +		return acc100_enqueue_enc_tb(q_data, ops, num);
> > +	else
> > +		return acc100_enqueue_enc_cb(q_data, ops, num); }
> > +
> >  /* Enqueue encode operations for ACC100 device. */  static uint16_t
> > acc100_enqueue_ldpc_enc(struct rte_bbdev_queue_data *q_data, @@
> > -1967,7 +2657,51 @@  {
> >  	if (unlikely(num == 0))
> >  		return 0;
> > -	return acc100_enqueue_ldpc_enc_cb(q_data, ops, num);
> > +	if (ops[0]->ldpc_enc.code_block_mode == 0)
> > +		return acc100_enqueue_enc_tb(q_data, ops, num);
> > +	else
> > +		return acc100_enqueue_ldpc_enc_cb(q_data, ops, num); }
> > +
> > +
> > +/* Enqueue decode operations for ACC100 device in CB mode */ static
> > +uint16_t acc100_enqueue_dec_cb(struct rte_bbdev_queue_data
> *q_data,
> > +		struct rte_bbdev_dec_op **ops, uint16_t num) {
> 
> Seems like the 10th variant of a similar function could these be combined to
> fewer functions ?
> 
> Maybe by passing in a function pointer to the enqueue_one_dec_one* that
> does the work ?

They have some variants related to the actual operation and constraints.
Not obvious to have a valuable refactor. 


> 
> > +	struct acc100_queue *q = q_data->queue_private;
> > +	int32_t avail = q->sw_ring_depth + q->sw_ring_tail - q-
> >sw_ring_head;
> > +	uint16_t i;
> > +	union acc100_dma_desc *desc;
> > +	int ret;
> > +
> > +	for (i = 0; i < num; ++i) {
> > +		/* Check if there are available space for further processing */
> > +		if (unlikely(avail - 1 < 0))
> > +			break;
> > +		avail -= 1;
> > +
> > +		ret = enqueue_dec_one_op_cb(q, ops[i], i);
> > +		if (ret < 0)
> > +			break;
> > +	}
> > +
> > +	if (unlikely(i == 0))
> > +		return 0; /* Nothing to enqueue */
> > +
> > +	/* Set SDone in last CB in enqueued ops for CB mode*/
> > +	desc = q->ring_addr + ((q->sw_ring_head + i - 1)
> > +			& q->sw_ring_wrap_mask);
> > +	desc->req.sdone_enable = 1;
> > +	desc->req.irq_enable = q->irq_enable;
> > +
> > +	acc100_dma_enqueue(q, i, &q_data->queue_stats);
> > +
> > +	/* Update stats */
> > +	q_data->queue_stats.enqueued_count += i;
> > +	q_data->queue_stats.enqueue_err_count += num - i;
> > +
> > +	return i;
> >  }
> >
> >  /* Check we can mux encode operations with common FCW */ @@ -
> 2065,6
> > +2799,53 @@
> >  	return i;
> >  }
> >
> > +
> > +/* Enqueue decode operations for ACC100 device in TB mode */ static
> > +uint16_t acc100_enqueue_dec_tb(struct rte_bbdev_queue_data
> *q_data,
> > +		struct rte_bbdev_dec_op **ops, uint16_t num) {
> 11th ;)
> > +	struct acc100_queue *q = q_data->queue_private;
> > +	int32_t avail = q->sw_ring_depth + q->sw_ring_tail - q-
> >sw_ring_head;
> > +	uint16_t i, enqueued_cbs = 0;
> > +	uint8_t cbs_in_tb;
> > +	int ret;
> > +
> > +	for (i = 0; i < num; ++i) {
> > +		cbs_in_tb = get_num_cbs_in_tb_dec(&ops[i]->turbo_dec);
> > +		/* Check if there are available space for further processing */
> > +		if (unlikely(avail - cbs_in_tb < 0))
> > +			break;
> > +		avail -= cbs_in_tb;
> > +
> > +		ret = enqueue_dec_one_op_tb(q, ops[i], enqueued_cbs,
> cbs_in_tb);
> > +		if (ret < 0)
> > +			break;
> > +		enqueued_cbs += ret;
> > +	}
> > +
> > +	acc100_dma_enqueue(q, enqueued_cbs, &q_data->queue_stats);
> > +
> > +	/* Update stats */
> > +	q_data->queue_stats.enqueued_count += i;
> > +	q_data->queue_stats.enqueue_err_count += num - i;
> > +
> > +	return i;
> > +}
> > +
> > +/* Enqueue decode operations for ACC100 device. */ static uint16_t
> > +acc100_enqueue_dec(struct rte_bbdev_queue_data *q_data,
> > +		struct rte_bbdev_dec_op **ops, uint16_t num) {
> > +	if (unlikely(num == 0))
> > +		return 0;
> similar move the num == 0 check to the tb/cb functions.

same comment

> > +	if (ops[0]->turbo_dec.code_block_mode == 0)
> > +		return acc100_enqueue_dec_tb(q_data, ops, num);
> > +	else
> > +		return acc100_enqueue_dec_cb(q_data, ops, num); }
> > +
> >  /* Enqueue decode operations for ACC100 device. */  static uint16_t
> > acc100_enqueue_ldpc_dec(struct rte_bbdev_queue_data *q_data, @@
> > -2388,6 +3169,51 @@
> >  	return cb_idx;
> >  }
> >
> > +/* Dequeue encode operations from ACC100 device. */ static uint16_t
> > +acc100_dequeue_enc(struct rte_bbdev_queue_data *q_data,
> > +		struct rte_bbdev_enc_op **ops, uint16_t num) {
> > +	struct acc100_queue *q = q_data->queue_private;
> > +	uint16_t dequeue_num;
> > +	uint32_t avail = q->sw_ring_head - q->sw_ring_tail;
> > +	uint32_t aq_dequeued = 0;
> > +	uint16_t i;
> > +	uint16_t dequeued_cbs = 0;
> > +	struct rte_bbdev_enc_op *op;
> > +	int ret;
> > +
> > +#ifdef RTE_LIBRTE_BBDEV_DEBUG
> > +	if (unlikely(ops == 0 && q == NULL))
> 
> ops is a pointer so should compare with NULL
> 
> The && likely needs to be ||
> 
> Maybe print out a message so caller knows something wrong happened.

ok

> 
> > +		return 0;
> > +#endif
> > +
> > +	dequeue_num = (avail < num) ? avail : num;
> > +
> > +	for (i = 0; i < dequeue_num; ++i) {
> > +		op = (q->ring_addr + ((q->sw_ring_tail + dequeued_cbs)
> > +			& q->sw_ring_wrap_mask))->req.op_addr;
> > +		if (op->turbo_enc.code_block_mode == 0)
> > +			ret = dequeue_enc_one_op_tb(q, &ops[i],
> dequeued_cbs,
> > +					&aq_dequeued);
> > +		else
> > +			ret = dequeue_enc_one_op_cb(q, &ops[i],
> dequeued_cbs,
> > +					&aq_dequeued);
> > +
> > +		if (ret < 0)
> > +			break;
> > +		dequeued_cbs += ret;
> > +	}
> > +
> > +	q->aq_dequeued += aq_dequeued;
> > +	q->sw_ring_tail += dequeued_cbs;
> > +
> > +	/* Update enqueue stats */
> > +	q_data->queue_stats.dequeued_count += i;
> > +
> > +	return i;
> > +}
> > +
> >  /* Dequeue LDPC encode operations from ACC100 device. */  static
> > uint16_t  acc100_dequeue_ldpc_enc(struct rte_bbdev_queue_data
> *q_data,
> > @@ -2426,6 +3252,52 @@
> >  	return dequeued_cbs;
> >  }
> >
> > +
> > +/* Dequeue decode operations from ACC100 device. */ static uint16_t
> > +acc100_dequeue_dec(struct rte_bbdev_queue_data *q_data,
> > +		struct rte_bbdev_dec_op **ops, uint16_t num) {
> 
> very similar to enc function above, consider how to combine them to a
> single function.
> 
> Tom
> 
> > +	struct acc100_queue *q = q_data->queue_private;
> > +	uint16_t dequeue_num;
> > +	uint32_t avail = q->sw_ring_head - q->sw_ring_tail;
> > +	uint32_t aq_dequeued = 0;
> > +	uint16_t i;
> > +	uint16_t dequeued_cbs = 0;
> > +	struct rte_bbdev_dec_op *op;
> > +	int ret;
> > +
> > +#ifdef RTE_LIBRTE_BBDEV_DEBUG
> > +	if (unlikely(ops == 0 && q == NULL))
> > +		return 0;
> > +#endif
> > +
> > +	dequeue_num = (avail < num) ? avail : num;
> > +
> > +	for (i = 0; i < dequeue_num; ++i) {
> > +		op = (q->ring_addr + ((q->sw_ring_tail + dequeued_cbs)
> > +			& q->sw_ring_wrap_mask))->req.op_addr;
> > +		if (op->turbo_dec.code_block_mode == 0)
> > +			ret = dequeue_dec_one_op_tb(q, &ops[i],
> dequeued_cbs,
> > +					&aq_dequeued);
> > +		else
> > +			ret = dequeue_dec_one_op_cb(q_data, q, &ops[i],
> > +					dequeued_cbs, &aq_dequeued);
> > +
> > +		if (ret < 0)
> > +			break;
> > +		dequeued_cbs += ret;
> > +	}
> > +
> > +	q->aq_dequeued += aq_dequeued;
> > +	q->sw_ring_tail += dequeued_cbs;
> > +
> > +	/* Update enqueue stats */
> > +	q_data->queue_stats.dequeued_count += i;
> > +
> > +	return i;
> > +}
> > +
> >  /* Dequeue decode operations from ACC100 device. */  static uint16_t
> > acc100_dequeue_ldpc_dec(struct rte_bbdev_queue_data *q_data, @@
> > -2479,6 +3351,10 @@
> >  	struct rte_pci_device *pci_dev = RTE_DEV_TO_PCI(dev->device);
> >
> >  	dev->dev_ops = &acc100_bbdev_ops;
> > +	dev->enqueue_enc_ops = acc100_enqueue_enc;
> > +	dev->enqueue_dec_ops = acc100_enqueue_dec;
> > +	dev->dequeue_enc_ops = acc100_dequeue_enc;
> > +	dev->dequeue_dec_ops = acc100_dequeue_dec;
> >  	dev->enqueue_ldpc_enc_ops = acc100_enqueue_ldpc_enc;
> >  	dev->enqueue_ldpc_dec_ops = acc100_enqueue_ldpc_dec;
> >  	dev->dequeue_ldpc_enc_ops = acc100_dequeue_ldpc_enc;
  
Tom Rix Oct. 1, 2020, 3:42 p.m. UTC | #3
On 9/30/20 12:10 PM, Chautru, Nicolas wrote:
> Hi Tom, 
>
>> From: Tom Rix <trix@redhat.com>
>> On 9/28/20 5:29 PM, Nicolas Chautru wrote:
>>> Adding capability for 4G encode and decoder processing
>>>
>>> Signed-off-by: Nicolas Chautru <nicolas.chautru@intel.com>
>>> Acked-by: Liu Tianjiao <Tianjiao.liu@intel.com>
>>> ---
>>>  doc/guides/bbdevs/features/acc100.ini    |    4 +-
>>>  drivers/baseband/acc100/rte_acc100_pmd.c | 1010
>>> ++++++++++++++++++++++++++++--
>>>  2 files changed, 945 insertions(+), 69 deletions(-)
>>>
>>> diff --git a/doc/guides/bbdevs/features/acc100.ini
>>> b/doc/guides/bbdevs/features/acc100.ini
>>> index 40c7adc..642cd48 100644
>>> --- a/doc/guides/bbdevs/features/acc100.ini
>>> +++ b/doc/guides/bbdevs/features/acc100.ini
>>> @@ -4,8 +4,8 @@
>>>  ; Refer to default.ini for the full list of available PMD features.
>>>  ;
>>>  [Features]
>>> -Turbo Decoder (4G)     = N
>>> -Turbo Encoder (4G)     = N
>>> +Turbo Decoder (4G)     = Y
>>> +Turbo Encoder (4G)     = Y
>>>  LDPC Decoder (5G)      = Y
>>>  LDPC Encoder (5G)      = Y
>>>  LLR/HARQ Compression   = Y
>>> diff --git a/drivers/baseband/acc100/rte_acc100_pmd.c
>>> b/drivers/baseband/acc100/rte_acc100_pmd.c
>>> index e484c0a..7d4c3df 100644
>>> --- a/drivers/baseband/acc100/rte_acc100_pmd.c
>>> +++ b/drivers/baseband/acc100/rte_acc100_pmd.c
>>> @@ -339,7 +339,6 @@
>>>  	free_base_addresses(base_addrs, i);
>>>  }
>>>
>>> -
>>>  /* Allocate 64MB memory used for all software rings */  static int
>>> acc100_setup_queues(struct rte_bbdev *dev, uint16_t num_queues, int
>>> socket_id) @@ -637,6 +636,41 @@
>>>
>>>  	static const struct rte_bbdev_op_cap bbdev_capabilities[] = {
>>>  		{
>>> +			.type = RTE_BBDEV_OP_TURBO_DEC,
>>> +			.cap.turbo_dec = {
>>> +				.capability_flags =
>>> +
>> 	RTE_BBDEV_TURBO_SUBBLOCK_DEINTERLEAVE |
>>> +					RTE_BBDEV_TURBO_CRC_TYPE_24B
>> |
>>> +
>> 	RTE_BBDEV_TURBO_HALF_ITERATION_EVEN |
>>> +
>> 	RTE_BBDEV_TURBO_EARLY_TERMINATION |
>>> +
>> 	RTE_BBDEV_TURBO_NEG_LLR_1_BIT_IN |
>>> +					RTE_BBDEV_TURBO_MAP_DEC |
>>> +
>> 	RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP |
>>> +
>> 	RTE_BBDEV_TURBO_DEC_SCATTER_GATHER,
>>> +				.max_llr_modulus = INT8_MAX,
>>> +				.num_buffers_src =
>>> +
>> 	RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
>>> +				.num_buffers_hard_out =
>>> +
>> 	RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
>>> +				.num_buffers_soft_out =
>>> +
>> 	RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
>>> +			}
>>> +		},
>>> +		{
>>> +			.type = RTE_BBDEV_OP_TURBO_ENC,
>>> +			.cap.turbo_enc = {
>>> +				.capability_flags =
>>> +
>> 	RTE_BBDEV_TURBO_CRC_24B_ATTACH |
>>> +
>> 	RTE_BBDEV_TURBO_RV_INDEX_BYPASS |
>>> +					RTE_BBDEV_TURBO_RATE_MATCH |
>>> +
>> 	RTE_BBDEV_TURBO_ENC_SCATTER_GATHER,
>>> +				.num_buffers_src =
>>> +
>> 	RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
>>> +				.num_buffers_dst =
>>> +
>> 	RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
>>> +			}
>>> +		},
>>> +		{
>>>  			.type   = RTE_BBDEV_OP_LDPC_ENC,
>>>  			.cap.ldpc_enc = {
>>>  				.capability_flags =
>>> @@ -719,7 +753,6 @@
>>>  #endif
>>>  }
>>>
>>> -
>>>  static const struct rte_bbdev_ops acc100_bbdev_ops = {
>>>  	.setup_queues = acc100_setup_queues,
>>>  	.close = acc100_dev_close,
>>> @@ -763,6 +796,58 @@
>>>  	return tail;
>>>  }
>>>
>>> +/* Fill in a frame control word for turbo encoding. */ static inline
>>> +void acc100_fcw_te_fill(const struct rte_bbdev_enc_op *op, struct
>>> +acc100_fcw_te *fcw) {
>>> +	fcw->code_block_mode = op->turbo_enc.code_block_mode;
>>> +	if (fcw->code_block_mode == 0) { /* For TB mode */
>>> +		fcw->k_neg = op->turbo_enc.tb_params.k_neg;
>>> +		fcw->k_pos = op->turbo_enc.tb_params.k_pos;
>>> +		fcw->c_neg = op->turbo_enc.tb_params.c_neg;
>>> +		fcw->c = op->turbo_enc.tb_params.c;
>>> +		fcw->ncb_neg = op->turbo_enc.tb_params.ncb_neg;
>>> +		fcw->ncb_pos = op->turbo_enc.tb_params.ncb_pos;
>>> +
>>> +		if (check_bit(op->turbo_enc.op_flags,
>>> +				RTE_BBDEV_TURBO_RATE_MATCH)) {
>>> +			fcw->bypass_rm = 0;
>>> +			fcw->cab = op->turbo_enc.tb_params.cab;
>>> +			fcw->ea = op->turbo_enc.tb_params.ea;
>>> +			fcw->eb = op->turbo_enc.tb_params.eb;
>>> +		} else {
>>> +			/* E is set to the encoding output size when RM is
>>> +			 * bypassed.
>>> +			 */
>>> +			fcw->bypass_rm = 1;
>>> +			fcw->cab = fcw->c_neg;
>>> +			fcw->ea = 3 * fcw->k_neg + 12;
>>> +			fcw->eb = 3 * fcw->k_pos + 12;
>>> +		}
>>> +	} else { /* For CB mode */
>>> +		fcw->k_pos = op->turbo_enc.cb_params.k;
>>> +		fcw->ncb_pos = op->turbo_enc.cb_params.ncb;
>>> +
>>> +		if (check_bit(op->turbo_enc.op_flags,
>>> +				RTE_BBDEV_TURBO_RATE_MATCH)) {
>>> +			fcw->bypass_rm = 0;
>>> +			fcw->eb = op->turbo_enc.cb_params.e;
>>> +		} else {
>>> +			/* E is set to the encoding output size when RM is
>>> +			 * bypassed.
>>> +			 */
>>> +			fcw->bypass_rm = 1;
>>> +			fcw->eb = 3 * fcw->k_pos + 12;
>>> +		}
>>> +	}
>>> +
>>> +	fcw->bypass_rv_idx1 = check_bit(op->turbo_enc.op_flags,
>>> +			RTE_BBDEV_TURBO_RV_INDEX_BYPASS);
>>> +	fcw->code_block_crc = check_bit(op->turbo_enc.op_flags,
>>> +			RTE_BBDEV_TURBO_CRC_24B_ATTACH);
>>> +	fcw->rv_idx1 = op->turbo_enc.rv_index; }
>>> +
>>>  /* Compute value of k0.
>>>   * Based on 3GPP 38.212 Table 5.4.2.1-2
>>>   * Starting position of different redundancy versions, k0 @@ -813,6
>>> +898,25 @@
>>>  	fcw->mcb_count = num_cb;
>>>  }
>>>
>>> +/* Fill in a frame control word for turbo decoding. */ static inline
>>> +void acc100_fcw_td_fill(const struct rte_bbdev_dec_op *op, struct
>>> +acc100_fcw_td *fcw) {
>>> +	/* Note : Early termination is always enabled for 4GUL */
>>> +	fcw->fcw_ver = 1;
>>> +	if (op->turbo_dec.code_block_mode == 0)
>>> +		fcw->k_pos = op->turbo_dec.tb_params.k_pos;
>>> +	else
>>> +		fcw->k_pos = op->turbo_dec.cb_params.k;
>>> +	fcw->turbo_crc_type = check_bit(op->turbo_dec.op_flags,
>>> +			RTE_BBDEV_TURBO_CRC_TYPE_24B);
>>> +	fcw->bypass_sb_deint = 0;
>>> +	fcw->raw_decoder_input_on = 0;
>>> +	fcw->max_iter = op->turbo_dec.iter_max;
>>> +	fcw->half_iter_on = !check_bit(op->turbo_dec.op_flags,
>>> +			RTE_BBDEV_TURBO_HALF_ITERATION_EVEN);
>>> +}
>>> +
>>>  /* Fill in a frame control word for LDPC decoding. */  static inline
>>> void  acc100_fcw_ld_fill(const struct rte_bbdev_dec_op *op, struct
>>> acc100_fcw_ld *fcw, @@ -1042,6 +1146,87 @@  }
>>>
>>>  static inline int
>>> +acc100_dma_desc_te_fill(struct rte_bbdev_enc_op *op,
>>> +		struct acc100_dma_req_desc *desc, struct rte_mbuf **input,
>>> +		struct rte_mbuf *output, uint32_t *in_offset,
>>> +		uint32_t *out_offset, uint32_t *out_length,
>>> +		uint32_t *mbuf_total_left, uint32_t *seg_total_left, uint8_t
>> r) {
>>> +	int next_triplet = 1; /* FCW already done */
>>> +	uint32_t e, ea, eb, length;
>>> +	uint16_t k, k_neg, k_pos;
>>> +	uint8_t cab, c_neg;
>>> +
>>> +	desc->word0 = ACC100_DMA_DESC_TYPE;
>>> +	desc->word1 = 0; /**< Timestamp could be disabled */
>>> +	desc->word2 = 0;
>>> +	desc->word3 = 0;
>>> +	desc->numCBs = 1;
>>> +
>>> +	if (op->turbo_enc.code_block_mode == 0) {
>>> +		ea = op->turbo_enc.tb_params.ea;
>>> +		eb = op->turbo_enc.tb_params.eb;
>>> +		cab = op->turbo_enc.tb_params.cab;
>>> +		k_neg = op->turbo_enc.tb_params.k_neg;
>>> +		k_pos = op->turbo_enc.tb_params.k_pos;
>>> +		c_neg = op->turbo_enc.tb_params.c_neg;
>>> +		e = (r < cab) ? ea : eb;
>>> +		k = (r < c_neg) ? k_neg : k_pos;
>>> +	} else {
>>> +		e = op->turbo_enc.cb_params.e;
>>> +		k = op->turbo_enc.cb_params.k;
>>> +	}
>>> +
>>> +	if (check_bit(op->turbo_enc.op_flags,
>> RTE_BBDEV_TURBO_CRC_24B_ATTACH))
>>> +		length = (k - 24) >> 3;
>>> +	else
>>> +		length = k >> 3;
>>> +
>>> +	if (unlikely((*mbuf_total_left == 0) || (*mbuf_total_left <
>>> +length))) {
>> similar to other patches, this check can be combined to <=
>>
>> change generally
> same comment on other patch
>
>>> +		rte_bbdev_log(ERR,
>>> +				"Mismatch between mbuf length and
>> included CB sizes: mbuf len %u, cb len %u",
>>> +				*mbuf_total_left, length);
>>> +		return -1;
>>> +	}
>>> +
>>> +	next_triplet = acc100_dma_fill_blk_type_in(desc, input, in_offset,
>>> +			length, seg_total_left, next_triplet);
>>> +	if (unlikely(next_triplet < 0)) {
>>> +		rte_bbdev_log(ERR,
>>> +				"Mismatch between data to process and
>> mbuf data length in bbdev_op: %p",
>>> +				op);
>>> +		return -1;
>>> +	}
>>> +	desc->data_ptrs[next_triplet - 1].last = 1;
>>> +	desc->m2dlen = next_triplet;
>>> +	*mbuf_total_left -= length;
>>> +
>>> +	/* Set output length */
>>> +	if (check_bit(op->turbo_enc.op_flags,
>> RTE_BBDEV_TURBO_RATE_MATCH))
>>> +		/* Integer round up division by 8 */
>>> +		*out_length = (e + 7) >> 3;
>>> +	else
>>> +		*out_length = (k >> 3) * 3 + 2;
>>> +
>>> +	next_triplet = acc100_dma_fill_blk_type_out(desc, output,
>> *out_offset,
>>> +			*out_length, next_triplet,
>> ACC100_DMA_BLKID_OUT_ENC);
>>> +	if (unlikely(next_triplet < 0)) {
>>> +		rte_bbdev_log(ERR,
>>> +				"Mismatch between data to process and
>> mbuf data length in bbdev_op: %p",
>>> +				op);
>>> +		return -1;
>>> +	}
>>> +	op->turbo_enc.output.length += *out_length;
>>> +	*out_offset += *out_length;
>>> +	desc->data_ptrs[next_triplet - 1].last = 1;
>>> +	desc->d2mlen = next_triplet - desc->m2dlen;
>>> +
>>> +	desc->op_addr = op;
>>> +
>>> +	return 0;
>>> +}
>>> +
>>> +static inline int
>>>  acc100_dma_desc_le_fill(struct rte_bbdev_enc_op *op,
>>>  		struct acc100_dma_req_desc *desc, struct rte_mbuf **input,
>>>  		struct rte_mbuf *output, uint32_t *in_offset, @@ -1110,6
>> +1295,117
>>> @@  }
>>>
>>>  static inline int
>>> +acc100_dma_desc_td_fill(struct rte_bbdev_dec_op *op,
>>> +		struct acc100_dma_req_desc *desc, struct rte_mbuf **input,
>>> +		struct rte_mbuf *h_output, struct rte_mbuf *s_output,
>>> +		uint32_t *in_offset, uint32_t *h_out_offset,
>>> +		uint32_t *s_out_offset, uint32_t *h_out_length,
>>> +		uint32_t *s_out_length, uint32_t *mbuf_total_left,
>>> +		uint32_t *seg_total_left, uint8_t r) {
>>> +	int next_triplet = 1; /* FCW already done */
>>> +	uint16_t k;
>>> +	uint16_t crc24_overlap = 0;
>>> +	uint32_t e, kw;
>>> +
>>> +	desc->word0 = ACC100_DMA_DESC_TYPE;
>>> +	desc->word1 = 0; /**< Timestamp could be disabled */
>>> +	desc->word2 = 0;
>>> +	desc->word3 = 0;
>>> +	desc->numCBs = 1;
>>> +
>>> +	if (op->turbo_dec.code_block_mode == 0) {
>>> +		k = (r < op->turbo_dec.tb_params.c_neg)
>>> +			? op->turbo_dec.tb_params.k_neg
>>> +			: op->turbo_dec.tb_params.k_pos;
>>> +		e = (r < op->turbo_dec.tb_params.cab)
>>> +			? op->turbo_dec.tb_params.ea
>>> +			: op->turbo_dec.tb_params.eb;
>>> +	} else {
>>> +		k = op->turbo_dec.cb_params.k;
>>> +		e = op->turbo_dec.cb_params.e;
>>> +	}
>>> +
>>> +	if ((op->turbo_dec.code_block_mode == 0)
>>> +		&& !check_bit(op->turbo_dec.op_flags,
>>> +		RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP))
>>> +		crc24_overlap = 24;
>>> +
>>> +	/* Calculates circular buffer size.
>>> +	 * According to 3gpp 36.212 section 5.1.4.2
>>> +	 *   Kw = 3 * Kpi,
>>> +	 * where:
>>> +	 *   Kpi = nCol * nRow
>>> +	 * where nCol is 32 and nRow can be calculated from:
>>> +	 *   D =< nCol * nRow
>>> +	 * where D is the size of each output from turbo encoder block (k +
>> 4).
>>> +	 */
>>> +	kw = RTE_ALIGN_CEIL(k + 4, 32) * 3;
>>> +
>>> +	if (unlikely((*mbuf_total_left == 0) || (*mbuf_total_left < kw))) {
>>> +		rte_bbdev_log(ERR,
>>> +				"Mismatch between mbuf length and
>> included CB sizes: mbuf len %u, cb len %u",
>>> +				*mbuf_total_left, kw);
>>> +		return -1;
>>> +	}
>>> +
>>> +	next_triplet = acc100_dma_fill_blk_type_in(desc, input, in_offset,
>> kw,
>>> +			seg_total_left, next_triplet);
>>> +	if (unlikely(next_triplet < 0)) {
>>> +		rte_bbdev_log(ERR,
>>> +				"Mismatch between data to process and
>> mbuf data length in bbdev_op: %p",
>>> +				op);
>>> +		return -1;
>>> +	}
>>> +	desc->data_ptrs[next_triplet - 1].last = 1;
>>> +	desc->m2dlen = next_triplet;
>>> +	*mbuf_total_left -= kw;
>>> +
>>> +	next_triplet = acc100_dma_fill_blk_type_out(
>>> +			desc, h_output, *h_out_offset,
>>> +			k >> 3, next_triplet,
>> ACC100_DMA_BLKID_OUT_HARD);
>>> +	if (unlikely(next_triplet < 0)) {
>>> +		rte_bbdev_log(ERR,
>>> +				"Mismatch between data to process and
>> mbuf data length in bbdev_op: %p",
>>> +				op);
>>> +		return -1;
>>> +	}
>>> +
>>> +	*h_out_length = ((k - crc24_overlap) >> 3);
>>> +	op->turbo_dec.hard_output.length += *h_out_length;
>>> +	*h_out_offset += *h_out_length;
>>> +
>>> +	/* Soft output */
>>> +	if (check_bit(op->turbo_dec.op_flags,
>> RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
>>> +		if (check_bit(op->turbo_dec.op_flags,
>>> +				RTE_BBDEV_TURBO_EQUALIZER))
>>> +			*s_out_length = e;
>>> +		else
>>> +			*s_out_length = (k * 3) + 12;
>>> +
>>> +		next_triplet = acc100_dma_fill_blk_type_out(desc, s_output,
>>> +				*s_out_offset, *s_out_length, next_triplet,
>>> +				ACC100_DMA_BLKID_OUT_SOFT);
>>> +		if (unlikely(next_triplet < 0)) {
>>> +			rte_bbdev_log(ERR,
>>> +					"Mismatch between data to process
>> and mbuf data length in bbdev_op: %p",
>>> +					op);
>>> +			return -1;
>>> +		}
>>> +
>>> +		op->turbo_dec.soft_output.length += *s_out_length;
>>> +		*s_out_offset += *s_out_length;
>>> +	}
>>> +
>>> +	desc->data_ptrs[next_triplet - 1].last = 1;
>>> +	desc->d2mlen = next_triplet - desc->m2dlen;
>>> +
>>> +	desc->op_addr = op;
>>> +
>>> +	return 0;
>>> +}
>>> +
>>> +static inline int
>>>  acc100_dma_desc_ld_fill(struct rte_bbdev_dec_op *op,
>>>  		struct acc100_dma_req_desc *desc,
>>>  		struct rte_mbuf **input, struct rte_mbuf *h_output, @@ -
>> 1374,6
>>> +1670,57 @@
>>>
>>>  /* Enqueue one encode operations for ACC100 device in CB mode */
>>> static inline int
>>> +enqueue_enc_one_op_cb(struct acc100_queue *q, struct
>> rte_bbdev_enc_op *op,
>>> +		uint16_t total_enqueued_cbs)
>>> +{
>>> +	union acc100_dma_desc *desc = NULL;
>>> +	int ret;
>>> +	uint32_t in_offset, out_offset, out_length, mbuf_total_left,
>>> +		seg_total_left;
>>> +	struct rte_mbuf *input, *output_head, *output;
>>> +
>>> +	uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
>>> +			& q->sw_ring_wrap_mask);
>>> +	desc = q->ring_addr + desc_idx;
>>> +	acc100_fcw_te_fill(op, &desc->req.fcw_te);
>>> +
>>> +	input = op->turbo_enc.input.data;
>>> +	output_head = output = op->turbo_enc.output.data;
>>> +	in_offset = op->turbo_enc.input.offset;
>>> +	out_offset = op->turbo_enc.output.offset;
>>> +	out_length = 0;
>>> +	mbuf_total_left = op->turbo_enc.input.length;
>>> +	seg_total_left = rte_pktmbuf_data_len(op->turbo_enc.input.data)
>>> +			- in_offset;
>>> +
>>> +	ret = acc100_dma_desc_te_fill(op, &desc->req, &input, output,
>>> +			&in_offset, &out_offset, &out_length,
>> &mbuf_total_left,
>>> +			&seg_total_left, 0);
>>> +
>>> +	if (unlikely(ret < 0))
>>> +		return ret;
>>> +
>>> +	mbuf_append(output_head, output, out_length);
>>> +
>>> +#ifdef RTE_LIBRTE_BBDEV_DEBUG
>>> +	rte_memdump(stderr, "FCW", &desc->req.fcw_te,
>>> +			sizeof(desc->req.fcw_te) - 8);
>>> +	rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc));
>>> +
>>> +	/* Check if any data left after processing one CB */
>>> +	if (mbuf_total_left != 0) {
>>> +		rte_bbdev_log(ERR,
>>> +				"Some date still left after processing one CB:
>> mbuf_total_left = %u",
>>> +				mbuf_total_left);
>>> +		return -EINVAL;
>>> +	}
>>> +#endif
>>> +	/* One CB (one op) was successfully prepared to enqueue */
>>> +	return 1;
>>> +}
>>> +
>>> +/* Enqueue one encode operations for ACC100 device in CB mode */
>>> +static inline int
>>>  enqueue_ldpc_enc_n_op_cb(struct acc100_queue *q, struct
>> rte_bbdev_enc_op **ops,
>>>  		uint16_t total_enqueued_cbs, int16_t num)  { @@ -1481,78
>> +1828,235
>>> @@
>>>  	return 1;
>>>  }
>>>
>>> -static inline int
>>> -harq_loopback(struct acc100_queue *q, struct rte_bbdev_dec_op *op,
>>> -		uint16_t total_enqueued_cbs) {
>>> -	struct acc100_fcw_ld *fcw;
>>> -	union acc100_dma_desc *desc;
>>> -	int next_triplet = 1;
>>> -	struct rte_mbuf *hq_output_head, *hq_output;
>>> -	uint16_t harq_in_length = op-
>>> ldpc_dec.harq_combined_input.length;
>>> -	if (harq_in_length == 0) {
>>> -		rte_bbdev_log(ERR, "Loopback of invalid null size\n");
>>> -		return -EINVAL;
>>> -	}
>>>
>>> -	int h_comp = check_bit(op->ldpc_dec.op_flags,
>>> -			RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION
>>> -			) ? 1 : 0;
>>> -	if (h_comp == 1)
>>> -		harq_in_length = harq_in_length * 8 / 6;
>>> -	harq_in_length = RTE_ALIGN(harq_in_length, 64);
>>> -	uint16_t harq_dma_length_in = (h_comp == 0) ?
>>> -			harq_in_length :
>>> -			harq_in_length * 6 / 8;
>>> -	uint16_t harq_dma_length_out = harq_dma_length_in;
>>> -	bool ddr_mem_in = check_bit(op->ldpc_dec.op_flags,
>>> -
>> 	RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE);
>>> -	union acc100_harq_layout_data *harq_layout = q->d->harq_layout;
>>> -	uint16_t harq_index = (ddr_mem_in ?
>>> -			op->ldpc_dec.harq_combined_input.offset :
>>> -			op->ldpc_dec.harq_combined_output.offset)
>>> -			/ ACC100_HARQ_OFFSET;
>>> +/* Enqueue one encode operations for ACC100 device in TB mode. */
>>> +static inline int enqueue_enc_one_op_tb(struct acc100_queue *q,
>>> +struct rte_bbdev_enc_op *op,
>>> +		uint16_t total_enqueued_cbs, uint8_t cbs_in_tb) {
>>> +	union acc100_dma_desc *desc = NULL;
>>> +	int ret;
>>> +	uint8_t r, c;
>>> +	uint32_t in_offset, out_offset, out_length, mbuf_total_left,
>>> +		seg_total_left;
>>> +	struct rte_mbuf *input, *output_head, *output;
>>> +	uint16_t current_enqueued_cbs = 0;
>>>
>>>  	uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
>>>  			& q->sw_ring_wrap_mask);
>>>  	desc = q->ring_addr + desc_idx;
>>> -	fcw = &desc->req.fcw_ld;
>>> -	/* Set the FCW from loopback into DDR */
>>> -	memset(fcw, 0, sizeof(struct acc100_fcw_ld));
>>> -	fcw->FCWversion = ACC100_FCW_VER;
>>> -	fcw->qm = 2;
>>> -	fcw->Zc = 384;
>>> -	if (harq_in_length < 16 * N_ZC_1)
>>> -		fcw->Zc = 16;
>>> -	fcw->ncb = fcw->Zc * N_ZC_1;
>>> -	fcw->rm_e = 2;
>>> -	fcw->hcin_en = 1;
>>> -	fcw->hcout_en = 1;
>>> +	uint64_t fcw_offset = (desc_idx << 8) + ACC100_DESC_FCW_OFFSET;
>>> +	acc100_fcw_te_fill(op, &desc->req.fcw_te);
>>>
>>> -	rte_bbdev_log(DEBUG, "Loopback IN %d Index %d offset %d length
>> %d %d\n",
>>> -			ddr_mem_in, harq_index,
>>> -			harq_layout[harq_index].offset, harq_in_length,
>>> -			harq_dma_length_in);
>>> +	input = op->turbo_enc.input.data;
>>> +	output_head = output = op->turbo_enc.output.data;
>>> +	in_offset = op->turbo_enc.input.offset;
>>> +	out_offset = op->turbo_enc.output.offset;
>>> +	out_length = 0;
>>> +	mbuf_total_left = op->turbo_enc.input.length;
>>>
>>> -	if (ddr_mem_in && (harq_layout[harq_index].offset > 0)) {
>>> -		fcw->hcin_size0 = harq_layout[harq_index].size0;
>>> -		fcw->hcin_offset = harq_layout[harq_index].offset;
>>> -		fcw->hcin_size1 = harq_in_length - fcw->hcin_offset;
>>> -		harq_dma_length_in = (fcw->hcin_size0 + fcw->hcin_size1);
>>> -		if (h_comp == 1)
>>> -			harq_dma_length_in = harq_dma_length_in * 6 / 8;
>>> -	} else {
>>> -		fcw->hcin_size0 = harq_in_length;
>>> -	}
>>> -	harq_layout[harq_index].val = 0;
>>> -	rte_bbdev_log(DEBUG, "Loopback FCW Config %d %d %d\n",
>>> -			fcw->hcin_size0, fcw->hcin_offset, fcw->hcin_size1);
>>> -	fcw->hcout_size0 = harq_in_length;
>>> -	fcw->hcin_decomp_mode = h_comp;
>>> -	fcw->hcout_comp_mode = h_comp;
>>> -	fcw->gain_i = 1;
>>> -	fcw->gain_h = 1;
>>> +	c = op->turbo_enc.tb_params.c;
>>> +	r = op->turbo_enc.tb_params.r;
>>>
>>> -	/* Set the prefix of descriptor. This could be done at polling */
>>> +	while (mbuf_total_left > 0 && r < c) {
>>> +		seg_total_left = rte_pktmbuf_data_len(input) - in_offset;
>>> +		/* Set up DMA descriptor */
>>> +		desc = q->ring_addr + ((q->sw_ring_head +
>> total_enqueued_cbs)
>>> +				& q->sw_ring_wrap_mask);
>>> +		desc->req.data_ptrs[0].address = q->ring_addr_phys +
>> fcw_offset;
>>> +		desc->req.data_ptrs[0].blen = ACC100_FCW_TE_BLEN;
>>> +
>>> +		ret = acc100_dma_desc_te_fill(op, &desc->req, &input,
>> output,
>>> +				&in_offset, &out_offset, &out_length,
>>> +				&mbuf_total_left, &seg_total_left, r);
>>> +		if (unlikely(ret < 0))
>>> +			return ret;
>>> +		mbuf_append(output_head, output, out_length);
>>> +
>>> +		/* Set total number of CBs in TB */
>>> +		desc->req.cbs_in_tb = cbs_in_tb;
>>> +#ifdef RTE_LIBRTE_BBDEV_DEBUG
>>> +		rte_memdump(stderr, "FCW", &desc->req.fcw_te,
>>> +				sizeof(desc->req.fcw_te) - 8);
>>> +		rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc));
>> #endif
>>> +
>>> +		if (seg_total_left == 0) {
>>> +			/* Go to the next mbuf */
>>> +			input = input->next;
>>> +			in_offset = 0;
>>> +			output = output->next;
>>> +			out_offset = 0;
>>> +		}
>>> +
>>> +		total_enqueued_cbs++;
>>> +		current_enqueued_cbs++;
>>> +		r++;
>>> +	}
>>> +
>>> +	if (unlikely(desc == NULL))
>>> +		return current_enqueued_cbs;
>>> +
>>> +#ifdef RTE_LIBRTE_BBDEV_DEBUG
>>> +	/* Check if any CBs left for processing */
>>> +	if (mbuf_total_left != 0) {
>>> +		rte_bbdev_log(ERR,
>>> +				"Some date still left for processing:
>> mbuf_total_left = %u",
>>> +				mbuf_total_left);
>>> +		return -EINVAL;
>>> +	}
>>> +#endif
>>> +
>>> +	/* Set SDone on last CB descriptor for TB mode. */
>>> +	desc->req.sdone_enable = 1;
>>> +	desc->req.irq_enable = q->irq_enable;
>>> +
>>> +	return current_enqueued_cbs;
>>> +}
>>> +
>>> +/** Enqueue one decode operations for ACC100 device in CB mode */
>>> +static inline int enqueue_dec_one_op_cb(struct acc100_queue *q,
>>> +struct rte_bbdev_dec_op *op,
>>> +		uint16_t total_enqueued_cbs)
>>> +{
>>> +	union acc100_dma_desc *desc = NULL;
>>> +	int ret;
>>> +	uint32_t in_offset, h_out_offset, s_out_offset, s_out_length,
>>> +		h_out_length, mbuf_total_left, seg_total_left;
>>> +	struct rte_mbuf *input, *h_output_head, *h_output,
>>> +		*s_output_head, *s_output;
>>> +
>>> +	uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
>>> +			& q->sw_ring_wrap_mask);
>>> +	desc = q->ring_addr + desc_idx;
>>> +	acc100_fcw_td_fill(op, &desc->req.fcw_td);
>>> +
>>> +	input = op->turbo_dec.input.data;
>>> +	h_output_head = h_output = op->turbo_dec.hard_output.data;
>>> +	s_output_head = s_output = op->turbo_dec.soft_output.data;
>>> +	in_offset = op->turbo_dec.input.offset;
>>> +	h_out_offset = op->turbo_dec.hard_output.offset;
>>> +	s_out_offset = op->turbo_dec.soft_output.offset;
>>> +	h_out_length = s_out_length = 0;
>>> +	mbuf_total_left = op->turbo_dec.input.length;
>>> +	seg_total_left = rte_pktmbuf_data_len(input) - in_offset;
>>> +
>>> +#ifdef RTE_LIBRTE_BBDEV_DEBUG
>>> +	if (unlikely(input == NULL)) {
>>> +		rte_bbdev_log(ERR, "Invalid mbuf pointer");
>>> +		return -EFAULT;
>>> +	}
>>> +#endif
>>> +
>>> +	/* Set up DMA descriptor */
>>> +	desc = q->ring_addr + ((q->sw_ring_head + total_enqueued_cbs)
>>> +			& q->sw_ring_wrap_mask);
>>> +
>>> +	ret = acc100_dma_desc_td_fill(op, &desc->req, &input, h_output,
>>> +			s_output, &in_offset, &h_out_offset, &s_out_offset,
>>> +			&h_out_length, &s_out_length, &mbuf_total_left,
>>> +			&seg_total_left, 0);
>>> +
>>> +	if (unlikely(ret < 0))
>>> +		return ret;
>>> +
>>> +	/* Hard output */
>>> +	mbuf_append(h_output_head, h_output, h_out_length);
>>> +
>>> +	/* Soft output */
>>> +	if (check_bit(op->turbo_dec.op_flags,
>> RTE_BBDEV_TURBO_SOFT_OUTPUT))
>>> +		mbuf_append(s_output_head, s_output, s_out_length);
>>> +
>>> +#ifdef RTE_LIBRTE_BBDEV_DEBUG
>>> +	rte_memdump(stderr, "FCW", &desc->req.fcw_td,
>>> +			sizeof(desc->req.fcw_td) - 8);
>>> +	rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc));
>>> +
>>> +	/* Check if any CBs left for processing */
>>> +	if (mbuf_total_left != 0) {
>>> +		rte_bbdev_log(ERR,
>>> +				"Some date still left after processing one CB:
>> mbuf_total_left = %u",
>>> +				mbuf_total_left);
>>> +		return -EINVAL;
>>> +	}
>>> +#endif
>> logic similar to debug in mbuf_append, should be a common function.
> Not exactly except if I miss your point. 

I look for code blocks that look similar and want you to consider if they can be combined

into a general function or macro.  A general function is easier to maintain. In this case,

it seems like logging of something is wrong with mbuf is similar to an earlier block of code.

Nothing is functionally wrong. 

>
>>> +
>>> +	/* One CB (one op) was successfully prepared to enqueue */
>>> +	return 1;
>>> +}
>>> +
>>> +static inline int
>>> +harq_loopback(struct acc100_queue *q, struct rte_bbdev_dec_op *op,
>>> +		uint16_t total_enqueued_cbs) {
>>> +	struct acc100_fcw_ld *fcw;
>>> +	union acc100_dma_desc *desc;
>>> +	int next_triplet = 1;
>>> +	struct rte_mbuf *hq_output_head, *hq_output;
>>> +	uint16_t harq_in_length = op-
>>> ldpc_dec.harq_combined_input.length;
>>> +	if (harq_in_length == 0) {
>>> +		rte_bbdev_log(ERR, "Loopback of invalid null size\n");
>>> +		return -EINVAL;
>>> +	}
>>> +
>>> +	int h_comp = check_bit(op->ldpc_dec.op_flags,
>>> +			RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION
>>> +			) ? 1 : 0;
>>> +	if (h_comp == 1)
>>> +		harq_in_length = harq_in_length * 8 / 6;
>>> +	harq_in_length = RTE_ALIGN(harq_in_length, 64);
>>> +	uint16_t harq_dma_length_in = (h_comp == 0) ?
>> Can these h_comp checks be combined to a single if/else ?
> it may be clearer, ok.
>
>
>>> +			harq_in_length :
>>> +			harq_in_length * 6 / 8;
>>> +	uint16_t harq_dma_length_out = harq_dma_length_in;
>>> +	bool ddr_mem_in = check_bit(op->ldpc_dec.op_flags,
>>> +
>> 	RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE);
>>> +	union acc100_harq_layout_data *harq_layout = q->d->harq_layout;
>>> +	uint16_t harq_index = (ddr_mem_in ?
>>> +			op->ldpc_dec.harq_combined_input.offset :
>>> +			op->ldpc_dec.harq_combined_output.offset)
>>> +			/ ACC100_HARQ_OFFSET;
>>> +
>>> +	uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
>>> +			& q->sw_ring_wrap_mask);
>>> +	desc = q->ring_addr + desc_idx;
>>> +	fcw = &desc->req.fcw_ld;
>>> +	/* Set the FCW from loopback into DDR */
>>> +	memset(fcw, 0, sizeof(struct acc100_fcw_ld));
>>> +	fcw->FCWversion = ACC100_FCW_VER;
>>> +	fcw->qm = 2;
>>> +	fcw->Zc = 384;
>> these magic numbers should have #defines
> These are not magic numbers, but actually 3GPP values
ok
>
>>> +	if (harq_in_length < 16 * N_ZC_1)
>>> +		fcw->Zc = 16;
>>> +	fcw->ncb = fcw->Zc * N_ZC_1;
>>> +	fcw->rm_e = 2;
>>> +	fcw->hcin_en = 1;
>>> +	fcw->hcout_en = 1;
>>> +
>>> +	rte_bbdev_log(DEBUG, "Loopback IN %d Index %d offset %d length
>> %d %d\n",
>>> +			ddr_mem_in, harq_index,
>>> +			harq_layout[harq_index].offset, harq_in_length,
>>> +			harq_dma_length_in);
>>> +
>>> +	if (ddr_mem_in && (harq_layout[harq_index].offset > 0)) {
>>> +		fcw->hcin_size0 = harq_layout[harq_index].size0;
>>> +		fcw->hcin_offset = harq_layout[harq_index].offset;
>>> +		fcw->hcin_size1 = harq_in_length - fcw->hcin_offset;
>>> +		harq_dma_length_in = (fcw->hcin_size0 + fcw->hcin_size1);
>>> +		if (h_comp == 1)
>>> +			harq_dma_length_in = harq_dma_length_in * 6 / 8;
>>> +	} else {
>>> +		fcw->hcin_size0 = harq_in_length;
>>> +	}
>>> +	harq_layout[harq_index].val = 0;
>>> +	rte_bbdev_log(DEBUG, "Loopback FCW Config %d %d %d\n",
>>> +			fcw->hcin_size0, fcw->hcin_offset, fcw->hcin_size1);
>>> +	fcw->hcout_size0 = harq_in_length;
>>> +	fcw->hcin_decomp_mode = h_comp;
>>> +	fcw->hcout_comp_mode = h_comp;
>>> +	fcw->gain_i = 1;
>>> +	fcw->gain_h = 1;
>>> +
>>> +	/* Set the prefix of descriptor. This could be done at polling */
>>>  	desc->req.word0 = ACC100_DMA_DESC_TYPE;
>>>  	desc->req.word1 = 0; /**< Timestamp could be disabled */
>>>  	desc->req.word2 = 0;
>>> @@ -1816,6 +2320,107 @@
>>>  	return current_enqueued_cbs;
>>>  }
>>>
>>> +/* Enqueue one decode operations for ACC100 device in TB mode */
>>> +static inline int enqueue_dec_one_op_tb(struct acc100_queue *q,
>>> +struct rte_bbdev_dec_op *op,
>>> +		uint16_t total_enqueued_cbs, uint8_t cbs_in_tb) {
>>> +	union acc100_dma_desc *desc = NULL;
>>> +	int ret;
>>> +	uint8_t r, c;
>>> +	uint32_t in_offset, h_out_offset, s_out_offset, s_out_length,
>>> +		h_out_length, mbuf_total_left, seg_total_left;
>>> +	struct rte_mbuf *input, *h_output_head, *h_output,
>>> +		*s_output_head, *s_output;
>>> +	uint16_t current_enqueued_cbs = 0;
>>> +
>>> +	uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
>>> +			& q->sw_ring_wrap_mask);
>>> +	desc = q->ring_addr + desc_idx;
>>> +	uint64_t fcw_offset = (desc_idx << 8) + ACC100_DESC_FCW_OFFSET;
>>> +	acc100_fcw_td_fill(op, &desc->req.fcw_td);
>>> +
>>> +	input = op->turbo_dec.input.data;
>>> +	h_output_head = h_output = op->turbo_dec.hard_output.data;
>>> +	s_output_head = s_output = op->turbo_dec.soft_output.data;
>>> +	in_offset = op->turbo_dec.input.offset;
>>> +	h_out_offset = op->turbo_dec.hard_output.offset;
>>> +	s_out_offset = op->turbo_dec.soft_output.offset;
>>> +	h_out_length = s_out_length = 0;
>>> +	mbuf_total_left = op->turbo_dec.input.length;
>>> +	c = op->turbo_dec.tb_params.c;
>>> +	r = op->turbo_dec.tb_params.r;
>>> +
>>> +	while (mbuf_total_left > 0 && r < c) {
>>> +
>>> +		seg_total_left = rte_pktmbuf_data_len(input) - in_offset;
>>> +
>>> +		/* Set up DMA descriptor */
>>> +		desc = q->ring_addr + ((q->sw_ring_head +
>> total_enqueued_cbs)
>>> +				& q->sw_ring_wrap_mask);
>>> +		desc->req.data_ptrs[0].address = q->ring_addr_phys +
>> fcw_offset;
>>> +		desc->req.data_ptrs[0].blen = ACC100_FCW_TD_BLEN;
>>> +		ret = acc100_dma_desc_td_fill(op, &desc->req, &input,
>>> +				h_output, s_output, &in_offset,
>> &h_out_offset,
>>> +				&s_out_offset, &h_out_length,
>> &s_out_length,
>>> +				&mbuf_total_left, &seg_total_left, r);
>>> +
>>> +		if (unlikely(ret < 0))
>>> +			return ret;
>>> +
>>> +		/* Hard output */
>>> +		mbuf_append(h_output_head, h_output, h_out_length);
>>> +
>>> +		/* Soft output */
>>> +		if (check_bit(op->turbo_dec.op_flags,
>>> +				RTE_BBDEV_TURBO_SOFT_OUTPUT))
>>> +			mbuf_append(s_output_head, s_output,
>> s_out_length);
>>> +
>>> +		/* Set total number of CBs in TB */
>>> +		desc->req.cbs_in_tb = cbs_in_tb;
>>> +#ifdef RTE_LIBRTE_BBDEV_DEBUG
>>> +		rte_memdump(stderr, "FCW", &desc->req.fcw_td,
>>> +				sizeof(desc->req.fcw_td) - 8);
>>> +		rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc));
>> #endif
>>> +
>>> +		if (seg_total_left == 0) {
>>> +			/* Go to the next mbuf */
>>> +			input = input->next;
>>> +			in_offset = 0;
>>> +			h_output = h_output->next;
>>> +			h_out_offset = 0;
>>> +
>>> +			if (check_bit(op->turbo_dec.op_flags,
>>> +					RTE_BBDEV_TURBO_SOFT_OUTPUT))
>> {
>>> +				s_output = s_output->next;
>>> +				s_out_offset = 0;
>>> +			}
>>> +		}
>>> +
>>> +		total_enqueued_cbs++;
>>> +		current_enqueued_cbs++;
>>> +		r++;
>>> +	}
>>> +
>>> +	if (unlikely(desc == NULL))
>>> +		return current_enqueued_cbs;
>>> +
>>> +#ifdef RTE_LIBRTE_BBDEV_DEBUG
>>> +	/* Check if any CBs left for processing */
>>> +	if (mbuf_total_left != 0) {
>>> +		rte_bbdev_log(ERR,
>>> +				"Some date still left for processing:
>> mbuf_total_left = %u",
>>> +				mbuf_total_left);
>>> +		return -EINVAL;
>>> +	}
>>> +#endif
>>> +	/* Set SDone on last CB descriptor for TB mode */
>>> +	desc->req.sdone_enable = 1;
>>> +	desc->req.irq_enable = q->irq_enable;
>>> +
>>> +	return current_enqueued_cbs;
>>> +}
>>>
>>>  /* Calculates number of CBs in processed encoder TB based on 'r' and
>> input
>>>   * length.
>>> @@ -1893,6 +2498,45 @@
>>>  	return cbs_in_tb;
>>>  }
>>>
>>> +/* Enqueue encode operations for ACC100 device in CB mode. */ static
>>> +uint16_t acc100_enqueue_enc_cb(struct rte_bbdev_queue_data
>> *q_data,
>>> +		struct rte_bbdev_enc_op **ops, uint16_t num) {
>>> +	struct acc100_queue *q = q_data->queue_private;
>>> +	int32_t avail = q->sw_ring_depth + q->sw_ring_tail - q-
>>> sw_ring_head;
>>> +	uint16_t i;
>>> +	union acc100_dma_desc *desc;
>>> +	int ret;
>>> +
>>> +	for (i = 0; i < num; ++i) {
>>> +		/* Check if there are available space for further processing */
>>> +		if (unlikely(avail - 1 < 0))
>>> +			break;
>>> +		avail -= 1;
>>> +
>>> +		ret = enqueue_enc_one_op_cb(q, ops[i], i);
>>> +		if (ret < 0)
>>> +			break;
>>> +	}
>>> +
>>> +	if (unlikely(i == 0))
>>> +		return 0; /* Nothing to enqueue */
>>> +
>>> +	/* Set SDone in last CB in enqueued ops for CB mode*/
>>> +	desc = q->ring_addr + ((q->sw_ring_head + i - 1)
>>> +			& q->sw_ring_wrap_mask);
>>> +	desc->req.sdone_enable = 1;
>>> +	desc->req.irq_enable = q->irq_enable;
>>> +
>>> +	acc100_dma_enqueue(q, i, &q_data->queue_stats);
>>> +
>>> +	/* Update stats */
>>> +	q_data->queue_stats.enqueued_count += i;
>>> +	q_data->queue_stats.enqueue_err_count += num - i;
>>> +	return i;
>>> +}
>>> +
>>>  /* Check we can mux encode operations with common FCW */  static
>>> inline bool  check_mux(struct rte_bbdev_enc_op **ops, uint16_t num) {
>>> @@ -1960,6 +2604,52 @@
>>>  	return i;
>>>  }
>>>
>>> +/* Enqueue encode operations for ACC100 device in TB mode. */ static
>>> +uint16_t acc100_enqueue_enc_tb(struct rte_bbdev_queue_data
>> *q_data,
>>> +		struct rte_bbdev_enc_op **ops, uint16_t num) {
>>> +	struct acc100_queue *q = q_data->queue_private;
>>> +	int32_t avail = q->sw_ring_depth + q->sw_ring_tail - q-
>>> sw_ring_head;
>>> +	uint16_t i, enqueued_cbs = 0;
>>> +	uint8_t cbs_in_tb;
>>> +	int ret;
>>> +
>>> +	for (i = 0; i < num; ++i) {
>>> +		cbs_in_tb = get_num_cbs_in_tb_enc(&ops[i]->turbo_enc);
>>> +		/* Check if there are available space for further processing */
>>> +		if (unlikely(avail - cbs_in_tb < 0))
>>> +			break;
>>> +		avail -= cbs_in_tb;
>>> +
>>> +		ret = enqueue_enc_one_op_tb(q, ops[i], enqueued_cbs,
>> cbs_in_tb);
>>> +		if (ret < 0)
>>> +			break;
>>> +		enqueued_cbs += ret;
>>> +	}
>>> +
>> other similar functions have a (i == 0) check here.
> ok
>
>>> +	acc100_dma_enqueue(q, enqueued_cbs, &q_data->queue_stats);
>>> +
>>> +	/* Update stats */
>>> +	q_data->queue_stats.enqueued_count += i;
>>> +	q_data->queue_stats.enqueue_err_count += num - i;
>>> +
>>> +	return i;
>>> +}
>>> +
>>> +/* Enqueue encode operations for ACC100 device. */ static uint16_t
>>> +acc100_enqueue_enc(struct rte_bbdev_queue_data *q_data,
>>> +		struct rte_bbdev_enc_op **ops, uint16_t num) {
>>> +	if (unlikely(num == 0))
>>> +		return 0;
>> num == 0 check should move into the tb/cb functions
> same comment on other patch, why not catch it early?
>
>>> +	if (ops[0]->turbo_enc.code_block_mode == 0)
>>> +		return acc100_enqueue_enc_tb(q_data, ops, num);
>>> +	else
>>> +		return acc100_enqueue_enc_cb(q_data, ops, num); }
>>> +
>>>  /* Enqueue encode operations for ACC100 device. */  static uint16_t
>>> acc100_enqueue_ldpc_enc(struct rte_bbdev_queue_data *q_data, @@
>>> -1967,7 +2657,51 @@  {
>>>  	if (unlikely(num == 0))
>>>  		return 0;
>>> -	return acc100_enqueue_ldpc_enc_cb(q_data, ops, num);
>>> +	if (ops[0]->ldpc_enc.code_block_mode == 0)
>>> +		return acc100_enqueue_enc_tb(q_data, ops, num);
>>> +	else
>>> +		return acc100_enqueue_ldpc_enc_cb(q_data, ops, num); }
>>> +
>>> +
>>> +/* Enqueue decode operations for ACC100 device in CB mode */ static
>>> +uint16_t acc100_enqueue_dec_cb(struct rte_bbdev_queue_data
>> *q_data,
>>> +		struct rte_bbdev_dec_op **ops, uint16_t num) {
>> Seems like the 10th variant of a similar function could these be combined to
>> fewer functions ?
>>
>> Maybe by passing in a function pointer to the enqueue_one_dec_one* that
>> does the work ?
> They have some variants related to the actual operation and constraints.
> Not obvious to have a valuable refactor. 
>
As above nothing functionally wrong, just something to consider

ok.

Tom

>>> +	struct acc100_queue *q = q_data->queue_private;
>>> +	int32_t avail = q->sw_ring_depth + q->sw_ring_tail - q-
>>> sw_ring_head;
>>> +	uint16_t i;
>>> +	union acc100_dma_desc *desc;
>>> +	int ret;
>>> +
>>> +	for (i = 0; i < num; ++i) {
>>> +		/* Check if there are available space for further processing */
>>> +		if (unlikely(avail - 1 < 0))
>>> +			break;
>>> +		avail -= 1;
>>> +
>>> +		ret = enqueue_dec_one_op_cb(q, ops[i], i);
>>> +		if (ret < 0)
>>> +			break;
>>> +	}
>>> +
>>> +	if (unlikely(i == 0))
>>> +		return 0; /* Nothing to enqueue */
>>> +
>>> +	/* Set SDone in last CB in enqueued ops for CB mode*/
>>> +	desc = q->ring_addr + ((q->sw_ring_head + i - 1)
>>> +			& q->sw_ring_wrap_mask);
>>> +	desc->req.sdone_enable = 1;
>>> +	desc->req.irq_enable = q->irq_enable;
>>> +
>>> +	acc100_dma_enqueue(q, i, &q_data->queue_stats);
>>> +
>>> +	/* Update stats */
>>> +	q_data->queue_stats.enqueued_count += i;
>>> +	q_data->queue_stats.enqueue_err_count += num - i;
>>> +
>>> +	return i;
>>>  }
>>>
>>>  /* Check we can mux encode operations with common FCW */ @@ -
>> 2065,6
>>> +2799,53 @@
>>>  	return i;
>>>  }
>>>
>>> +
>>> +/* Enqueue decode operations for ACC100 device in TB mode */ static
>>> +uint16_t acc100_enqueue_dec_tb(struct rte_bbdev_queue_data
>> *q_data,
>>> +		struct rte_bbdev_dec_op **ops, uint16_t num) {
>> 11th ;)
>>> +	struct acc100_queue *q = q_data->queue_private;
>>> +	int32_t avail = q->sw_ring_depth + q->sw_ring_tail - q-
>>> sw_ring_head;
>>> +	uint16_t i, enqueued_cbs = 0;
>>> +	uint8_t cbs_in_tb;
>>> +	int ret;
>>> +
>>> +	for (i = 0; i < num; ++i) {
>>> +		cbs_in_tb = get_num_cbs_in_tb_dec(&ops[i]->turbo_dec);
>>> +		/* Check if there are available space for further processing */
>>> +		if (unlikely(avail - cbs_in_tb < 0))
>>> +			break;
>>> +		avail -= cbs_in_tb;
>>> +
>>> +		ret = enqueue_dec_one_op_tb(q, ops[i], enqueued_cbs,
>> cbs_in_tb);
>>> +		if (ret < 0)
>>> +			break;
>>> +		enqueued_cbs += ret;
>>> +	}
>>> +
>>> +	acc100_dma_enqueue(q, enqueued_cbs, &q_data->queue_stats);
>>> +
>>> +	/* Update stats */
>>> +	q_data->queue_stats.enqueued_count += i;
>>> +	q_data->queue_stats.enqueue_err_count += num - i;
>>> +
>>> +	return i;
>>> +}
>>> +
>>> +/* Enqueue decode operations for ACC100 device. */ static uint16_t
>>> +acc100_enqueue_dec(struct rte_bbdev_queue_data *q_data,
>>> +		struct rte_bbdev_dec_op **ops, uint16_t num) {
>>> +	if (unlikely(num == 0))
>>> +		return 0;
>> similar move the num == 0 check to the tb/cb functions.
> same comment
>
>>> +	if (ops[0]->turbo_dec.code_block_mode == 0)
>>> +		return acc100_enqueue_dec_tb(q_data, ops, num);
>>> +	else
>>> +		return acc100_enqueue_dec_cb(q_data, ops, num); }
>>> +
>>>  /* Enqueue decode operations for ACC100 device. */  static uint16_t
>>> acc100_enqueue_ldpc_dec(struct rte_bbdev_queue_data *q_data, @@
>>> -2388,6 +3169,51 @@
>>>  	return cb_idx;
>>>  }
>>>
>>> +/* Dequeue encode operations from ACC100 device. */ static uint16_t
>>> +acc100_dequeue_enc(struct rte_bbdev_queue_data *q_data,
>>> +		struct rte_bbdev_enc_op **ops, uint16_t num) {
>>> +	struct acc100_queue *q = q_data->queue_private;
>>> +	uint16_t dequeue_num;
>>> +	uint32_t avail = q->sw_ring_head - q->sw_ring_tail;
>>> +	uint32_t aq_dequeued = 0;
>>> +	uint16_t i;
>>> +	uint16_t dequeued_cbs = 0;
>>> +	struct rte_bbdev_enc_op *op;
>>> +	int ret;
>>> +
>>> +#ifdef RTE_LIBRTE_BBDEV_DEBUG
>>> +	if (unlikely(ops == 0 && q == NULL))
>> ops is a pointer so should compare with NULL
>>
>> The && likely needs to be ||
>>
>> Maybe print out a message so caller knows something wrong happened.
> ok
>
>>> +		return 0;
>>> +#endif
>>> +
>>> +	dequeue_num = (avail < num) ? avail : num;
>>> +
>>> +	for (i = 0; i < dequeue_num; ++i) {
>>> +		op = (q->ring_addr + ((q->sw_ring_tail + dequeued_cbs)
>>> +			& q->sw_ring_wrap_mask))->req.op_addr;
>>> +		if (op->turbo_enc.code_block_mode == 0)
>>> +			ret = dequeue_enc_one_op_tb(q, &ops[i],
>> dequeued_cbs,
>>> +					&aq_dequeued);
>>> +		else
>>> +			ret = dequeue_enc_one_op_cb(q, &ops[i],
>> dequeued_cbs,
>>> +					&aq_dequeued);
>>> +
>>> +		if (ret < 0)
>>> +			break;
>>> +		dequeued_cbs += ret;
>>> +	}
>>> +
>>> +	q->aq_dequeued += aq_dequeued;
>>> +	q->sw_ring_tail += dequeued_cbs;
>>> +
>>> +	/* Update enqueue stats */
>>> +	q_data->queue_stats.dequeued_count += i;
>>> +
>>> +	return i;
>>> +}
>>> +
>>>  /* Dequeue LDPC encode operations from ACC100 device. */  static
>>> uint16_t  acc100_dequeue_ldpc_enc(struct rte_bbdev_queue_data
>> *q_data,
>>> @@ -2426,6 +3252,52 @@
>>>  	return dequeued_cbs;
>>>  }
>>>
>>> +
>>> +/* Dequeue decode operations from ACC100 device. */ static uint16_t
>>> +acc100_dequeue_dec(struct rte_bbdev_queue_data *q_data,
>>> +		struct rte_bbdev_dec_op **ops, uint16_t num) {
>> very similar to enc function above, consider how to combine them to a
>> single function.
>>
>> Tom
>>
>>> +	struct acc100_queue *q = q_data->queue_private;
>>> +	uint16_t dequeue_num;
>>> +	uint32_t avail = q->sw_ring_head - q->sw_ring_tail;
>>> +	uint32_t aq_dequeued = 0;
>>> +	uint16_t i;
>>> +	uint16_t dequeued_cbs = 0;
>>> +	struct rte_bbdev_dec_op *op;
>>> +	int ret;
>>> +
>>> +#ifdef RTE_LIBRTE_BBDEV_DEBUG
>>> +	if (unlikely(ops == 0 && q == NULL))
>>> +		return 0;
>>> +#endif
>>> +
>>> +	dequeue_num = (avail < num) ? avail : num;
>>> +
>>> +	for (i = 0; i < dequeue_num; ++i) {
>>> +		op = (q->ring_addr + ((q->sw_ring_tail + dequeued_cbs)
>>> +			& q->sw_ring_wrap_mask))->req.op_addr;
>>> +		if (op->turbo_dec.code_block_mode == 0)
>>> +			ret = dequeue_dec_one_op_tb(q, &ops[i],
>> dequeued_cbs,
>>> +					&aq_dequeued);
>>> +		else
>>> +			ret = dequeue_dec_one_op_cb(q_data, q, &ops[i],
>>> +					dequeued_cbs, &aq_dequeued);
>>> +
>>> +		if (ret < 0)
>>> +			break;
>>> +		dequeued_cbs += ret;
>>> +	}
>>> +
>>> +	q->aq_dequeued += aq_dequeued;
>>> +	q->sw_ring_tail += dequeued_cbs;
>>> +
>>> +	/* Update enqueue stats */
>>> +	q_data->queue_stats.dequeued_count += i;
>>> +
>>> +	return i;
>>> +}
>>> +
>>>  /* Dequeue decode operations from ACC100 device. */  static uint16_t
>>> acc100_dequeue_ldpc_dec(struct rte_bbdev_queue_data *q_data, @@
>>> -2479,6 +3351,10 @@
>>>  	struct rte_pci_device *pci_dev = RTE_DEV_TO_PCI(dev->device);
>>>
>>>  	dev->dev_ops = &acc100_bbdev_ops;
>>> +	dev->enqueue_enc_ops = acc100_enqueue_enc;
>>> +	dev->enqueue_dec_ops = acc100_enqueue_dec;
>>> +	dev->dequeue_enc_ops = acc100_dequeue_enc;
>>> +	dev->dequeue_dec_ops = acc100_dequeue_dec;
>>>  	dev->enqueue_ldpc_enc_ops = acc100_enqueue_ldpc_enc;
>>>  	dev->enqueue_ldpc_dec_ops = acc100_enqueue_ldpc_dec;
>>>  	dev->dequeue_ldpc_enc_ops = acc100_dequeue_ldpc_enc;
  
Chautru, Nicolas Oct. 1, 2020, 9:46 p.m. UTC | #4
Hi Tom,

> From: Tom Rix <trix@redhat.com>
> On 9/30/20 12:10 PM, Chautru, Nicolas wrote:
> > Hi Tom,
> >
> >> From: Tom Rix <trix@redhat.com>
> >> On 9/28/20 5:29 PM, Nicolas Chautru wrote:
> >>> Adding capability for 4G encode and decoder processing
> >>>
> >>> Signed-off-by: Nicolas Chautru <nicolas.chautru@intel.com>
> >>> Acked-by: Liu Tianjiao <Tianjiao.liu@intel.com>
> >>> ---
> >>>  doc/guides/bbdevs/features/acc100.ini    |    4 +-
> >>>  drivers/baseband/acc100/rte_acc100_pmd.c | 1010
> >>> ++++++++++++++++++++++++++++--
> >>>  2 files changed, 945 insertions(+), 69 deletions(-)
> >>>
> >>> diff --git a/doc/guides/bbdevs/features/acc100.ini
> >>> b/doc/guides/bbdevs/features/acc100.ini
> >>> index 40c7adc..642cd48 100644
> >>> --- a/doc/guides/bbdevs/features/acc100.ini
> >>> +++ b/doc/guides/bbdevs/features/acc100.ini
> >>> @@ -4,8 +4,8 @@
> >>>  ; Refer to default.ini for the full list of available PMD features.
> >>>  ;
> >>>  [Features]
> >>> -Turbo Decoder (4G)     = N
> >>> -Turbo Encoder (4G)     = N
> >>> +Turbo Decoder (4G)     = Y
> >>> +Turbo Encoder (4G)     = Y
> >>>  LDPC Decoder (5G)      = Y
> >>>  LDPC Encoder (5G)      = Y
> >>>  LLR/HARQ Compression   = Y
> >>> diff --git a/drivers/baseband/acc100/rte_acc100_pmd.c
> >>> b/drivers/baseband/acc100/rte_acc100_pmd.c
> >>> index e484c0a..7d4c3df 100644
> >>> --- a/drivers/baseband/acc100/rte_acc100_pmd.c
> >>> +++ b/drivers/baseband/acc100/rte_acc100_pmd.c
> >>> @@ -339,7 +339,6 @@
> >>>  	free_base_addresses(base_addrs, i);  }
> >>>
> >>> -
> >>>  /* Allocate 64MB memory used for all software rings */  static int
> >>> acc100_setup_queues(struct rte_bbdev *dev, uint16_t num_queues, int
> >>> socket_id) @@ -637,6 +636,41 @@
> >>>
> >>>  	static const struct rte_bbdev_op_cap bbdev_capabilities[] = {
> >>>  		{
> >>> +			.type = RTE_BBDEV_OP_TURBO_DEC,
> >>> +			.cap.turbo_dec = {
> >>> +				.capability_flags =
> >>> +
> >> 	RTE_BBDEV_TURBO_SUBBLOCK_DEINTERLEAVE |
> >>> +					RTE_BBDEV_TURBO_CRC_TYPE_24B
> >> |
> >>> +
> >> 	RTE_BBDEV_TURBO_HALF_ITERATION_EVEN |
> >>> +
> >> 	RTE_BBDEV_TURBO_EARLY_TERMINATION |
> >>> +
> >> 	RTE_BBDEV_TURBO_NEG_LLR_1_BIT_IN |
> >>> +					RTE_BBDEV_TURBO_MAP_DEC |
> >>> +
> >> 	RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP |
> >>> +
> >> 	RTE_BBDEV_TURBO_DEC_SCATTER_GATHER,
> >>> +				.max_llr_modulus = INT8_MAX,
> >>> +				.num_buffers_src =
> >>> +
> >> 	RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
> >>> +				.num_buffers_hard_out =
> >>> +
> >> 	RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
> >>> +				.num_buffers_soft_out =
> >>> +
> >> 	RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
> >>> +			}
> >>> +		},
> >>> +		{
> >>> +			.type = RTE_BBDEV_OP_TURBO_ENC,
> >>> +			.cap.turbo_enc = {
> >>> +				.capability_flags =
> >>> +
> >> 	RTE_BBDEV_TURBO_CRC_24B_ATTACH |
> >>> +
> >> 	RTE_BBDEV_TURBO_RV_INDEX_BYPASS |
> >>> +					RTE_BBDEV_TURBO_RATE_MATCH |
> >>> +
> >> 	RTE_BBDEV_TURBO_ENC_SCATTER_GATHER,
> >>> +				.num_buffers_src =
> >>> +
> >> 	RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
> >>> +				.num_buffers_dst =
> >>> +
> >> 	RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
> >>> +			}
> >>> +		},
> >>> +		{
> >>>  			.type   = RTE_BBDEV_OP_LDPC_ENC,
> >>>  			.cap.ldpc_enc = {
> >>>  				.capability_flags =
> >>> @@ -719,7 +753,6 @@
> >>>  #endif
> >>>  }
> >>>
> >>> -
> >>>  static const struct rte_bbdev_ops acc100_bbdev_ops = {
> >>>  	.setup_queues = acc100_setup_queues,
> >>>  	.close = acc100_dev_close,
> >>> @@ -763,6 +796,58 @@
> >>>  	return tail;
> >>>  }
> >>>
> >>> +/* Fill in a frame control word for turbo encoding. */ static
> >>> +inline void acc100_fcw_te_fill(const struct rte_bbdev_enc_op *op,
> >>> +struct acc100_fcw_te *fcw) {
> >>> +	fcw->code_block_mode = op->turbo_enc.code_block_mode;
> >>> +	if (fcw->code_block_mode == 0) { /* For TB mode */
> >>> +		fcw->k_neg = op->turbo_enc.tb_params.k_neg;
> >>> +		fcw->k_pos = op->turbo_enc.tb_params.k_pos;
> >>> +		fcw->c_neg = op->turbo_enc.tb_params.c_neg;
> >>> +		fcw->c = op->turbo_enc.tb_params.c;
> >>> +		fcw->ncb_neg = op->turbo_enc.tb_params.ncb_neg;
> >>> +		fcw->ncb_pos = op->turbo_enc.tb_params.ncb_pos;
> >>> +
> >>> +		if (check_bit(op->turbo_enc.op_flags,
> >>> +				RTE_BBDEV_TURBO_RATE_MATCH)) {
> >>> +			fcw->bypass_rm = 0;
> >>> +			fcw->cab = op->turbo_enc.tb_params.cab;
> >>> +			fcw->ea = op->turbo_enc.tb_params.ea;
> >>> +			fcw->eb = op->turbo_enc.tb_params.eb;
> >>> +		} else {
> >>> +			/* E is set to the encoding output size when RM is
> >>> +			 * bypassed.
> >>> +			 */
> >>> +			fcw->bypass_rm = 1;
> >>> +			fcw->cab = fcw->c_neg;
> >>> +			fcw->ea = 3 * fcw->k_neg + 12;
> >>> +			fcw->eb = 3 * fcw->k_pos + 12;
> >>> +		}
> >>> +	} else { /* For CB mode */
> >>> +		fcw->k_pos = op->turbo_enc.cb_params.k;
> >>> +		fcw->ncb_pos = op->turbo_enc.cb_params.ncb;
> >>> +
> >>> +		if (check_bit(op->turbo_enc.op_flags,
> >>> +				RTE_BBDEV_TURBO_RATE_MATCH)) {
> >>> +			fcw->bypass_rm = 0;
> >>> +			fcw->eb = op->turbo_enc.cb_params.e;
> >>> +		} else {
> >>> +			/* E is set to the encoding output size when RM is
> >>> +			 * bypassed.
> >>> +			 */
> >>> +			fcw->bypass_rm = 1;
> >>> +			fcw->eb = 3 * fcw->k_pos + 12;
> >>> +		}
> >>> +	}
> >>> +
> >>> +	fcw->bypass_rv_idx1 = check_bit(op->turbo_enc.op_flags,
> >>> +			RTE_BBDEV_TURBO_RV_INDEX_BYPASS);
> >>> +	fcw->code_block_crc = check_bit(op->turbo_enc.op_flags,
> >>> +			RTE_BBDEV_TURBO_CRC_24B_ATTACH);
> >>> +	fcw->rv_idx1 = op->turbo_enc.rv_index; }
> >>> +
> >>>  /* Compute value of k0.
> >>>   * Based on 3GPP 38.212 Table 5.4.2.1-2
> >>>   * Starting position of different redundancy versions, k0 @@ -813,6
> >>> +898,25 @@
> >>>  	fcw->mcb_count = num_cb;
> >>>  }
> >>>
> >>> +/* Fill in a frame control word for turbo decoding. */ static
> >>> +inline void acc100_fcw_td_fill(const struct rte_bbdev_dec_op *op,
> >>> +struct acc100_fcw_td *fcw) {
> >>> +	/* Note : Early termination is always enabled for 4GUL */
> >>> +	fcw->fcw_ver = 1;
> >>> +	if (op->turbo_dec.code_block_mode == 0)
> >>> +		fcw->k_pos = op->turbo_dec.tb_params.k_pos;
> >>> +	else
> >>> +		fcw->k_pos = op->turbo_dec.cb_params.k;
> >>> +	fcw->turbo_crc_type = check_bit(op->turbo_dec.op_flags,
> >>> +			RTE_BBDEV_TURBO_CRC_TYPE_24B);
> >>> +	fcw->bypass_sb_deint = 0;
> >>> +	fcw->raw_decoder_input_on = 0;
> >>> +	fcw->max_iter = op->turbo_dec.iter_max;
> >>> +	fcw->half_iter_on = !check_bit(op->turbo_dec.op_flags,
> >>> +			RTE_BBDEV_TURBO_HALF_ITERATION_EVEN);
> >>> +}
> >>> +
> >>>  /* Fill in a frame control word for LDPC decoding. */  static
> >>> inline void  acc100_fcw_ld_fill(const struct rte_bbdev_dec_op *op,
> >>> struct acc100_fcw_ld *fcw, @@ -1042,6 +1146,87 @@  }
> >>>
> >>>  static inline int
> >>> +acc100_dma_desc_te_fill(struct rte_bbdev_enc_op *op,
> >>> +		struct acc100_dma_req_desc *desc, struct rte_mbuf **input,
> >>> +		struct rte_mbuf *output, uint32_t *in_offset,
> >>> +		uint32_t *out_offset, uint32_t *out_length,
> >>> +		uint32_t *mbuf_total_left, uint32_t *seg_total_left, uint8_t
> >> r) {
> >>> +	int next_triplet = 1; /* FCW already done */
> >>> +	uint32_t e, ea, eb, length;
> >>> +	uint16_t k, k_neg, k_pos;
> >>> +	uint8_t cab, c_neg;
> >>> +
> >>> +	desc->word0 = ACC100_DMA_DESC_TYPE;
> >>> +	desc->word1 = 0; /**< Timestamp could be disabled */
> >>> +	desc->word2 = 0;
> >>> +	desc->word3 = 0;
> >>> +	desc->numCBs = 1;
> >>> +
> >>> +	if (op->turbo_enc.code_block_mode == 0) {
> >>> +		ea = op->turbo_enc.tb_params.ea;
> >>> +		eb = op->turbo_enc.tb_params.eb;
> >>> +		cab = op->turbo_enc.tb_params.cab;
> >>> +		k_neg = op->turbo_enc.tb_params.k_neg;
> >>> +		k_pos = op->turbo_enc.tb_params.k_pos;
> >>> +		c_neg = op->turbo_enc.tb_params.c_neg;
> >>> +		e = (r < cab) ? ea : eb;
> >>> +		k = (r < c_neg) ? k_neg : k_pos;
> >>> +	} else {
> >>> +		e = op->turbo_enc.cb_params.e;
> >>> +		k = op->turbo_enc.cb_params.k;
> >>> +	}
> >>> +
> >>> +	if (check_bit(op->turbo_enc.op_flags,
> >> RTE_BBDEV_TURBO_CRC_24B_ATTACH))
> >>> +		length = (k - 24) >> 3;
> >>> +	else
> >>> +		length = k >> 3;
> >>> +
> >>> +	if (unlikely((*mbuf_total_left == 0) || (*mbuf_total_left <
> >>> +length))) {
> >> similar to other patches, this check can be combined to <=
> >>
> >> change generally
> > same comment on other patch
> >
> >>> +		rte_bbdev_log(ERR,
> >>> +				"Mismatch between mbuf length and
> >> included CB sizes: mbuf len %u, cb len %u",
> >>> +				*mbuf_total_left, length);
> >>> +		return -1;
> >>> +	}
> >>> +
> >>> +	next_triplet = acc100_dma_fill_blk_type_in(desc, input, in_offset,
> >>> +			length, seg_total_left, next_triplet);
> >>> +	if (unlikely(next_triplet < 0)) {
> >>> +		rte_bbdev_log(ERR,
> >>> +				"Mismatch between data to process and
> >> mbuf data length in bbdev_op: %p",
> >>> +				op);
> >>> +		return -1;
> >>> +	}
> >>> +	desc->data_ptrs[next_triplet - 1].last = 1;
> >>> +	desc->m2dlen = next_triplet;
> >>> +	*mbuf_total_left -= length;
> >>> +
> >>> +	/* Set output length */
> >>> +	if (check_bit(op->turbo_enc.op_flags,
> >> RTE_BBDEV_TURBO_RATE_MATCH))
> >>> +		/* Integer round up division by 8 */
> >>> +		*out_length = (e + 7) >> 3;
> >>> +	else
> >>> +		*out_length = (k >> 3) * 3 + 2;
> >>> +
> >>> +	next_triplet = acc100_dma_fill_blk_type_out(desc, output,
> >> *out_offset,
> >>> +			*out_length, next_triplet,
> >> ACC100_DMA_BLKID_OUT_ENC);
> >>> +	if (unlikely(next_triplet < 0)) {
> >>> +		rte_bbdev_log(ERR,
> >>> +				"Mismatch between data to process and
> >> mbuf data length in bbdev_op: %p",
> >>> +				op);
> >>> +		return -1;
> >>> +	}
> >>> +	op->turbo_enc.output.length += *out_length;
> >>> +	*out_offset += *out_length;
> >>> +	desc->data_ptrs[next_triplet - 1].last = 1;
> >>> +	desc->d2mlen = next_triplet - desc->m2dlen;
> >>> +
> >>> +	desc->op_addr = op;
> >>> +
> >>> +	return 0;
> >>> +}
> >>> +
> >>> +static inline int
> >>>  acc100_dma_desc_le_fill(struct rte_bbdev_enc_op *op,
> >>>  		struct acc100_dma_req_desc *desc, struct rte_mbuf **input,
> >>>  		struct rte_mbuf *output, uint32_t *in_offset, @@ -1110,6
> >> +1295,117
> >>> @@  }
> >>>
> >>>  static inline int
> >>> +acc100_dma_desc_td_fill(struct rte_bbdev_dec_op *op,
> >>> +		struct acc100_dma_req_desc *desc, struct rte_mbuf **input,
> >>> +		struct rte_mbuf *h_output, struct rte_mbuf *s_output,
> >>> +		uint32_t *in_offset, uint32_t *h_out_offset,
> >>> +		uint32_t *s_out_offset, uint32_t *h_out_length,
> >>> +		uint32_t *s_out_length, uint32_t *mbuf_total_left,
> >>> +		uint32_t *seg_total_left, uint8_t r) {
> >>> +	int next_triplet = 1; /* FCW already done */
> >>> +	uint16_t k;
> >>> +	uint16_t crc24_overlap = 0;
> >>> +	uint32_t e, kw;
> >>> +
> >>> +	desc->word0 = ACC100_DMA_DESC_TYPE;
> >>> +	desc->word1 = 0; /**< Timestamp could be disabled */
> >>> +	desc->word2 = 0;
> >>> +	desc->word3 = 0;
> >>> +	desc->numCBs = 1;
> >>> +
> >>> +	if (op->turbo_dec.code_block_mode == 0) {
> >>> +		k = (r < op->turbo_dec.tb_params.c_neg)
> >>> +			? op->turbo_dec.tb_params.k_neg
> >>> +			: op->turbo_dec.tb_params.k_pos;
> >>> +		e = (r < op->turbo_dec.tb_params.cab)
> >>> +			? op->turbo_dec.tb_params.ea
> >>> +			: op->turbo_dec.tb_params.eb;
> >>> +	} else {
> >>> +		k = op->turbo_dec.cb_params.k;
> >>> +		e = op->turbo_dec.cb_params.e;
> >>> +	}
> >>> +
> >>> +	if ((op->turbo_dec.code_block_mode == 0)
> >>> +		&& !check_bit(op->turbo_dec.op_flags,
> >>> +		RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP))
> >>> +		crc24_overlap = 24;
> >>> +
> >>> +	/* Calculates circular buffer size.
> >>> +	 * According to 3gpp 36.212 section 5.1.4.2
> >>> +	 *   Kw = 3 * Kpi,
> >>> +	 * where:
> >>> +	 *   Kpi = nCol * nRow
> >>> +	 * where nCol is 32 and nRow can be calculated from:
> >>> +	 *   D =< nCol * nRow
> >>> +	 * where D is the size of each output from turbo encoder block (k
> >>> ++
> >> 4).
> >>> +	 */
> >>> +	kw = RTE_ALIGN_CEIL(k + 4, 32) * 3;
> >>> +
> >>> +	if (unlikely((*mbuf_total_left == 0) || (*mbuf_total_left < kw))) {
> >>> +		rte_bbdev_log(ERR,
> >>> +				"Mismatch between mbuf length and
> >> included CB sizes: mbuf len %u, cb len %u",
> >>> +				*mbuf_total_left, kw);
> >>> +		return -1;
> >>> +	}
> >>> +
> >>> +	next_triplet = acc100_dma_fill_blk_type_in(desc, input, in_offset,
> >> kw,
> >>> +			seg_total_left, next_triplet);
> >>> +	if (unlikely(next_triplet < 0)) {
> >>> +		rte_bbdev_log(ERR,
> >>> +				"Mismatch between data to process and
> >> mbuf data length in bbdev_op: %p",
> >>> +				op);
> >>> +		return -1;
> >>> +	}
> >>> +	desc->data_ptrs[next_triplet - 1].last = 1;
> >>> +	desc->m2dlen = next_triplet;
> >>> +	*mbuf_total_left -= kw;
> >>> +
> >>> +	next_triplet = acc100_dma_fill_blk_type_out(
> >>> +			desc, h_output, *h_out_offset,
> >>> +			k >> 3, next_triplet,
> >> ACC100_DMA_BLKID_OUT_HARD);
> >>> +	if (unlikely(next_triplet < 0)) {
> >>> +		rte_bbdev_log(ERR,
> >>> +				"Mismatch between data to process and
> >> mbuf data length in bbdev_op: %p",
> >>> +				op);
> >>> +		return -1;
> >>> +	}
> >>> +
> >>> +	*h_out_length = ((k - crc24_overlap) >> 3);
> >>> +	op->turbo_dec.hard_output.length += *h_out_length;
> >>> +	*h_out_offset += *h_out_length;
> >>> +
> >>> +	/* Soft output */
> >>> +	if (check_bit(op->turbo_dec.op_flags,
> >> RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
> >>> +		if (check_bit(op->turbo_dec.op_flags,
> >>> +				RTE_BBDEV_TURBO_EQUALIZER))
> >>> +			*s_out_length = e;
> >>> +		else
> >>> +			*s_out_length = (k * 3) + 12;
> >>> +
> >>> +		next_triplet = acc100_dma_fill_blk_type_out(desc, s_output,
> >>> +				*s_out_offset, *s_out_length, next_triplet,
> >>> +				ACC100_DMA_BLKID_OUT_SOFT);
> >>> +		if (unlikely(next_triplet < 0)) {
> >>> +			rte_bbdev_log(ERR,
> >>> +					"Mismatch between data to process
> >> and mbuf data length in bbdev_op: %p",
> >>> +					op);
> >>> +			return -1;
> >>> +		}
> >>> +
> >>> +		op->turbo_dec.soft_output.length += *s_out_length;
> >>> +		*s_out_offset += *s_out_length;
> >>> +	}
> >>> +
> >>> +	desc->data_ptrs[next_triplet - 1].last = 1;
> >>> +	desc->d2mlen = next_triplet - desc->m2dlen;
> >>> +
> >>> +	desc->op_addr = op;
> >>> +
> >>> +	return 0;
> >>> +}
> >>> +
> >>> +static inline int
> >>>  acc100_dma_desc_ld_fill(struct rte_bbdev_dec_op *op,
> >>>  		struct acc100_dma_req_desc *desc,
> >>>  		struct rte_mbuf **input, struct rte_mbuf *h_output, @@ -
> >> 1374,6
> >>> +1670,57 @@
> >>>
> >>>  /* Enqueue one encode operations for ACC100 device in CB mode */
> >>> static inline int
> >>> +enqueue_enc_one_op_cb(struct acc100_queue *q, struct
> >> rte_bbdev_enc_op *op,
> >>> +		uint16_t total_enqueued_cbs)
> >>> +{
> >>> +	union acc100_dma_desc *desc = NULL;
> >>> +	int ret;
> >>> +	uint32_t in_offset, out_offset, out_length, mbuf_total_left,
> >>> +		seg_total_left;
> >>> +	struct rte_mbuf *input, *output_head, *output;
> >>> +
> >>> +	uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
> >>> +			& q->sw_ring_wrap_mask);
> >>> +	desc = q->ring_addr + desc_idx;
> >>> +	acc100_fcw_te_fill(op, &desc->req.fcw_te);
> >>> +
> >>> +	input = op->turbo_enc.input.data;
> >>> +	output_head = output = op->turbo_enc.output.data;
> >>> +	in_offset = op->turbo_enc.input.offset;
> >>> +	out_offset = op->turbo_enc.output.offset;
> >>> +	out_length = 0;
> >>> +	mbuf_total_left = op->turbo_enc.input.length;
> >>> +	seg_total_left = rte_pktmbuf_data_len(op->turbo_enc.input.data)
> >>> +			- in_offset;
> >>> +
> >>> +	ret = acc100_dma_desc_te_fill(op, &desc->req, &input, output,
> >>> +			&in_offset, &out_offset, &out_length,
> >> &mbuf_total_left,
> >>> +			&seg_total_left, 0);
> >>> +
> >>> +	if (unlikely(ret < 0))
> >>> +		return ret;
> >>> +
> >>> +	mbuf_append(output_head, output, out_length);
> >>> +
> >>> +#ifdef RTE_LIBRTE_BBDEV_DEBUG
> >>> +	rte_memdump(stderr, "FCW", &desc->req.fcw_te,
> >>> +			sizeof(desc->req.fcw_te) - 8);
> >>> +	rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc));
> >>> +
> >>> +	/* Check if any data left after processing one CB */
> >>> +	if (mbuf_total_left != 0) {
> >>> +		rte_bbdev_log(ERR,
> >>> +				"Some date still left after processing one CB:
> >> mbuf_total_left = %u",
> >>> +				mbuf_total_left);
> >>> +		return -EINVAL;
> >>> +	}
> >>> +#endif
> >>> +	/* One CB (one op) was successfully prepared to enqueue */
> >>> +	return 1;
> >>> +}
> >>> +
> >>> +/* Enqueue one encode operations for ACC100 device in CB mode */
> >>> +static inline int
> >>>  enqueue_ldpc_enc_n_op_cb(struct acc100_queue *q, struct
> >> rte_bbdev_enc_op **ops,
> >>>  		uint16_t total_enqueued_cbs, int16_t num)  { @@ -1481,78
> >> +1828,235
> >>> @@
> >>>  	return 1;
> >>>  }
> >>>
> >>> -static inline int
> >>> -harq_loopback(struct acc100_queue *q, struct rte_bbdev_dec_op *op,
> >>> -		uint16_t total_enqueued_cbs) {
> >>> -	struct acc100_fcw_ld *fcw;
> >>> -	union acc100_dma_desc *desc;
> >>> -	int next_triplet = 1;
> >>> -	struct rte_mbuf *hq_output_head, *hq_output;
> >>> -	uint16_t harq_in_length = op-
> >>> ldpc_dec.harq_combined_input.length;
> >>> -	if (harq_in_length == 0) {
> >>> -		rte_bbdev_log(ERR, "Loopback of invalid null size\n");
> >>> -		return -EINVAL;
> >>> -	}
> >>>
> >>> -	int h_comp = check_bit(op->ldpc_dec.op_flags,
> >>> -			RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION
> >>> -			) ? 1 : 0;
> >>> -	if (h_comp == 1)
> >>> -		harq_in_length = harq_in_length * 8 / 6;
> >>> -	harq_in_length = RTE_ALIGN(harq_in_length, 64);
> >>> -	uint16_t harq_dma_length_in = (h_comp == 0) ?
> >>> -			harq_in_length :
> >>> -			harq_in_length * 6 / 8;
> >>> -	uint16_t harq_dma_length_out = harq_dma_length_in;
> >>> -	bool ddr_mem_in = check_bit(op->ldpc_dec.op_flags,
> >>> -
> >> 	RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE);
> >>> -	union acc100_harq_layout_data *harq_layout = q->d->harq_layout;
> >>> -	uint16_t harq_index = (ddr_mem_in ?
> >>> -			op->ldpc_dec.harq_combined_input.offset :
> >>> -			op->ldpc_dec.harq_combined_output.offset)
> >>> -			/ ACC100_HARQ_OFFSET;
> >>> +/* Enqueue one encode operations for ACC100 device in TB mode. */
> >>> +static inline int enqueue_enc_one_op_tb(struct acc100_queue *q,
> >>> +struct rte_bbdev_enc_op *op,
> >>> +		uint16_t total_enqueued_cbs, uint8_t cbs_in_tb) {
> >>> +	union acc100_dma_desc *desc = NULL;
> >>> +	int ret;
> >>> +	uint8_t r, c;
> >>> +	uint32_t in_offset, out_offset, out_length, mbuf_total_left,
> >>> +		seg_total_left;
> >>> +	struct rte_mbuf *input, *output_head, *output;
> >>> +	uint16_t current_enqueued_cbs = 0;
> >>>
> >>>  	uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
> >>>  			& q->sw_ring_wrap_mask);
> >>>  	desc = q->ring_addr + desc_idx;
> >>> -	fcw = &desc->req.fcw_ld;
> >>> -	/* Set the FCW from loopback into DDR */
> >>> -	memset(fcw, 0, sizeof(struct acc100_fcw_ld));
> >>> -	fcw->FCWversion = ACC100_FCW_VER;
> >>> -	fcw->qm = 2;
> >>> -	fcw->Zc = 384;
> >>> -	if (harq_in_length < 16 * N_ZC_1)
> >>> -		fcw->Zc = 16;
> >>> -	fcw->ncb = fcw->Zc * N_ZC_1;
> >>> -	fcw->rm_e = 2;
> >>> -	fcw->hcin_en = 1;
> >>> -	fcw->hcout_en = 1;
> >>> +	uint64_t fcw_offset = (desc_idx << 8) + ACC100_DESC_FCW_OFFSET;
> >>> +	acc100_fcw_te_fill(op, &desc->req.fcw_te);
> >>>
> >>> -	rte_bbdev_log(DEBUG, "Loopback IN %d Index %d offset %d length
> >> %d %d\n",
> >>> -			ddr_mem_in, harq_index,
> >>> -			harq_layout[harq_index].offset, harq_in_length,
> >>> -			harq_dma_length_in);
> >>> +	input = op->turbo_enc.input.data;
> >>> +	output_head = output = op->turbo_enc.output.data;
> >>> +	in_offset = op->turbo_enc.input.offset;
> >>> +	out_offset = op->turbo_enc.output.offset;
> >>> +	out_length = 0;
> >>> +	mbuf_total_left = op->turbo_enc.input.length;
> >>>
> >>> -	if (ddr_mem_in && (harq_layout[harq_index].offset > 0)) {
> >>> -		fcw->hcin_size0 = harq_layout[harq_index].size0;
> >>> -		fcw->hcin_offset = harq_layout[harq_index].offset;
> >>> -		fcw->hcin_size1 = harq_in_length - fcw->hcin_offset;
> >>> -		harq_dma_length_in = (fcw->hcin_size0 + fcw->hcin_size1);
> >>> -		if (h_comp == 1)
> >>> -			harq_dma_length_in = harq_dma_length_in * 6 / 8;
> >>> -	} else {
> >>> -		fcw->hcin_size0 = harq_in_length;
> >>> -	}
> >>> -	harq_layout[harq_index].val = 0;
> >>> -	rte_bbdev_log(DEBUG, "Loopback FCW Config %d %d %d\n",
> >>> -			fcw->hcin_size0, fcw->hcin_offset, fcw->hcin_size1);
> >>> -	fcw->hcout_size0 = harq_in_length;
> >>> -	fcw->hcin_decomp_mode = h_comp;
> >>> -	fcw->hcout_comp_mode = h_comp;
> >>> -	fcw->gain_i = 1;
> >>> -	fcw->gain_h = 1;
> >>> +	c = op->turbo_enc.tb_params.c;
> >>> +	r = op->turbo_enc.tb_params.r;
> >>>
> >>> -	/* Set the prefix of descriptor. This could be done at polling */
> >>> +	while (mbuf_total_left > 0 && r < c) {
> >>> +		seg_total_left = rte_pktmbuf_data_len(input) - in_offset;
> >>> +		/* Set up DMA descriptor */
> >>> +		desc = q->ring_addr + ((q->sw_ring_head +
> >> total_enqueued_cbs)
> >>> +				& q->sw_ring_wrap_mask);
> >>> +		desc->req.data_ptrs[0].address = q->ring_addr_phys +
> >> fcw_offset;
> >>> +		desc->req.data_ptrs[0].blen = ACC100_FCW_TE_BLEN;
> >>> +
> >>> +		ret = acc100_dma_desc_te_fill(op, &desc->req, &input,
> >> output,
> >>> +				&in_offset, &out_offset, &out_length,
> >>> +				&mbuf_total_left, &seg_total_left, r);
> >>> +		if (unlikely(ret < 0))
> >>> +			return ret;
> >>> +		mbuf_append(output_head, output, out_length);
> >>> +
> >>> +		/* Set total number of CBs in TB */
> >>> +		desc->req.cbs_in_tb = cbs_in_tb;
> >>> +#ifdef RTE_LIBRTE_BBDEV_DEBUG
> >>> +		rte_memdump(stderr, "FCW", &desc->req.fcw_te,
> >>> +				sizeof(desc->req.fcw_te) - 8);
> >>> +		rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc));
> >> #endif
> >>> +
> >>> +		if (seg_total_left == 0) {
> >>> +			/* Go to the next mbuf */
> >>> +			input = input->next;
> >>> +			in_offset = 0;
> >>> +			output = output->next;
> >>> +			out_offset = 0;
> >>> +		}
> >>> +
> >>> +		total_enqueued_cbs++;
> >>> +		current_enqueued_cbs++;
> >>> +		r++;
> >>> +	}
> >>> +
> >>> +	if (unlikely(desc == NULL))
> >>> +		return current_enqueued_cbs;
> >>> +
> >>> +#ifdef RTE_LIBRTE_BBDEV_DEBUG
> >>> +	/* Check if any CBs left for processing */
> >>> +	if (mbuf_total_left != 0) {
> >>> +		rte_bbdev_log(ERR,
> >>> +				"Some date still left for processing:
> >> mbuf_total_left = %u",
> >>> +				mbuf_total_left);
> >>> +		return -EINVAL;
> >>> +	}
> >>> +#endif
> >>> +
> >>> +	/* Set SDone on last CB descriptor for TB mode. */
> >>> +	desc->req.sdone_enable = 1;
> >>> +	desc->req.irq_enable = q->irq_enable;
> >>> +
> >>> +	return current_enqueued_cbs;
> >>> +}
> >>> +
> >>> +/** Enqueue one decode operations for ACC100 device in CB mode */
> >>> +static inline int enqueue_dec_one_op_cb(struct acc100_queue *q,
> >>> +struct rte_bbdev_dec_op *op,
> >>> +		uint16_t total_enqueued_cbs)
> >>> +{
> >>> +	union acc100_dma_desc *desc = NULL;
> >>> +	int ret;
> >>> +	uint32_t in_offset, h_out_offset, s_out_offset, s_out_length,
> >>> +		h_out_length, mbuf_total_left, seg_total_left;
> >>> +	struct rte_mbuf *input, *h_output_head, *h_output,
> >>> +		*s_output_head, *s_output;
> >>> +
> >>> +	uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
> >>> +			& q->sw_ring_wrap_mask);
> >>> +	desc = q->ring_addr + desc_idx;
> >>> +	acc100_fcw_td_fill(op, &desc->req.fcw_td);
> >>> +
> >>> +	input = op->turbo_dec.input.data;
> >>> +	h_output_head = h_output = op->turbo_dec.hard_output.data;
> >>> +	s_output_head = s_output = op->turbo_dec.soft_output.data;
> >>> +	in_offset = op->turbo_dec.input.offset;
> >>> +	h_out_offset = op->turbo_dec.hard_output.offset;
> >>> +	s_out_offset = op->turbo_dec.soft_output.offset;
> >>> +	h_out_length = s_out_length = 0;
> >>> +	mbuf_total_left = op->turbo_dec.input.length;
> >>> +	seg_total_left = rte_pktmbuf_data_len(input) - in_offset;
> >>> +
> >>> +#ifdef RTE_LIBRTE_BBDEV_DEBUG
> >>> +	if (unlikely(input == NULL)) {
> >>> +		rte_bbdev_log(ERR, "Invalid mbuf pointer");
> >>> +		return -EFAULT;
> >>> +	}
> >>> +#endif
> >>> +
> >>> +	/* Set up DMA descriptor */
> >>> +	desc = q->ring_addr + ((q->sw_ring_head + total_enqueued_cbs)
> >>> +			& q->sw_ring_wrap_mask);
> >>> +
> >>> +	ret = acc100_dma_desc_td_fill(op, &desc->req, &input, h_output,
> >>> +			s_output, &in_offset, &h_out_offset, &s_out_offset,
> >>> +			&h_out_length, &s_out_length, &mbuf_total_left,
> >>> +			&seg_total_left, 0);
> >>> +
> >>> +	if (unlikely(ret < 0))
> >>> +		return ret;
> >>> +
> >>> +	/* Hard output */
> >>> +	mbuf_append(h_output_head, h_output, h_out_length);
> >>> +
> >>> +	/* Soft output */
> >>> +	if (check_bit(op->turbo_dec.op_flags,
> >> RTE_BBDEV_TURBO_SOFT_OUTPUT))
> >>> +		mbuf_append(s_output_head, s_output, s_out_length);
> >>> +
> >>> +#ifdef RTE_LIBRTE_BBDEV_DEBUG
> >>> +	rte_memdump(stderr, "FCW", &desc->req.fcw_td,
> >>> +			sizeof(desc->req.fcw_td) - 8);
> >>> +	rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc));
> >>> +
> >>> +	/* Check if any CBs left for processing */
> >>> +	if (mbuf_total_left != 0) {
> >>> +		rte_bbdev_log(ERR,
> >>> +				"Some date still left after processing one CB:
> >> mbuf_total_left = %u",
> >>> +				mbuf_total_left);
> >>> +		return -EINVAL;
> >>> +	}
> >>> +#endif
> >> logic similar to debug in mbuf_append, should be a common function.
> > Not exactly except if I miss your point.
> 
> I look for code blocks that look similar and want you to consider if they can
> be combined
> 
> into a general function or macro.  A general function is easier to maintain. In
> this case,
> 
> it seems like logging of something is wrong with mbuf is similar to an earlier
> block of code.
> 
> Nothing is functionally wrong.

For that very example I can add a common error trap.
Not totally convinced of the value and this prevents using %s, __func__, but ok. 

> 
> >
> >>> +
> >>> +	/* One CB (one op) was successfully prepared to enqueue */
> >>> +	return 1;
> >>> +}
> >>> +
> >>> +static inline int
> >>> +harq_loopback(struct acc100_queue *q, struct rte_bbdev_dec_op *op,
> >>> +		uint16_t total_enqueued_cbs) {
> >>> +	struct acc100_fcw_ld *fcw;
> >>> +	union acc100_dma_desc *desc;
> >>> +	int next_triplet = 1;
> >>> +	struct rte_mbuf *hq_output_head, *hq_output;
> >>> +	uint16_t harq_in_length = op-
> >>> ldpc_dec.harq_combined_input.length;
> >>> +	if (harq_in_length == 0) {
> >>> +		rte_bbdev_log(ERR, "Loopback of invalid null size\n");
> >>> +		return -EINVAL;
> >>> +	}
> >>> +
> >>> +	int h_comp = check_bit(op->ldpc_dec.op_flags,
> >>> +			RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION
> >>> +			) ? 1 : 0;
> >>> +	if (h_comp == 1)
> >>> +		harq_in_length = harq_in_length * 8 / 6;
> >>> +	harq_in_length = RTE_ALIGN(harq_in_length, 64);
> >>> +	uint16_t harq_dma_length_in = (h_comp == 0) ?
> >> Can these h_comp checks be combined to a single if/else ?
> > it may be clearer, ok.
> >
> >
> >>> +			harq_in_length :
> >>> +			harq_in_length * 6 / 8;
> >>> +	uint16_t harq_dma_length_out = harq_dma_length_in;
> >>> +	bool ddr_mem_in = check_bit(op->ldpc_dec.op_flags,
> >>> +
> >> 	RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE);
> >>> +	union acc100_harq_layout_data *harq_layout = q->d->harq_layout;
> >>> +	uint16_t harq_index = (ddr_mem_in ?
> >>> +			op->ldpc_dec.harq_combined_input.offset :
> >>> +			op->ldpc_dec.harq_combined_output.offset)
> >>> +			/ ACC100_HARQ_OFFSET;
> >>> +
> >>> +	uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
> >>> +			& q->sw_ring_wrap_mask);
> >>> +	desc = q->ring_addr + desc_idx;
> >>> +	fcw = &desc->req.fcw_ld;
> >>> +	/* Set the FCW from loopback into DDR */
> >>> +	memset(fcw, 0, sizeof(struct acc100_fcw_ld));
> >>> +	fcw->FCWversion = ACC100_FCW_VER;
> >>> +	fcw->qm = 2;
> >>> +	fcw->Zc = 384;
> >> these magic numbers should have #defines
> > These are not magic numbers, but actually 3GPP values
> ok
> >
> >>> +	if (harq_in_length < 16 * N_ZC_1)
> >>> +		fcw->Zc = 16;
> >>> +	fcw->ncb = fcw->Zc * N_ZC_1;
> >>> +	fcw->rm_e = 2;
> >>> +	fcw->hcin_en = 1;
> >>> +	fcw->hcout_en = 1;
> >>> +
> >>> +	rte_bbdev_log(DEBUG, "Loopback IN %d Index %d offset %d length
> >> %d %d\n",
> >>> +			ddr_mem_in, harq_index,
> >>> +			harq_layout[harq_index].offset, harq_in_length,
> >>> +			harq_dma_length_in);
> >>> +
> >>> +	if (ddr_mem_in && (harq_layout[harq_index].offset > 0)) {
> >>> +		fcw->hcin_size0 = harq_layout[harq_index].size0;
> >>> +		fcw->hcin_offset = harq_layout[harq_index].offset;
> >>> +		fcw->hcin_size1 = harq_in_length - fcw->hcin_offset;
> >>> +		harq_dma_length_in = (fcw->hcin_size0 + fcw->hcin_size1);
> >>> +		if (h_comp == 1)
> >>> +			harq_dma_length_in = harq_dma_length_in * 6 / 8;
> >>> +	} else {
> >>> +		fcw->hcin_size0 = harq_in_length;
> >>> +	}
> >>> +	harq_layout[harq_index].val = 0;
> >>> +	rte_bbdev_log(DEBUG, "Loopback FCW Config %d %d %d\n",
> >>> +			fcw->hcin_size0, fcw->hcin_offset, fcw->hcin_size1);
> >>> +	fcw->hcout_size0 = harq_in_length;
> >>> +	fcw->hcin_decomp_mode = h_comp;
> >>> +	fcw->hcout_comp_mode = h_comp;
> >>> +	fcw->gain_i = 1;
> >>> +	fcw->gain_h = 1;
> >>> +
> >>> +	/* Set the prefix of descriptor. This could be done at polling */
> >>>  	desc->req.word0 = ACC100_DMA_DESC_TYPE;
> >>>  	desc->req.word1 = 0; /**< Timestamp could be disabled */
> >>>  	desc->req.word2 = 0;
> >>> @@ -1816,6 +2320,107 @@
> >>>  	return current_enqueued_cbs;
> >>>  }
> >>>
> >>> +/* Enqueue one decode operations for ACC100 device in TB mode */
> >>> +static inline int enqueue_dec_one_op_tb(struct acc100_queue *q,
> >>> +struct rte_bbdev_dec_op *op,
> >>> +		uint16_t total_enqueued_cbs, uint8_t cbs_in_tb) {
> >>> +	union acc100_dma_desc *desc = NULL;
> >>> +	int ret;
> >>> +	uint8_t r, c;
> >>> +	uint32_t in_offset, h_out_offset, s_out_offset, s_out_length,
> >>> +		h_out_length, mbuf_total_left, seg_total_left;
> >>> +	struct rte_mbuf *input, *h_output_head, *h_output,
> >>> +		*s_output_head, *s_output;
> >>> +	uint16_t current_enqueued_cbs = 0;
> >>> +
> >>> +	uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
> >>> +			& q->sw_ring_wrap_mask);
> >>> +	desc = q->ring_addr + desc_idx;
> >>> +	uint64_t fcw_offset = (desc_idx << 8) + ACC100_DESC_FCW_OFFSET;
> >>> +	acc100_fcw_td_fill(op, &desc->req.fcw_td);
> >>> +
> >>> +	input = op->turbo_dec.input.data;
> >>> +	h_output_head = h_output = op->turbo_dec.hard_output.data;
> >>> +	s_output_head = s_output = op->turbo_dec.soft_output.data;
> >>> +	in_offset = op->turbo_dec.input.offset;
> >>> +	h_out_offset = op->turbo_dec.hard_output.offset;
> >>> +	s_out_offset = op->turbo_dec.soft_output.offset;
> >>> +	h_out_length = s_out_length = 0;
> >>> +	mbuf_total_left = op->turbo_dec.input.length;
> >>> +	c = op->turbo_dec.tb_params.c;
> >>> +	r = op->turbo_dec.tb_params.r;
> >>> +
> >>> +	while (mbuf_total_left > 0 && r < c) {
> >>> +
> >>> +		seg_total_left = rte_pktmbuf_data_len(input) - in_offset;
> >>> +
> >>> +		/* Set up DMA descriptor */
> >>> +		desc = q->ring_addr + ((q->sw_ring_head +
> >> total_enqueued_cbs)
> >>> +				& q->sw_ring_wrap_mask);
> >>> +		desc->req.data_ptrs[0].address = q->ring_addr_phys +
> >> fcw_offset;
> >>> +		desc->req.data_ptrs[0].blen = ACC100_FCW_TD_BLEN;
> >>> +		ret = acc100_dma_desc_td_fill(op, &desc->req, &input,
> >>> +				h_output, s_output, &in_offset,
> >> &h_out_offset,
> >>> +				&s_out_offset, &h_out_length,
> >> &s_out_length,
> >>> +				&mbuf_total_left, &seg_total_left, r);
> >>> +
> >>> +		if (unlikely(ret < 0))
> >>> +			return ret;
> >>> +
> >>> +		/* Hard output */
> >>> +		mbuf_append(h_output_head, h_output, h_out_length);
> >>> +
> >>> +		/* Soft output */
> >>> +		if (check_bit(op->turbo_dec.op_flags,
> >>> +				RTE_BBDEV_TURBO_SOFT_OUTPUT))
> >>> +			mbuf_append(s_output_head, s_output,
> >> s_out_length);
> >>> +
> >>> +		/* Set total number of CBs in TB */
> >>> +		desc->req.cbs_in_tb = cbs_in_tb;
> >>> +#ifdef RTE_LIBRTE_BBDEV_DEBUG
> >>> +		rte_memdump(stderr, "FCW", &desc->req.fcw_td,
> >>> +				sizeof(desc->req.fcw_td) - 8);
> >>> +		rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc));
> >> #endif
> >>> +
> >>> +		if (seg_total_left == 0) {
> >>> +			/* Go to the next mbuf */
> >>> +			input = input->next;
> >>> +			in_offset = 0;
> >>> +			h_output = h_output->next;
> >>> +			h_out_offset = 0;
> >>> +
> >>> +			if (check_bit(op->turbo_dec.op_flags,
> >>> +					RTE_BBDEV_TURBO_SOFT_OUTPUT))
> >> {
> >>> +				s_output = s_output->next;
> >>> +				s_out_offset = 0;
> >>> +			}
> >>> +		}
> >>> +
> >>> +		total_enqueued_cbs++;
> >>> +		current_enqueued_cbs++;
> >>> +		r++;
> >>> +	}
> >>> +
> >>> +	if (unlikely(desc == NULL))
> >>> +		return current_enqueued_cbs;
> >>> +
> >>> +#ifdef RTE_LIBRTE_BBDEV_DEBUG
> >>> +	/* Check if any CBs left for processing */
> >>> +	if (mbuf_total_left != 0) {
> >>> +		rte_bbdev_log(ERR,
> >>> +				"Some date still left for processing:
> >> mbuf_total_left = %u",
> >>> +				mbuf_total_left);
> >>> +		return -EINVAL;
> >>> +	}
> >>> +#endif
> >>> +	/* Set SDone on last CB descriptor for TB mode */
> >>> +	desc->req.sdone_enable = 1;
> >>> +	desc->req.irq_enable = q->irq_enable;
> >>> +
> >>> +	return current_enqueued_cbs;
> >>> +}
> >>>
> >>>  /* Calculates number of CBs in processed encoder TB based on 'r'
> >>> and
> >> input
> >>>   * length.
> >>> @@ -1893,6 +2498,45 @@
> >>>  	return cbs_in_tb;
> >>>  }
> >>>
> >>> +/* Enqueue encode operations for ACC100 device in CB mode. */
> >>> +static uint16_t acc100_enqueue_enc_cb(struct rte_bbdev_queue_data
> >> *q_data,
> >>> +		struct rte_bbdev_enc_op **ops, uint16_t num) {
> >>> +	struct acc100_queue *q = q_data->queue_private;
> >>> +	int32_t avail = q->sw_ring_depth + q->sw_ring_tail - q-
> >>> sw_ring_head;
> >>> +	uint16_t i;
> >>> +	union acc100_dma_desc *desc;
> >>> +	int ret;
> >>> +
> >>> +	for (i = 0; i < num; ++i) {
> >>> +		/* Check if there are available space for further processing */
> >>> +		if (unlikely(avail - 1 < 0))
> >>> +			break;
> >>> +		avail -= 1;
> >>> +
> >>> +		ret = enqueue_enc_one_op_cb(q, ops[i], i);
> >>> +		if (ret < 0)
> >>> +			break;
> >>> +	}
> >>> +
> >>> +	if (unlikely(i == 0))
> >>> +		return 0; /* Nothing to enqueue */
> >>> +
> >>> +	/* Set SDone in last CB in enqueued ops for CB mode*/
> >>> +	desc = q->ring_addr + ((q->sw_ring_head + i - 1)
> >>> +			& q->sw_ring_wrap_mask);
> >>> +	desc->req.sdone_enable = 1;
> >>> +	desc->req.irq_enable = q->irq_enable;
> >>> +
> >>> +	acc100_dma_enqueue(q, i, &q_data->queue_stats);
> >>> +
> >>> +	/* Update stats */
> >>> +	q_data->queue_stats.enqueued_count += i;
> >>> +	q_data->queue_stats.enqueue_err_count += num - i;
> >>> +	return i;
> >>> +}
> >>> +
> >>>  /* Check we can mux encode operations with common FCW */  static
> >>> inline bool  check_mux(struct rte_bbdev_enc_op **ops, uint16_t num)
> >>> { @@ -1960,6 +2604,52 @@
> >>>  	return i;
> >>>  }
> >>>
> >>> +/* Enqueue encode operations for ACC100 device in TB mode. */
> >>> +static uint16_t acc100_enqueue_enc_tb(struct rte_bbdev_queue_data
> >> *q_data,
> >>> +		struct rte_bbdev_enc_op **ops, uint16_t num) {
> >>> +	struct acc100_queue *q = q_data->queue_private;
> >>> +	int32_t avail = q->sw_ring_depth + q->sw_ring_tail - q-
> >>> sw_ring_head;
> >>> +	uint16_t i, enqueued_cbs = 0;
> >>> +	uint8_t cbs_in_tb;
> >>> +	int ret;
> >>> +
> >>> +	for (i = 0; i < num; ++i) {
> >>> +		cbs_in_tb = get_num_cbs_in_tb_enc(&ops[i]->turbo_enc);
> >>> +		/* Check if there are available space for further processing */
> >>> +		if (unlikely(avail - cbs_in_tb < 0))
> >>> +			break;
> >>> +		avail -= cbs_in_tb;
> >>> +
> >>> +		ret = enqueue_enc_one_op_tb(q, ops[i], enqueued_cbs,
> >> cbs_in_tb);
> >>> +		if (ret < 0)
> >>> +			break;
> >>> +		enqueued_cbs += ret;
> >>> +	}
> >>> +
> >> other similar functions have a (i == 0) check here.
> > ok
> >
> >>> +	acc100_dma_enqueue(q, enqueued_cbs, &q_data->queue_stats);
> >>> +
> >>> +	/* Update stats */
> >>> +	q_data->queue_stats.enqueued_count += i;
> >>> +	q_data->queue_stats.enqueue_err_count += num - i;
> >>> +
> >>> +	return i;
> >>> +}
> >>> +
> >>> +/* Enqueue encode operations for ACC100 device. */ static uint16_t
> >>> +acc100_enqueue_enc(struct rte_bbdev_queue_data *q_data,
> >>> +		struct rte_bbdev_enc_op **ops, uint16_t num) {
> >>> +	if (unlikely(num == 0))
> >>> +		return 0;
> >> num == 0 check should move into the tb/cb functions
> > same comment on other patch, why not catch it early?
> >
> >>> +	if (ops[0]->turbo_enc.code_block_mode == 0)
> >>> +		return acc100_enqueue_enc_tb(q_data, ops, num);
> >>> +	else
> >>> +		return acc100_enqueue_enc_cb(q_data, ops, num); }
> >>> +
> >>>  /* Enqueue encode operations for ACC100 device. */  static uint16_t
> >>> acc100_enqueue_ldpc_enc(struct rte_bbdev_queue_data *q_data, @@
> >>> -1967,7 +2657,51 @@  {
> >>>  	if (unlikely(num == 0))
> >>>  		return 0;
> >>> -	return acc100_enqueue_ldpc_enc_cb(q_data, ops, num);
> >>> +	if (ops[0]->ldpc_enc.code_block_mode == 0)
> >>> +		return acc100_enqueue_enc_tb(q_data, ops, num);
> >>> +	else
> >>> +		return acc100_enqueue_ldpc_enc_cb(q_data, ops, num); }
> >>> +
> >>> +
> >>> +/* Enqueue decode operations for ACC100 device in CB mode */ static
> >>> +uint16_t acc100_enqueue_dec_cb(struct rte_bbdev_queue_data
> >> *q_data,
> >>> +		struct rte_bbdev_dec_op **ops, uint16_t num) {
> >> Seems like the 10th variant of a similar function could these be
> >> combined to fewer functions ?
> >>
> >> Maybe by passing in a function pointer to the enqueue_one_dec_one*
> >> that does the work ?
> > They have some variants related to the actual operation and constraints.
> > Not obvious to have a valuable refactor.
> >
> As above nothing functionally wrong, just something to consider
> 
> ok.

I agree in principle and this was done to a number of places (ie. acc100_dma_fill_blk_type_out(), etc...). 
I think we can look in parallel into that idea you suggested earlier of "common code" and future refactory around that, 
notably to support future generation of devices in that family with feature overlap and hence avoid/limit code duplication moving forward across different PMDs. 
More something for 2021. Thanks

> 
> Tom
> 
> >>> +	struct acc100_queue *q = q_data->queue_private;
> >>> +	int32_t avail = q->sw_ring_depth + q->sw_ring_tail - q-
> >>> sw_ring_head;
> >>> +	uint16_t i;
> >>> +	union acc100_dma_desc *desc;
> >>> +	int ret;
> >>> +
> >>> +	for (i = 0; i < num; ++i) {
> >>> +		/* Check if there are available space for further processing */
> >>> +		if (unlikely(avail - 1 < 0))
> >>> +			break;
> >>> +		avail -= 1;
> >>> +
> >>> +		ret = enqueue_dec_one_op_cb(q, ops[i], i);
> >>> +		if (ret < 0)
> >>> +			break;
> >>> +	}
> >>> +
> >>> +	if (unlikely(i == 0))
> >>> +		return 0; /* Nothing to enqueue */
> >>> +
> >>> +	/* Set SDone in last CB in enqueued ops for CB mode*/
> >>> +	desc = q->ring_addr + ((q->sw_ring_head + i - 1)
> >>> +			& q->sw_ring_wrap_mask);
> >>> +	desc->req.sdone_enable = 1;
> >>> +	desc->req.irq_enable = q->irq_enable;
> >>> +
> >>> +	acc100_dma_enqueue(q, i, &q_data->queue_stats);
> >>> +
> >>> +	/* Update stats */
> >>> +	q_data->queue_stats.enqueued_count += i;
> >>> +	q_data->queue_stats.enqueue_err_count += num - i;
> >>> +
> >>> +	return i;
> >>>  }
> >>>
> >>>  /* Check we can mux encode operations with common FCW */ @@ -
> >> 2065,6
> >>> +2799,53 @@
> >>>  	return i;
> >>>  }
> >>>
> >>> +
> >>> +/* Enqueue decode operations for ACC100 device in TB mode */ static
> >>> +uint16_t acc100_enqueue_dec_tb(struct rte_bbdev_queue_data
> >> *q_data,
> >>> +		struct rte_bbdev_dec_op **ops, uint16_t num) {
> >> 11th ;)
> >>> +	struct acc100_queue *q = q_data->queue_private;
> >>> +	int32_t avail = q->sw_ring_depth + q->sw_ring_tail - q-
> >>> sw_ring_head;
> >>> +	uint16_t i, enqueued_cbs = 0;
> >>> +	uint8_t cbs_in_tb;
> >>> +	int ret;
> >>> +
> >>> +	for (i = 0; i < num; ++i) {
> >>> +		cbs_in_tb = get_num_cbs_in_tb_dec(&ops[i]->turbo_dec);
> >>> +		/* Check if there are available space for further processing */
> >>> +		if (unlikely(avail - cbs_in_tb < 0))
> >>> +			break;
> >>> +		avail -= cbs_in_tb;
> >>> +
> >>> +		ret = enqueue_dec_one_op_tb(q, ops[i], enqueued_cbs,
> >> cbs_in_tb);
> >>> +		if (ret < 0)
> >>> +			break;
> >>> +		enqueued_cbs += ret;
> >>> +	}
> >>> +
> >>> +	acc100_dma_enqueue(q, enqueued_cbs, &q_data->queue_stats);
> >>> +
> >>> +	/* Update stats */
> >>> +	q_data->queue_stats.enqueued_count += i;
> >>> +	q_data->queue_stats.enqueue_err_count += num - i;
> >>> +
> >>> +	return i;
> >>> +}
> >>> +
> >>> +/* Enqueue decode operations for ACC100 device. */ static uint16_t
> >>> +acc100_enqueue_dec(struct rte_bbdev_queue_data *q_data,
> >>> +		struct rte_bbdev_dec_op **ops, uint16_t num) {
> >>> +	if (unlikely(num == 0))
> >>> +		return 0;
> >> similar move the num == 0 check to the tb/cb functions.
> > same comment
> >
> >>> +	if (ops[0]->turbo_dec.code_block_mode == 0)
> >>> +		return acc100_enqueue_dec_tb(q_data, ops, num);
> >>> +	else
> >>> +		return acc100_enqueue_dec_cb(q_data, ops, num); }
> >>> +
> >>>  /* Enqueue decode operations for ACC100 device. */  static uint16_t
> >>> acc100_enqueue_ldpc_dec(struct rte_bbdev_queue_data *q_data, @@
> >>> -2388,6 +3169,51 @@
> >>>  	return cb_idx;
> >>>  }
> >>>
> >>> +/* Dequeue encode operations from ACC100 device. */ static uint16_t
> >>> +acc100_dequeue_enc(struct rte_bbdev_queue_data *q_data,
> >>> +		struct rte_bbdev_enc_op **ops, uint16_t num) {
> >>> +	struct acc100_queue *q = q_data->queue_private;
> >>> +	uint16_t dequeue_num;
> >>> +	uint32_t avail = q->sw_ring_head - q->sw_ring_tail;
> >>> +	uint32_t aq_dequeued = 0;
> >>> +	uint16_t i;
> >>> +	uint16_t dequeued_cbs = 0;
> >>> +	struct rte_bbdev_enc_op *op;
> >>> +	int ret;
> >>> +
> >>> +#ifdef RTE_LIBRTE_BBDEV_DEBUG
> >>> +	if (unlikely(ops == 0 && q == NULL))
> >> ops is a pointer so should compare with NULL
> >>
> >> The && likely needs to be ||
> >>
> >> Maybe print out a message so caller knows something wrong happened.
> > ok
> >
> >>> +		return 0;
> >>> +#endif
> >>> +
> >>> +	dequeue_num = (avail < num) ? avail : num;
> >>> +
> >>> +	for (i = 0; i < dequeue_num; ++i) {
> >>> +		op = (q->ring_addr + ((q->sw_ring_tail + dequeued_cbs)
> >>> +			& q->sw_ring_wrap_mask))->req.op_addr;
> >>> +		if (op->turbo_enc.code_block_mode == 0)
> >>> +			ret = dequeue_enc_one_op_tb(q, &ops[i],
> >> dequeued_cbs,
> >>> +					&aq_dequeued);
> >>> +		else
> >>> +			ret = dequeue_enc_one_op_cb(q, &ops[i],
> >> dequeued_cbs,
> >>> +					&aq_dequeued);
> >>> +
> >>> +		if (ret < 0)
> >>> +			break;
> >>> +		dequeued_cbs += ret;
> >>> +	}
> >>> +
> >>> +	q->aq_dequeued += aq_dequeued;
> >>> +	q->sw_ring_tail += dequeued_cbs;
> >>> +
> >>> +	/* Update enqueue stats */
> >>> +	q_data->queue_stats.dequeued_count += i;
> >>> +
> >>> +	return i;
> >>> +}
> >>> +
> >>>  /* Dequeue LDPC encode operations from ACC100 device. */  static
> >>> uint16_t  acc100_dequeue_ldpc_enc(struct rte_bbdev_queue_data
> >> *q_data,
> >>> @@ -2426,6 +3252,52 @@
> >>>  	return dequeued_cbs;
> >>>  }
> >>>
> >>> +
> >>> +/* Dequeue decode operations from ACC100 device. */ static uint16_t
> >>> +acc100_dequeue_dec(struct rte_bbdev_queue_data *q_data,
> >>> +		struct rte_bbdev_dec_op **ops, uint16_t num) {
> >> very similar to enc function above, consider how to combine them to a
> >> single function.
> >>
> >> Tom
> >>
> >>> +	struct acc100_queue *q = q_data->queue_private;
> >>> +	uint16_t dequeue_num;
> >>> +	uint32_t avail = q->sw_ring_head - q->sw_ring_tail;
> >>> +	uint32_t aq_dequeued = 0;
> >>> +	uint16_t i;
> >>> +	uint16_t dequeued_cbs = 0;
> >>> +	struct rte_bbdev_dec_op *op;
> >>> +	int ret;
> >>> +
> >>> +#ifdef RTE_LIBRTE_BBDEV_DEBUG
> >>> +	if (unlikely(ops == 0 && q == NULL))
> >>> +		return 0;
> >>> +#endif
> >>> +
> >>> +	dequeue_num = (avail < num) ? avail : num;
> >>> +
> >>> +	for (i = 0; i < dequeue_num; ++i) {
> >>> +		op = (q->ring_addr + ((q->sw_ring_tail + dequeued_cbs)
> >>> +			& q->sw_ring_wrap_mask))->req.op_addr;
> >>> +		if (op->turbo_dec.code_block_mode == 0)
> >>> +			ret = dequeue_dec_one_op_tb(q, &ops[i],
> >> dequeued_cbs,
> >>> +					&aq_dequeued);
> >>> +		else
> >>> +			ret = dequeue_dec_one_op_cb(q_data, q, &ops[i],
> >>> +					dequeued_cbs, &aq_dequeued);
> >>> +
> >>> +		if (ret < 0)
> >>> +			break;
> >>> +		dequeued_cbs += ret;
> >>> +	}
> >>> +
> >>> +	q->aq_dequeued += aq_dequeued;
> >>> +	q->sw_ring_tail += dequeued_cbs;
> >>> +
> >>> +	/* Update enqueue stats */
> >>> +	q_data->queue_stats.dequeued_count += i;
> >>> +
> >>> +	return i;
> >>> +}
> >>> +
> >>>  /* Dequeue decode operations from ACC100 device. */  static
> >>> uint16_t acc100_dequeue_ldpc_dec(struct rte_bbdev_queue_data
> >>> *q_data, @@
> >>> -2479,6 +3351,10 @@
> >>>  	struct rte_pci_device *pci_dev = RTE_DEV_TO_PCI(dev->device);
> >>>
> >>>  	dev->dev_ops = &acc100_bbdev_ops;
> >>> +	dev->enqueue_enc_ops = acc100_enqueue_enc;
> >>> +	dev->enqueue_dec_ops = acc100_enqueue_dec;
> >>> +	dev->dequeue_enc_ops = acc100_dequeue_enc;
> >>> +	dev->dequeue_dec_ops = acc100_dequeue_dec;
> >>>  	dev->enqueue_ldpc_enc_ops = acc100_enqueue_ldpc_enc;
> >>>  	dev->enqueue_ldpc_dec_ops = acc100_enqueue_ldpc_dec;
> >>>  	dev->dequeue_ldpc_enc_ops = acc100_dequeue_ldpc_enc;
  

Patch

diff --git a/doc/guides/bbdevs/features/acc100.ini b/doc/guides/bbdevs/features/acc100.ini
index 40c7adc..642cd48 100644
--- a/doc/guides/bbdevs/features/acc100.ini
+++ b/doc/guides/bbdevs/features/acc100.ini
@@ -4,8 +4,8 @@ 
 ; Refer to default.ini for the full list of available PMD features.
 ;
 [Features]
-Turbo Decoder (4G)     = N
-Turbo Encoder (4G)     = N
+Turbo Decoder (4G)     = Y
+Turbo Encoder (4G)     = Y
 LDPC Decoder (5G)      = Y
 LDPC Encoder (5G)      = Y
 LLR/HARQ Compression   = Y
diff --git a/drivers/baseband/acc100/rte_acc100_pmd.c b/drivers/baseband/acc100/rte_acc100_pmd.c
index e484c0a..7d4c3df 100644
--- a/drivers/baseband/acc100/rte_acc100_pmd.c
+++ b/drivers/baseband/acc100/rte_acc100_pmd.c
@@ -339,7 +339,6 @@ 
 	free_base_addresses(base_addrs, i);
 }
 
-
 /* Allocate 64MB memory used for all software rings */
 static int
 acc100_setup_queues(struct rte_bbdev *dev, uint16_t num_queues, int socket_id)
@@ -637,6 +636,41 @@ 
 
 	static const struct rte_bbdev_op_cap bbdev_capabilities[] = {
 		{
+			.type = RTE_BBDEV_OP_TURBO_DEC,
+			.cap.turbo_dec = {
+				.capability_flags =
+					RTE_BBDEV_TURBO_SUBBLOCK_DEINTERLEAVE |
+					RTE_BBDEV_TURBO_CRC_TYPE_24B |
+					RTE_BBDEV_TURBO_HALF_ITERATION_EVEN |
+					RTE_BBDEV_TURBO_EARLY_TERMINATION |
+					RTE_BBDEV_TURBO_NEG_LLR_1_BIT_IN |
+					RTE_BBDEV_TURBO_MAP_DEC |
+					RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP |
+					RTE_BBDEV_TURBO_DEC_SCATTER_GATHER,
+				.max_llr_modulus = INT8_MAX,
+				.num_buffers_src =
+						RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
+				.num_buffers_hard_out =
+						RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
+				.num_buffers_soft_out =
+						RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
+			}
+		},
+		{
+			.type = RTE_BBDEV_OP_TURBO_ENC,
+			.cap.turbo_enc = {
+				.capability_flags =
+					RTE_BBDEV_TURBO_CRC_24B_ATTACH |
+					RTE_BBDEV_TURBO_RV_INDEX_BYPASS |
+					RTE_BBDEV_TURBO_RATE_MATCH |
+					RTE_BBDEV_TURBO_ENC_SCATTER_GATHER,
+				.num_buffers_src =
+						RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
+				.num_buffers_dst =
+						RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
+			}
+		},
+		{
 			.type   = RTE_BBDEV_OP_LDPC_ENC,
 			.cap.ldpc_enc = {
 				.capability_flags =
@@ -719,7 +753,6 @@ 
 #endif
 }
 
-
 static const struct rte_bbdev_ops acc100_bbdev_ops = {
 	.setup_queues = acc100_setup_queues,
 	.close = acc100_dev_close,
@@ -763,6 +796,58 @@ 
 	return tail;
 }
 
+/* Fill in a frame control word for turbo encoding. */
+static inline void
+acc100_fcw_te_fill(const struct rte_bbdev_enc_op *op, struct acc100_fcw_te *fcw)
+{
+	fcw->code_block_mode = op->turbo_enc.code_block_mode;
+	if (fcw->code_block_mode == 0) { /* For TB mode */
+		fcw->k_neg = op->turbo_enc.tb_params.k_neg;
+		fcw->k_pos = op->turbo_enc.tb_params.k_pos;
+		fcw->c_neg = op->turbo_enc.tb_params.c_neg;
+		fcw->c = op->turbo_enc.tb_params.c;
+		fcw->ncb_neg = op->turbo_enc.tb_params.ncb_neg;
+		fcw->ncb_pos = op->turbo_enc.tb_params.ncb_pos;
+
+		if (check_bit(op->turbo_enc.op_flags,
+				RTE_BBDEV_TURBO_RATE_MATCH)) {
+			fcw->bypass_rm = 0;
+			fcw->cab = op->turbo_enc.tb_params.cab;
+			fcw->ea = op->turbo_enc.tb_params.ea;
+			fcw->eb = op->turbo_enc.tb_params.eb;
+		} else {
+			/* E is set to the encoding output size when RM is
+			 * bypassed.
+			 */
+			fcw->bypass_rm = 1;
+			fcw->cab = fcw->c_neg;
+			fcw->ea = 3 * fcw->k_neg + 12;
+			fcw->eb = 3 * fcw->k_pos + 12;
+		}
+	} else { /* For CB mode */
+		fcw->k_pos = op->turbo_enc.cb_params.k;
+		fcw->ncb_pos = op->turbo_enc.cb_params.ncb;
+
+		if (check_bit(op->turbo_enc.op_flags,
+				RTE_BBDEV_TURBO_RATE_MATCH)) {
+			fcw->bypass_rm = 0;
+			fcw->eb = op->turbo_enc.cb_params.e;
+		} else {
+			/* E is set to the encoding output size when RM is
+			 * bypassed.
+			 */
+			fcw->bypass_rm = 1;
+			fcw->eb = 3 * fcw->k_pos + 12;
+		}
+	}
+
+	fcw->bypass_rv_idx1 = check_bit(op->turbo_enc.op_flags,
+			RTE_BBDEV_TURBO_RV_INDEX_BYPASS);
+	fcw->code_block_crc = check_bit(op->turbo_enc.op_flags,
+			RTE_BBDEV_TURBO_CRC_24B_ATTACH);
+	fcw->rv_idx1 = op->turbo_enc.rv_index;
+}
+
 /* Compute value of k0.
  * Based on 3GPP 38.212 Table 5.4.2.1-2
  * Starting position of different redundancy versions, k0
@@ -813,6 +898,25 @@ 
 	fcw->mcb_count = num_cb;
 }
 
+/* Fill in a frame control word for turbo decoding. */
+static inline void
+acc100_fcw_td_fill(const struct rte_bbdev_dec_op *op, struct acc100_fcw_td *fcw)
+{
+	/* Note : Early termination is always enabled for 4GUL */
+	fcw->fcw_ver = 1;
+	if (op->turbo_dec.code_block_mode == 0)
+		fcw->k_pos = op->turbo_dec.tb_params.k_pos;
+	else
+		fcw->k_pos = op->turbo_dec.cb_params.k;
+	fcw->turbo_crc_type = check_bit(op->turbo_dec.op_flags,
+			RTE_BBDEV_TURBO_CRC_TYPE_24B);
+	fcw->bypass_sb_deint = 0;
+	fcw->raw_decoder_input_on = 0;
+	fcw->max_iter = op->turbo_dec.iter_max;
+	fcw->half_iter_on = !check_bit(op->turbo_dec.op_flags,
+			RTE_BBDEV_TURBO_HALF_ITERATION_EVEN);
+}
+
 /* Fill in a frame control word for LDPC decoding. */
 static inline void
 acc100_fcw_ld_fill(const struct rte_bbdev_dec_op *op, struct acc100_fcw_ld *fcw,
@@ -1042,6 +1146,87 @@ 
 }
 
 static inline int
+acc100_dma_desc_te_fill(struct rte_bbdev_enc_op *op,
+		struct acc100_dma_req_desc *desc, struct rte_mbuf **input,
+		struct rte_mbuf *output, uint32_t *in_offset,
+		uint32_t *out_offset, uint32_t *out_length,
+		uint32_t *mbuf_total_left, uint32_t *seg_total_left, uint8_t r)
+{
+	int next_triplet = 1; /* FCW already done */
+	uint32_t e, ea, eb, length;
+	uint16_t k, k_neg, k_pos;
+	uint8_t cab, c_neg;
+
+	desc->word0 = ACC100_DMA_DESC_TYPE;
+	desc->word1 = 0; /**< Timestamp could be disabled */
+	desc->word2 = 0;
+	desc->word3 = 0;
+	desc->numCBs = 1;
+
+	if (op->turbo_enc.code_block_mode == 0) {
+		ea = op->turbo_enc.tb_params.ea;
+		eb = op->turbo_enc.tb_params.eb;
+		cab = op->turbo_enc.tb_params.cab;
+		k_neg = op->turbo_enc.tb_params.k_neg;
+		k_pos = op->turbo_enc.tb_params.k_pos;
+		c_neg = op->turbo_enc.tb_params.c_neg;
+		e = (r < cab) ? ea : eb;
+		k = (r < c_neg) ? k_neg : k_pos;
+	} else {
+		e = op->turbo_enc.cb_params.e;
+		k = op->turbo_enc.cb_params.k;
+	}
+
+	if (check_bit(op->turbo_enc.op_flags, RTE_BBDEV_TURBO_CRC_24B_ATTACH))
+		length = (k - 24) >> 3;
+	else
+		length = k >> 3;
+
+	if (unlikely((*mbuf_total_left == 0) || (*mbuf_total_left < length))) {
+		rte_bbdev_log(ERR,
+				"Mismatch between mbuf length and included CB sizes: mbuf len %u, cb len %u",
+				*mbuf_total_left, length);
+		return -1;
+	}
+
+	next_triplet = acc100_dma_fill_blk_type_in(desc, input, in_offset,
+			length, seg_total_left, next_triplet);
+	if (unlikely(next_triplet < 0)) {
+		rte_bbdev_log(ERR,
+				"Mismatch between data to process and mbuf data length in bbdev_op: %p",
+				op);
+		return -1;
+	}
+	desc->data_ptrs[next_triplet - 1].last = 1;
+	desc->m2dlen = next_triplet;
+	*mbuf_total_left -= length;
+
+	/* Set output length */
+	if (check_bit(op->turbo_enc.op_flags, RTE_BBDEV_TURBO_RATE_MATCH))
+		/* Integer round up division by 8 */
+		*out_length = (e + 7) >> 3;
+	else
+		*out_length = (k >> 3) * 3 + 2;
+
+	next_triplet = acc100_dma_fill_blk_type_out(desc, output, *out_offset,
+			*out_length, next_triplet, ACC100_DMA_BLKID_OUT_ENC);
+	if (unlikely(next_triplet < 0)) {
+		rte_bbdev_log(ERR,
+				"Mismatch between data to process and mbuf data length in bbdev_op: %p",
+				op);
+		return -1;
+	}
+	op->turbo_enc.output.length += *out_length;
+	*out_offset += *out_length;
+	desc->data_ptrs[next_triplet - 1].last = 1;
+	desc->d2mlen = next_triplet - desc->m2dlen;
+
+	desc->op_addr = op;
+
+	return 0;
+}
+
+static inline int
 acc100_dma_desc_le_fill(struct rte_bbdev_enc_op *op,
 		struct acc100_dma_req_desc *desc, struct rte_mbuf **input,
 		struct rte_mbuf *output, uint32_t *in_offset,
@@ -1110,6 +1295,117 @@ 
 }
 
 static inline int
+acc100_dma_desc_td_fill(struct rte_bbdev_dec_op *op,
+		struct acc100_dma_req_desc *desc, struct rte_mbuf **input,
+		struct rte_mbuf *h_output, struct rte_mbuf *s_output,
+		uint32_t *in_offset, uint32_t *h_out_offset,
+		uint32_t *s_out_offset, uint32_t *h_out_length,
+		uint32_t *s_out_length, uint32_t *mbuf_total_left,
+		uint32_t *seg_total_left, uint8_t r)
+{
+	int next_triplet = 1; /* FCW already done */
+	uint16_t k;
+	uint16_t crc24_overlap = 0;
+	uint32_t e, kw;
+
+	desc->word0 = ACC100_DMA_DESC_TYPE;
+	desc->word1 = 0; /**< Timestamp could be disabled */
+	desc->word2 = 0;
+	desc->word3 = 0;
+	desc->numCBs = 1;
+
+	if (op->turbo_dec.code_block_mode == 0) {
+		k = (r < op->turbo_dec.tb_params.c_neg)
+			? op->turbo_dec.tb_params.k_neg
+			: op->turbo_dec.tb_params.k_pos;
+		e = (r < op->turbo_dec.tb_params.cab)
+			? op->turbo_dec.tb_params.ea
+			: op->turbo_dec.tb_params.eb;
+	} else {
+		k = op->turbo_dec.cb_params.k;
+		e = op->turbo_dec.cb_params.e;
+	}
+
+	if ((op->turbo_dec.code_block_mode == 0)
+		&& !check_bit(op->turbo_dec.op_flags,
+		RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP))
+		crc24_overlap = 24;
+
+	/* Calculates circular buffer size.
+	 * According to 3gpp 36.212 section 5.1.4.2
+	 *   Kw = 3 * Kpi,
+	 * where:
+	 *   Kpi = nCol * nRow
+	 * where nCol is 32 and nRow can be calculated from:
+	 *   D =< nCol * nRow
+	 * where D is the size of each output from turbo encoder block (k + 4).
+	 */
+	kw = RTE_ALIGN_CEIL(k + 4, 32) * 3;
+
+	if (unlikely((*mbuf_total_left == 0) || (*mbuf_total_left < kw))) {
+		rte_bbdev_log(ERR,
+				"Mismatch between mbuf length and included CB sizes: mbuf len %u, cb len %u",
+				*mbuf_total_left, kw);
+		return -1;
+	}
+
+	next_triplet = acc100_dma_fill_blk_type_in(desc, input, in_offset, kw,
+			seg_total_left, next_triplet);
+	if (unlikely(next_triplet < 0)) {
+		rte_bbdev_log(ERR,
+				"Mismatch between data to process and mbuf data length in bbdev_op: %p",
+				op);
+		return -1;
+	}
+	desc->data_ptrs[next_triplet - 1].last = 1;
+	desc->m2dlen = next_triplet;
+	*mbuf_total_left -= kw;
+
+	next_triplet = acc100_dma_fill_blk_type_out(
+			desc, h_output, *h_out_offset,
+			k >> 3, next_triplet, ACC100_DMA_BLKID_OUT_HARD);
+	if (unlikely(next_triplet < 0)) {
+		rte_bbdev_log(ERR,
+				"Mismatch between data to process and mbuf data length in bbdev_op: %p",
+				op);
+		return -1;
+	}
+
+	*h_out_length = ((k - crc24_overlap) >> 3);
+	op->turbo_dec.hard_output.length += *h_out_length;
+	*h_out_offset += *h_out_length;
+
+	/* Soft output */
+	if (check_bit(op->turbo_dec.op_flags, RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
+		if (check_bit(op->turbo_dec.op_flags,
+				RTE_BBDEV_TURBO_EQUALIZER))
+			*s_out_length = e;
+		else
+			*s_out_length = (k * 3) + 12;
+
+		next_triplet = acc100_dma_fill_blk_type_out(desc, s_output,
+				*s_out_offset, *s_out_length, next_triplet,
+				ACC100_DMA_BLKID_OUT_SOFT);
+		if (unlikely(next_triplet < 0)) {
+			rte_bbdev_log(ERR,
+					"Mismatch between data to process and mbuf data length in bbdev_op: %p",
+					op);
+			return -1;
+		}
+
+		op->turbo_dec.soft_output.length += *s_out_length;
+		*s_out_offset += *s_out_length;
+	}
+
+	desc->data_ptrs[next_triplet - 1].last = 1;
+	desc->d2mlen = next_triplet - desc->m2dlen;
+
+	desc->op_addr = op;
+
+	return 0;
+}
+
+static inline int
 acc100_dma_desc_ld_fill(struct rte_bbdev_dec_op *op,
 		struct acc100_dma_req_desc *desc,
 		struct rte_mbuf **input, struct rte_mbuf *h_output,
@@ -1374,6 +1670,57 @@ 
 
 /* Enqueue one encode operations for ACC100 device in CB mode */
 static inline int
+enqueue_enc_one_op_cb(struct acc100_queue *q, struct rte_bbdev_enc_op *op,
+		uint16_t total_enqueued_cbs)
+{
+	union acc100_dma_desc *desc = NULL;
+	int ret;
+	uint32_t in_offset, out_offset, out_length, mbuf_total_left,
+		seg_total_left;
+	struct rte_mbuf *input, *output_head, *output;
+
+	uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
+			& q->sw_ring_wrap_mask);
+	desc = q->ring_addr + desc_idx;
+	acc100_fcw_te_fill(op, &desc->req.fcw_te);
+
+	input = op->turbo_enc.input.data;
+	output_head = output = op->turbo_enc.output.data;
+	in_offset = op->turbo_enc.input.offset;
+	out_offset = op->turbo_enc.output.offset;
+	out_length = 0;
+	mbuf_total_left = op->turbo_enc.input.length;
+	seg_total_left = rte_pktmbuf_data_len(op->turbo_enc.input.data)
+			- in_offset;
+
+	ret = acc100_dma_desc_te_fill(op, &desc->req, &input, output,
+			&in_offset, &out_offset, &out_length, &mbuf_total_left,
+			&seg_total_left, 0);
+
+	if (unlikely(ret < 0))
+		return ret;
+
+	mbuf_append(output_head, output, out_length);
+
+#ifdef RTE_LIBRTE_BBDEV_DEBUG
+	rte_memdump(stderr, "FCW", &desc->req.fcw_te,
+			sizeof(desc->req.fcw_te) - 8);
+	rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc));
+
+	/* Check if any data left after processing one CB */
+	if (mbuf_total_left != 0) {
+		rte_bbdev_log(ERR,
+				"Some date still left after processing one CB: mbuf_total_left = %u",
+				mbuf_total_left);
+		return -EINVAL;
+	}
+#endif
+	/* One CB (one op) was successfully prepared to enqueue */
+	return 1;
+}
+
+/* Enqueue one encode operations for ACC100 device in CB mode */
+static inline int
 enqueue_ldpc_enc_n_op_cb(struct acc100_queue *q, struct rte_bbdev_enc_op **ops,
 		uint16_t total_enqueued_cbs, int16_t num)
 {
@@ -1481,78 +1828,235 @@ 
 	return 1;
 }
 
-static inline int
-harq_loopback(struct acc100_queue *q, struct rte_bbdev_dec_op *op,
-		uint16_t total_enqueued_cbs) {
-	struct acc100_fcw_ld *fcw;
-	union acc100_dma_desc *desc;
-	int next_triplet = 1;
-	struct rte_mbuf *hq_output_head, *hq_output;
-	uint16_t harq_in_length = op->ldpc_dec.harq_combined_input.length;
-	if (harq_in_length == 0) {
-		rte_bbdev_log(ERR, "Loopback of invalid null size\n");
-		return -EINVAL;
-	}
 
-	int h_comp = check_bit(op->ldpc_dec.op_flags,
-			RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION
-			) ? 1 : 0;
-	if (h_comp == 1)
-		harq_in_length = harq_in_length * 8 / 6;
-	harq_in_length = RTE_ALIGN(harq_in_length, 64);
-	uint16_t harq_dma_length_in = (h_comp == 0) ?
-			harq_in_length :
-			harq_in_length * 6 / 8;
-	uint16_t harq_dma_length_out = harq_dma_length_in;
-	bool ddr_mem_in = check_bit(op->ldpc_dec.op_flags,
-			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE);
-	union acc100_harq_layout_data *harq_layout = q->d->harq_layout;
-	uint16_t harq_index = (ddr_mem_in ?
-			op->ldpc_dec.harq_combined_input.offset :
-			op->ldpc_dec.harq_combined_output.offset)
-			/ ACC100_HARQ_OFFSET;
+/* Enqueue one encode operations for ACC100 device in TB mode. */
+static inline int
+enqueue_enc_one_op_tb(struct acc100_queue *q, struct rte_bbdev_enc_op *op,
+		uint16_t total_enqueued_cbs, uint8_t cbs_in_tb)
+{
+	union acc100_dma_desc *desc = NULL;
+	int ret;
+	uint8_t r, c;
+	uint32_t in_offset, out_offset, out_length, mbuf_total_left,
+		seg_total_left;
+	struct rte_mbuf *input, *output_head, *output;
+	uint16_t current_enqueued_cbs = 0;
 
 	uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
 			& q->sw_ring_wrap_mask);
 	desc = q->ring_addr + desc_idx;
-	fcw = &desc->req.fcw_ld;
-	/* Set the FCW from loopback into DDR */
-	memset(fcw, 0, sizeof(struct acc100_fcw_ld));
-	fcw->FCWversion = ACC100_FCW_VER;
-	fcw->qm = 2;
-	fcw->Zc = 384;
-	if (harq_in_length < 16 * N_ZC_1)
-		fcw->Zc = 16;
-	fcw->ncb = fcw->Zc * N_ZC_1;
-	fcw->rm_e = 2;
-	fcw->hcin_en = 1;
-	fcw->hcout_en = 1;
+	uint64_t fcw_offset = (desc_idx << 8) + ACC100_DESC_FCW_OFFSET;
+	acc100_fcw_te_fill(op, &desc->req.fcw_te);
 
-	rte_bbdev_log(DEBUG, "Loopback IN %d Index %d offset %d length %d %d\n",
-			ddr_mem_in, harq_index,
-			harq_layout[harq_index].offset, harq_in_length,
-			harq_dma_length_in);
+	input = op->turbo_enc.input.data;
+	output_head = output = op->turbo_enc.output.data;
+	in_offset = op->turbo_enc.input.offset;
+	out_offset = op->turbo_enc.output.offset;
+	out_length = 0;
+	mbuf_total_left = op->turbo_enc.input.length;
 
-	if (ddr_mem_in && (harq_layout[harq_index].offset > 0)) {
-		fcw->hcin_size0 = harq_layout[harq_index].size0;
-		fcw->hcin_offset = harq_layout[harq_index].offset;
-		fcw->hcin_size1 = harq_in_length - fcw->hcin_offset;
-		harq_dma_length_in = (fcw->hcin_size0 + fcw->hcin_size1);
-		if (h_comp == 1)
-			harq_dma_length_in = harq_dma_length_in * 6 / 8;
-	} else {
-		fcw->hcin_size0 = harq_in_length;
-	}
-	harq_layout[harq_index].val = 0;
-	rte_bbdev_log(DEBUG, "Loopback FCW Config %d %d %d\n",
-			fcw->hcin_size0, fcw->hcin_offset, fcw->hcin_size1);
-	fcw->hcout_size0 = harq_in_length;
-	fcw->hcin_decomp_mode = h_comp;
-	fcw->hcout_comp_mode = h_comp;
-	fcw->gain_i = 1;
-	fcw->gain_h = 1;
+	c = op->turbo_enc.tb_params.c;
+	r = op->turbo_enc.tb_params.r;
 
-	/* Set the prefix of descriptor. This could be done at polling */
+	while (mbuf_total_left > 0 && r < c) {
+		seg_total_left = rte_pktmbuf_data_len(input) - in_offset;
+		/* Set up DMA descriptor */
+		desc = q->ring_addr + ((q->sw_ring_head + total_enqueued_cbs)
+				& q->sw_ring_wrap_mask);
+		desc->req.data_ptrs[0].address = q->ring_addr_phys + fcw_offset;
+		desc->req.data_ptrs[0].blen = ACC100_FCW_TE_BLEN;
+
+		ret = acc100_dma_desc_te_fill(op, &desc->req, &input, output,
+				&in_offset, &out_offset, &out_length,
+				&mbuf_total_left, &seg_total_left, r);
+		if (unlikely(ret < 0))
+			return ret;
+		mbuf_append(output_head, output, out_length);
+
+		/* Set total number of CBs in TB */
+		desc->req.cbs_in_tb = cbs_in_tb;
+#ifdef RTE_LIBRTE_BBDEV_DEBUG
+		rte_memdump(stderr, "FCW", &desc->req.fcw_te,
+				sizeof(desc->req.fcw_te) - 8);
+		rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc));
+#endif
+
+		if (seg_total_left == 0) {
+			/* Go to the next mbuf */
+			input = input->next;
+			in_offset = 0;
+			output = output->next;
+			out_offset = 0;
+		}
+
+		total_enqueued_cbs++;
+		current_enqueued_cbs++;
+		r++;
+	}
+
+	if (unlikely(desc == NULL))
+		return current_enqueued_cbs;
+
+#ifdef RTE_LIBRTE_BBDEV_DEBUG
+	/* Check if any CBs left for processing */
+	if (mbuf_total_left != 0) {
+		rte_bbdev_log(ERR,
+				"Some date still left for processing: mbuf_total_left = %u",
+				mbuf_total_left);
+		return -EINVAL;
+	}
+#endif
+
+	/* Set SDone on last CB descriptor for TB mode. */
+	desc->req.sdone_enable = 1;
+	desc->req.irq_enable = q->irq_enable;
+
+	return current_enqueued_cbs;
+}
+
+/** Enqueue one decode operations for ACC100 device in CB mode */
+static inline int
+enqueue_dec_one_op_cb(struct acc100_queue *q, struct rte_bbdev_dec_op *op,
+		uint16_t total_enqueued_cbs)
+{
+	union acc100_dma_desc *desc = NULL;
+	int ret;
+	uint32_t in_offset, h_out_offset, s_out_offset, s_out_length,
+		h_out_length, mbuf_total_left, seg_total_left;
+	struct rte_mbuf *input, *h_output_head, *h_output,
+		*s_output_head, *s_output;
+
+	uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
+			& q->sw_ring_wrap_mask);
+	desc = q->ring_addr + desc_idx;
+	acc100_fcw_td_fill(op, &desc->req.fcw_td);
+
+	input = op->turbo_dec.input.data;
+	h_output_head = h_output = op->turbo_dec.hard_output.data;
+	s_output_head = s_output = op->turbo_dec.soft_output.data;
+	in_offset = op->turbo_dec.input.offset;
+	h_out_offset = op->turbo_dec.hard_output.offset;
+	s_out_offset = op->turbo_dec.soft_output.offset;
+	h_out_length = s_out_length = 0;
+	mbuf_total_left = op->turbo_dec.input.length;
+	seg_total_left = rte_pktmbuf_data_len(input) - in_offset;
+
+#ifdef RTE_LIBRTE_BBDEV_DEBUG
+	if (unlikely(input == NULL)) {
+		rte_bbdev_log(ERR, "Invalid mbuf pointer");
+		return -EFAULT;
+	}
+#endif
+
+	/* Set up DMA descriptor */
+	desc = q->ring_addr + ((q->sw_ring_head + total_enqueued_cbs)
+			& q->sw_ring_wrap_mask);
+
+	ret = acc100_dma_desc_td_fill(op, &desc->req, &input, h_output,
+			s_output, &in_offset, &h_out_offset, &s_out_offset,
+			&h_out_length, &s_out_length, &mbuf_total_left,
+			&seg_total_left, 0);
+
+	if (unlikely(ret < 0))
+		return ret;
+
+	/* Hard output */
+	mbuf_append(h_output_head, h_output, h_out_length);
+
+	/* Soft output */
+	if (check_bit(op->turbo_dec.op_flags, RTE_BBDEV_TURBO_SOFT_OUTPUT))
+		mbuf_append(s_output_head, s_output, s_out_length);
+
+#ifdef RTE_LIBRTE_BBDEV_DEBUG
+	rte_memdump(stderr, "FCW", &desc->req.fcw_td,
+			sizeof(desc->req.fcw_td) - 8);
+	rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc));
+
+	/* Check if any CBs left for processing */
+	if (mbuf_total_left != 0) {
+		rte_bbdev_log(ERR,
+				"Some date still left after processing one CB: mbuf_total_left = %u",
+				mbuf_total_left);
+		return -EINVAL;
+	}
+#endif
+
+	/* One CB (one op) was successfully prepared to enqueue */
+	return 1;
+}
+
+static inline int
+harq_loopback(struct acc100_queue *q, struct rte_bbdev_dec_op *op,
+		uint16_t total_enqueued_cbs) {
+	struct acc100_fcw_ld *fcw;
+	union acc100_dma_desc *desc;
+	int next_triplet = 1;
+	struct rte_mbuf *hq_output_head, *hq_output;
+	uint16_t harq_in_length = op->ldpc_dec.harq_combined_input.length;
+	if (harq_in_length == 0) {
+		rte_bbdev_log(ERR, "Loopback of invalid null size\n");
+		return -EINVAL;
+	}
+
+	int h_comp = check_bit(op->ldpc_dec.op_flags,
+			RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION
+			) ? 1 : 0;
+	if (h_comp == 1)
+		harq_in_length = harq_in_length * 8 / 6;
+	harq_in_length = RTE_ALIGN(harq_in_length, 64);
+	uint16_t harq_dma_length_in = (h_comp == 0) ?
+			harq_in_length :
+			harq_in_length * 6 / 8;
+	uint16_t harq_dma_length_out = harq_dma_length_in;
+	bool ddr_mem_in = check_bit(op->ldpc_dec.op_flags,
+			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE);
+	union acc100_harq_layout_data *harq_layout = q->d->harq_layout;
+	uint16_t harq_index = (ddr_mem_in ?
+			op->ldpc_dec.harq_combined_input.offset :
+			op->ldpc_dec.harq_combined_output.offset)
+			/ ACC100_HARQ_OFFSET;
+
+	uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
+			& q->sw_ring_wrap_mask);
+	desc = q->ring_addr + desc_idx;
+	fcw = &desc->req.fcw_ld;
+	/* Set the FCW from loopback into DDR */
+	memset(fcw, 0, sizeof(struct acc100_fcw_ld));
+	fcw->FCWversion = ACC100_FCW_VER;
+	fcw->qm = 2;
+	fcw->Zc = 384;
+	if (harq_in_length < 16 * N_ZC_1)
+		fcw->Zc = 16;
+	fcw->ncb = fcw->Zc * N_ZC_1;
+	fcw->rm_e = 2;
+	fcw->hcin_en = 1;
+	fcw->hcout_en = 1;
+
+	rte_bbdev_log(DEBUG, "Loopback IN %d Index %d offset %d length %d %d\n",
+			ddr_mem_in, harq_index,
+			harq_layout[harq_index].offset, harq_in_length,
+			harq_dma_length_in);
+
+	if (ddr_mem_in && (harq_layout[harq_index].offset > 0)) {
+		fcw->hcin_size0 = harq_layout[harq_index].size0;
+		fcw->hcin_offset = harq_layout[harq_index].offset;
+		fcw->hcin_size1 = harq_in_length - fcw->hcin_offset;
+		harq_dma_length_in = (fcw->hcin_size0 + fcw->hcin_size1);
+		if (h_comp == 1)
+			harq_dma_length_in = harq_dma_length_in * 6 / 8;
+	} else {
+		fcw->hcin_size0 = harq_in_length;
+	}
+	harq_layout[harq_index].val = 0;
+	rte_bbdev_log(DEBUG, "Loopback FCW Config %d %d %d\n",
+			fcw->hcin_size0, fcw->hcin_offset, fcw->hcin_size1);
+	fcw->hcout_size0 = harq_in_length;
+	fcw->hcin_decomp_mode = h_comp;
+	fcw->hcout_comp_mode = h_comp;
+	fcw->gain_i = 1;
+	fcw->gain_h = 1;
+
+	/* Set the prefix of descriptor. This could be done at polling */
 	desc->req.word0 = ACC100_DMA_DESC_TYPE;
 	desc->req.word1 = 0; /**< Timestamp could be disabled */
 	desc->req.word2 = 0;
@@ -1816,6 +2320,107 @@ 
 	return current_enqueued_cbs;
 }
 
+/* Enqueue one decode operations for ACC100 device in TB mode */
+static inline int
+enqueue_dec_one_op_tb(struct acc100_queue *q, struct rte_bbdev_dec_op *op,
+		uint16_t total_enqueued_cbs, uint8_t cbs_in_tb)
+{
+	union acc100_dma_desc *desc = NULL;
+	int ret;
+	uint8_t r, c;
+	uint32_t in_offset, h_out_offset, s_out_offset, s_out_length,
+		h_out_length, mbuf_total_left, seg_total_left;
+	struct rte_mbuf *input, *h_output_head, *h_output,
+		*s_output_head, *s_output;
+	uint16_t current_enqueued_cbs = 0;
+
+	uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
+			& q->sw_ring_wrap_mask);
+	desc = q->ring_addr + desc_idx;
+	uint64_t fcw_offset = (desc_idx << 8) + ACC100_DESC_FCW_OFFSET;
+	acc100_fcw_td_fill(op, &desc->req.fcw_td);
+
+	input = op->turbo_dec.input.data;
+	h_output_head = h_output = op->turbo_dec.hard_output.data;
+	s_output_head = s_output = op->turbo_dec.soft_output.data;
+	in_offset = op->turbo_dec.input.offset;
+	h_out_offset = op->turbo_dec.hard_output.offset;
+	s_out_offset = op->turbo_dec.soft_output.offset;
+	h_out_length = s_out_length = 0;
+	mbuf_total_left = op->turbo_dec.input.length;
+	c = op->turbo_dec.tb_params.c;
+	r = op->turbo_dec.tb_params.r;
+
+	while (mbuf_total_left > 0 && r < c) {
+
+		seg_total_left = rte_pktmbuf_data_len(input) - in_offset;
+
+		/* Set up DMA descriptor */
+		desc = q->ring_addr + ((q->sw_ring_head + total_enqueued_cbs)
+				& q->sw_ring_wrap_mask);
+		desc->req.data_ptrs[0].address = q->ring_addr_phys + fcw_offset;
+		desc->req.data_ptrs[0].blen = ACC100_FCW_TD_BLEN;
+		ret = acc100_dma_desc_td_fill(op, &desc->req, &input,
+				h_output, s_output, &in_offset, &h_out_offset,
+				&s_out_offset, &h_out_length, &s_out_length,
+				&mbuf_total_left, &seg_total_left, r);
+
+		if (unlikely(ret < 0))
+			return ret;
+
+		/* Hard output */
+		mbuf_append(h_output_head, h_output, h_out_length);
+
+		/* Soft output */
+		if (check_bit(op->turbo_dec.op_flags,
+				RTE_BBDEV_TURBO_SOFT_OUTPUT))
+			mbuf_append(s_output_head, s_output, s_out_length);
+
+		/* Set total number of CBs in TB */
+		desc->req.cbs_in_tb = cbs_in_tb;
+#ifdef RTE_LIBRTE_BBDEV_DEBUG
+		rte_memdump(stderr, "FCW", &desc->req.fcw_td,
+				sizeof(desc->req.fcw_td) - 8);
+		rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc));
+#endif
+
+		if (seg_total_left == 0) {
+			/* Go to the next mbuf */
+			input = input->next;
+			in_offset = 0;
+			h_output = h_output->next;
+			h_out_offset = 0;
+
+			if (check_bit(op->turbo_dec.op_flags,
+					RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
+				s_output = s_output->next;
+				s_out_offset = 0;
+			}
+		}
+
+		total_enqueued_cbs++;
+		current_enqueued_cbs++;
+		r++;
+	}
+
+	if (unlikely(desc == NULL))
+		return current_enqueued_cbs;
+
+#ifdef RTE_LIBRTE_BBDEV_DEBUG
+	/* Check if any CBs left for processing */
+	if (mbuf_total_left != 0) {
+		rte_bbdev_log(ERR,
+				"Some date still left for processing: mbuf_total_left = %u",
+				mbuf_total_left);
+		return -EINVAL;
+	}
+#endif
+	/* Set SDone on last CB descriptor for TB mode */
+	desc->req.sdone_enable = 1;
+	desc->req.irq_enable = q->irq_enable;
+
+	return current_enqueued_cbs;
+}
 
 /* Calculates number of CBs in processed encoder TB based on 'r' and input
  * length.
@@ -1893,6 +2498,45 @@ 
 	return cbs_in_tb;
 }
 
+/* Enqueue encode operations for ACC100 device in CB mode. */
+static uint16_t
+acc100_enqueue_enc_cb(struct rte_bbdev_queue_data *q_data,
+		struct rte_bbdev_enc_op **ops, uint16_t num)
+{
+	struct acc100_queue *q = q_data->queue_private;
+	int32_t avail = q->sw_ring_depth + q->sw_ring_tail - q->sw_ring_head;
+	uint16_t i;
+	union acc100_dma_desc *desc;
+	int ret;
+
+	for (i = 0; i < num; ++i) {
+		/* Check if there are available space for further processing */
+		if (unlikely(avail - 1 < 0))
+			break;
+		avail -= 1;
+
+		ret = enqueue_enc_one_op_cb(q, ops[i], i);
+		if (ret < 0)
+			break;
+	}
+
+	if (unlikely(i == 0))
+		return 0; /* Nothing to enqueue */
+
+	/* Set SDone in last CB in enqueued ops for CB mode*/
+	desc = q->ring_addr + ((q->sw_ring_head + i - 1)
+			& q->sw_ring_wrap_mask);
+	desc->req.sdone_enable = 1;
+	desc->req.irq_enable = q->irq_enable;
+
+	acc100_dma_enqueue(q, i, &q_data->queue_stats);
+
+	/* Update stats */
+	q_data->queue_stats.enqueued_count += i;
+	q_data->queue_stats.enqueue_err_count += num - i;
+	return i;
+}
+
 /* Check we can mux encode operations with common FCW */
 static inline bool
 check_mux(struct rte_bbdev_enc_op **ops, uint16_t num) {
@@ -1960,6 +2604,52 @@ 
 	return i;
 }
 
+/* Enqueue encode operations for ACC100 device in TB mode. */
+static uint16_t
+acc100_enqueue_enc_tb(struct rte_bbdev_queue_data *q_data,
+		struct rte_bbdev_enc_op **ops, uint16_t num)
+{
+	struct acc100_queue *q = q_data->queue_private;
+	int32_t avail = q->sw_ring_depth + q->sw_ring_tail - q->sw_ring_head;
+	uint16_t i, enqueued_cbs = 0;
+	uint8_t cbs_in_tb;
+	int ret;
+
+	for (i = 0; i < num; ++i) {
+		cbs_in_tb = get_num_cbs_in_tb_enc(&ops[i]->turbo_enc);
+		/* Check if there are available space for further processing */
+		if (unlikely(avail - cbs_in_tb < 0))
+			break;
+		avail -= cbs_in_tb;
+
+		ret = enqueue_enc_one_op_tb(q, ops[i], enqueued_cbs, cbs_in_tb);
+		if (ret < 0)
+			break;
+		enqueued_cbs += ret;
+	}
+
+	acc100_dma_enqueue(q, enqueued_cbs, &q_data->queue_stats);
+
+	/* Update stats */
+	q_data->queue_stats.enqueued_count += i;
+	q_data->queue_stats.enqueue_err_count += num - i;
+
+	return i;
+}
+
+/* Enqueue encode operations for ACC100 device. */
+static uint16_t
+acc100_enqueue_enc(struct rte_bbdev_queue_data *q_data,
+		struct rte_bbdev_enc_op **ops, uint16_t num)
+{
+	if (unlikely(num == 0))
+		return 0;
+	if (ops[0]->turbo_enc.code_block_mode == 0)
+		return acc100_enqueue_enc_tb(q_data, ops, num);
+	else
+		return acc100_enqueue_enc_cb(q_data, ops, num);
+}
+
 /* Enqueue encode operations for ACC100 device. */
 static uint16_t
 acc100_enqueue_ldpc_enc(struct rte_bbdev_queue_data *q_data,
@@ -1967,7 +2657,51 @@ 
 {
 	if (unlikely(num == 0))
 		return 0;
-	return acc100_enqueue_ldpc_enc_cb(q_data, ops, num);
+	if (ops[0]->ldpc_enc.code_block_mode == 0)
+		return acc100_enqueue_enc_tb(q_data, ops, num);
+	else
+		return acc100_enqueue_ldpc_enc_cb(q_data, ops, num);
+}
+
+
+/* Enqueue decode operations for ACC100 device in CB mode */
+static uint16_t
+acc100_enqueue_dec_cb(struct rte_bbdev_queue_data *q_data,
+		struct rte_bbdev_dec_op **ops, uint16_t num)
+{
+	struct acc100_queue *q = q_data->queue_private;
+	int32_t avail = q->sw_ring_depth + q->sw_ring_tail - q->sw_ring_head;
+	uint16_t i;
+	union acc100_dma_desc *desc;
+	int ret;
+
+	for (i = 0; i < num; ++i) {
+		/* Check if there are available space for further processing */
+		if (unlikely(avail - 1 < 0))
+			break;
+		avail -= 1;
+
+		ret = enqueue_dec_one_op_cb(q, ops[i], i);
+		if (ret < 0)
+			break;
+	}
+
+	if (unlikely(i == 0))
+		return 0; /* Nothing to enqueue */
+
+	/* Set SDone in last CB in enqueued ops for CB mode*/
+	desc = q->ring_addr + ((q->sw_ring_head + i - 1)
+			& q->sw_ring_wrap_mask);
+	desc->req.sdone_enable = 1;
+	desc->req.irq_enable = q->irq_enable;
+
+	acc100_dma_enqueue(q, i, &q_data->queue_stats);
+
+	/* Update stats */
+	q_data->queue_stats.enqueued_count += i;
+	q_data->queue_stats.enqueue_err_count += num - i;
+
+	return i;
 }
 
 /* Check we can mux encode operations with common FCW */
@@ -2065,6 +2799,53 @@ 
 	return i;
 }
 
+
+/* Enqueue decode operations for ACC100 device in TB mode */
+static uint16_t
+acc100_enqueue_dec_tb(struct rte_bbdev_queue_data *q_data,
+		struct rte_bbdev_dec_op **ops, uint16_t num)
+{
+	struct acc100_queue *q = q_data->queue_private;
+	int32_t avail = q->sw_ring_depth + q->sw_ring_tail - q->sw_ring_head;
+	uint16_t i, enqueued_cbs = 0;
+	uint8_t cbs_in_tb;
+	int ret;
+
+	for (i = 0; i < num; ++i) {
+		cbs_in_tb = get_num_cbs_in_tb_dec(&ops[i]->turbo_dec);
+		/* Check if there are available space for further processing */
+		if (unlikely(avail - cbs_in_tb < 0))
+			break;
+		avail -= cbs_in_tb;
+
+		ret = enqueue_dec_one_op_tb(q, ops[i], enqueued_cbs, cbs_in_tb);
+		if (ret < 0)
+			break;
+		enqueued_cbs += ret;
+	}
+
+	acc100_dma_enqueue(q, enqueued_cbs, &q_data->queue_stats);
+
+	/* Update stats */
+	q_data->queue_stats.enqueued_count += i;
+	q_data->queue_stats.enqueue_err_count += num - i;
+
+	return i;
+}
+
+/* Enqueue decode operations for ACC100 device. */
+static uint16_t
+acc100_enqueue_dec(struct rte_bbdev_queue_data *q_data,
+		struct rte_bbdev_dec_op **ops, uint16_t num)
+{
+	if (unlikely(num == 0))
+		return 0;
+	if (ops[0]->turbo_dec.code_block_mode == 0)
+		return acc100_enqueue_dec_tb(q_data, ops, num);
+	else
+		return acc100_enqueue_dec_cb(q_data, ops, num);
+}
+
 /* Enqueue decode operations for ACC100 device. */
 static uint16_t
 acc100_enqueue_ldpc_dec(struct rte_bbdev_queue_data *q_data,
@@ -2388,6 +3169,51 @@ 
 	return cb_idx;
 }
 
+/* Dequeue encode operations from ACC100 device. */
+static uint16_t
+acc100_dequeue_enc(struct rte_bbdev_queue_data *q_data,
+		struct rte_bbdev_enc_op **ops, uint16_t num)
+{
+	struct acc100_queue *q = q_data->queue_private;
+	uint16_t dequeue_num;
+	uint32_t avail = q->sw_ring_head - q->sw_ring_tail;
+	uint32_t aq_dequeued = 0;
+	uint16_t i;
+	uint16_t dequeued_cbs = 0;
+	struct rte_bbdev_enc_op *op;
+	int ret;
+
+#ifdef RTE_LIBRTE_BBDEV_DEBUG
+	if (unlikely(ops == 0 && q == NULL))
+		return 0;
+#endif
+
+	dequeue_num = (avail < num) ? avail : num;
+
+	for (i = 0; i < dequeue_num; ++i) {
+		op = (q->ring_addr + ((q->sw_ring_tail + dequeued_cbs)
+			& q->sw_ring_wrap_mask))->req.op_addr;
+		if (op->turbo_enc.code_block_mode == 0)
+			ret = dequeue_enc_one_op_tb(q, &ops[i], dequeued_cbs,
+					&aq_dequeued);
+		else
+			ret = dequeue_enc_one_op_cb(q, &ops[i], dequeued_cbs,
+					&aq_dequeued);
+
+		if (ret < 0)
+			break;
+		dequeued_cbs += ret;
+	}
+
+	q->aq_dequeued += aq_dequeued;
+	q->sw_ring_tail += dequeued_cbs;
+
+	/* Update enqueue stats */
+	q_data->queue_stats.dequeued_count += i;
+
+	return i;
+}
+
 /* Dequeue LDPC encode operations from ACC100 device. */
 static uint16_t
 acc100_dequeue_ldpc_enc(struct rte_bbdev_queue_data *q_data,
@@ -2426,6 +3252,52 @@ 
 	return dequeued_cbs;
 }
 
+
+/* Dequeue decode operations from ACC100 device. */
+static uint16_t
+acc100_dequeue_dec(struct rte_bbdev_queue_data *q_data,
+		struct rte_bbdev_dec_op **ops, uint16_t num)
+{
+	struct acc100_queue *q = q_data->queue_private;
+	uint16_t dequeue_num;
+	uint32_t avail = q->sw_ring_head - q->sw_ring_tail;
+	uint32_t aq_dequeued = 0;
+	uint16_t i;
+	uint16_t dequeued_cbs = 0;
+	struct rte_bbdev_dec_op *op;
+	int ret;
+
+#ifdef RTE_LIBRTE_BBDEV_DEBUG
+	if (unlikely(ops == 0 && q == NULL))
+		return 0;
+#endif
+
+	dequeue_num = (avail < num) ? avail : num;
+
+	for (i = 0; i < dequeue_num; ++i) {
+		op = (q->ring_addr + ((q->sw_ring_tail + dequeued_cbs)
+			& q->sw_ring_wrap_mask))->req.op_addr;
+		if (op->turbo_dec.code_block_mode == 0)
+			ret = dequeue_dec_one_op_tb(q, &ops[i], dequeued_cbs,
+					&aq_dequeued);
+		else
+			ret = dequeue_dec_one_op_cb(q_data, q, &ops[i],
+					dequeued_cbs, &aq_dequeued);
+
+		if (ret < 0)
+			break;
+		dequeued_cbs += ret;
+	}
+
+	q->aq_dequeued += aq_dequeued;
+	q->sw_ring_tail += dequeued_cbs;
+
+	/* Update enqueue stats */
+	q_data->queue_stats.dequeued_count += i;
+
+	return i;
+}
+
 /* Dequeue decode operations from ACC100 device. */
 static uint16_t
 acc100_dequeue_ldpc_dec(struct rte_bbdev_queue_data *q_data,
@@ -2479,6 +3351,10 @@ 
 	struct rte_pci_device *pci_dev = RTE_DEV_TO_PCI(dev->device);
 
 	dev->dev_ops = &acc100_bbdev_ops;
+	dev->enqueue_enc_ops = acc100_enqueue_enc;
+	dev->enqueue_dec_ops = acc100_enqueue_dec;
+	dev->dequeue_enc_ops = acc100_dequeue_enc;
+	dev->dequeue_dec_ops = acc100_dequeue_dec;
 	dev->enqueue_ldpc_enc_ops = acc100_enqueue_ldpc_enc;
 	dev->enqueue_ldpc_dec_ops = acc100_enqueue_ldpc_dec;
 	dev->dequeue_ldpc_enc_ops = acc100_dequeue_ldpc_enc;