[v3,18/19] net/hinic: optimize RX performance
Checks
Commit Message
This patch optimizes receive packets performance
on arm platform.
Signed-off-by: Xiaoyun wang <cloud.wangxiaoyun@huawei.com>
---
drivers/net/hinic/hinic_pmd_rx.c | 5 +----
drivers/net/hinic/hinic_pmd_rx.h | 11 +++++++++++
2 files changed, 12 insertions(+), 4 deletions(-)
Comments
On 9/30/2019 3:00 PM, Xiaoyun wang wrote:
> This patch optimizes receive packets performance
> on arm platform.
>
> Signed-off-by: Xiaoyun wang <cloud.wangxiaoyun@huawei.com>
<...>
> @@ -28,6 +28,7 @@ struct hinic_rq_ctrl {
> u32 ctrl_fmt;
> };
>
> +#if defined(__X86_64_SSE__)
> struct hinic_rq_cqe {
> u32 status;
> u32 vlan_len;
> @@ -36,6 +37,16 @@ struct hinic_rq_cqe {
>
> u32 rsvd[4];
> };
> +#elif defined(__ARM64_NEON__)
> +struct hinic_rq_cqe {
> + u32 status;
> + u32 vlan_len;
> + u32 offload_type;
> + u32 rss_hash;
> +
> + u32 rsvd[4];
> +} __rte_cache_aligned;
> +#endif
This change makes "struct hinic_rq_cqe" only exist for 'x86_64' and 'ARM64', if
the intention is to add '__rte_cache_aligned' for the neon, would following
work, which will also work for all archs:
struct hinic_rq_cqe {
u32 status;
u32 vlan_len;
u32 offload_type;
u32 rss_hash;
u32 rsvd[4];
#if defined(__ARM64_NEON__)
} __rte_cache_aligned;
#else
};
#endif
If this works, can you please send a new version with this update?
Hi Ferruh,
Thanks for your comments. I think you're right, i will modify it with Patch V4. Also I change it with the same structure
for X86-64 and ARM platform with cache-aligned, and test the RX performance, all is OK.
Best Regards
Xiaoyun Wang
在 2019/9/30 23:10, Ferruh Yigit 写道:
> On 9/30/2019 3:00 PM, Xiaoyun wang wrote:
>> This patch optimizes receive packets performance
>> on arm platform.
>>
>> Signed-off-by: Xiaoyun wang <cloud.wangxiaoyun@huawei.com>
> <...>
>
>> @@ -28,6 +28,7 @@ struct hinic_rq_ctrl {
>> u32 ctrl_fmt;
>> };
>>
>> +#if defined(__X86_64_SSE__)
>> struct hinic_rq_cqe {
>> u32 status;
>> u32 vlan_len;
>> @@ -36,6 +37,16 @@ struct hinic_rq_cqe {
>>
>> u32 rsvd[4];
>> };
>> +#elif defined(__ARM64_NEON__)
>> +struct hinic_rq_cqe {
>> + u32 status;
>> + u32 vlan_len;
>> + u32 offload_type;
>> + u32 rss_hash;
>> +
>> + u32 rsvd[4];
>> +} __rte_cache_aligned;
>> +#endif
> This change makes "struct hinic_rq_cqe" only exist for 'x86_64' and 'ARM64', if
> the intention is to add '__rte_cache_aligned' for the neon, would following
> work, which will also work for all archs:
>
>
> struct hinic_rq_cqe {
> u32 status;
> u32 vlan_len;
> u32 offload_type;
> u32 rss_hash;
>
> u32 rsvd[4];
> #if defined(__ARM64_NEON__)
> } __rte_cache_aligned;
> #else
> };
> #endif
>
> If this works, can you please send a new version with this update?
>
@@ -972,13 +972,10 @@ u16 hinic_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, u16 nb_pkts)
while (pkts < nb_pkts) {
/* 2. current ci is done */
rx_cqe = &rxq->rx_cqe[sw_ci];
- status = rx_cqe->status;
+ status = __atomic_load_n(&rx_cqe->status, __ATOMIC_ACQUIRE);
if (!HINIC_GET_RX_DONE_BE(status))
break;
- /* read other cqe member after status */
- rte_rmb();
-
/* convert cqe and get packet length */
hinic_rq_cqe_be_to_cpu32(&cqe, (volatile void *)rx_cqe);
vlan_len = cqe.vlan_len;
@@ -28,6 +28,7 @@ struct hinic_rq_ctrl {
u32 ctrl_fmt;
};
+#if defined(__X86_64_SSE__)
struct hinic_rq_cqe {
u32 status;
u32 vlan_len;
@@ -36,6 +37,16 @@ struct hinic_rq_cqe {
u32 rsvd[4];
};
+#elif defined(__ARM64_NEON__)
+struct hinic_rq_cqe {
+ u32 status;
+ u32 vlan_len;
+ u32 offload_type;
+ u32 rss_hash;
+
+ u32 rsvd[4];
+} __rte_cache_aligned;
+#endif
struct hinic_rq_cqe_sect {
struct hinic_sge sge;