[dpdk-dev] [PATCH] mbuf: make rearm_data address naturally aligned

Olivier Matz olivier.matz at 6wind.com
Mon May 23 13:19:46 CEST 2016


Hi,

On 05/19/2016 05:50 PM, Thomas Monjalon wrote:
> 2016-05-19 19:05, Jerin Jacob:
>> On Thu, May 19, 2016 at 12:18:57PM +0000, Ananyev, Konstantin wrote:
>>>> On Thu, May 19, 2016 at 12:20:16AM +0530, Jerin Jacob wrote:
>>>>> On Wed, May 18, 2016 at 05:43:00PM +0100, Bruce Richardson wrote:
>>>>>> On Wed, May 18, 2016 at 07:27:43PM +0530, Jerin Jacob wrote:
>>> I wonder does anyone really use mbuf port field?
>>> My though was - could we to drop it completely?
>>> Actually, after discussing it with Bruce offline, an interesting idea came out:
>>> if we'll drop port and make mbuf_prefree() to reset nb_segs=1, then
>>> we can reduce RX rearm_data to 4B. So with that layout:
>>>
>>> struct rte_mbuf {
>>>
>>>          MARKER cacheline0;
>>>
>>>         void *buf_addr;           
>>>         phys_addr_t buf_physaddr; 
>>>         uint16_t buf_len;
>>>         uint8_t nb_segs;
>>>         uint8_t reserved_1byte;   /* former port */
>>>         
>>>         MARKER32 rearm_data;
>>>         uint16_t data_off;
>>>        uint16_t refcnt;
>>>        
>>>         uint64_t ol_flags;
>>>         ...
>>>
>>> We can keep buf_len at its place and avoid 2B gap, while making rearm_data
>>> 4B long and 4B aligned.
>>
>> Couple of comments,
>> - IMO, It is good if nb_segs can move under rearm_data, as some
>> drivers(not in ixgbe may be) can write nb_segs in one shot also
>> in segmented rx handler case
>> - I think, it makes sense to keep port in mbuf so that application
>> can make use of it(Not sure what real application developers think of
>> this)
> 
> I agree we could try to remove the port id from mbuf.
> These mbuf data are a common base to pass infos between drivers and apps.
> If you need to store some data which are read and write from the app only,
> you can have use the private zone (see rte_pktmbuf_priv_size).

At the first read, I was in favor of keeping the port_id in the
mbuf. But after checking the examples and applications, I'm not
opposed to remove it. Indeed, this information could go in an
application-specific part or it could be an additional function
parameter in the application processing function.

The same question could be raised for nb_seg: it seems this info
is not used a lot, and having a 8 bits value here also prevents from
having long chains (ex: for socket buffer in a tcp stack).

Just an idea thrown in the air: if these 2 fields are removed, it
brings some room for the m->next field to go in the first cache line.
This was mentioned several times (at least [1]).

[1] http://dpdk.org/ml/archives/dev/2015-June/019182.html

By the way, the "pahole" utility gives a nice representation
of structures with the field sizes and offsets. Example on the
current rte_mbuf structure on x86_64:

$ make config T=x86_64-native-linuxapp-gcc
$ make -j4 EXTRA_CFLAGS="-O -g"
$ pahole -C rte_mbuf build/app/testpmd

struct rte_mbuf {
     MARKER                     cacheline0;           /*     0     0 */
     void *                     buf_addr;             /*     0     8 */
     phys_addr_t                buf_physaddr;         /*     8     8 */
     uint16_t                   buf_len;              /*    16     2 */
     MARKER8                    rearm_data;           /*    18     0 */
     uint16_t                   data_off;             /*    18     2 */
     union {
             rte_atomic16_t     refcnt_atomic;        /*           2 */
             uint16_t           refcnt;               /*           2 */
     };                                               /*    20     2 */
     uint8_t                    nb_segs;              /*    22     1 */
     uint8_t                    port;                 /*    23     1 */
     uint64_t                   ol_flags;             /*    24     8 */
     MARKER                     rx_descriptor_fields1; /*    32     0 */
     union {
             uint32_t           packet_type;          /*           4 */
             struct {
                     uint32_t   l2_type:4;            /*    32:28  4 */
                     uint32_t   l3_type:4;            /*    32:24  4 */
                     uint32_t   l4_type:4;            /*    32:20  4 */
                     uint32_t   tun_type:4;           /*    32:16  4 */
                     uint32_t   inner_l2_type:4;      /*    32:12  4 */
                     uint32_t   inner_l3_type:4;      /*    32: 8  4 */
                     uint32_t   inner_l4_type:4;      /*    32: 4  4 */
             };                                       /*           4 */
     };                                               /*    32     4 */
     uint32_t                   pkt_len;              /*    36     4 */
     uint16_t                   data_len;             /*    40     2 */
     uint16_t                   vlan_tci;             /*    42     2 */
     union {
             uint32_t           rss;                  /*           4 */
             struct {
                     union {
                             struct {
                                     uint16_t hash;   /*    44     2 */
                                     uint16_t id;     /*    46     2 */
                             };                       /*           4 */
                             uint32_t lo;             /*           4 */
                     };                               /*    44     4 */
                     uint32_t   hi;                   /*    48     4 */
             } fdir;                                  /*           8 */
             struct {
                     uint32_t   lo;                   /*    44     4 */
                     uint32_t   hi;                   /*    48     4 */
             } sched;                                 /*           8 */
             uint32_t           usr;                  /*           4 */
     } hash;                                          /*    44     8 */
     uint32_t                   seqn;                 /*    52     4 */
     uint16_t                   vlan_tci_outer;       /*    56     2 */

     /* XXX 6 bytes hole, try to pack */

     /* --- cacheline 1 boundary (64 bytes) --- */
     MARKER                     cacheline1;           /*    64     0 */
     union {
             void *             userdata;             /*           8 */
             uint64_t           udata64;              /*           8 */
     };                                               /*    64     8 */
     struct rte_mempool *       pool;                 /*    72     8 */
     struct rte_mbuf *          next;                 /*    80     8 */
     union {
             uint64_t           tx_offload;           /*           8 */
             struct {
                     uint64_t   l2_len:7;             /*    88:57  8 */
                     uint64_t   l3_len:9;             /*    88:48  8 */
                     uint64_t   l4_len:8;             /*    88:40  8 */
                     uint64_t   tso_segsz:16;         /*    88:24  8 */
                     uint64_t   outer_l3_len:9;       /*    88:15  8 */
                     uint64_t   outer_l2_len:7;       /*    88: 8  8 */
             };                                       /*           8 */
     };                                               /*    88     8 */
     uint16_t                   priv_size;            /*    96     2 */
     uint16_t                   timesync;             /*    98     2 */

     /* size: 128, cachelines: 2, members: 25 */
     /* sum members: 94, holes: 1, sum holes: 6 */
     /* padding: 28 */
};




More information about the dev mailing list