[PATCH] mbuf: replace GCC marker extension with C11 anonymous unions

Morten Brørup mb at smartsharesystems.com
Wed Jan 31 10:18:37 CET 2024


> From: Tyler Retzlaff [mailto:roretzla at linux.microsoft.com]
> Sent: Wednesday, 31 January 2024 00.26
> 
> Replace the use of RTE_MARKER<x> with C11 anonymous unions to improve
> code portability between toolchains.
> 
> Update use of rte_mbuf rearm_data field in net/ionic, net/sfc and
> net/virtio which were accessing field as a zero-length array.
> 
> Signed-off-by: Tyler Retzlaff <roretzla at linux.microsoft.com>
> ---

I have some comments, putting weight on code readability rather than avoiding API breakage.

We can consider my suggested API breaking changes for the next API breaking release, and keep your goal of minimal API breakage with the current changes.

> diff --git a/lib/mbuf/rte_mbuf_core.h b/lib/mbuf/rte_mbuf_core.h
> index 5688683..d731ea0 100644
> --- a/lib/mbuf/rte_mbuf_core.h
> +++ b/lib/mbuf/rte_mbuf_core.h
> @@ -464,9 +464,10 @@ enum {
>   * The generic rte_mbuf, containing a packet mbuf.
>   */
>  struct rte_mbuf {
> -	RTE_MARKER cacheline0;
> -
> -	void *buf_addr;           /**< Virtual address of segment buffer.
> */
> +	union {
> +	    void *cacheline0;
> +	    void *buf_addr;           /**< Virtual address of segment
> buffer. */
> +	};

I suppose this is the least ugly workaround for not being able to use the RTE_MARKER hack here.

>  #if RTE_IOVA_IN_MBUF
>  	/**
>  	 * Physical address of segment buffer.
> @@ -487,69 +488,77 @@ struct rte_mbuf {
>  #endif
> 
>  	/* next 8 bytes are initialised on RX descriptor rearm */
> -	RTE_MARKER64 rearm_data;
> -	uint16_t data_off;
> -
> -	/**
> -	 * Reference counter. Its size should at least equal to the size
> -	 * of port field (16 bits), to support zero-copy broadcast.
> -	 * It should only be accessed using the following functions:
> -	 * rte_mbuf_refcnt_update(), rte_mbuf_refcnt_read(), and
> -	 * rte_mbuf_refcnt_set(). The functionality of these functions
> (atomic,
> -	 * or non-atomic) is controlled by the RTE_MBUF_REFCNT_ATOMIC
> flag.
> -	 */
> -	RTE_ATOMIC(uint16_t) refcnt;
> +	union {
> +		uint64_t rearm_data;

I consider this union with uint64_t rearm_data an improvement for code readability. Using a marker here was weird.

> +		struct {
> +			uint16_t data_off;
> +
> +			/**
> +			 * Reference counter. Its size should at least equal
> to the size
> +			 * of port field (16 bits), to support zero-copy
> broadcast.
> +			 * It should only be accessed using the following
> functions:
> +			 * rte_mbuf_refcnt_update(), rte_mbuf_refcnt_read(),
> and
> +			 * rte_mbuf_refcnt_set(). The functionality of these
> functions (atomic,
> +			 * or non-atomic) is controlled by the
> RTE_MBUF_REFCNT_ATOMIC flag.
> +			 */
> +			RTE_ATOMIC(uint16_t) refcnt;
> 
> -	/**
> -	 * Number of segments. Only valid for the first segment of an
> mbuf
> -	 * chain.
> -	 */
> -	uint16_t nb_segs;
> +			/**
> +			 * Number of segments. Only valid for the first
> segment of an mbuf
> +			 * chain.
> +			 */
> +			uint16_t nb_segs;
> 
> -	/** Input port (16 bits to support more than 256 virtual ports).
> -	 * The event eth Tx adapter uses this field to specify the output
> port.
> -	 */
> -	uint16_t port;
> +			/** Input port (16 bits to support more than 256
> virtual ports).
> +			 * The event eth Tx adapter uses this field to
> specify the output port.
> +			 */
> +			uint16_t port;
> 
> -	uint64_t ol_flags;        /**< Offload features. */
> +			uint64_t ol_flags;        /**< Offload features. */

Either:
1. If the comment about 8 bytes init on rearm is correct: ol_flags should remain outside the struct and union, i.e. at top level, else
2. It would be nice to increase the size of the rearm_data variable to 16 byte, so it covers the entire struct being rearmed. (And the incorrect comment about how many bytes are being rearmed should be fixed.)

> +		};
> +	};
> 
>  	/* remaining bytes are set on RX when pulling packet from
> descriptor */
> -	RTE_MARKER rx_descriptor_fields1;
> -
> -	/*
> -	 * The packet type, which is the combination of outer/inner L2,
> L3, L4
> -	 * and tunnel types. The packet_type is about data really present
> in the
> -	 * mbuf. Example: if vlan stripping is enabled, a received vlan
> packet
> -	 * would have RTE_PTYPE_L2_ETHER and not RTE_PTYPE_L2_VLAN
> because the
> -	 * vlan is stripped from the data.
> -	 */
>  	union {
> -		uint32_t packet_type; /**< L2/L3/L4 and tunnel information.
> */
> -		__extension__
> +		void *rx_descriptor_fields1;

Instead of using void* for rx_descriptor_fields1, it would be nice to make rx_descriptor_fields1 a type of the correct size. It might need to be an uint32_t array to avoid imposing additional alignment requirements.

> +
> +		/*
> +		 * The packet type, which is the combination of outer/inner
> L2, L3, L4
> +		 * and tunnel types. The packet_type is about data really
> present in the
> +		 * mbuf. Example: if vlan stripping is enabled, a received
> vlan packet
> +		 * would have RTE_PTYPE_L2_ETHER and not RTE_PTYPE_L2_VLAN
> because the
> +		 * vlan is stripped from the data.
> +		 */
>  		struct {
> -			uint8_t l2_type:4;   /**< (Outer) L2 type. */
> -			uint8_t l3_type:4;   /**< (Outer) L3 type. */
> -			uint8_t l4_type:4;   /**< (Outer) L4 type. */
> -			uint8_t tun_type:4;  /**< Tunnel type. */
>  			union {
> -				uint8_t inner_esp_next_proto;
> -				/**< ESP next protocol type, valid if
> -				 * RTE_PTYPE_TUNNEL_ESP tunnel type is set
> -				 * on both Tx and Rx.
> -				 */
> +				uint32_t packet_type; /**< L2/L3/L4 and tunnel
> information. */
>  				__extension__
>  				struct {
> -					uint8_t inner_l2_type:4;
> -					/**< Inner L2 type. */
> -					uint8_t inner_l3_type:4;
> -					/**< Inner L3 type. */
> +					uint8_t l2_type:4;   /**< (Outer) L2
> type. */
> +					uint8_t l3_type:4;   /**< (Outer) L3
> type. */
> +					uint8_t l4_type:4;   /**< (Outer) L4
> type. */
> +					uint8_t tun_type:4;  /**< Tunnel type. */
> +					union {
> +						uint8_t inner_esp_next_proto;
> +						/**< ESP next protocol type, valid
> if
> +						 * RTE_PTYPE_TUNNEL_ESP tunnel type
> is set
> +						 * on both Tx and Rx.
> +						 */
> +						__extension__
> +						struct {
> +							uint8_t inner_l2_type:4;
> +							/**< Inner L2 type. */
> +							uint8_t inner_l3_type:4;
> +							/**< Inner L3 type. */
> +						};
> +					};
> +					uint8_t inner_l4_type:4; /**< Inner L4
> type. */
>  				};
>  			};
> -			uint8_t inner_l4_type:4; /**< Inner L4 type. */
> +			uint32_t pkt_len;         /**< Total pkt len: sum of
> all segments. */
>  		};
>  	};
> 
> -	uint32_t pkt_len;         /**< Total pkt len: sum of all
> segments. */
>  	uint16_t data_len;        /**< Amount of data in segment buffer.
> */
>  	/** VLAN TCI (CPU order), valid if RTE_MBUF_F_RX_VLAN is set. */
>  	uint16_t vlan_tci;
> @@ -595,21 +604,23 @@ struct rte_mbuf {
>  	struct rte_mempool *pool; /**< Pool from which mbuf was
> allocated. */
> 
>  	/* second cache line - fields only used in slow path or on TX */
> -	RTE_MARKER cacheline1 __rte_cache_min_aligned;
> +	union {
> +		void *cacheline1;

The __rte_cache_min_aligned cannot be removed. It provides cache line alignment for 32 bit platforms, where pointers in the first cache line only use 4 byte.

NB: The rte_mbuf structure could be optimized for 32 bit platforms by moving fields from the second cache line to the holes in the first, but that's another discussion.

> 
>  #if RTE_IOVA_IN_MBUF
> -	/**
> -	 * Next segment of scattered packet. Must be NULL in the last
> -	 * segment or in case of non-segmented packet.
> -	 */
> -	struct rte_mbuf *next;
> +		/**
> +		 * Next segment of scattered packet. Must be NULL in the
> last
> +		 * segment or in case of non-segmented packet.
> +		 */
> +		struct rte_mbuf *next;
>  #else
> -	/**
> -	 * Reserved for dynamic fields
> -	 * when the next pointer is in first cache line (i.e.
> RTE_IOVA_IN_MBUF is 0).
> -	 */
> -	uint64_t dynfield2;
> +		/**
> +		 * Reserved for dynamic fields
> +		 * when the next pointer is in first cache line (i.e.
> RTE_IOVA_IN_MBUF is 0).
> +		 */
> +		uint64_t dynfield2;
>  #endif
> +	};
> 
>  	/* fields to support TX offloads */
>  	union {
> --
> 1.8.3.1



More information about the dev mailing list