diff options
author | Eric Dumazet <eric.dumazet@gmail.com> | 2011-11-14 06:05:34 +0000 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2011-11-14 14:13:30 -0500 |
commit | e52fcb2462ac484e6dd6e68869536609f0216938 (patch) | |
tree | 794407ff242481d5d09e3e669cabfe129d80f6df /drivers/net/ethernet/broadcom/bnx2x/bnx2x.h | |
parent | b2b5ce9d1ccf1c45f8ac68e5d901112ab76ba199 (diff) |
bnx2x: uses build_skb() in receive path
bnx2x uses following formula to compute its rx_buf_sz :
dev->mtu + 2*L1_CACHE_BYTES + 14 + 8 + 8 + 2
Then core network adds NET_SKB_PAD and SKB_DATA_ALIGN(sizeof(struct
skb_shared_info))
Final allocated size for skb head on x86_64 (L1_CACHE_BYTES = 64,
MTU=1500) : 2112 bytes : SLUB/SLAB round this to 4096 bytes.
Since skb truesize is then bigger than SK_MEM_QUANTUM, we have lot of
false sharing because of mem_reclaim in UDP stack.
One possible way to half truesize is to reduce the need by 64 bytes
(2112 -> 2048 bytes)
Instead of allocating a full cache line at the end of packet for
alignment, we can use the fact that skb_shared_info sits at the end of
skb->head, and we can use this room, if we convert bnx2x to new
build_skb() infrastructure.
skb_shared_info will be initialized after hardware finished its
transfert, so we can eventually overwrite the final padding.
Using build_skb() also reduces cache line misses in the driver, since we
use cache hot skb instead of cold ones. Number of in-flight sk_buff
structures is lower, they are recycled while still hot.
Performance results :
(820.000 pps on a rx UDP monothread benchmark, instead of 720.000 pps)
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Eilon Greenstein <eilong@broadcom.com>
CC: Ben Hutchings <bhutchings@solarflare.com>
CC: Tom Herbert <therbert@google.com>
CC: Jamal Hadi Salim <hadi@mojatatu.com>
CC: Stephen Hemminger <shemminger@vyatta.com>
CC: Thomas Graf <tgraf@infradead.org>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Acked-by: Eilon Greenstein <eilong@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net/ethernet/broadcom/bnx2x/bnx2x.h')
-rw-r--r-- | drivers/net/ethernet/broadcom/bnx2x/bnx2x.h | 30 |
1 files changed, 23 insertions, 7 deletions
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h index 4b90b518d6a6..0f7b7a463eba 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h @@ -293,8 +293,13 @@ enum { #define FCOE_TXQ_IDX(bp) (MAX_ETH_TXQ_IDX(bp)) /* fast path */ +/* + * This driver uses new build_skb() API : + * RX ring buffer contains pointer to kmalloc() data only, + * skb are built only after Hardware filled the frame. + */ struct sw_rx_bd { - struct sk_buff *skb; + u8 *data; DEFINE_DMA_UNMAP_ADDR(mapping); }; @@ -424,8 +429,8 @@ union host_hc_status_block { struct bnx2x_agg_info { /* - * First aggregation buffer is an skb, the following - are pages. - * We will preallocate the skbs for each aggregation when + * First aggregation buffer is a data buffer, the following - are pages. + * We will preallocate the data buffer for each aggregation when * we open the interface and will replace the BD at the consumer * with this one when we receive the TPA_START CQE in order to * keep the Rx BD ring consistent. @@ -439,6 +444,7 @@ struct bnx2x_agg_info { u16 parsing_flags; u16 vlan_tag; u16 len_on_bd; + u32 rxhash; }; #define Q_STATS_OFFSET32(stat_name) \ @@ -1187,10 +1193,20 @@ struct bnx2x { #define ETH_MAX_JUMBO_PACKET_SIZE 9600 /* Max supported alignment is 256 (8 shift) */ -#define BNX2X_RX_ALIGN_SHIFT ((L1_CACHE_SHIFT < 8) ? \ - L1_CACHE_SHIFT : 8) - /* FW use 2 Cache lines Alignment for start packet and size */ -#define BNX2X_FW_RX_ALIGN (2 << BNX2X_RX_ALIGN_SHIFT) +#define BNX2X_RX_ALIGN_SHIFT min(8, L1_CACHE_SHIFT) + + /* FW uses 2 Cache lines Alignment for start packet and size + * + * We assume skb_build() uses sizeof(struct skb_shared_info) bytes + * at the end of skb->data, to avoid wasting a full cache line. + * This reduces memory use (skb->truesize). + */ +#define BNX2X_FW_RX_ALIGN_START (1UL << BNX2X_RX_ALIGN_SHIFT) + +#define BNX2X_FW_RX_ALIGN_END \ + max(1UL << BNX2X_RX_ALIGN_SHIFT, \ + SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) + #define BNX2X_PXP_DRAM_ALIGN (BNX2X_RX_ALIGN_SHIFT - 5) struct host_sp_status_block *def_status_blk; |