aboutsummaryrefslogtreecommitdiff
path: root/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
diff options
context:
space:
mode:
authorEric Dumazet <eric.dumazet@gmail.com>2011-11-14 06:05:34 +0000
committerDavid S. Miller <davem@davemloft.net>2011-11-14 14:13:30 -0500
commite52fcb2462ac484e6dd6e68869536609f0216938 (patch)
tree794407ff242481d5d09e3e669cabfe129d80f6df /drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
parentb2b5ce9d1ccf1c45f8ac68e5d901112ab76ba199 (diff)
bnx2x: uses build_skb() in receive path
bnx2x uses following formula to compute its rx_buf_sz : dev->mtu + 2*L1_CACHE_BYTES + 14 + 8 + 8 + 2 Then core network adds NET_SKB_PAD and SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) Final allocated size for skb head on x86_64 (L1_CACHE_BYTES = 64, MTU=1500) : 2112 bytes : SLUB/SLAB round this to 4096 bytes. Since skb truesize is then bigger than SK_MEM_QUANTUM, we have lot of false sharing because of mem_reclaim in UDP stack. One possible way to half truesize is to reduce the need by 64 bytes (2112 -> 2048 bytes) Instead of allocating a full cache line at the end of packet for alignment, we can use the fact that skb_shared_info sits at the end of skb->head, and we can use this room, if we convert bnx2x to new build_skb() infrastructure. skb_shared_info will be initialized after hardware finished its transfert, so we can eventually overwrite the final padding. Using build_skb() also reduces cache line misses in the driver, since we use cache hot skb instead of cold ones. Number of in-flight sk_buff structures is lower, they are recycled while still hot. Performance results : (820.000 pps on a rx UDP monothread benchmark, instead of 720.000 pps) Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> CC: Eilon Greenstein <eilong@broadcom.com> CC: Ben Hutchings <bhutchings@solarflare.com> CC: Tom Herbert <therbert@google.com> CC: Jamal Hadi Salim <hadi@mojatatu.com> CC: Stephen Hemminger <shemminger@vyatta.com> CC: Thomas Graf <tgraf@infradead.org> CC: Herbert Xu <herbert@gondor.apana.org.au> CC: Jeff Kirsher <jeffrey.t.kirsher@intel.com> Acked-by: Eilon Greenstein <eilong@broadcom.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net/ethernet/broadcom/bnx2x/bnx2x.h')
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x.h30
1 files changed, 23 insertions, 7 deletions
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
index 4b90b518d6a6..0f7b7a463eba 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
@@ -293,8 +293,13 @@ enum {
#define FCOE_TXQ_IDX(bp) (MAX_ETH_TXQ_IDX(bp))
/* fast path */
+/*
+ * This driver uses new build_skb() API :
+ * RX ring buffer contains pointer to kmalloc() data only,
+ * skb are built only after Hardware filled the frame.
+ */
struct sw_rx_bd {
- struct sk_buff *skb;
+ u8 *data;
DEFINE_DMA_UNMAP_ADDR(mapping);
};
@@ -424,8 +429,8 @@ union host_hc_status_block {
struct bnx2x_agg_info {
/*
- * First aggregation buffer is an skb, the following - are pages.
- * We will preallocate the skbs for each aggregation when
+ * First aggregation buffer is a data buffer, the following - are pages.
+ * We will preallocate the data buffer for each aggregation when
* we open the interface and will replace the BD at the consumer
* with this one when we receive the TPA_START CQE in order to
* keep the Rx BD ring consistent.
@@ -439,6 +444,7 @@ struct bnx2x_agg_info {
u16 parsing_flags;
u16 vlan_tag;
u16 len_on_bd;
+ u32 rxhash;
};
#define Q_STATS_OFFSET32(stat_name) \
@@ -1187,10 +1193,20 @@ struct bnx2x {
#define ETH_MAX_JUMBO_PACKET_SIZE 9600
/* Max supported alignment is 256 (8 shift) */
-#define BNX2X_RX_ALIGN_SHIFT ((L1_CACHE_SHIFT < 8) ? \
- L1_CACHE_SHIFT : 8)
- /* FW use 2 Cache lines Alignment for start packet and size */
-#define BNX2X_FW_RX_ALIGN (2 << BNX2X_RX_ALIGN_SHIFT)
+#define BNX2X_RX_ALIGN_SHIFT min(8, L1_CACHE_SHIFT)
+
+ /* FW uses 2 Cache lines Alignment for start packet and size
+ *
+ * We assume skb_build() uses sizeof(struct skb_shared_info) bytes
+ * at the end of skb->data, to avoid wasting a full cache line.
+ * This reduces memory use (skb->truesize).
+ */
+#define BNX2X_FW_RX_ALIGN_START (1UL << BNX2X_RX_ALIGN_SHIFT)
+
+#define BNX2X_FW_RX_ALIGN_END \
+ max(1UL << BNX2X_RX_ALIGN_SHIFT, \
+ SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
+
#define BNX2X_PXP_DRAM_ALIGN (BNX2X_RX_ALIGN_SHIFT - 5)
struct host_sp_status_block *def_status_blk;