From: Doug Ledford <dledford@redhat.com> Date: Mon, 15 Jun 2009 09:52:44 -0400 Subject: [infiniband] mlx4_en: update to ofed 1.4.1 final bits Message-id: 61047fb4fcb44e79e5eb669d5de2e44aae5f2eac.1245072810.git.dledford@redhat.com O-Subject: [Patch RHEL5.4 09/16] [mlx4_en] update to ofed 1.4.1 final bits Bugzilla: 506097 Signed-off-by: Doug Ledford <dledford@redhat.com> diff --git a/drivers/net/mlx4/Makefile b/drivers/net/mlx4/Makefile index f5487c6..9b60070 100644 --- a/drivers/net/mlx4/Makefile +++ b/drivers/net/mlx4/Makefile @@ -11,4 +11,4 @@ mlx4_core-y := alloc.o catas.o cmd.o cq.o eq.o fw.o icm.o intf.o main.o mcg.o \ obj-$(CONFIG_MLX4_EN) += mlx4_en.o mlx4_en-y := en_main.o en_tx.o en_rx.o en_params.o en_port.o en_cq.o \ - en_resources.o en_netdev.o en_frag.o + en_resources.o en_netdev.o en_frag.o en_lro.o diff --git a/drivers/net/mlx4/en_cq.c b/drivers/net/mlx4/en_cq.c index 8e04633..440633a 100644 --- a/drivers/net/mlx4/en_cq.c +++ b/drivers/net/mlx4/en_cq.c @@ -62,6 +62,10 @@ int mlx4_en_create_cq(struct mlx4_en_priv *priv, } cq->ring = ring; cq->is_tx = mode; + if (priv->rx_ring[ring].use_frags) + cq->process_cq = mlx4_en_process_rx_cq; + else + cq->process_cq = mlx4_en_process_rx_cq_skb; spin_lock_init(&cq->lock); err = mlx4_alloc_hwq_res(mdev->dev, &cq->wqres, diff --git a/drivers/net/mlx4/en_lro.c b/drivers/net/mlx4/en_lro.c new file mode 100644 index 0000000..6a9e897 --- /dev/null +++ b/drivers/net/mlx4/en_lro.c @@ -0,0 +1,540 @@ +/* + * Copyright (c) 2007 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/ip.h> +#include <linux/tcp.h> +#include <net/tcp.h> +#include <linux/if_vlan.h> +#include <linux/delay.h> + +#include "mlx4_en.h" + +/* LRO hash function - using sum of source and destination port LSBs is + * good enough */ +#define LRO_INDEX(th, size) \ + ((*((u8*) &th->source + 1) + *((u8*) &th->dest + 1)) & (size - 1)) + +/* #define CONFIG_MLX4_EN_DEBUG_LRO */ + +#ifdef CONFIG_MLX4_EN_DEBUG_LRO +static void mlx4_en_lro_validate(struct mlx4_en_priv* priv, struct mlx4_en_lro *lro) +{ + int i; + int size, size2; + struct sk_buff *skb = lro->skb; + skb_frag_t *frags; + int len, len2; + int cur_skb = 0; + + /* Sum fragment sizes of first skb */ + len = skb->len; + size = skb_headlen(skb); + frags = skb_shinfo(skb)->frags; + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + size += frags[i].size; + } + + /* Add in fragments of linked skb's */ + skb = skb_shinfo(skb)->frag_list; + while (skb) { + cur_skb++; + len2 = skb->len; + if (skb_headlen(skb)) { + mlx4_err(priv->mdev, "Bad LRO format: non-zero headlen " + "in fraglist (skb:%d)\n", cur_skb); + return; + } + + size2 = 0; + frags = skb_shinfo(skb)->frags; + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + size2 += frags[i].size; + } + + if (size2 != len2) { + mlx4_err(priv->mdev, "Bad skb size:%d in LRO fraglist. " + "Expected:%d (skb:%d)\n", size2, len2, cur_skb); + return; + } + size += size2; + skb = skb->next; + } + + if (size != len) + mlx4_err(priv->mdev, "Bad LRO size:%d expected:%d\n", size, len); +} +#endif /* MLX4_EN_DEBUG_LRO */ + +static void mlx4_en_lro_flush_single(struct mlx4_en_priv* priv, + struct mlx4_en_rx_ring* ring, struct mlx4_en_lro *lro) +{ + struct sk_buff *skb = lro->skb; + struct iphdr *iph = (struct iphdr *) skb->data; + struct tcphdr *th = (struct tcphdr *)(iph + 1); + unsigned int headlen = skb_headlen(skb); + __wsum tcp_hdr_csum; + u32 *ts; + + /* Update IP length and checksum */ + iph->tot_len = htons(lro->tot_len); + iph->check = 0; + iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); + + /* Update latest TCP ack, window, psh, and timestamp */ + th->ack_seq = lro->ack_seq; + th->window = lro->window; + th->psh = !!lro->psh; + if (lro->has_timestamp) { + ts = (u32 *) (th + 1); + ts[1] = htonl(lro->tsval); + ts[2] = lro->tsecr; + } + th->check = 0; + tcp_hdr_csum = csum_partial((u8 *)th, th->doff << 2, 0); + lro->data_csum = csum_add(lro->data_csum, tcp_hdr_csum); + th->check = csum_tcpudp_magic(iph->saddr, iph->daddr, + lro->tot_len - (iph->ihl << 2), + IPPROTO_TCP, lro->data_csum); + + /* Update skb */ + skb->len = lro->tot_len; + skb->data_len = lro->tot_len - headlen; + skb->truesize = skb->len + sizeof(struct sk_buff); + skb_shinfo(skb)->gso_size = lro->mss; + +#ifdef CONFIG_MLX4_EN_DEBUG_LRO + mlx4_en_lro_validate(priv, lro); +#endif /* CONFIG_MLX4_EN_DEBUG_LRO */ + + /* Push it up the stack */ + if (priv->vlgrp && lro->has_vlan) + vlan_hwaccel_receive_skb(skb, priv->vlgrp, + be16_to_cpu(lro->vlan_prio)); + else + netif_receive_skb(skb); + priv->dev->last_rx = jiffies; + + /* Increment stats */ + priv->port_stats.lro_flushed++; + + /* Move session back to the free list */ + hlist_del(&lro->node); + hlist_del(&lro->flush_node); + hlist_add_head(&lro->node, &ring->lro_free); +} + +void mlx4_en_lro_flush(struct mlx4_en_priv* priv, struct mlx4_en_rx_ring *ring, u8 all) +{ + struct mlx4_en_lro *lro; + struct hlist_node *node, *tmp; + + hlist_for_each_entry_safe(lro, node, tmp, &ring->lro_flush, flush_node) { + if (all || time_after(jiffies, lro->expires)) + mlx4_en_lro_flush_single(priv, ring, lro); + } +} + +static inline int mlx4_en_lro_append(struct mlx4_en_priv *priv, + struct mlx4_en_lro *lro, + struct mlx4_en_rx_desc *rx_desc, + struct skb_frag_struct *skb_frags, + struct mlx4_en_rx_alloc *page_alloc, + unsigned int data_len, + int hlen) +{ + struct sk_buff *skb = lro->skb_last; + struct skb_shared_info *info; + struct skb_frag_struct *frags_copy; + int nr_frags; + + if (skb_shinfo(skb)->nr_frags + priv->num_frags > MAX_SKB_FRAGS) + return -ENOMEM; + + info = skb_shinfo(skb); + + /* Copy fragments from descriptor ring to skb */ + frags_copy = info->frags + info->nr_frags; + nr_frags = mlx4_en_complete_rx_desc(priv, rx_desc, skb_frags, + frags_copy, + page_alloc, + data_len + hlen); + if (!nr_frags) { + mlx4_dbg(DRV, priv, "Failed completing rx desc during LRO append\n"); + return -ENOMEM; + } + + /* Skip over headers */ + frags_copy[0].page_offset += hlen; + + if (nr_frags == 1) + frags_copy[0].size = data_len; + else { + /* Adjust size of last fragment to match packet length. + * Note: if this fragment is also the first one, the + * operation is completed in the next line */ + frags_copy[nr_frags - 1].size = hlen + data_len - + priv->frag_info[nr_frags - 1].frag_prefix_size; + + /* Adjust size of first fragment */ + frags_copy[0].size -= hlen; + } + + /* Update skb bookkeeping */ + skb->len += data_len; + skb->data_len += data_len; + info->nr_frags += nr_frags; + return 0; +} + +static inline struct mlx4_en_lro *mlx4_en_lro_find_session(struct mlx4_en_dev *mdev, + struct mlx4_en_rx_ring *ring, + struct iphdr *iph, + struct tcphdr *th) +{ + struct mlx4_en_lro *lro; + struct hlist_node *node; + int index = LRO_INDEX(th, mdev->profile.num_lro); + struct hlist_head *list = &ring->lro_hash[index]; + + hlist_for_each_entry(lro, node, list, node) { + if (lro->sport_dport == *((u32*) &th->source) && + lro->saddr == iph->saddr && + lro->daddr == iph->daddr) + return lro; + } + return NULL; +} + +static inline struct mlx4_en_lro *mlx4_en_lro_alloc_session(struct mlx4_en_priv *priv, + struct mlx4_en_rx_ring *ring) +{ + return hlist_empty(&ring->lro_free) ? NULL : + hlist_entry(ring->lro_free.first, struct mlx4_en_lro, node); +} + +static __wsum mlx4_en_lro_tcp_data_csum(struct iphdr *iph, + struct tcphdr *th, int len) +{ + __wsum tcp_csum; + __wsum tcp_hdr_csum; + __wsum tcp_ps_hdr_csum; + + tcp_csum = ~csum_unfold(th->check); + tcp_hdr_csum = csum_partial((u8 *)th, th->doff << 2, tcp_csum); + + tcp_ps_hdr_csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, + len + (th->doff << 2), + IPPROTO_TCP, 0); + + return csum_sub(csum_sub(tcp_csum, tcp_hdr_csum), + tcp_ps_hdr_csum); +} + +int mlx4_en_lro_rx(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *ring, + struct mlx4_en_rx_desc *rx_desc, + struct skb_frag_struct *skb_frags, + unsigned int length, + struct mlx4_cqe *cqe) +{ + struct mlx4_en_dev *mdev = priv->mdev; + struct mlx4_en_lro *lro; + struct sk_buff *skb; + struct iphdr *iph; + struct tcphdr *th; + dma_addr_t dma; + int tcp_hlen; + int tcp_data_len; + int hlen; + u16 ip_len; + void *va; + u32 *ts; + u32 seq; + u32 tsval = (u32) ~0UL; + u32 tsecr = 0; + u32 ack_seq; + u16 window; + + /* This packet is eligible for LRO if it is: + * - DIX Ethernet (type interpretation) + * - TCP/IP (v4) + * - without IP options + * - not an IP fragment */ + if (!mlx4_en_can_lro(cqe->status)) + return -1; + + /* Get pointer to TCP header. We already know that the packet is DIX Ethernet/IPv4/TCP + * with no VLAN (HW stripped it) and no IP options */ + va = page_address(skb_frags[0].page) + skb_frags[0].page_offset; + iph = va + ETH_HLEN; + th = (struct tcphdr *)(iph + 1); + + /* Synchronsize headers for processing */ + dma = be64_to_cpu(rx_desc->data[0].addr); +#define MAX_LRO_HEADER (ETH_HLEN + \ + sizeof(*iph) + \ + sizeof(*th) + \ + TCPOLEN_TSTAMP_ALIGNED) + dma_sync_single_range_for_cpu(&mdev->pdev->dev, dma, 0, + MAX_LRO_HEADER, DMA_FROM_DEVICE); + + /* We only handle aligned timestamp options */ + tcp_hlen = (th->doff << 2); + if (tcp_hlen == sizeof(*th) + TCPOLEN_TSTAMP_ALIGNED) { + ts = (u32*) (th + 1); + if (unlikely(*ts != htonl((TCPOPT_NOP << 24) | + (TCPOPT_NOP << 16) | + (TCPOPT_TIMESTAMP << 8) | + TCPOLEN_TIMESTAMP))) + goto sync_device; + tsval = ntohl(ts[1]); + tsecr = ts[2]; + } else if (tcp_hlen != sizeof(*th)) + goto sync_device; + + + /* At this point we know we have a TCP packet that is likely to be + * eligible for LRO. Therefore, see now if we have an oustanding + * session that corresponds to this packet so we could flush it if + * something still prevents LRO */ + lro = mlx4_en_lro_find_session(mdev, ring, iph, th); + + /* ensure no bits set besides ack or psh */ + if (th->fin || th->syn || th->rst || th->urg || th->ece || + th->cwr || !th->ack) { + if (lro) { + /* First flush session to keep packets in-order */ + mlx4_en_lro_flush_single(priv, ring, lro); + } + goto sync_device; + } + + /* Get ip length and verify that the frame is big enough */ + ip_len = ntohs(iph->tot_len); + if (unlikely(length < ETH_HLEN + ip_len)) { + mlx4_warn(mdev, "Cannot LRO - ip payload exceeds frame!\n"); + goto sync_device; + } + + /* Get TCP payload length */ + tcp_data_len = ip_len - tcp_hlen - sizeof(struct iphdr); + seq = ntohl(th->seq); + if (!tcp_data_len) + goto flush_session; + + if (lro) { + /* Check VLAN tag */ + if (cqe->vlan_my_qpn & MLX4_CQE_VLAN_PRESENT_MASK) { + if (cqe->sl_vid != lro->vlan_prio || !lro->has_vlan) { + mlx4_en_lro_flush_single(priv, ring, lro); + goto sync_device; + } + } else if (lro->has_vlan) { + mlx4_en_lro_flush_single(priv, ring, lro); + goto sync_device; + } + + /* Check sequence number */ + if (unlikely(seq != lro->next_seq)) { + mlx4_en_lro_flush_single(priv, ring, lro); + goto sync_device; + } + + /* If the cummulative IP length is over 64K, flush and start + * a new session */ + if (lro->tot_len + tcp_data_len > 0xffff) { + mlx4_en_lro_flush_single(priv, ring, lro); + goto new_session; + } + + /* Check timestamps */ + if (tcp_hlen != sizeof(*th)) { + if (unlikely(lro->tsval > tsval || !tsecr)) + goto sync_device; + } + + window = th->window; + ack_seq = th->ack_seq; + if (likely(tcp_data_len)) { + /* Append the data! */ + hlen = ETH_HLEN + sizeof(struct iphdr) + tcp_hlen; + if (mlx4_en_lro_append(priv, lro, rx_desc, skb_frags, + ring->page_alloc, + tcp_data_len, hlen)) { + mlx4_en_lro_flush_single(priv, ring, lro); + goto sync_device; + } + } else { + /* No data */ + dma_sync_single_range_for_device(&mdev->dev->pdev->dev, dma, + 0, MAX_LRO_HEADER, + DMA_FROM_DEVICE); + } + + /* Update session */ + lro->psh |= th->psh; + lro->next_seq += tcp_data_len; + lro->data_csum = csum_block_add(lro->data_csum, + mlx4_en_lro_tcp_data_csum(iph, th, + tcp_data_len), + lro->tot_len); + lro->tot_len += tcp_data_len; + lro->tsval = tsval; + lro->tsecr = tsecr; + lro->ack_seq = ack_seq; + lro->window = window; + if (tcp_data_len > lro->mss) + lro->mss = tcp_data_len; + priv->port_stats.lro_aggregated++; + if (th->psh) + mlx4_en_lro_flush_single(priv, ring, lro); + return 0; + } + +new_session: + if (th->psh) + goto sync_device; + lro = mlx4_en_lro_alloc_session(priv, ring); + if (lro) { + skb = mlx4_en_rx_skb(priv, rx_desc, skb_frags, ring->page_alloc, + ETH_HLEN + ip_len); + if (skb) { + int index; + + /* Add in the skb */ + lro->skb = skb; + lro->skb_last = skb; + skb->protocol = eth_type_trans(skb, priv->dev); + skb->ip_summed = CHECKSUM_UNNECESSARY; + + /* Initialize session */ + lro->saddr = iph->saddr; + lro->daddr = iph->daddr; + lro->sport_dport = *((u32*) &th->source); + + lro->next_seq = seq + tcp_data_len; + lro->tot_len = ip_len; + lro->psh = th->psh; + lro->ack_seq = th->ack_seq; + lro->window = th->window; + lro->mss = tcp_data_len; + lro->data_csum = mlx4_en_lro_tcp_data_csum(iph, th, + tcp_data_len); + + /* Handle vlans */ + if (cqe->vlan_my_qpn & MLX4_CQE_VLAN_PRESENT_MASK) { + lro->vlan_prio = cqe->sl_vid; + lro->has_vlan = 1; + } else + lro->has_vlan = 0; + + /* Handle timestamps */ + if (tcp_hlen != sizeof(*th)) { + lro->tsval = tsval; + lro->tsecr = tsecr; + lro->has_timestamp = 1; + } else { + lro->tsval = (u32) ~0UL; + lro->has_timestamp = 0; + } + + /* Activate this session */ + lro->expires = jiffies + HZ / 25; + hlist_del(&lro->node); + index = LRO_INDEX(th, mdev->profile.num_lro); + + hlist_add_head(&lro->node, &ring->lro_hash[index]); + hlist_add_head(&lro->flush_node, &ring->lro_flush); + priv->port_stats.lro_aggregated++; + return 0; + } else { + /* Packet is dropped because we were not able to allocate new + * page for fragments */ + dma_sync_single_range_for_device(&mdev->pdev->dev, dma, + 0, MAX_LRO_HEADER, + DMA_FROM_DEVICE); + return 0; + } + } else { + priv->port_stats.lro_no_desc++; + } + +flush_session: + if (lro) + mlx4_en_lro_flush_single(priv, ring, lro); +sync_device: + dma_sync_single_range_for_device(&mdev->pdev->dev, dma, 0, + MAX_LRO_HEADER, DMA_FROM_DEVICE); + return -1; +} + +void mlx4_en_lro_destroy(struct mlx4_en_rx_ring *ring) +{ + struct mlx4_en_lro *lro; + struct hlist_node *node, *tmp; + + hlist_for_each_entry_safe(lro, node, tmp, &ring->lro_free, node) { + hlist_del(&lro->node); + kfree(lro); + } + kfree(ring->lro_hash); +} + +int mlx4_en_lro_init(struct mlx4_en_rx_ring *ring, int num_lro) +{ + struct mlx4_en_lro *lro; + int i; + + INIT_HLIST_HEAD(&ring->lro_free); + INIT_HLIST_HEAD(&ring->lro_flush); + ring->lro_hash = kmalloc(sizeof(struct hlist_head) * num_lro, + GFP_KERNEL); + if (!ring->lro_hash) + return -ENOMEM; + + for (i = 0; i < num_lro; i++) { + INIT_HLIST_HEAD(&ring->lro_hash[i]); + lro = kzalloc(sizeof(struct mlx4_en_lro), GFP_KERNEL); + if (!lro) { + mlx4_en_lro_destroy(ring); + return -ENOMEM; + } + INIT_HLIST_NODE(&lro->node); + INIT_HLIST_NODE(&lro->flush_node); + hlist_add_head(&lro->node, &ring->lro_free); + } + return 0; +} + + diff --git a/drivers/net/mlx4/en_main.c b/drivers/net/mlx4/en_main.c index b669fb2..1bf6be2 100644 --- a/drivers/net/mlx4/en_main.c +++ b/drivers/net/mlx4/en_main.c @@ -183,7 +183,7 @@ static void *mlx4_en_add(struct mlx4_dev *dev) mdev->workqueue = create_singlethread_workqueue("mlx4_en"); if (!mdev->workqueue) { err = -ENOMEM; - goto err_close_nic; + goto err_mr; } /* At this stage all non-port specific tasks are complete: @@ -216,9 +216,8 @@ err_free_netdev: flush_workqueue(mdev->workqueue); /* Stop event queue before we drop down to release shared SW state */ - -err_close_nic: destroy_workqueue(mdev->workqueue); + err_mr: mlx4_mr_free(dev, &mdev->mr); err_uar: diff --git a/drivers/net/mlx4/en_netdev.c b/drivers/net/mlx4/en_netdev.c index e961481..450b918 100644 --- a/drivers/net/mlx4/en_netdev.c +++ b/drivers/net/mlx4/en_netdev.c @@ -352,11 +352,9 @@ static void mlx4_en_tx_timeout(struct net_device *dev) if (netif_msg_timer(priv)) mlx4_warn(mdev, "Tx timeout called on port:%d\n", priv->port); - if (netif_carrier_ok(dev)) { - priv->port_stats.tx_timeout++; - mlx4_dbg(DRV, priv, "Scheduling watchdog\n"); - queue_work(mdev->workqueue, &priv->watchdog_task); - } + priv->port_stats.tx_timeout++; + mlx4_dbg(DRV, priv, "Scheduling watchdog\n"); + queue_work(mdev->workqueue, &priv->watchdog_task); } @@ -586,7 +584,7 @@ int mlx4_en_start_port(struct net_device *dev) err = mlx4_en_activate_cq(priv, cq); if (err) { mlx4_err(mdev, "Failed activating Rx CQ\n"); - goto rx_err; + goto cq_err; } for (j = 0; j < cq->size; j++) cq->buf[j].owner_sr_opcode = MLX4_CQE_OWNER_MASK; @@ -770,9 +768,14 @@ static void mlx4_en_restart(struct work_struct *work) struct net_device *dev = priv->dev; mlx4_dbg(DRV, priv, "Watchdog task called for port %d\n", priv->port); - mlx4_en_stop_port(dev); - if (mlx4_en_start_port(dev)) - mlx4_err(mdev, "Failed restarting port %d\n", priv->port); + + mutex_lock(&mdev->state_lock); + if (priv->port_up) { + mlx4_en_stop_port(dev); + if (mlx4_en_start_port(dev)) + mlx4_err(mdev, "Failed restarting port %d\n", priv->port); + } + mutex_unlock(&mdev->state_lock); } @@ -873,14 +876,15 @@ int mlx4_en_alloc_resources(struct mlx4_en_priv *priv) /* Create rx Rings */ for (i = 0; i < priv->rx_ring_num; i++) { - if (mlx4_en_create_cq(priv, &priv->rx_cq[i], - prof->rx_ring_size, i, RX)) - goto err; - if (i > 0) priv->rx_ring[i].use_frags = 1; else priv->rx_ring[i].use_frags = 0; + + if (mlx4_en_create_cq(priv, &priv->rx_cq[i], + prof->rx_ring_size, i, RX)) + goto err; + if (mlx4_en_create_rx_ring(priv, &priv->rx_ring[i], prof->rx_ring_size)) goto err; diff --git a/drivers/net/mlx4/en_params.c b/drivers/net/mlx4/en_params.c index 8ac0c02..0b2b7c0 100644 --- a/drivers/net/mlx4/en_params.c +++ b/drivers/net/mlx4/en_params.c @@ -96,21 +96,6 @@ int mlx4_en_get_profile(struct mlx4_en_dev *mdev) * Ethtool support */ -static void mlx4_en_update_lro_stats(struct mlx4_en_priv *priv) -{ - int i; - - priv->port_stats.lro_aggregated = 0; - priv->port_stats.lro_flushed = 0; - priv->port_stats.lro_no_desc = 0; - - for (i = 0; i < priv->rx_ring_num; i++) { - priv->port_stats.lro_aggregated += priv->rx_ring[i].lro.stats.aggregated; - priv->port_stats.lro_flushed += priv->rx_ring[i].lro.stats.flushed; - priv->port_stats.lro_no_desc += priv->rx_ring[i].lro.stats.no_desc; - } -} - static void mlx4_en_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *drvinfo) { @@ -217,8 +202,6 @@ static void mlx4_en_get_ethtool_stats(struct net_device *dev, spin_lock_bh(&priv->stats_lock); - mlx4_en_update_lro_stats(priv); - for (i = 0; i < NUM_MAIN_STATS; i++) data[index++] = ((unsigned long *) &priv->stats)[i]; for (i = 0; i < NUM_PORT_STATS; i++) diff --git a/drivers/net/mlx4/en_rx.c b/drivers/net/mlx4/en_rx.c index 93d8e02..cc27524 100644 --- a/drivers/net/mlx4/en_rx.c +++ b/drivers/net/mlx4/en_rx.c @@ -52,18 +52,6 @@ static void mlx4_en_srq_event(struct mlx4_srq *srq, enum mlx4_event type) return; } -static int mlx4_en_get_frag_header(struct skb_frag_struct *frags, void **mac_hdr, - void **ip_hdr, void **tcpudp_hdr, - u64 *hdr_flags, void *priv) -{ - *mac_hdr = page_address(frags->page) + frags->page_offset; - *ip_hdr = *mac_hdr + ETH_HLEN; - *tcpudp_hdr = (struct tcphdr *)(*ip_hdr + sizeof(struct iphdr)); - *hdr_flags = LRO_IPV4 | LRO_TCP; - - return 0; -} - static int mlx4_en_alloc_frag(struct mlx4_en_priv *priv, struct mlx4_en_rx_desc *rx_desc, struct skb_frag_struct *skb_frags, @@ -262,17 +250,9 @@ static int mlx4_en_fill_rx_buffers(struct mlx4_en_priv *priv) err = mlx4_en_prepare_rx_desc_skb(priv, ring, ring->actual_size); if (err) { - if (ring->actual_size < MLX4_EN_MIN_RX_SIZE) { - mlx4_err(mdev, "Failed to allocate " - "enough rx buffers\n"); - return -ENOMEM; - } else { - if (netif_msg_rx_err(priv)) - mlx4_warn(mdev, - "Only %d buffers allocated\n", - ring->actual_size); - goto out; - } + mlx4_err(mdev, "Failed to allocate " + "enough rx buffers\n"); + return -ENOMEM; } ring->actual_size++; ring->prod++; @@ -439,23 +419,11 @@ int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv, } ring->buf = ring->wqres.buf.direct.buf; - /* Configure lro mngr */ - memset(&ring->lro, 0, sizeof(struct net_lro_mgr)); - ring->lro.dev = priv->dev; - ring->lro.features = LRO_F_NAPI; - ring->lro.frag_align_pad = NET_IP_ALIGN; - ring->lro.ip_summed = CHECKSUM_UNNECESSARY; - ring->lro.ip_summed_aggr = CHECKSUM_UNNECESSARY; - ring->lro.max_desc = mdev->profile.num_lro; - ring->lro.max_aggr = MAX_SKB_FRAGS; - ring->lro.lro_arr = kzalloc(mdev->profile.num_lro * - sizeof(struct net_lro_desc), - GFP_KERNEL); - if (!ring->lro.lro_arr) { - mlx4_err(mdev, "Failed to allocate lro array\n"); + /* Allocate LRO sessions */ + if (mdev->profile.num_lro && mlx4_en_lro_init(ring, mdev->profile.num_lro)) { + mlx4_err(mdev, "Failed allocating lro sessions\n"); goto err_map; } - ring->lro.get_frag_header = mlx4_en_get_frag_header; return 0; @@ -507,6 +475,7 @@ int mlx4_en_activate_rx_rings(struct mlx4_en_priv *priv) err = mlx4_en_init_allocator(priv, ring); if (err) { mlx4_err(mdev, "Failed initializing ring allocator\n"); + ring_ind--; goto err_allocator; } } else { @@ -541,6 +510,7 @@ int mlx4_en_activate_rx_rings(struct mlx4_en_priv *priv) &ring->wqres.mtt, ring->wqres.db.dma, &ring->srq); if (err){ mlx4_err(mdev, "Failed to allocate srq\n"); + ring_ind--; goto err_srq; } ring->srq.event = mlx4_en_srq_event; @@ -576,7 +546,8 @@ void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv, { struct mlx4_en_dev *mdev = priv->mdev; - kfree(ring->lro.lro_arr); + if (mdev->profile.num_lro) + mlx4_en_lro_destroy(ring); mlx4_en_unmap_buffer(&ring->wqres.buf); mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size); vfree(ring->rx_info); @@ -688,6 +659,10 @@ struct sk_buff *mlx4_en_rx_skb(struct mlx4_en_priv *priv, used_frags = mlx4_en_complete_rx_desc(priv, rx_desc, skb_frags, skb_shinfo(skb)->frags, page_alloc, length); + if (unlikely(!used_frags)) { + kfree_skb(skb); + return NULL; + } skb_shinfo(skb)->nr_frags = used_frags; /* Copy headers into the skb linear buffer */ @@ -761,7 +736,7 @@ mlx4_en_get_rx_skb(struct mlx4_en_priv *priv, struct sk_buff *skb; dma_addr_t dma; - if (length <= SMALL_PACKET_SIZE) { + if (length <= MLX4_EN_SMALL_PKT_SIZE) { skb = dev_alloc_skb(length + NET_IP_ALIGN); if (unlikely(!skb)) return NULL; @@ -894,11 +869,9 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud struct mlx4_cqe *cqe; struct mlx4_en_rx_ring *ring = &priv->rx_ring[cq->ring]; struct skb_frag_struct *skb_frags; - struct skb_frag_struct lro_frags[MLX4_EN_MAX_RX_FRAGS]; struct mlx4_en_rx_desc *rx_desc; struct sk_buff *skb; int index; - int nr; unsigned int length; int polled = 0; int ip_summed; @@ -938,38 +911,10 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud if ((cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPOK)) && (cqe->checksum == 0xffff)) { priv->port_stats.rx_chksum_good++; - /* This packet is eligible for LRO if it is: - * - DIX Ethernet (type interpretation) - * - TCP/IP (v4) - * - without IP options - * - not an IP fragment */ - if (mlx4_en_can_lro(cqe->status) && - dev->features & NETIF_F_LRO) { - - nr = mlx4_en_complete_rx_desc( - priv, rx_desc, - skb_frags, lro_frags, - ring->page_alloc, length); - if (!nr) - goto next; - - if (priv->vlgrp && (cqe->vlan_my_qpn & - MLX4_CQE_VLAN_PRESENT_MASK)) { - lro_vlan_hwaccel_receive_frags( - &ring->lro, lro_frags, - length, length, - priv->vlgrp, - be16_to_cpu(cqe->sl_vid), - NULL, 0); - } else - lro_receive_frags(&ring->lro, - lro_frags, - length, - length, - NULL, 0); - + if (mdev->profile.num_lro && + !mlx4_en_lro_rx(priv, ring, rx_desc, + skb_frags, length, cqe)) goto next; - } /* LRO not possible, complete processing here */ ip_summed = CHECKSUM_UNNECESSARY; @@ -1010,13 +955,15 @@ next: if (++polled == budget) { /* We are here because we reached the NAPI budget - * flush only pending LRO sessions */ - lro_flush_all(&ring->lro); + if (mdev->profile.num_lro) + mlx4_en_lro_flush(priv, ring, 0); goto out; } } /* If CQ is empty flush all LRO sessions unconditionally */ - lro_flush_all(&ring->lro); + if (mdev->profile.num_lro) + mlx4_en_lro_flush(priv, ring, 1); out: AVG_PERF_COUNTER(priv->pstats.rx_coal_avg, polled); @@ -1052,13 +999,9 @@ int mlx4_en_poll_rx_cq(struct net_device *poll_dev, int *budget) struct net_device *dev = cq->dev; struct mlx4_en_priv *priv = netdev_priv(dev); int done; - int work = min(*budget, poll_dev->quota); - - if (priv->rx_ring[cq->ring].use_frags) - done = mlx4_en_process_rx_cq(dev, cq, work); - else - done = mlx4_en_process_rx_cq_skb(dev, cq, work); + int work = min(*budget, poll_dev->quota); + done = cq->process_cq(dev, cq, work); dev->quota -= done; *budget -= done; diff --git a/drivers/net/mlx4/mlx4_en.h b/drivers/net/mlx4/mlx4_en.h index 6952b10..d31e5e9 100644 --- a/drivers/net/mlx4/mlx4_en.h +++ b/drivers/net/mlx4/mlx4_en.h @@ -38,7 +38,7 @@ #include <linux/list.h> #include <linux/mutex.h> #include <linux/netdevice.h> -#include <linux/inet_lro.h> +#include <net/checksum.h> #include <linux/mlx4/device.h> #include <linux/mlx4/qp.h> @@ -49,7 +49,7 @@ #include "en_port.h" #define DRV_NAME "mlx4_en" -#define DRV_VERSION "1.4.1-RC7" +#define DRV_VERSION "1.4.1" #define DRV_RELDATE "April 2009" @@ -124,7 +124,7 @@ enum { #define MLX4_EN_MIN_RX_SIZE (MLX4_EN_ALLOC_SIZE / SMP_CACHE_BYTES) #define MLX4_EN_MIN_TX_SIZE (4096 / TXBB_SIZE) -#define MLX4_EN_SMALL_PKT_SIZE 128 +#define MLX4_EN_SMALL_PKT_SIZE 64 #define MLX4_EN_TX_HASH_SIZE 256 #define MLX4_EN_TX_HASH_MASK (MLX4_EN_TX_HASH_SIZE - 1) #define MLX4_EN_NUM_HASH_RINGS 8 @@ -283,11 +283,42 @@ struct mlx4_en_rx_desc { struct mlx4_wqe_data_seg data[0]; }; +struct mlx4_en_lro { + struct hlist_node node; + struct hlist_node flush_node; + + /* Id fields come first: */ + u32 saddr; + u32 daddr; + u32 sport_dport; + u32 next_seq; + u16 tot_len; + u8 psh; + + u32 tsval; + u32 tsecr; + u32 ack_seq; + u16 window; + __be16 vlan_prio; + u16 has_vlan; + u16 has_timestamp; + u16 mss; + __wsum data_csum; + + unsigned long expires; + struct sk_buff *skb; + struct sk_buff *skb_last; +}; + + struct mlx4_en_rx_ring { struct mlx4_srq srq; struct mlx4_hwq_resources wqres; struct mlx4_en_rx_alloc page_alloc[MLX4_EN_MAX_RX_FRAGS]; - struct net_lro_mgr lro; + struct mlx4_en_lro lro; + struct hlist_head *lro_hash; + struct hlist_head lro_free; + struct hlist_head lro_flush; u32 size ; /* number of Rx descs*/ u32 actual_size; u32 size_mask; @@ -337,6 +368,7 @@ struct mlx4_en_cq { enum cq_type is_tx; u16 moder_time; u16 moder_cnt; + int (*process_cq)(struct net_device *, struct mlx4_en_cq *, int); struct mlx4_cqe *buf; #define MLX4_EN_OPCODE_ERROR 0x1e }; @@ -593,6 +625,15 @@ struct sk_buff *mlx4_en_rx_skb(struct mlx4_en_priv *priv, void mlx4_en_set_default_rss_map(struct mlx4_en_priv *priv, struct mlx4_en_rss_map *rss_map, int num_entries, int num_rings); + +void mlx4_en_lro_flush(struct mlx4_en_priv* priv, struct mlx4_en_rx_ring *ring, u8 all); +int mlx4_en_lro_rx(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *ring, + struct mlx4_en_rx_desc *rx_desc, + struct skb_frag_struct *skb_frags, + unsigned int length, struct mlx4_cqe *cqe); +void mlx4_en_lro_destroy(struct mlx4_en_rx_ring *ring); +int mlx4_en_lro_init(struct mlx4_en_rx_ring *ring, int num_lro); + void mlx4_en_set_prio_map(struct mlx4_en_priv *priv, u16 *prio_map, u32 ring_num); int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv); void mlx4_en_release_rss_steer(struct mlx4_en_priv *priv);