From: Doug Ledford <dledford@redhat.com> Date: Tue, 14 Apr 2009 15:23:29 -0400 Subject: [openib] mlx4: Update mlx4_ib and mlx4_core, add mlx4_en Message-id: 1239737023-31222-3-git-send-email-dledford@redhat.com O-Subject: [Patch RHEL5.4 02/16] [mlx4] Update mlx4_ib and mlx4_core, add mlx4_en driver Bugzilla: 456525 477065 Signed-off-by: Doug Ledford <dledford@redhat.com> diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index b816fbc..ed68324 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -36,6 +37,9 @@ #include "mlx4_ib.h" #include "user.h" +/* Which firmware version adds support for Resize CQ */ +#define MLX4_FW_VER_RESIZE_CQ mlx4_fw_ver(2, 5, 0) + static void mlx4_ib_cq_comp(struct mlx4_cq *cq) { struct ib_cq *ibcq = &to_mibcq(cq)->ibcq; @@ -64,13 +68,7 @@ static void mlx4_ib_cq_event(struct mlx4_cq *cq, enum mlx4_event type) static void *get_cqe_from_buf(struct mlx4_ib_cq_buf *buf, int n) { - int offset = n * sizeof (struct mlx4_cqe); - - if (buf->buf.nbufs == 1) - return buf->buf.u.direct.buf + offset; - else - return buf->buf.u.page_list[offset >> PAGE_SHIFT].buf + - (offset & (PAGE_SIZE - 1)); + return mlx4_buf_offset(&buf->buf, n * sizeof (struct mlx4_cqe)); } static void *get_cqe(struct mlx4_ib_cq *cq, int n) @@ -95,18 +93,75 @@ int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) { struct mlx4_ib_cq *mcq = to_mcq(cq); struct mlx4_ib_dev *dev = to_mdev(cq->device); - struct mlx4_cq_context *context; + + return mlx4_cq_modify(dev->dev, &mcq->mcq, cq_count, cq_period); +} + +static int mlx4_ib_alloc_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *buf, int nent) +{ int err; - context = kzalloc(sizeof *context, GFP_KERNEL); - if (!context) - return -ENOMEM; + err = mlx4_buf_alloc(dev->dev, nent * sizeof(struct mlx4_cqe), + PAGE_SIZE * 2, &buf->buf); + + if (err) + goto out; + + err = mlx4_mtt_init(dev->dev, buf->buf.npages, buf->buf.page_shift, + &buf->mtt); + if (err) + goto err_buf; + + err = mlx4_buf_write_mtt(dev->dev, &buf->mtt, &buf->buf); + if (err) + goto err_mtt; + + return 0; + +err_mtt: + mlx4_mtt_cleanup(dev->dev, &buf->mtt); + +err_buf: + mlx4_buf_free(dev->dev, nent * sizeof(struct mlx4_cqe), + &buf->buf); + +out: + return err; +} + +static void mlx4_ib_free_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *buf, int cqe) +{ + mlx4_buf_free(dev->dev, (cqe + 1) * sizeof(struct mlx4_cqe), &buf->buf); +} + +static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_ucontext *context, + struct mlx4_ib_cq_buf *buf, struct ib_umem **umem, + u64 buf_addr, int cqe) +{ + int err; + + *umem = ib_umem_get(context, buf_addr, cqe * sizeof (struct mlx4_cqe), + IB_ACCESS_LOCAL_WRITE, 1); + if (IS_ERR(*umem)) + return PTR_ERR(*umem); + + err = mlx4_mtt_init(dev->dev, ib_umem_page_count(*umem), + ilog2((*umem)->page_size), &buf->mtt); + if (err) + goto err_buf; + + err = mlx4_ib_umem_write_mtt(dev, &buf->mtt, *umem); + if (err) + goto err_mtt; - context->cq_period = cpu_to_be16(cq_period); - context->cq_max_count = cpu_to_be16(cq_count); - err = mlx4_cq_modify(dev->dev, &mcq->mcq, context, 1); + return 0; + +err_mtt: + mlx4_mtt_cleanup(dev->dev, &buf->mtt); + +err_buf: + ib_umem_release(*umem); - kfree(context); return err; } @@ -117,7 +172,6 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector struct mlx4_ib_dev *dev = to_mdev(ibdev); struct mlx4_ib_cq *cq; struct mlx4_uar *uar; - int buf_size; int err; if (entries < 1 || entries > dev->dev->caps.max_cqes) { @@ -131,8 +185,10 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector entries = roundup_pow_of_two(entries + 1); cq->ibcq.cqe = entries - 1; - buf_size = entries * sizeof (struct mlx4_cqe); + mutex_init(&cq->resize_mutex); spin_lock_init(&cq->lock); + cq->resize_buf = NULL; + cq->resize_umem = NULL; if (context) { struct mlx4_ib_create_cq ucmd; @@ -142,21 +198,10 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector goto err_cq; } - cq->umem = ib_umem_get_dmasync(context, ucmd.buf_addr, buf_size, - IB_ACCESS_LOCAL_WRITE); - if (IS_ERR(cq->umem)) { - err = PTR_ERR(cq->umem); - goto err_cq; - } - - err = mlx4_mtt_init(dev->dev, ib_umem_page_count(cq->umem), - ilog2(cq->umem->page_size), &cq->buf.mtt); - if (err) - goto err_buf; - - err = mlx4_ib_umem_write_mtt(dev, &cq->buf.mtt, cq->umem); + err = mlx4_ib_get_cq_umem(dev, context, &cq->buf, &cq->umem, + ucmd.buf_addr, entries); if (err) - goto err_mtt; + goto err_cq; err = mlx4_ib_db_map_user(to_mucontext(context), ucmd.db_addr, &cq->db); @@ -165,7 +210,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector uar = &to_mucontext(context)->uar; } else { - err = mlx4_ib_db_alloc(dev, &cq->db, 1); + err = mlx4_db_alloc(dev->dev, &cq->db, 1); if (err) goto err_cq; @@ -174,25 +219,17 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector *cq->mcq.set_ci_db = 0; *cq->mcq.arm_db = 0; - if (mlx4_buf_alloc(dev->dev, buf_size, PAGE_SIZE * 2, &cq->buf.buf)) { - err = -ENOMEM; - goto err_db; - } - - err = mlx4_mtt_init(dev->dev, cq->buf.buf.npages, cq->buf.buf.page_shift, - &cq->buf.mtt); - if (err) - goto err_buf; - - err = mlx4_buf_write_mtt(dev->dev, &cq->buf.mtt, &cq->buf.buf); + err = mlx4_ib_alloc_cq_buf(dev, &cq->buf, entries); if (err) - goto err_mtt; + goto err_db; uar = &dev->priv_uar; } err = mlx4_cq_alloc(dev->dev, entries, &cq->buf.mtt, uar, - cq->db.dma, &cq->mcq); + cq->db.dma, &cq->mcq, + vector == IB_CQ_VECTOR_LEAST_ATTACHED ? + MLX4_LEAST_ATTACHED_VECTOR : vector, 0); if (err) goto err_dbmap; @@ -214,16 +251,14 @@ err_dbmap: err_mtt: mlx4_mtt_cleanup(dev->dev, &cq->buf.mtt); -err_buf: if (context) - ib_umem_release_dmasync(cq->umem); + ib_umem_release(cq->umem); else - mlx4_buf_free(dev->dev, entries * sizeof (struct mlx4_cqe), - &cq->buf.buf); + mlx4_ib_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe); err_db: if (!context) - mlx4_ib_db_free(dev, &cq->db); + mlx4_db_free(dev->dev, &cq->db); err_cq: kfree(cq); @@ -231,6 +266,179 @@ err_cq: return ERR_PTR(err); } +static int mlx4_alloc_resize_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq *cq, + int entries) +{ + int err; + + if (cq->resize_buf) + return -EBUSY; + + cq->resize_buf = kmalloc(sizeof *cq->resize_buf, GFP_ATOMIC); + if (!cq->resize_buf) + return -ENOMEM; + + err = mlx4_ib_alloc_cq_buf(dev, &cq->resize_buf->buf, entries); + if (err) { + kfree(cq->resize_buf); + cq->resize_buf = NULL; + return err; + } + + cq->resize_buf->cqe = entries - 1; + + return 0; +} + +static int mlx4_alloc_resize_umem(struct mlx4_ib_dev *dev, struct mlx4_ib_cq *cq, + int entries, struct ib_udata *udata) +{ + struct mlx4_ib_resize_cq ucmd; + int err; + + if (cq->resize_umem) + return -EBUSY; + + if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) + return -EFAULT; + + cq->resize_buf = kmalloc(sizeof *cq->resize_buf, GFP_ATOMIC); + if (!cq->resize_buf) + return -ENOMEM; + + err = mlx4_ib_get_cq_umem(dev, cq->umem->context, &cq->resize_buf->buf, + &cq->resize_umem, ucmd.buf_addr, entries); + if (err) { + kfree(cq->resize_buf); + cq->resize_buf = NULL; + return err; + } + + cq->resize_buf->cqe = entries - 1; + + return 0; +} + +static int mlx4_ib_get_outstanding_cqes(struct mlx4_ib_cq *cq) +{ + u32 i; + + i = cq->mcq.cons_index; + while (get_sw_cqe(cq, i & cq->ibcq.cqe)) + ++i; + + return i - cq->mcq.cons_index; +} + +static void mlx4_ib_cq_resize_copy_cqes(struct mlx4_ib_cq *cq) +{ + struct mlx4_cqe *cqe, *new_cqe; + int i; + + i = cq->mcq.cons_index; + cqe = get_cqe(cq, i & cq->ibcq.cqe); + while ((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) != MLX4_CQE_OPCODE_RESIZE) { + new_cqe = get_cqe_from_buf(&cq->resize_buf->buf, + (i + 1) & cq->resize_buf->cqe); + memcpy(new_cqe, get_cqe(cq, i & cq->ibcq.cqe), sizeof(struct mlx4_cqe)); + new_cqe->owner_sr_opcode = (cqe->owner_sr_opcode & ~MLX4_CQE_OWNER_MASK) | + (((i + 1) & (cq->resize_buf->cqe + 1)) ? MLX4_CQE_OWNER_MASK : 0); + cqe = get_cqe(cq, ++i & cq->ibcq.cqe); + } + ++cq->mcq.cons_index; +} + +int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) +{ + struct mlx4_ib_dev *dev = to_mdev(ibcq->device); + struct mlx4_ib_cq *cq = to_mcq(ibcq); + struct mlx4_mtt mtt; + int outst_cqe; + int err; + + if (dev->dev->caps.fw_ver < MLX4_FW_VER_RESIZE_CQ) + return -ENOSYS; + + mutex_lock(&cq->resize_mutex); + + if (entries < 1 || entries > dev->dev->caps.max_cqes) { + err = -EINVAL; + goto out; + } + + entries = roundup_pow_of_two(entries + 1); + if (entries == ibcq->cqe + 1) { + err = 0; + goto out; + } + + if (ibcq->uobject) { + err = mlx4_alloc_resize_umem(dev, cq, entries, udata); + if (err) + goto out; + } else { + /* Can't be smaller then the number of outstanding CQEs */ + outst_cqe = mlx4_ib_get_outstanding_cqes(cq); + if (entries < outst_cqe + 1) { + err = 0; + goto out; + } + + err = mlx4_alloc_resize_buf(dev, cq, entries); + if (err) + goto out; + } + + mtt = cq->buf.mtt; + err = mlx4_cq_resize(dev->dev, &cq->mcq, entries, &cq->resize_buf->buf.mtt); + if (err) + goto err_buf; + + mlx4_mtt_cleanup(dev->dev, &mtt); + if (ibcq->uobject) { + cq->buf = cq->resize_buf->buf; + cq->ibcq.cqe = cq->resize_buf->cqe; + ib_umem_release(cq->umem); + cq->umem = cq->resize_umem; + + kfree(cq->resize_buf); + cq->resize_buf = NULL; + cq->resize_umem = NULL; + } else { + spin_lock_irq(&cq->lock); + if (cq->resize_buf) { + mlx4_ib_cq_resize_copy_cqes(cq); + mlx4_ib_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe); + cq->buf = cq->resize_buf->buf; + cq->ibcq.cqe = cq->resize_buf->cqe; + + kfree(cq->resize_buf); + cq->resize_buf = NULL; + } + spin_unlock_irq(&cq->lock); + } + + goto out; + +err_buf: + mlx4_mtt_cleanup(dev->dev, &cq->resize_buf->buf.mtt); + if (!ibcq->uobject) + mlx4_ib_free_cq_buf(dev, &cq->resize_buf->buf, + cq->resize_buf->cqe); + + kfree(cq->resize_buf); + cq->resize_buf = NULL; + + if (cq->resize_umem) { + ib_umem_release(cq->resize_umem); + cq->resize_umem = NULL; + } + +out: + mutex_unlock(&cq->resize_mutex); + return err; +} + int mlx4_ib_destroy_cq(struct ib_cq *cq) { struct mlx4_ib_dev *dev = to_mdev(cq->device); @@ -241,11 +449,10 @@ int mlx4_ib_destroy_cq(struct ib_cq *cq) if (cq->uobject) { mlx4_ib_db_unmap_user(to_mucontext(cq->uobject->context), &mcq->db); - ib_umem_release_dmasync(mcq->umem); + ib_umem_release(mcq->umem); } else { - mlx4_buf_free(dev->dev, (cq->cqe + 1) * sizeof (struct mlx4_cqe), - &mcq->buf.buf); - mlx4_ib_db_free(dev, &mcq->db); + mlx4_ib_free_cq_buf(dev, &mcq->buf, cq->cqe); + mlx4_db_free(dev->dev, &mcq->db); } kfree(mcq); @@ -324,6 +531,20 @@ static void mlx4_ib_handle_error_cqe(struct mlx4_err_cqe *cqe, wc->vendor_err = cqe->vendor_err_syndrome; } +static int mlx4_ib_ipoib_csum_ok(__be16 status, __be16 checksum) +{ + return ((status & cpu_to_be16(MLX4_CQE_STATUS_IPV4 | + MLX4_CQE_STATUS_IPV4F | + MLX4_CQE_STATUS_IPV4OPT | + MLX4_CQE_STATUS_IPV6 | + MLX4_CQE_STATUS_IPOK)) == + cpu_to_be16(MLX4_CQE_STATUS_IPV4 | + MLX4_CQE_STATUS_IPOK)) && + (status & cpu_to_be16(MLX4_CQE_STATUS_UDP | + MLX4_CQE_STATUS_TCP)) && + checksum == cpu_to_be16(0xffff); +} + static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq, struct mlx4_ib_qp **cur_qp, struct ib_wc *wc) @@ -334,14 +555,10 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq, struct mlx4_ib_srq *srq; int is_send; int is_error; - u16 wqe_ctr; u32 g_mlpath_rqpn; - __be32 status; - -#define CSUM_MASK_BITS cpu_to_be32(0x13c00000) -#define CSUM_VAL_BITS cpu_to_be32(0x10400000) -#define CSUM_MASK2_BITS cpu_to_be32(0x0c000000) + u16 wqe_ctr; +repoll: cqe = next_cqe_sw(cq); if (!cqe) return -EAGAIN; @@ -364,18 +581,34 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq, return -EINVAL; } + /* Resize CQ in progress */ + if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_CQE_OPCODE_RESIZE)) { + if (cq->resize_buf) { + struct mlx4_ib_dev *dev = to_mdev(cq->ibcq.device); + + mlx4_ib_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe); + cq->buf = cq->resize_buf->buf; + cq->ibcq.cqe = cq->resize_buf->cqe; + + kfree(cq->resize_buf); + cq->resize_buf = NULL; + } + + goto repoll; + } + if (!*cur_qp || - (be32_to_cpu(cqe->my_qpn) & 0xffffff) != (*cur_qp)->mqp.qpn) { + (be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK) != (*cur_qp)->mqp.qpn) { /* * We do not have to take the QP table lock here, * because CQs will be locked while QPs are removed * from the table. */ mqp = __mlx4_qp_lookup(to_mdev(cq->ibcq.device)->dev, - be32_to_cpu(cqe->my_qpn)); + be32_to_cpu(cqe->vlan_my_qpn)); if (unlikely(!mqp)) { printk(KERN_WARNING "CQ %06x with entry for unknown QPN %06x\n", - cq->mcq.cqn, be32_to_cpu(cqe->my_qpn) & 0xffffff); + cq->mcq.cqn, be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK); return -EINVAL; } @@ -421,6 +654,7 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq, case MLX4_OPCODE_SEND_IMM: wc->wc_flags |= IB_WC_WITH_IMM; case MLX4_OPCODE_SEND: + case MLX4_OPCODE_SEND_INVAL: wc->opcode = IB_WC_SEND; break; case MLX4_OPCODE_RDMA_READ: @@ -441,39 +675,46 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq, case MLX4_OPCODE_LSO: wc->opcode = IB_WC_LSO; break; + case MLX4_OPCODE_FMR: + wc->opcode = IB_WC_FAST_REG_MR; + break; + case MLX4_OPCODE_LOCAL_INVAL: + wc->opcode = IB_WC_LOCAL_INV; + break; } } else { wc->byte_len = be32_to_cpu(cqe->byte_cnt); switch (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) { case MLX4_RECV_OPCODE_RDMA_WRITE_IMM: - wc->opcode = IB_WC_RECV_RDMA_WITH_IMM; - wc->wc_flags = IB_WC_WITH_IMM; - wc->imm_data = cqe->immed_rss_invalid; + wc->opcode = IB_WC_RECV_RDMA_WITH_IMM; + wc->wc_flags = IB_WC_WITH_IMM; + wc->ex.imm_data = cqe->immed_rss_invalid; + break; + case MLX4_RECV_OPCODE_SEND_INVAL: + wc->opcode = IB_WC_RECV; + wc->wc_flags = IB_WC_WITH_INVALIDATE; + wc->ex.invalidate_rkey = be32_to_cpu(cqe->immed_rss_invalid); break; case MLX4_RECV_OPCODE_SEND: wc->opcode = IB_WC_RECV; wc->wc_flags = 0; break; case MLX4_RECV_OPCODE_SEND_IMM: - wc->opcode = IB_WC_RECV; - wc->wc_flags = IB_WC_WITH_IMM; - wc->imm_data = cqe->immed_rss_invalid; + wc->opcode = IB_WC_RECV; + wc->wc_flags = IB_WC_WITH_IMM; + wc->ex.imm_data = cqe->immed_rss_invalid; break; } wc->slid = be16_to_cpu(cqe->rlid); - wc->sl = cqe->sl >> 4; + wc->sl = be16_to_cpu(cqe->sl_vid) >> 12; g_mlpath_rqpn = be32_to_cpu(cqe->g_mlpath_rqpn); wc->src_qp = g_mlpath_rqpn & 0xffffff; wc->dlid_path_bits = (g_mlpath_rqpn >> 24) & 0x7f; - wc->wc_flags |= g_mlpath_rqpn & 0x80000000 ? - IB_WC_GRH : 0; + wc->wc_flags |= g_mlpath_rqpn & 0x80000000 ? IB_WC_GRH : 0; wc->pkey_index = be32_to_cpu(cqe->immed_rss_invalid) & 0x7f; - status = cqe->ipoib_status; - wc->csum_ok = (status & CSUM_MASK_BITS) == CSUM_VAL_BITS && - (status & CSUM_MASK2_BITS) && - cqe->checksum == 0xffff; + wc->csum_ok = mlx4_ib_ipoib_csum_ok(cqe->status, cqe->checksum); } return 0; @@ -541,7 +782,7 @@ void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq) */ while ((int) --prod_index - (int) cq->mcq.cons_index >= 0) { cqe = get_cqe(cq, prod_index & cq->ibcq.cqe); - if ((be32_to_cpu(cqe->my_qpn) & 0xffffff) == qpn) { + if ((be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK) == qpn) { if (srq && !(cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK)) mlx4_ib_free_srq_wqe(srq, be16_to_cpu(cqe->wqe_index)); ++nfreed; diff --git a/drivers/infiniband/hw/mlx4/doorbell.c b/drivers/infiniband/hw/mlx4/doorbell.c index 1c36087..8aee423 100644 --- a/drivers/infiniband/hw/mlx4/doorbell.c +++ b/drivers/infiniband/hw/mlx4/doorbell.c @@ -34,124 +34,6 @@ #include "mlx4_ib.h" -struct mlx4_ib_db_pgdir { - struct list_head list; - DECLARE_BITMAP(order0, MLX4_IB_DB_PER_PAGE); - DECLARE_BITMAP(order1, MLX4_IB_DB_PER_PAGE / 2); - unsigned long *bits[2]; - __be32 *db_page; - dma_addr_t db_dma; -}; - -static struct mlx4_ib_db_pgdir *mlx4_ib_alloc_db_pgdir(struct mlx4_ib_dev *dev) -{ - struct mlx4_ib_db_pgdir *pgdir; - - pgdir = kzalloc(sizeof *pgdir, GFP_KERNEL); - if (!pgdir) - return NULL; - - bitmap_fill(pgdir->order1, MLX4_IB_DB_PER_PAGE / 2); - pgdir->bits[0] = pgdir->order0; - pgdir->bits[1] = pgdir->order1; - pgdir->db_page = dma_alloc_coherent(dev->ib_dev.dma_device, - PAGE_SIZE, &pgdir->db_dma, - GFP_KERNEL); - if (!pgdir->db_page) { - kfree(pgdir); - return NULL; - } - - return pgdir; -} - -static int mlx4_ib_alloc_db_from_pgdir(struct mlx4_ib_db_pgdir *pgdir, - struct mlx4_ib_db *db, int order) -{ - int o; - int i; - - for (o = order; o <= 1; ++o) { - i = find_first_bit(pgdir->bits[o], MLX4_IB_DB_PER_PAGE >> o); - if (i < MLX4_IB_DB_PER_PAGE >> o) - goto found; - } - - return -ENOMEM; - -found: - clear_bit(i, pgdir->bits[o]); - - i <<= o; - - if (o > order) - set_bit(i ^ 1, pgdir->bits[order]); - - db->u.pgdir = pgdir; - db->index = i; - db->db = pgdir->db_page + db->index; - db->dma = pgdir->db_dma + db->index * 4; - db->order = order; - - return 0; -} - -int mlx4_ib_db_alloc(struct mlx4_ib_dev *dev, struct mlx4_ib_db *db, int order) -{ - struct mlx4_ib_db_pgdir *pgdir; - int ret = 0; - - mutex_lock(&dev->pgdir_mutex); - - list_for_each_entry(pgdir, &dev->pgdir_list, list) - if (!mlx4_ib_alloc_db_from_pgdir(pgdir, db, order)) - goto out; - - pgdir = mlx4_ib_alloc_db_pgdir(dev); - if (!pgdir) { - ret = -ENOMEM; - goto out; - } - - list_add(&pgdir->list, &dev->pgdir_list); - - /* This should never fail -- we just allocated an empty page: */ - WARN_ON(mlx4_ib_alloc_db_from_pgdir(pgdir, db, order)); - -out: - mutex_unlock(&dev->pgdir_mutex); - - return ret; -} - -void mlx4_ib_db_free(struct mlx4_ib_dev *dev, struct mlx4_ib_db *db) -{ - int o; - int i; - - mutex_lock(&dev->pgdir_mutex); - - o = db->order; - i = db->index; - - if (db->order == 0 && test_bit(i ^ 1, db->u.pgdir->order0)) { - clear_bit(i ^ 1, db->u.pgdir->order0); - ++o; - } - - i >>= o; - set_bit(i, db->u.pgdir->bits[o]); - - if (bitmap_full(db->u.pgdir->order1, MLX4_IB_DB_PER_PAGE / 2)) { - dma_free_coherent(dev->ib_dev.dma_device, PAGE_SIZE, - db->u.pgdir->db_page, db->u.pgdir->db_dma); - list_del(&db->u.pgdir->list); - kfree(db->u.pgdir); - } - - mutex_unlock(&dev->pgdir_mutex); -} - struct mlx4_ib_user_db_page { struct list_head list; struct ib_umem *umem; @@ -160,7 +42,7 @@ struct mlx4_ib_user_db_page { }; int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt, - struct mlx4_ib_db *db) + struct mlx4_db *db) { struct mlx4_ib_user_db_page *page; struct ib_umem_chunk *chunk; @@ -181,7 +63,7 @@ int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt, page->user_virt = (virt & PAGE_MASK); page->refcnt = 0; page->umem = ib_umem_get(&context->ibucontext, virt & PAGE_MASK, - PAGE_SIZE, 0); + PAGE_SIZE, 0, 0); if (IS_ERR(page->umem)) { err = PTR_ERR(page->umem); kfree(page); @@ -202,7 +84,7 @@ out: return err; } -void mlx4_ib_db_unmap_user(struct mlx4_ib_ucontext *context, struct mlx4_ib_db *db) +void mlx4_ib_db_unmap_user(struct mlx4_ib_ucontext *context, struct mlx4_db *db) { mutex_lock(&context->db_page_mutex); diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index ee72ebe..0f0d945 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -147,7 +147,8 @@ static void update_sm_ah(struct mlx4_ib_dev *dev, u8 port_num, u16 lid, u8 sl) * Snoop SM MADs for port info and P_Key table sets, so we can * synthesize LID change and P_Key change events. */ -static void smp_snoop(struct ib_device *ibdev, u8 port_num, struct ib_mad *mad) +static void smp_snoop(struct ib_device *ibdev, u8 port_num, struct ib_mad *mad, + u16 prev_lid) { struct ib_event event; @@ -157,6 +158,7 @@ static void smp_snoop(struct ib_device *ibdev, u8 port_num, struct ib_mad *mad) if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO) { struct ib_port_info *pinfo = (struct ib_port_info *) ((struct ib_smp *) mad)->data; + u16 lid = be16_to_cpu(pinfo->lid); update_sm_ah(to_mdev(ibdev), port_num, be16_to_cpu(pinfo->sm_lid), @@ -165,12 +167,15 @@ static void smp_snoop(struct ib_device *ibdev, u8 port_num, struct ib_mad *mad) event.device = ibdev; event.element.port_num = port_num; - if(pinfo->clientrereg_resv_subnetto & 0x80) + if (pinfo->clientrereg_resv_subnetto & 0x80) { event.event = IB_EVENT_CLIENT_REREGISTER; - else + ib_dispatch_event(&event); + } + if (prev_lid != lid) { event.event = IB_EVENT_LID_CHANGE; + ib_dispatch_event(&event); + } - ib_dispatch_event(&event); } if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PKEY_TABLE) { @@ -228,8 +233,9 @@ int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, struct ib_wc *in_wc, struct ib_grh *in_grh, struct ib_mad *in_mad, struct ib_mad *out_mad) { - u16 slid; + u16 slid, prev_lid = 0; int err; + struct ib_port_attr pattr; slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE); @@ -263,6 +269,13 @@ int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, } else return IB_MAD_RESULT_SUCCESS; + if ((in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED || + in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) && + in_mad->mad_hdr.method == IB_MGMT_METHOD_SET && + in_mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO && + !ib_query_port(ibdev, port_num, &pattr)) + prev_lid = pattr.lid; + err = mlx4_MAD_IFC(to_mdev(ibdev), mad_flags & IB_MAD_IGNORE_MKEY, mad_flags & IB_MAD_IGNORE_BKEY, @@ -271,7 +284,7 @@ int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, return IB_MAD_RESULT_FAILURE; if (!out_mad->mad_hdr.status) { - smp_snoop(ibdev, port_num, in_mad); + smp_snoop(ibdev, port_num, in_mad, prev_lid); node_desc_override(ibdev, out_mad); } @@ -298,7 +311,7 @@ int mlx4_ib_mad_init(struct mlx4_ib_dev *dev) int p, q; int ret; - for (p = 0; p < dev->dev->caps.num_ports; ++p) + for (p = 0; p < dev->num_ports; ++p) for (q = 0; q <= 1; ++q) { agent = ib_register_mad_agent(&dev->ib_dev, p + 1, q ? IB_QPT_GSI : IB_QPT_SMI, @@ -314,7 +327,7 @@ int mlx4_ib_mad_init(struct mlx4_ib_dev *dev) return 0; err: - for (p = 0; p < dev->dev->caps.num_ports; ++p) + for (p = 0; p < dev->num_ports; ++p) for (q = 0; q <= 1; ++q) if (dev->send_agent[p][q]) ib_unregister_mad_agent(dev->send_agent[p][q]); @@ -327,7 +340,7 @@ void mlx4_ib_mad_cleanup(struct mlx4_ib_dev *dev) struct ib_mad_agent *agent; int p, q; - for (p = 0; p < dev->dev->caps.num_ports; ++p) { + for (p = 0; p < dev->num_ports; ++p) { for (q = 0; q <= 1; ++q) { agent = dev->send_agent[p][q]; dev->send_agent[p][q] = NULL; diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 2d9c39a..4a9dfc2 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -46,7 +47,7 @@ #define DRV_NAME MLX4_IB_DRV_NAME #define DRV_VERSION "1.0" -#define DRV_RELDATE "February 28, 2008" +#define DRV_RELDATE "April 4, 2008" MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("Mellanox ConnectX HCA InfiniBand driver"); @@ -61,7 +62,7 @@ MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0"); #endif /* CONFIG_MLX4_DEBUG */ -static const char mlx4_ib_version[] __devinitdata = +static const char mlx4_ib_version[] = DRV_NAME ": Mellanox ConnectX InfiniBand driver v" DRV_VERSION " (" DRV_RELDATE ")\n"; @@ -99,7 +100,8 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, props->device_cap_flags = IB_DEVICE_CHANGE_PHY_PORT | IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SYS_IMAGE_GUID | - IB_DEVICE_RC_RNR_NAK_GEN; + IB_DEVICE_RC_RNR_NAK_GEN | + IB_DEVICE_BLOCK_MULTICAST_LOOPBACK; if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR) props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR; if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR) @@ -109,9 +111,17 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_UD_AV_PORT) props->device_cap_flags |= IB_DEVICE_UD_AV_PORT_ENFORCE; if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IPOIB_CSUM) - props->device_cap_flags |= IB_DEVICE_IP_CSUM; + props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM; if (dev->dev->caps.max_gso_sz) - props->device_cap_flags |= IB_DEVICE_TCP_TSO; + props->device_cap_flags |= IB_DEVICE_UD_TSO; + if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_RESERVED_LKEY) + props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY; + if ((dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_LOCAL_INV) && + (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_REMOTE_INV) && + (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_FAST_REG_WR)) + props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; + if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_RAW_ETY) + props->max_raw_ethy_qp = dev->ib_dev.phys_port_cnt; props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) & 0xffffff; @@ -135,6 +145,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, props->max_srq = dev->dev->caps.num_srqs - dev->dev->caps.reserved_srqs; props->max_srq_wr = dev->dev->caps.max_srq_wqes - 1; props->max_srq_sge = dev->dev->caps.max_srq_sge; + props->max_fast_reg_page_list_len = PAGE_SIZE / sizeof (u64); props->local_ca_ack_delay = dev->dev->caps.local_ca_ack_delay; props->atomic_cap = dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_ATOMIC ? IB_ATOMIC_HCA : IB_ATOMIC_NONE; @@ -343,8 +354,14 @@ static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev, int err; resp.qp_tab_size = dev->dev->caps.num_qps; - resp.bf_reg_size = dev->dev->caps.bf_reg_size; - resp.bf_regs_per_page = dev->dev->caps.bf_regs_per_page; + + if (mlx4_wc_enabled()) { + resp.bf_reg_size = dev->dev->caps.bf_reg_size; + resp.bf_regs_per_page = dev->dev->caps.bf_regs_per_page; + } else { + resp.bf_reg_size = 0; + resp.bf_regs_per_page = 0; + } context = kmalloc(sizeof *context, GFP_KERNEL); if (!context) @@ -446,7 +463,9 @@ static int mlx4_ib_dealloc_pd(struct ib_pd *pd) static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) { return mlx4_multicast_attach(to_mdev(ibqp->device)->dev, - &to_mqp(ibqp)->mqp, gid->raw); + &to_mqp(ibqp)->mqp, gid->raw, + !!(to_mqp(ibqp)->flags & + MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK)); } static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) @@ -529,108 +548,87 @@ static struct class_device_attribute *mlx4_class_attributes[] = { }; /* - * create 2 functions (show, store) and a class_device_attribute struct - * pointing to the functions for _name - */ -#define CLASS_DEVICE_DIAG_CLR_RPRT_ATTR(_name, _offset, _in_mod) \ -static ssize_t store_rprt_##_name(struct class_device *cdev, \ - const char *buf, size_t length) { \ - return store_diag_rprt(cdev, buf, length, _offset, _in_mod); \ -} \ -static ssize_t show_rprt_##_name(struct class_device *cdev, char *buf) { \ - return show_diag_rprt(cdev, buf, _offset, _in_mod); \ -} \ -static CLASS_DEVICE_ATTR(_name, S_IRUGO | S_IWUGO, \ - show_rprt_##_name, store_rprt_##_name); - -/* - * create show function and a class_device_attribute struct pointing to + * create show function and a device_attribute struct pointing to * the function for _name */ -#define CLASS_DEVICE_DIAG_RPRT_ATTR(_name, _offset, _in_mod) \ -static ssize_t show_rprt_##_name(struct class_device *cdev, char *buf){ \ - return show_diag_rprt(cdev, buf, _offset, _in_mod); \ -} \ +#define DEVICE_DIAG_RPRT_ATTR(_name, _offset, _op_mod) \ +static ssize_t show_rprt_##_name(struct class_device *cdev, \ + char *buf){ \ + return show_diag_rprt(cdev, buf, _offset, _op_mod); \ +} \ static CLASS_DEVICE_ATTR(_name, S_IRUGO, show_rprt_##_name, NULL); -static ssize_t show_diag_rprt(struct class_device *cdev, char *buf, - int offset, int in_mod) +#define MLX4_DIAG_RPRT_CLEAR_DIAGS 3 + +static size_t show_diag_rprt(struct class_device *cdev, char *buf, + u32 offset, u8 op_modifier) { - size_t ret = -1; + size_t ret; u32 counter_offset = offset; u32 diag_counter = 0; struct mlx4_ib_dev *dev = container_of(cdev, struct mlx4_ib_dev, ib_dev.class_dev); - /* clear counters file, can't read it */ - if(offset < 0) - return sprintf(buf,"This file is write only\n"); - - ret = mlx4_query_diag_counters(dev->dev, 1, in_mod, &counter_offset, - &diag_counter); - if (ret < 0) - { - sprintf(buf,"Operation failed\n"); + + ret = mlx4_query_diag_counters(dev->dev, 1, op_modifier, + &counter_offset, &diag_counter); + if (ret) return ret; - } return sprintf(buf,"%d\n", diag_counter); } -/* the store function is used for counter clear */ -static ssize_t store_diag_rprt(struct class_device *cdev, - const char *buf, size_t length, - int offset, int in_mod) +static ssize_t clear_diag_counters(struct class_device *cdev, + const char *buf, size_t length) { - size_t ret = -1; - u32 counter_offset = 0; - u32 diag_counter; + size_t ret; struct mlx4_ib_dev *dev = container_of(cdev, struct mlx4_ib_dev, ib_dev.class_dev); - ret = mlx4_query_diag_counters(dev->dev, 1, in_mod, &counter_offset, - &diag_counter); + ret = mlx4_query_diag_counters(dev->dev, 0, MLX4_DIAG_RPRT_CLEAR_DIAGS, + NULL, NULL); if (ret) return ret; return length; } -CLASS_DEVICE_DIAG_RPRT_ATTR(rq_num_lle , 0x00, 2); -CLASS_DEVICE_DIAG_RPRT_ATTR(sq_num_lle , 0x04, 2); -CLASS_DEVICE_DIAG_RPRT_ATTR(rq_num_lqpoe , 0x08, 2); -CLASS_DEVICE_DIAG_RPRT_ATTR(sq_num_lqpoe , 0x0C, 2); -CLASS_DEVICE_DIAG_RPRT_ATTR(rq_num_leeoe , 0x10, 2); -CLASS_DEVICE_DIAG_RPRT_ATTR(sq_num_leeoe , 0x14, 2); -CLASS_DEVICE_DIAG_RPRT_ATTR(rq_num_lpe , 0x18, 2); -CLASS_DEVICE_DIAG_RPRT_ATTR(sq_num_lpe , 0x1C, 2); -CLASS_DEVICE_DIAG_RPRT_ATTR(rq_num_wrfe , 0x20, 2); -CLASS_DEVICE_DIAG_RPRT_ATTR(sq_num_wrfe , 0x24, 2); -CLASS_DEVICE_DIAG_RPRT_ATTR(sq_num_mwbe , 0x2C, 2); -CLASS_DEVICE_DIAG_RPRT_ATTR(sq_num_bre , 0x34, 2); -CLASS_DEVICE_DIAG_RPRT_ATTR(rq_num_lae , 0x38, 2); -CLASS_DEVICE_DIAG_RPRT_ATTR(sq_num_rire , 0x44, 2); -CLASS_DEVICE_DIAG_RPRT_ATTR(rq_num_rire , 0x48, 2); -CLASS_DEVICE_DIAG_RPRT_ATTR(sq_num_rae , 0x4C, 2); -CLASS_DEVICE_DIAG_RPRT_ATTR(rq_num_rae , 0x50, 2); -CLASS_DEVICE_DIAG_RPRT_ATTR(sq_num_roe , 0x54, 2); -CLASS_DEVICE_DIAG_RPRT_ATTR(sq_num_tree , 0x5C, 2); -CLASS_DEVICE_DIAG_RPRT_ATTR(sq_num_rree , 0x64, 2); -CLASS_DEVICE_DIAG_RPRT_ATTR(rq_num_rnr , 0x68, 2); -CLASS_DEVICE_DIAG_RPRT_ATTR(sq_num_rnr , 0x6C, 2); -CLASS_DEVICE_DIAG_RPRT_ATTR(sq_num_rabrte , 0x7C, 2); -CLASS_DEVICE_DIAG_RPRT_ATTR(sq_num_ieecne , 0x84, 2); -CLASS_DEVICE_DIAG_RPRT_ATTR(sq_num_ieecse , 0x8C, 2); -CLASS_DEVICE_DIAG_RPRT_ATTR(rq_num_oos , 0x100, 2); -CLASS_DEVICE_DIAG_RPRT_ATTR(sq_num_oos , 0x104, 2); -CLASS_DEVICE_DIAG_RPRT_ATTR(rq_num_mce , 0x108, 2); -CLASS_DEVICE_DIAG_RPRT_ATTR(rq_num_rsync , 0x110, 2); -CLASS_DEVICE_DIAG_RPRT_ATTR(sq_num_rsync , 0x114, 2); -CLASS_DEVICE_DIAG_RPRT_ATTR(rq_num_udsdprd , 0x118, 2); -CLASS_DEVICE_DIAG_RPRT_ATTR(rq_num_ucsdprd , 0x120, 2); -CLASS_DEVICE_DIAG_RPRT_ATTR(num_cqovf , 0x1A0, 2); -CLASS_DEVICE_DIAG_RPRT_ATTR(num_eqovf , 0x1A4, 2); -CLASS_DEVICE_DIAG_RPRT_ATTR(num_baddb , 0x1A8, 2); -CLASS_DEVICE_DIAG_CLR_RPRT_ATTR(clear_diag , -1 , 3); +DEVICE_DIAG_RPRT_ATTR(rq_num_lle , 0x00, 2); +DEVICE_DIAG_RPRT_ATTR(sq_num_lle , 0x04, 2); +DEVICE_DIAG_RPRT_ATTR(rq_num_lqpoe , 0x08, 2); +DEVICE_DIAG_RPRT_ATTR(sq_num_lqpoe , 0x0C, 2); +DEVICE_DIAG_RPRT_ATTR(rq_num_leeoe , 0x10, 2); +DEVICE_DIAG_RPRT_ATTR(sq_num_leeoe , 0x14, 2); +DEVICE_DIAG_RPRT_ATTR(rq_num_lpe , 0x18, 2); +DEVICE_DIAG_RPRT_ATTR(sq_num_lpe , 0x1C, 2); +DEVICE_DIAG_RPRT_ATTR(rq_num_wrfe , 0x20, 2); +DEVICE_DIAG_RPRT_ATTR(sq_num_wrfe , 0x24, 2); +DEVICE_DIAG_RPRT_ATTR(sq_num_mwbe , 0x2C, 2); +DEVICE_DIAG_RPRT_ATTR(sq_num_bre , 0x34, 2); +DEVICE_DIAG_RPRT_ATTR(rq_num_lae , 0x38, 2); +DEVICE_DIAG_RPRT_ATTR(sq_num_rire , 0x44, 2); +DEVICE_DIAG_RPRT_ATTR(rq_num_rire , 0x48, 2); +DEVICE_DIAG_RPRT_ATTR(sq_num_rae , 0x4C, 2); +DEVICE_DIAG_RPRT_ATTR(rq_num_rae , 0x50, 2); +DEVICE_DIAG_RPRT_ATTR(sq_num_roe , 0x54, 2); +DEVICE_DIAG_RPRT_ATTR(sq_num_tree , 0x5C, 2); +DEVICE_DIAG_RPRT_ATTR(sq_num_rree , 0x64, 2); +DEVICE_DIAG_RPRT_ATTR(rq_num_rnr , 0x68, 2); +DEVICE_DIAG_RPRT_ATTR(sq_num_rnr , 0x6C, 2); +DEVICE_DIAG_RPRT_ATTR(sq_num_rabrte , 0x7C, 2); +DEVICE_DIAG_RPRT_ATTR(sq_num_ieecne , 0x84, 2); +DEVICE_DIAG_RPRT_ATTR(sq_num_ieecse , 0x8C, 2); +DEVICE_DIAG_RPRT_ATTR(rq_num_oos , 0x100, 2); +DEVICE_DIAG_RPRT_ATTR(sq_num_oos , 0x104, 2); +DEVICE_DIAG_RPRT_ATTR(rq_num_mce , 0x108, 2); +DEVICE_DIAG_RPRT_ATTR(rq_num_rsync , 0x110, 2); +DEVICE_DIAG_RPRT_ATTR(sq_num_rsync , 0x114, 2); +DEVICE_DIAG_RPRT_ATTR(rq_num_udsdprd , 0x118, 2); +DEVICE_DIAG_RPRT_ATTR(rq_num_ucsdprd , 0x120, 2); +DEVICE_DIAG_RPRT_ATTR(num_cqovf , 0x1A0, 2); +DEVICE_DIAG_RPRT_ATTR(num_eqovf , 0x1A4, 2); +DEVICE_DIAG_RPRT_ATTR(num_baddb , 0x1A8, 2); + +static CLASS_DEVICE_ATTR(clear_diag, S_IWUGO, NULL, clear_diag_counters); static struct attribute *diag_rprt_attrs[] = { &class_device_attr_rq_num_lle.attr, @@ -679,9 +677,24 @@ static struct attribute_group diag_counters_group = { static void *mlx4_ib_add(struct mlx4_dev *dev) { + static int mlx4_ib_version_printed; struct mlx4_ib_dev *ibdev; + int num_ports = 0; int i; + if (!mlx4_ib_version_printed) { + printk(KERN_INFO "%s", mlx4_ib_version); + ++mlx4_ib_version_printed; + } + + + mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) + num_ports++; + + /* No point in registering device with no ports */ + if (num_ports == 0) + return NULL; + ibdev = (struct mlx4_ib_dev *) ib_alloc_device(sizeof *ibdev); if (!ibdev) { dev_err(&dev->pdev->dev, "Device struct alloc failed\n"); @@ -699,16 +712,15 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) goto err_uar; MLX4_INIT_DOORBELL_LOCK(&ibdev->uar_lock); - INIT_LIST_HEAD(&ibdev->pgdir_list); - mutex_init(&ibdev->pgdir_mutex); - ibdev->dev = dev; strlcpy(ibdev->ib_dev.name, "mlx4_%d", IB_DEVICE_NAME_MAX); ibdev->ib_dev.owner = THIS_MODULE; ibdev->ib_dev.node_type = RDMA_NODE_IB_CA; - ibdev->ib_dev.phys_port_cnt = dev->caps.num_ports; - ibdev->ib_dev.num_comp_vectors = 1; + ibdev->ib_dev.local_dma_lkey = dev->caps.reserved_lkey; + ibdev->num_ports = num_ports; + ibdev->ib_dev.phys_port_cnt = ibdev->num_ports; + ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors; ibdev->ib_dev.dma_device = &dev->pdev->dev; ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION; @@ -722,6 +734,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) (1ull << IB_USER_VERBS_CMD_DEREG_MR) | (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | + (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) | (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | (1ull << IB_USER_VERBS_CMD_CREATE_QP) | (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | @@ -761,12 +774,16 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->ib_dev.post_recv = mlx4_ib_post_recv; ibdev->ib_dev.create_cq = mlx4_ib_create_cq; ibdev->ib_dev.modify_cq = mlx4_ib_modify_cq; + ibdev->ib_dev.resize_cq = mlx4_ib_resize_cq; ibdev->ib_dev.destroy_cq = mlx4_ib_destroy_cq; ibdev->ib_dev.poll_cq = mlx4_ib_poll_cq; ibdev->ib_dev.req_notify_cq = mlx4_ib_arm_cq; ibdev->ib_dev.get_dma_mr = mlx4_ib_get_dma_mr; ibdev->ib_dev.reg_user_mr = mlx4_ib_reg_user_mr; ibdev->ib_dev.dereg_mr = mlx4_ib_dereg_mr; + ibdev->ib_dev.alloc_fast_reg_mr = mlx4_ib_alloc_fast_reg_mr; + ibdev->ib_dev.alloc_fast_reg_page_list = mlx4_ib_alloc_fast_reg_page_list; + ibdev->ib_dev.free_fast_reg_page_list = mlx4_ib_free_fast_reg_page_list; ibdev->ib_dev.attach_mcast = mlx4_ib_mcg_attach; ibdev->ib_dev.detach_mcast = mlx4_ib_mcg_detach; ibdev->ib_dev.process_mad = mlx4_ib_process_mad; @@ -776,11 +793,6 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->ib_dev.unmap_fmr = mlx4_ib_unmap_fmr; ibdev->ib_dev.dealloc_fmr = mlx4_ib_fmr_dealloc; - if (ibdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IPOIB_CSUM) - ibdev->ib_dev.flags |= IB_DEVICE_IP_CSUM; - if (ibdev->dev->caps.max_gso_sz) - ibdev->ib_dev.flags |= IB_DEVICE_TCP_TSO; - if (init_node_data(ibdev)) goto err_map; @@ -800,13 +812,10 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) } if(sysfs_create_group(&ibdev->ib_dev.class_dev.kobj, &diag_counters_group)) - goto err_diag; + goto err_reg; return ibdev; -err_diag: - ib_unregister_device(&ibdev->ib_dev); - err_reg: ib_unregister_device(&ibdev->ib_dev); @@ -830,13 +839,17 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr) struct mlx4_ib_dev *ibdev = ibdev_ptr; int p; - sysfs_remove_group(&ibdev->ib_dev.class_dev.kobj, &diag_counters_group); + if (!ibdev->num_ports) + return; - for (p = 1; p <= dev->caps.num_ports; ++p) - mlx4_CLOSE_PORT(dev, p); + sysfs_remove_group(&ibdev->ib_dev.class_dev.kobj, &diag_counters_group); mlx4_ib_mad_cleanup(ibdev); ib_unregister_device(&ibdev->ib_dev); + + for (p = 1; p <= ibdev->num_ports; ++p) + mlx4_CLOSE_PORT(dev, p); + iounmap(ibdev->uar_map); mlx4_uar_free(dev, &ibdev->priv_uar); mlx4_pd_free(dev, ibdev->priv_pdn); @@ -844,18 +857,24 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr) } static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr, - enum mlx4_dev_event event, int subtype, - int port) + enum mlx4_dev_event event, int port) { struct ib_event ibev; + struct mlx4_ib_dev *ibdev = to_mdev((struct ib_device *) ibdev_ptr); + + if (port > ibdev->num_ports || !ibdev->num_ports) + return; switch (event) { - case MLX4_EVENT_TYPE_PORT_CHANGE: - ibev.event = subtype == MLX4_PORT_CHANGE_SUBTYPE_ACTIVE ? - IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR; + case MLX4_DEV_EVENT_PORT_UP: + ibev.event = IB_EVENT_PORT_ACTIVE; + break; + + case MLX4_DEV_EVENT_PORT_DOWN: + ibev.event = IB_EVENT_PORT_ERR; break; - case MLX4_EVENT_TYPE_LOCAL_CATAS_ERROR: + case MLX4_DEV_EVENT_CATASTROPHIC_ERROR: ibev.event = IB_EVENT_DEVICE_FATAL; break; diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index f86a19d..10a23e9 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -1,5 +1,6 @@ /* * Copyright (c) 2006, 2007 Cisco Systems. All rights reserved. + * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -63,30 +64,12 @@ extern int mlx4_ib_debug_level; #endif /* CONFIG_MLX4_DEBUG */ enum { - MLX4_IB_DB_PER_PAGE = PAGE_SIZE / 4 -}; - -enum { MLX4_IB_SQ_MIN_WQE_SHIFT = 6 }; #define MLX4_IB_SQ_HEADROOM(shift) ((2048 >> (shift)) + 1) #define MLX4_IB_SQ_MAX_SPARE (MLX4_IB_SQ_HEADROOM(MLX4_IB_SQ_MIN_WQE_SHIFT)) -struct mlx4_ib_db_pgdir; -struct mlx4_ib_user_db_page; - -struct mlx4_ib_db { - __be32 *db; - union { - struct mlx4_ib_db_pgdir *pgdir; - struct mlx4_ib_user_db_page *user_page; - } u; - dma_addr_t dma; - int index; - int order; -}; - struct mlx4_ib_ucontext { struct ib_ucontext ibucontext; struct mlx4_uar uar; @@ -104,13 +87,21 @@ struct mlx4_ib_cq_buf { struct mlx4_mtt mtt; }; +struct mlx4_ib_cq_resize { + struct mlx4_ib_cq_buf buf; + int cqe; +}; + struct mlx4_ib_cq { struct ib_cq ibcq; struct mlx4_cq mcq; struct mlx4_ib_cq_buf buf; - struct mlx4_ib_db db; + struct mlx4_ib_cq_resize *resize_buf; + struct mlx4_db db; spinlock_t lock; + struct mutex resize_mutex; struct ib_umem *umem; + struct ib_umem *resize_umem; }; struct mlx4_ib_mr { @@ -119,6 +110,11 @@ struct mlx4_ib_mr { struct ib_umem *umem; }; +struct mlx4_ib_fast_reg_page_list { + struct ib_fast_reg_page_list ibfrpl; + dma_addr_t map; +}; + struct mlx4_ib_fmr { struct ib_fmr ibfmr; struct mlx4_fmr mfmr; @@ -136,8 +132,9 @@ struct mlx4_ib_wq { unsigned tail; }; -enum qp_flags { - MLX4_QP_LSO = 1 << 0 +enum mlx4_ib_qp_flags { + MLX4_IB_QP_LSO = 1 << 0, + MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = 1 << 1, }; struct mlx4_ib_qp { @@ -145,7 +142,7 @@ struct mlx4_ib_qp { struct mlx4_qp mqp; struct mlx4_buf buf; - struct mlx4_ib_db db; + struct mlx4_db db; struct mlx4_ib_wq rq; u32 doorbell_qpn; @@ -159,20 +156,20 @@ struct mlx4_ib_qp { struct mlx4_mtt mtt; int buf_size; struct mutex mutex; + u32 flags; u8 port; u8 alt_port; u8 atomic_rd_en; u8 resp_depth; u8 sq_no_prefetch; u8 state; - int flags; }; struct mlx4_ib_srq { struct ib_srq ibsrq; struct mlx4_srq msrq; struct mlx4_buf buf; - struct mlx4_ib_db db; + struct mlx4_db db; u64 *wrid; spinlock_t lock; int head; @@ -191,11 +188,9 @@ struct mlx4_ib_ah { struct mlx4_ib_dev { struct ib_device ib_dev; struct mlx4_dev *dev; + int num_ports; void __iomem *uar_map; - struct list_head pgdir_list; - struct mutex pgdir_mutex; - struct mlx4_uar priv_uar; u32 priv_pdn; MLX4_DECLARE_DOORBELL_LOCK(uar_lock); @@ -237,6 +232,11 @@ static inline struct mlx4_ib_mr *to_mmr(struct ib_mr *ibmr) return container_of(ibmr, struct mlx4_ib_mr, ibmr); } +static inline struct mlx4_ib_fast_reg_page_list *to_mfrpl(struct ib_fast_reg_page_list *ibfrpl) +{ + return container_of(ibfrpl, struct mlx4_ib_fast_reg_page_list, ibfrpl); +} + static inline struct mlx4_ib_fmr *to_mfmr(struct ib_fmr *ibfmr) { return container_of(ibfmr, struct mlx4_ib_fmr, ibfmr); @@ -266,11 +266,9 @@ static inline struct mlx4_ib_ah *to_mah(struct ib_ah *ibah) return container_of(ibah, struct mlx4_ib_ah, ibah); } -int mlx4_ib_db_alloc(struct mlx4_ib_dev *dev, struct mlx4_ib_db *db, int order); -void mlx4_ib_db_free(struct mlx4_ib_dev *dev, struct mlx4_ib_db *db); int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt, - struct mlx4_ib_db *db); -void mlx4_ib_db_unmap_user(struct mlx4_ib_ucontext *context, struct mlx4_ib_db *db); + struct mlx4_db *db); +void mlx4_ib_db_unmap_user(struct mlx4_ib_ucontext *context, struct mlx4_db *db); struct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc); int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt, @@ -279,8 +277,14 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata); int mlx4_ib_dereg_mr(struct ib_mr *mr); +struct ib_mr *mlx4_ib_alloc_fast_reg_mr(struct ib_pd *pd, + int max_page_list_len); +struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device *ibdev, + int page_list_len); +void mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list); int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period); +int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata); struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector, struct ib_ucontext *context, struct ib_udata *udata); diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index fe2c2e9..ae52cd4 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -118,6 +119,70 @@ out: return err; } +static int handle_hugetlb_user_mr(struct ib_pd *pd, struct mlx4_ib_mr *mr, + u64 start, u64 virt_addr, int access_flags) +{ +#if defined(CONFIG_HUGETLB_PAGE) && !defined(__powerpc__) && !defined(__ia64__) + struct mlx4_ib_dev *dev = to_mdev(pd->device); + struct ib_umem_chunk *chunk; + unsigned dsize; + dma_addr_t daddr; + unsigned cur_size = 0; + dma_addr_t uninitialized_var(cur_addr); + int n; + struct ib_umem *umem = mr->umem; + u64 *arr; + int err = 0; + int i; + int j = 0; + int off = start & (HPAGE_SIZE - 1); + + n = DIV_ROUND_UP(off + umem->length, HPAGE_SIZE); + arr = kmalloc(n * sizeof *arr, GFP_KERNEL); + if (!arr) + return -ENOMEM; + + list_for_each_entry(chunk, &umem->chunk_list, list) + for (i = 0; i < chunk->nmap; ++i) { + daddr = sg_dma_address(&chunk->page_list[i]); + dsize = sg_dma_len(&chunk->page_list[i]); + if (!cur_size) { + cur_addr = daddr; + cur_size = dsize; + } else if (cur_addr + cur_size != daddr) { + err = -EINVAL; + goto out; + } else + cur_size += dsize; + + if (cur_size > HPAGE_SIZE) { + err = -EINVAL; + goto out; + } else if (cur_size == HPAGE_SIZE) { + cur_size = 0; + arr[j++] = cur_addr; + } + } + + if (cur_size) { + arr[j++] = cur_addr; + } + + err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, virt_addr, umem->length, + convert_access(access_flags), n, HPAGE_SHIFT, &mr->mmr); + if (err) + goto out; + + err = mlx4_write_mtt(dev->dev, &mr->mmr.mtt, 0, n, arr); + +out: + kfree(arr); + return err; +#else + return -ENOSYS; +#endif +} + struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata) @@ -132,23 +197,27 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (!mr) return ERR_PTR(-ENOMEM); - mr->umem = ib_umem_get(pd->uobject->context, start, length, access_flags); + mr->umem = ib_umem_get(pd->uobject->context, start, length, + access_flags, 0); if (IS_ERR(mr->umem)) { err = PTR_ERR(mr->umem); goto err_free; } - n = ib_umem_page_count(mr->umem); - shift = ilog2(mr->umem->page_size); + if (!mr->umem->hugetlb || + handle_hugetlb_user_mr(pd, mr, start, virt_addr, access_flags)) { + n = ib_umem_page_count(mr->umem); + shift = ilog2(mr->umem->page_size); - err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, virt_addr, length, - convert_access(access_flags), n, shift, &mr->mmr); - if (err) - goto err_umem; + err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, virt_addr, length, + convert_access(access_flags), n, shift, &mr->mmr); + if (err) + goto err_umem; - err = mlx4_ib_umem_write_mtt(dev, &mr->mmr.mtt, mr->umem); - if (err) - goto err_mr; + err = mlx4_ib_umem_write_mtt(dev, &mr->mmr.mtt, mr->umem); + if (err) + goto err_mr; + } err = mlx4_mr_enable(dev->dev, &mr->mmr); if (err) @@ -182,6 +251,79 @@ int mlx4_ib_dereg_mr(struct ib_mr *ibmr) return 0; } +struct ib_mr *mlx4_ib_alloc_fast_reg_mr(struct ib_pd *pd, + int max_page_list_len) +{ + struct mlx4_ib_dev *dev = to_mdev(pd->device); + struct mlx4_ib_mr *mr; + int err; + + mr = kmalloc(sizeof *mr, GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); + + err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, 0, 0, 0, + max_page_list_len, 0, &mr->mmr); + if (err) + goto err_free; + + err = mlx4_mr_enable(dev->dev, &mr->mmr); + if (err) + goto err_mr; + + mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key; + mr->umem = NULL; + + return &mr->ibmr; + +err_mr: + mlx4_mr_free(dev->dev, &mr->mmr); + +err_free: + kfree(mr); + return ERR_PTR(err); +} + +struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device *ibdev, + int page_list_len) +{ + struct mlx4_ib_dev *dev = to_mdev(ibdev); + struct mlx4_ib_fast_reg_page_list *mfrpl; + int size = page_list_len * sizeof (u64); + + if (size > PAGE_SIZE) + return ERR_PTR(-EINVAL); + + mfrpl = kmalloc(sizeof *mfrpl, GFP_KERNEL); + if (!mfrpl) + return ERR_PTR(-ENOMEM); + + mfrpl->ibfrpl.page_list = dma_alloc_coherent(&dev->dev->pdev->dev, + size, &mfrpl->map, + GFP_KERNEL); + if (!mfrpl->ibfrpl.page_list) + goto err_free; + + WARN_ON(mfrpl->map & 0x3f); + + return &mfrpl->ibfrpl; + +err_free: + kfree(mfrpl); + return ERR_PTR(-ENOMEM); +} + +void mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list) +{ + struct mlx4_ib_dev *dev = to_mdev(page_list->device); + struct mlx4_ib_fast_reg_page_list *mfrpl = to_mfrpl(page_list); + int size = page_list->max_page_list_len * sizeof (u64); + + dma_free_coherent(&dev->dev->pdev->dev, size, page_list->page_list, + mfrpl->map); + kfree(mfrpl); +} + struct ib_fmr *mlx4_ib_fmr_alloc(struct ib_pd *pd, int acc, struct ib_fmr_attr *fmr_attr) { diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 21580ab..c5a2fc3 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -31,6 +32,7 @@ */ #include <linux/log2.h> + #include <rdma/ib_cache.h> #include <rdma/ib_pack.h> @@ -52,7 +54,8 @@ enum { /* * Largest possible UD header: send with GRH and immediate data. */ - MLX4_IB_UD_HEADER_SIZE = 72 + MLX4_IB_UD_HEADER_SIZE = 72, + MLX4_IB_MAX_RAW_ETY_HDR_SIZE = 12 }; struct mlx4_ib_sqp { @@ -77,6 +80,9 @@ static const __be32 mlx4_ib_opcode[] = { [IB_WR_RDMA_READ] = __constant_cpu_to_be32(MLX4_OPCODE_RDMA_READ), [IB_WR_ATOMIC_CMP_AND_SWP] = __constant_cpu_to_be32(MLX4_OPCODE_ATOMIC_CS), [IB_WR_ATOMIC_FETCH_AND_ADD] = __constant_cpu_to_be32(MLX4_OPCODE_ATOMIC_FA), + [IB_WR_SEND_WITH_INV] = __constant_cpu_to_be32(MLX4_OPCODE_SEND_INVAL), + [IB_WR_LOCAL_INV] = __constant_cpu_to_be32(MLX4_OPCODE_LOCAL_INVAL), + [IB_WR_FAST_REG_MR] = __constant_cpu_to_be32(MLX4_OPCODE_FMR), }; static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp) @@ -98,11 +104,7 @@ static int is_qp0(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) static void *get_wqe(struct mlx4_ib_qp *qp, int offset) { - if (BITS_PER_LONG == 64 || qp->buf.nbufs == 1) - return qp->buf.u.direct.buf + offset; - else - return qp->buf.u.page_list[offset >> PAGE_SHIFT].buf + - (offset & (PAGE_SIZE - 1)); + return mlx4_buf_offset(&qp->buf, offset); } static void *get_recv_wqe(struct mlx4_ib_qp *qp, int n) @@ -118,15 +120,15 @@ static void *get_send_wqe(struct mlx4_ib_qp *qp, int n) /* * Stamp a SQ WQE so that it is invalid if prefetched by marking the * first four bytes of every 64 byte chunk with - * 0x7FFFFFF | (invalid_ownership_value << 31). + * 0x7FFFFFF | (invalid_ownership_value << 31). * - * When max WR is than or equal to the WQE size, - * as an optimization, we can stamp WQE with 0xffffffff, - * and skip the very first chunk of the WQE. + * When the max work request size is less than or equal to the WQE + * basic block size, as an optimization, we can stamp all WQEs with + * 0xffffffff, and skip the very first chunk of each WQE. */ static void stamp_send_wqe(struct mlx4_ib_qp *qp, int n, int size) { - u32 *wqe; + __be32 *wqe; int i; int s; int ind; @@ -135,11 +137,11 @@ static void stamp_send_wqe(struct mlx4_ib_qp *qp, int n, int size) struct mlx4_wqe_ctrl_seg *ctrl; if (qp->sq_max_wqes_per_wr > 1) { - s = roundup(size, 1 << qp->sq.wqe_shift); + s = roundup(size, 1U << qp->sq.wqe_shift); for (i = 0; i < s; i += 64) { ind = (i >> qp->sq.wqe_shift) + n; - stamp = ind & qp->sq.wqe_cnt ? cpu_to_be32(0x7fffffff) : - cpu_to_be32(0xffffffff); + stamp = ind & qp->sq.wqe_cnt ? cpu_to_be32(0x7fffffff) : + cpu_to_be32(0xffffffff); buf = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1)); wqe = buf + (i & ((1 << qp->sq.wqe_shift) - 1)); *wqe = stamp; @@ -149,7 +151,7 @@ static void stamp_send_wqe(struct mlx4_ib_qp *qp, int n, int size) s = (ctrl->fence_size & 0x3f) << 4; for (i = 64; i < s; i += 64) { wqe = buf + i; - *wqe = 0xffffffff; + *wqe = cpu_to_be32(0xffffffff); } } } @@ -180,9 +182,8 @@ static void post_nop_wqe(struct mlx4_ib_qp *qp, int n, int size) ctrl->srcrb_flags = 0; ctrl->fence_size = size / 16; /* - * Make sure descriptor is fully written before - * setting ownership bit (because HW can start - * executing as soon as we do). + * Make sure descriptor is fully written before setting ownership bit + * (because HW can start executing as soon as we do). */ wmb(); @@ -249,7 +250,7 @@ static void mlx4_ib_qp_event(struct mlx4_qp *qp, enum mlx4_event type) } } -static int send_wqe_overhead(enum ib_qp_type type) +static int send_wqe_overhead(enum ib_qp_type type, u32 flags) { /* * UD WQEs must have a datagram segment. @@ -260,7 +261,8 @@ static int send_wqe_overhead(enum ib_qp_type type) switch (type) { case IB_QPT_UD: return sizeof (struct mlx4_wqe_ctrl_seg) + - sizeof (struct mlx4_wqe_datagram_seg); + sizeof (struct mlx4_wqe_datagram_seg) + + ((flags & MLX4_IB_QP_LSO) ? 64 : 0); case IB_QPT_UC: return sizeof (struct mlx4_wqe_ctrl_seg) + sizeof (struct mlx4_wqe_raddr_seg); @@ -279,6 +281,12 @@ static int send_wqe_overhead(enum ib_qp_type type) ALIGN(4 + sizeof (struct mlx4_wqe_inline_seg), sizeof (struct mlx4_wqe_data_seg)); + case IB_QPT_RAW_ETY: + return sizeof(struct mlx4_wqe_ctrl_seg) + + ALIGN(MLX4_IB_MAX_RAW_ETY_HDR_SIZE + + sizeof(struct mlx4_wqe_inline_seg), + sizeof(struct mlx4_wqe_data_seg)); + default: return sizeof (struct mlx4_wqe_ctrl_seg); } @@ -334,20 +342,16 @@ static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, return 0; } -static int set_kernel_sq_size(struct mlx4_ib_dev *dev, - struct ib_qp_init_attr *init_attr, - struct mlx4_ib_qp *qp) +static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, + enum ib_qp_type type, struct mlx4_ib_qp *qp) { - struct ib_qp_cap *cap = &init_attr->cap; - enum ib_qp_type type = init_attr->qp_type; int s; - int reserve = 0; /* Sanity check SQ size before proceeding */ if (cap->max_send_wr > (dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE) || cap->max_send_sge > min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg) || - cap->max_inline_data + send_wqe_overhead(type) + + cap->max_inline_data + send_wqe_overhead(type, qp->flags) + sizeof (struct mlx4_wqe_inline_seg) > dev->dev->caps.max_sq_desc_sz) { mlx4_ib_dbg("Requested SQ resources exceed device maxima"); return -EINVAL; @@ -363,51 +367,59 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, return -EINVAL; } - if (qp->flags & MLX4_QP_LSO) - reserve = 64; + if (type == IB_QPT_RAW_ETY && + cap->max_send_sge + 1 > dev->dev->caps.max_sq_sg) { + mlx4_ib_dbg("No space for RAW ETY hdr"); + return -EINVAL; + } - s = max(cap->max_send_sge * sizeof (struct mlx4_wqe_data_seg) + reserve, + s = max(cap->max_send_sge * sizeof (struct mlx4_wqe_data_seg), cap->max_inline_data + sizeof (struct mlx4_wqe_inline_seg)) + - send_wqe_overhead(type); + send_wqe_overhead(type, qp->flags); + + if (s > dev->dev->caps.max_sq_desc_sz) + return -EINVAL; /* - * Hermon supports shrinking wqe, such that a single WR can include - * multiple units of wqe_shift. This way, WRs can differ in size, and - * do not have to be a power of 2 in size, saving memory and speeding up - * send WR posting. Unfortunately, if we do this wqe_index field in CQE - * can't be used to look up the WR ID anymore, so do this only if - * selective signalling is off. + * Hermon supports shrinking WQEs, such that a single work + * request can include multiple units of 1 << wqe_shift. This + * way, work requests can differ in size, and do not have to + * be a power of 2 in size, saving memory and speeding up send + * WR posting. Unfortunately, if we do this then the + * wqe_index field in CQEs can't be used to look up the WR ID + * anymore, so we do this only if selective signaling is off. * - * Further, on 32-bit platforms, we can't use vmap to make - * the QP buffer virtually contigious. Thus we have to use + * Further, on 32-bit platforms, we can't use vmap() to make + * the QP buffer virtually contigious. Thus we have to use * constant-sized WRs to make sure a WR is always fully within * a single page-sized chunk. * - * Finally, we use NOP opcode to avoid wrap-around in the middle of WR. - * We set NEC bit to avoid getting completions with error for NOP WRs. - * Since NEC is only supported starting with firmware 2.2.232, - * we use constant-sized WRs for older firmware. + * Finally, we use NOP work requests to pad the end of the + * work queue, to avoid wrap-around in the middle of WR. We + * set NEC bit to avoid getting completions with error for + * these NOP WRs, but since NEC is only supported starting + * with firmware 2.2.232, we use constant-sized WRs for older + * firmware. * - * And, since MLX QPs only support SEND, we use constant-sized WRs in this - * case. + * And, since MLX QPs only support SEND, we use constant-sized + * WRs in this case. * - * We look for the smallest value of wqe_shift such that the resulting - * number of wqes does not exceed device capabilities. + * We look for the smallest value of wqe_shift such that the + * resulting number of wqes does not exceed device + * capabilities. * - * We set WQE size to at least 64 bytes, this way stamping invalidates each WQE. + * We set WQE size to at least 64 bytes, this way stamping + * invalidates each WQE. */ if (dev->dev->caps.fw_ver >= MLX4_FW_VER_WQE_CTRL_NEC && qp->sq_signal_bits && BITS_PER_LONG == 64 && - type != IB_QPT_SMI && type != IB_QPT_GSI) + type != IB_QPT_SMI && type != IB_QPT_GSI && type != IB_QPT_RAW_ETY) qp->sq.wqe_shift = ilog2(64); else qp->sq.wqe_shift = ilog2(roundup_pow_of_two(s)); for (;;) { - if (1 << qp->sq.wqe_shift > dev->dev->caps.max_sq_desc_sz) - return -EINVAL; - - qp->sq_max_wqes_per_wr = DIV_ROUND_UP(s, 1 << qp->sq.wqe_shift); + qp->sq_max_wqes_per_wr = DIV_ROUND_UP(s, 1U << qp->sq.wqe_shift); /* * We need to leave 2 KB + 1 WR of headroom in the SQ to @@ -427,8 +439,10 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, ++qp->sq.wqe_shift; } - qp->sq.max_gs = ((qp->sq_max_wqes_per_wr << qp->sq.wqe_shift) - reserve - - send_wqe_overhead(type)) / sizeof (struct mlx4_wqe_data_seg); + qp->sq.max_gs = (min(dev->dev->caps.max_sq_desc_sz, + (qp->sq_max_wqes_per_wr << qp->sq.wqe_shift)) - + send_wqe_overhead(type, qp->flags)) / + sizeof (struct mlx4_wqe_data_seg); qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) + (qp->sq.wqe_cnt << qp->sq.wqe_shift); @@ -442,7 +456,7 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, cap->max_send_wr = qp->sq.max_post = (qp->sq.wqe_cnt - qp->sq_spare_wqes) / qp->sq_max_wqes_per_wr; - cap->max_send_sge =min(qp->sq.max_gs, + cap->max_send_sge = min(qp->sq.max_gs, min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg)); /* We don't support inline sends for kernel QPs (yet) */ @@ -484,22 +498,8 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, spin_lock_init(&qp->rq.lock); qp->state = IB_QPS_RESET; - qp->atomic_rd_en = 0; - qp->resp_depth = 0; - - qp->rq.head = 0; - qp->rq.tail = 0; - qp->sq.head = 0; - qp->sq.tail = 0; - qp->sq_next_wqe = 0; - if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) qp->sq_signal_bits = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE); - else - qp->sq_signal_bits = 0; - - if (init_attr->create_flags & QP_CREATE_LSO) - qp->flags |= MLX4_QP_LSO; err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, !!init_attr->srq, qp); if (err) @@ -520,7 +520,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, goto err; qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, - qp->buf_size, 0); + qp->buf_size, 0, 0); if (IS_ERR(qp->umem)) { err = PTR_ERR(qp->umem); mlx4_ib_dbg("ib_umem_get error (%d)", err); @@ -551,12 +551,18 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, } else { qp->sq_no_prefetch = 0; - err = set_kernel_sq_size(dev, init_attr, qp); + if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) + qp->flags |= MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK; + + if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO) + qp->flags |= MLX4_IB_QP_LSO; + + err = set_kernel_sq_size(dev, &init_attr->cap, init_attr->qp_type, qp); if (err) goto err; if (!init_attr->srq) { - err = mlx4_ib_db_alloc(dev, &qp->db, 0); + err = mlx4_db_alloc(dev->dev, &qp->db, 0); if (err) goto err; @@ -590,10 +596,17 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, } } - err = mlx4_qp_alloc(dev->dev, sqpn, &qp->mqp); + if (!sqpn) + err = mlx4_qp_reserve_range(dev->dev, 1, 1, &sqpn); if (err) goto err_wrid; + err = mlx4_qp_alloc(dev->dev, sqpn, &qp->mqp); + if (err) { + mlx4_qp_release_range(dev->dev, sqpn, 1); + goto err_wrid; + } + /* * Hardware wants QPN written in big-endian order (after * shifting) for send doorbell. Precompute this value to save @@ -626,7 +639,7 @@ err_buf: err_db: if (!pd->uobject && !init_attr->srq) - mlx4_ib_db_free(dev, &qp->db); + mlx4_db_free(dev->dev, &qp->db); err: return err; @@ -700,6 +713,10 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, mlx4_ib_unlock_cqs(send_cq, recv_cq); mlx4_qp_free(dev->dev, &qp->mqp); + + if (!is_sqp(dev, qp)) + mlx4_qp_release_range(dev->dev, qp->mqp.qpn, 1); + mlx4_mtt_cleanup(dev->dev, &qp->mtt); if (is_user) { @@ -712,7 +729,7 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, kfree(qp->rq.wrid); mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf); if (!qp->ibqp.srq) - mlx4_ib_db_free(dev, &qp->db); + mlx4_db_free(dev->dev, &qp->db); } } @@ -725,12 +742,24 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, struct mlx4_ib_qp *qp; int err; + /* + * We only support LSO and multicast loopback blocking, and + * only for kernel UD QPs. + */ + if (init_attr->create_flags & ~(IB_QP_CREATE_IPOIB_UD_LSO | + IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)) + return ERR_PTR(-EINVAL); + + if (init_attr->create_flags && + (pd->uobject || init_attr->qp_type != IB_QPT_UD)) + return ERR_PTR(-EINVAL); + switch (init_attr->qp_type) { case IB_QPT_RC: case IB_QPT_UC: case IB_QPT_UD: { - qp = kmalloc(sizeof *qp, GFP_KERNEL); + qp = kzalloc(sizeof *qp, GFP_KERNEL); if (!qp) return ERR_PTR(-ENOMEM); @@ -744,6 +773,9 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, break; } + case IB_QPT_RAW_ETY: + if (!(dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_RAW_ETY)) + return ERR_PTR(-ENOSYS); case IB_QPT_SMI: case IB_QPT_GSI: { @@ -753,7 +785,7 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, return ERR_PTR(-EINVAL); } - sqp = kmalloc(sizeof *sqp, GFP_KERNEL); + sqp = kzalloc(sizeof *sqp, GFP_KERNEL); if (!sqp) return ERR_PTR(-ENOMEM); @@ -761,7 +793,8 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, err = create_qp_common(dev, pd, init_attr, udata, dev->dev->caps.sqp_start + - (init_attr->qp_type == IB_QPT_SMI ? 0 : 2) + + (init_attr->qp_type == IB_QPT_RAW_ETY ? 4 : + (init_attr->qp_type == IB_QPT_SMI ? 0 : 2)) + init_attr->port_num - 1, qp); if (err) { @@ -775,7 +808,6 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, break; } default: - /* Don't support raw QPs */ mlx4_ib_dbg("Invalid QP type requested for create_qp (%d)", init_attr->qp_type); return ERR_PTR(-EINVAL); @@ -808,6 +840,7 @@ static int to_mlx4_st(enum ib_qp_type type) case IB_QPT_RC: return MLX4_QP_ST_RC; case IB_QPT_UC: return MLX4_QP_ST_UC; case IB_QPT_UD: return MLX4_QP_ST_UD; + case IB_QPT_RAW_ETY: case IB_QPT_SMI: case IB_QPT_GSI: return MLX4_QP_ST_MLX; default: return -1; @@ -932,12 +965,16 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, } } - if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI) + if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI || + ibqp->qp_type == IB_QPT_RAW_ETY) context->mtu_msgmax = (IB_MTU_4096 << 5) | 11; - else if (ibqp->qp_type == IB_QPT_UD) - context->mtu_msgmax = (IB_MTU_4096 << 5) | - ilog2(dev->dev->caps.max_gso_sz); - else if (attr_mask & IB_QP_PATH_MTU) { + else if (ibqp->qp_type == IB_QPT_UD) { + if (qp->flags & MLX4_IB_QP_LSO) + context->mtu_msgmax = (IB_MTU_4096 << 5) | + ilog2(dev->dev->caps.max_gso_sz); + else + context->mtu_msgmax = (IB_MTU_4096 << 5) | 12; + } else if (attr_mask & IB_QP_PATH_MTU) { if (attr->path_mtu < IB_MTU_256 || attr->path_mtu > IB_MTU_4096) { printk(KERN_ERR "path MTU (%u) is invalid\n", attr->path_mtu); @@ -998,7 +1035,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, if (attr_mask & IB_QP_ALT_PATH) { if (attr->alt_port_num == 0 || - attr->alt_port_num > dev->dev->caps.num_ports) { + attr->alt_port_num > dev->num_ports) { mlx4_ib_dbg("qpn 0x%x: invalid alternate port num (%d)", ibqp->qp_num, attr->alt_port_num); goto out; @@ -1026,6 +1063,10 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, context->pd = cpu_to_be32(to_mpd(ibqp->pd)->pdn); context->params1 = cpu_to_be32(MLX4_IB_ACK_REQ_FREQ << 28); + /* Set "fast registration enabled" for all kernel QPs */ + if (!qp->ibqp.uobject) + context->params1 |= cpu_to_be32(1 << 11); + if (attr_mask & IB_QP_RNR_RETRY) { context->params1 |= cpu_to_be32(attr->rnr_retry << 13); optpar |= MLX4_QP_OPTPAR_RNR_RETRY; @@ -1086,7 +1127,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR && (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI || - ibqp->qp_type == IB_QPT_UD)) { + ibqp->qp_type == IB_QPT_UD || ibqp->qp_type == IB_QPT_RAW_ETY)) { context->pri_path.sched_queue = (qp->port - 1) << 6; if (is_qp0(dev, qp)) context->pri_path.sched_queue |= MLX4_IB_DEFAULT_QP0_SCHED_QUEUE; @@ -1100,6 +1141,9 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, else sqd_event = 0; + if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) + context->rlkey |= (1 << 4); + /* * Before passing a kernel QP to the HW, make sure that the * ownership bits of the send queue are set and the SQ @@ -1113,7 +1157,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, for (i = 0; i < qp->sq.wqe_cnt; ++i) { ctrl = get_send_wqe(qp, i); ctrl->owner_opcode = cpu_to_be32(1 << 31); - if (!(qp->sq_max_wqes_per_wr > 1)) + if (qp->sq_max_wqes_per_wr == 1) ctrl->fence_size = 1 << (qp->sq.wqe_shift - 4); stamp_send_wqe(qp, i, 1 << qp->sq.wqe_shift); @@ -1179,23 +1223,6 @@ out: return err; } -static const struct ib_qp_attr mlx4_ib_qp_attr = { .port_num = 1 }; -static const int mlx4_ib_qp_attr_mask_table[IB_QPT_UD + 1] = { - [IB_QPT_UD] = (IB_QP_PKEY_INDEX | - IB_QP_PORT | - IB_QP_QKEY), - [IB_QPT_UC] = (IB_QP_PKEY_INDEX | - IB_QP_PORT | - IB_QP_ACCESS_FLAGS), - [IB_QPT_RC] = (IB_QP_PKEY_INDEX | - IB_QP_PORT | - IB_QP_ACCESS_FLAGS), - [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | - IB_QP_QKEY), - [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | - IB_QP_QKEY), -}; - int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) { @@ -1218,7 +1245,7 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, } if ((attr_mask & IB_QP_PORT) && - (attr->port_num == 0 || attr->port_num > dev->dev->caps.num_ports)) { + (attr->port_num == 0 || attr->port_num > dev->num_ports)) { mlx4_ib_dbg("qpn 0x%x: invalid port number (%d) specified " "for transition %d to %d. qp_type %d", ibqp->qp_num, attr->port_num, cur_state, @@ -1260,15 +1287,6 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, goto out; } - if (cur_state == IB_QPS_RESET && new_state == IB_QPS_ERR) { - err = __mlx4_ib_modify_qp(ibqp, &mlx4_ib_qp_attr, - mlx4_ib_qp_attr_mask_table[ibqp->qp_type], - IB_QPS_RESET, IB_QPS_INIT); - if (err) - goto out; - cur_state = IB_QPS_INIT; - } - err = __mlx4_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state); out: @@ -1276,8 +1294,51 @@ out: return err; } +static int build_raw_ety_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, + void *wqe, unsigned *mlx_seg_len) +{ + int payload = 0; + int header_size, packet_length; + struct mlx4_wqe_mlx_seg *mlx = wqe; + struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx; + u32 *lrh = wqe + sizeof *mlx + sizeof *inl; + int i; + + /* Only IB_WR_SEND is supported */ + if (wr->opcode != IB_WR_SEND) + return -EINVAL; + + for (i = 0; i < wr->num_sge; ++i) + payload += wr->sg_list[i].length; + + header_size = IB_LRH_BYTES + 4; /* LRH + RAW_HEADER (32 bits) */ + + /* headers + payload and round up */ + packet_length = (header_size + payload + 3) / 4; + + mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE); + + mlx->flags |= cpu_to_be32(MLX4_WQE_MLX_ICRC | + (wr->wr.raw_ety.lrh->service_level << 8)); + + mlx->rlid = wr->wr.raw_ety.lrh->destination_lid; + + wr->wr.raw_ety.lrh->packet_length = cpu_to_be16(packet_length); + + ib_lrh_header_pack(wr->wr.raw_ety.lrh, lrh); + lrh += IB_LRH_BYTES / 4; /* LRH size is a dword multiple */ + *lrh = cpu_to_be32(wr->wr.raw_ety.eth_type); + + inl->byte_count = cpu_to_be32(1 << 31 | header_size); + + *mlx_seg_len = + ALIGN(sizeof(struct mlx4_wqe_inline_seg) + header_size, 16); + + return 0; +} + static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, - void *wqe) + void *wqe, unsigned *mlx_seg_len) { struct ib_device *ib_dev = &to_mdev(sqp->qp.ibqp.device)->ib_dev; struct mlx4_wqe_mlx_seg *mlx = wqe; @@ -1326,7 +1387,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, case IB_WR_SEND_WITH_IMM: sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE; sqp->ud_header.immediate_present = 1; - sqp->ud_header.immediate_data = wr->imm_data; + sqp->ud_header.immediate_data = wr->ex.imm_data; break; default: mlx4_ib_dbg("special QP type %d: invalid wr opcode 0x%x", @@ -1400,7 +1461,9 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, i = 2; } - return ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + header_size, 16); + *mlx_seg_len = + ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + header_size, 16); + return 0; } static int mlx4_wq_overflow(struct mlx4_ib_wq *wq, int nreq, struct ib_cq *ib_cq) @@ -1420,6 +1483,44 @@ static int mlx4_wq_overflow(struct mlx4_ib_wq *wq, int nreq, struct ib_cq *ib_cq return cur + nreq >= wq->max_post; } +static __be32 convert_access(int acc) +{ + return (acc & IB_ACCESS_REMOTE_ATOMIC ? cpu_to_be32(MLX4_WQE_FMR_PERM_ATOMIC) : 0) | + (acc & IB_ACCESS_REMOTE_WRITE ? cpu_to_be32(MLX4_WQE_FMR_PERM_REMOTE_WRITE) : 0) | + (acc & IB_ACCESS_REMOTE_READ ? cpu_to_be32(MLX4_WQE_FMR_PERM_REMOTE_READ) : 0) | + (acc & IB_ACCESS_LOCAL_WRITE ? cpu_to_be32(MLX4_WQE_FMR_PERM_LOCAL_WRITE) : 0) | + cpu_to_be32(MLX4_WQE_FMR_PERM_LOCAL_READ); +} + +static void set_fmr_seg(struct mlx4_wqe_fmr_seg *fseg, struct ib_send_wr *wr) +{ + struct mlx4_ib_fast_reg_page_list *mfrpl = to_mfrpl(wr->wr.fast_reg.page_list); + int i; + + for (i = 0; i < wr->wr.fast_reg.page_list_len; ++i) + wr->wr.fast_reg.page_list->page_list[i] = + cpu_to_be64(wr->wr.fast_reg.page_list->page_list[i] | + MLX4_MTT_FLAG_PRESENT); + + fseg->flags = convert_access(wr->wr.fast_reg.access_flags); + fseg->mem_key = cpu_to_be32(wr->wr.fast_reg.rkey); + fseg->buf_list = cpu_to_be64(mfrpl->map); + fseg->start_addr = cpu_to_be64(wr->wr.fast_reg.iova_start); + fseg->reg_len = cpu_to_be64(wr->wr.fast_reg.length); + fseg->offset = 0; /* XXX -- is this just for ZBVA? */ + fseg->page_size = cpu_to_be32(wr->wr.fast_reg.page_shift); + fseg->reserved[0] = 0; + fseg->reserved[1] = 0; +} + +static void set_local_inv_seg(struct mlx4_wqe_local_inval_seg *iseg, u32 rkey) +{ + iseg->flags = 0; + iseg->mem_key = cpu_to_be32(rkey); + iseg->guest_id = 0; + iseg->pa = 0; +} + static __always_inline void set_raddr_seg(struct mlx4_wqe_raddr_seg *rseg, u64 remote_addr, u32 rkey) { @@ -1493,28 +1594,46 @@ static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg) dseg->addr = cpu_to_be64(sg->addr); } -static int build_lso_seg(struct mlx4_lso_seg *wqe, struct ib_send_wr *wr, - struct mlx4_ib_qp *qp, int *lso_seg_len) +static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr, + struct mlx4_ib_qp *qp, unsigned *lso_seg_len, __be32 *lso_hdr_sz) { - int halign; + unsigned halign = ALIGN(sizeof *wqe + wr->wr.ud.hlen, 16); - halign = ALIGN(wr->wr.ud.hlen, 16); - if (unlikely(!(qp->flags & MLX4_QP_LSO) && wr->num_sge > qp->sq.max_gs - (halign >> 4))) - return -EINVAL; + /* + * This is a temporary limitation and will be removed in + * a forthcoming FW release: + */ + if (unlikely(halign > 64)) + return -EINVAL; - memcpy(wqe->header, wr->wr.ud.header, wr->wr.ud.hlen); + if (unlikely(!(qp->flags & MLX4_IB_QP_LSO) && + wr->num_sge > qp->sq.max_gs - (halign >> 4))) + return -EINVAL; - /* make sure LSO header is written before - overwriting stamping */ - wmb(); + memcpy(wqe->header, wr->wr.ud.header, wr->wr.ud.hlen); - wqe->mss_hdr_size = cpu_to_be32(((wr->wr.ud.mss - wr->wr.ud.hlen) - << 16) | wr->wr.ud.hlen); + *lso_hdr_sz = cpu_to_be32((wr->wr.ud.mss - wr->wr.ud.hlen) << 16 | + wr->wr.ud.hlen); *lso_seg_len = halign; return 0; } +static __be32 send_ieth(struct ib_send_wr *wr) +{ + switch (wr->opcode) { + case IB_WR_SEND_WITH_IMM: + case IB_WR_RDMA_WRITE_WITH_IMM: + return wr->ex.imm_data; + + case IB_WR_SEND_WITH_INV: + return cpu_to_be32(wr->ex.invalidate_rkey); + + default: + return 0; + } +} + int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr) { @@ -1528,7 +1647,11 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, unsigned ind; int uninitialized_var(stamp); int uninitialized_var(size); + unsigned uninitialized_var(seglen); int i; + __be32 *lso_wqe; + __be32 uninitialized_var(lso_hdr_sz); + spin_lock_irqsave(&qp->sq.lock, flags); @@ -1563,11 +1686,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, MLX4_WQE_CTRL_TCP_UDP_CSUM) : 0) | qp->sq_signal_bits; - if (wr->opcode == IB_WR_SEND_WITH_IMM || - wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) - ctrl->imm = wr->imm_data; - else - ctrl->imm = 0; + ctrl->imm = send_ieth(wr); wqe += sizeof *ctrl; size = sizeof *ctrl / 16; @@ -1599,6 +1718,18 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, size += sizeof (struct mlx4_wqe_raddr_seg) / 16; break; + case IB_WR_LOCAL_INV: + set_local_inv_seg(wqe, wr->ex.invalidate_rkey); + wqe += sizeof (struct mlx4_wqe_local_inval_seg); + size += sizeof (struct mlx4_wqe_local_inval_seg) / 16; + break; + + case IB_WR_FAST_REG_MR: + set_fmr_seg(wqe, wr); + wqe += sizeof (struct mlx4_wqe_fmr_seg); + size += sizeof (struct mlx4_wqe_fmr_seg) / 16; + break; + default: /* No extra segments required for sends */ break; @@ -1611,30 +1742,44 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, size += sizeof (struct mlx4_wqe_datagram_seg) / 16; if (wr->opcode == IB_WR_LSO) { - int hlen; - - err = build_lso_seg(wqe, wr, qp, &hlen); - if (err) { + err = build_lso_seg(wqe, wr, qp, &seglen, &lso_hdr_sz); + if (unlikely(err)) { *bad_wr = wr; goto out; } - wqe += hlen; - size += hlen >> 4; + lso_wqe = (__be32 *) wqe; + wqe += seglen; + dseg = wqe; + dseg += wr->num_sge - 1; + size += (seglen / 16) + wr->num_sge * + (sizeof (struct mlx4_wqe_data_seg) / 16); + for (i = wr->num_sge - 1; i >= 0; --i, --dseg) + set_data_seg(dseg, wr->sg_list + i); + *lso_wqe = lso_hdr_sz; + goto lso_continue; } - break; case IB_QPT_SMI: case IB_QPT_GSI: - err = build_mlx_header(to_msqp(qp), wr, ctrl); - if (err < 0) { + err = build_mlx_header(to_msqp(qp), wr, ctrl, &seglen); + if (unlikely(err)) { *bad_wr = wr; goto out; } - wqe += err; - size += err / 16; + wqe += seglen; + size += seglen / 16; + break; - err = 0; + case IB_QPT_RAW_ETY: + err = build_raw_ety_header(to_msqp(qp), wr, ctrl, + &seglen); + if (unlikely(err)) { + *bad_wr = wr; + goto out; + } + wqe += seglen; + size += seglen / 16; break; default: @@ -1662,6 +1807,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, for (i = wr->num_sge - 1; i >= 0; --i, --dseg) set_data_seg(dseg, wr->sg_list + i); +lso_continue: ctrl->fence_size = (wr->send_flags & IB_SEND_FENCE ? MLX4_WQE_CTRL_FENCE : 0) | size; @@ -1681,7 +1827,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, (ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0); stamp = ind + qp->sq_spare_wqes; - ind += DIV_ROUND_UP(size * 16, 1 << qp->sq.wqe_shift); + ind += DIV_ROUND_UP(size * 16, 1U << qp->sq.wqe_shift); /* * We can improve latency by not stamping the last @@ -1866,7 +2012,9 @@ int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr struct mlx4_ib_qp *qp = to_mqp(ibqp); struct mlx4_qp_context context; int mlx4_state; - int err; + int err = 0; + + mutex_lock(&qp->mutex); if (qp->state == IB_QPS_RESET) { qp_attr->qp_state = IB_QPS_RESET; @@ -1874,12 +2022,15 @@ int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr } err = mlx4_qp_query(dev->dev, &qp->mqp, &context); - if (err) - return -EINVAL; + if (err) { + err = -EINVAL; + goto out; + } mlx4_state = be32_to_cpu(context.flags) >> 28; - qp_attr->qp_state = to_ib_qp_state(mlx4_state); + qp->state = to_ib_qp_state(mlx4_state); + qp_attr->qp_state = qp->state; qp_attr->path_mtu = context.mtu_msgmax >> 5; qp_attr->path_mig_state = to_ib_mig_state((be32_to_cpu(context.flags) >> 11) & 0x3); @@ -1938,6 +2089,15 @@ done: qp_init_attr->cap = qp_attr->cap; - return 0; + qp_init_attr->create_flags = 0; + if (qp->flags & MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK) + qp_init_attr->create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK; + + if (qp->flags & MLX4_IB_QP_LSO) + qp_init_attr->create_flags |= IB_QP_CREATE_IPOIB_UD_LSO; + +out: + mutex_unlock(&qp->mutex); + return err; } diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c index a1ad4cc..688b05f 100644 --- a/drivers/infiniband/hw/mlx4/srq.c +++ b/drivers/infiniband/hw/mlx4/srq.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -38,13 +39,7 @@ static void *get_wqe(struct mlx4_ib_srq *srq, int n) { - int offset = n << srq->msrq.wqe_shift; - - if (srq->buf.nbufs == 1) - return srq->buf.u.direct.buf + offset; - else - return srq->buf.u.page_list[offset >> PAGE_SHIFT].buf + - (offset & (PAGE_SIZE - 1)); + return mlx4_buf_offset(&srq->buf, n << srq->msrq.wqe_shift); } static void mlx4_ib_srq_event(struct mlx4_srq *srq, enum mlx4_event type) @@ -119,7 +114,7 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, } srq->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, - buf_size, 0); + buf_size, 0, 0); if (IS_ERR(srq->umem)) { err = PTR_ERR(srq->umem); goto err_srq; @@ -139,7 +134,7 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, if (err) goto err_mtt; } else { - err = mlx4_ib_db_alloc(dev, &srq->db, 0); + err = mlx4_db_alloc(dev->dev, &srq->db, 0); if (err) goto err_srq; @@ -210,7 +205,7 @@ err_buf: err_db: if (!pd->uobject) - mlx4_ib_db_free(dev, &srq->db); + mlx4_db_free(dev->dev, &srq->db); err_srq: kfree(srq); @@ -281,7 +276,7 @@ int mlx4_ib_destroy_srq(struct ib_srq *srq) kfree(msrq->wrid); mlx4_buf_free(dev->dev, msrq->msrq.max << msrq->msrq.wqe_shift, &msrq->buf); - mlx4_ib_db_free(dev, &msrq->db); + mlx4_db_free(dev->dev, &msrq->db); } kfree(msrq); diff --git a/drivers/infiniband/hw/mlx4/user.h b/drivers/infiniband/hw/mlx4/user.h index e2d11be..13beede 100644 --- a/drivers/infiniband/hw/mlx4/user.h +++ b/drivers/infiniband/hw/mlx4/user.h @@ -1,5 +1,6 @@ /* * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/hw/mlx4/wc.c b/drivers/infiniband/hw/mlx4/wc.c index 70b9c8f..8f9f12d 100644 --- a/drivers/infiniband/hw/mlx4/wc.c +++ b/drivers/infiniband/hw/mlx4/wc.c @@ -33,18 +33,17 @@ #include <linux/pci.h> #include "wc.h" -static u32 old_pat_lo[NR_CPUS] = {0}; -static u32 old_pat_hi[NR_CPUS] = {0}; -static unsigned int wc_enabled = 0; +#if defined(__i386__) || defined(__x86_64__) #define MLX4_PAT_MASK (0xFFFFF8FF) #define MLX4_PAT_MOD (0x00000100) #define MLX4_WC_FLAGS (_PAGE_PWT) - -#if defined(__i386__) || defined(__x86_64__) - #define X86_MSR_PAT_OFFSET 0x277 +static unsigned int wc_enabled = 0; +static u32 old_pat_lo[NR_CPUS] = {0}; +static u32 old_pat_hi[NR_CPUS] = {0}; + /* Returns non-zero if we have a chipset write-combining problem */ static int have_wc_errata(void) { @@ -187,6 +186,33 @@ int mlx4_wc_enabled(void) return wc_enabled; } +#elif defined(CONFIG_PPC64) + +static unsigned int wc_enabled = 0; + +int mlx4_enable_wc(void) +{ + wc_enabled = 1; + return 0; +} + +void mlx4_disable_wc(void) +{ + wc_enabled = 0; +} + +pgprot_t pgprot_wc(pgprot_t _prot) +{ + return wc_enabled ? __pgprot((pgprot_val(_prot) | _PAGE_NO_CACHE) & + ~(pgprot_t)_PAGE_GUARDED) : + pgprot_noncached(_prot); +} + +int mlx4_wc_enabled(void) +{ + return wc_enabled; +} + #else /* !(defined(__i386__) || defined(__x86_64__)) */ int mlx4_enable_wc(void){ return 0; } diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 786902e..89add4f 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -2517,6 +2517,15 @@ config BNX2X To compile this driver as a module, choose M here: the module will be called bnx2x. This is recommended. +config MLX4_EN + tristate "Mellanox Technologies 10Gbit Ethernet support" + depends on PCI && INET + select MLX4_CORE + select INET_LRO + help + This driver supports Mellanox Technologies ConnectX Ethernet + devices. + config MLX4_CORE tristate depends on PCI diff --git a/drivers/net/mlx4/Makefile b/drivers/net/mlx4/Makefile index 3af97af..d3ecd67 100644 --- a/drivers/net/mlx4/Makefile +++ b/drivers/net/mlx4/Makefile @@ -1,9 +1,8 @@ -INFINIBANDINCLUDE := -Idrivers/infiniband/include \ - $(if $(KBUILD_SRC),-I$(srctree)/drivers/infiniband/include) - -export CPPFLAGS := $(INFINIBANDINCLUDE) $(CPPFLAGS) - obj-$(CONFIG_MLX4_CORE) += mlx4_core.o mlx4_core-y := alloc.o catas.o cmd.o cq.o eq.o fw.o icm.o intf.o main.o mcg.o \ - mr.o pd.o profile.o qp.o reset.o srq.o + mr.o pd.o profile.o qp.o reset.o srq.o port.o sense.o + +obj-$(CONFIG_MLX4_EN) += mlx4_en.o + +mlx4_en-y := en_main.o en_tx.o en_rx.o en_params.o en_port.o en_cq.o en_resources.o en_netdev.o en_frag.o diff --git a/drivers/net/mlx4/alloc.c b/drivers/net/mlx4/alloc.c index 561dfb5..5538db1 100644 --- a/drivers/net/mlx4/alloc.c +++ b/drivers/net/mlx4/alloc.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -46,13 +47,16 @@ u32 mlx4_bitmap_alloc(struct mlx4_bitmap *bitmap) obj = find_next_zero_bit(bitmap->table, bitmap->max, bitmap->last); if (obj >= bitmap->max) { - bitmap->top = (bitmap->top + bitmap->max) & bitmap->mask; + bitmap->top = (bitmap->top + bitmap->max + bitmap->reserved_top) + & bitmap->mask; obj = find_first_zero_bit(bitmap->table, bitmap->max); } if (obj < bitmap->max) { set_bit(obj, bitmap->table); - bitmap->last = (obj + 1) & (bitmap->max - 1); + bitmap->last = (obj + 1); + if (bitmap->last == bitmap->max) + bitmap->last = 0; obj |= bitmap->top; } else obj = -1; @@ -64,16 +68,86 @@ u32 mlx4_bitmap_alloc(struct mlx4_bitmap *bitmap) void mlx4_bitmap_free(struct mlx4_bitmap *bitmap, u32 obj) { - obj &= bitmap->max - 1; + mlx4_bitmap_free_range(bitmap, obj, 1); +} + +static unsigned long find_aligned_range(unsigned long *bitmap, + u32 start, u32 nbits, + int len, int align) +{ + unsigned long end, i; + +again: + start = ALIGN(start, align); + while ((start < nbits) && test_bit(start, bitmap)) + start += align; + if (start >= nbits) + return -1; + + end = start+len; + if (end > nbits) + return -1; + for (i = start+1; i < end; i++) { + if (test_bit(i, bitmap)) { + start = i+1; + goto again; + } + } + return start; +} + +u32 mlx4_bitmap_alloc_range(struct mlx4_bitmap *bitmap, int cnt, int align) +{ + u32 obj, i; + + if (likely(cnt == 1 && align == 1)) + return mlx4_bitmap_alloc(bitmap); + + spin_lock(&bitmap->lock); + + obj = find_aligned_range(bitmap->table, bitmap->last, + bitmap->max, cnt, align); + if (obj >= bitmap->max) { + bitmap->top = (bitmap->top + bitmap->max + bitmap->reserved_top) + & bitmap->mask; + obj = find_aligned_range(bitmap->table, 0, bitmap->max, + cnt, align); + } + + if (obj < bitmap->max) { + for (i = 0; i < cnt; i++) + set_bit(obj+i, bitmap->table); + if (obj == bitmap->last) { + bitmap->last = (obj + cnt); + if (bitmap->last >= bitmap->max) + bitmap->last = 0; + } + obj |= bitmap->top; + } else + obj = -1; + + spin_unlock(&bitmap->lock); + + return obj; +} + +void mlx4_bitmap_free_range(struct mlx4_bitmap *bitmap, u32 obj, int cnt) +{ + u32 i; + + obj &= bitmap->max + bitmap->reserved_top - 1; spin_lock(&bitmap->lock); - clear_bit(obj, bitmap->table); + for (i = 0; i < cnt; i++) + clear_bit(obj+i, bitmap->table); bitmap->last = min(bitmap->last, obj); - bitmap->top = (bitmap->top + bitmap->max) & bitmap->mask; + bitmap->top = (bitmap->top + bitmap->max + bitmap->reserved_top) + & bitmap->mask; spin_unlock(&bitmap->lock); } -int mlx4_bitmap_init(struct mlx4_bitmap *bitmap, u32 num, u32 mask, u32 reserved) +int mlx4_bitmap_init(struct mlx4_bitmap *bitmap, u32 num, u32 mask, + u32 reserved_bot, u32 reserved_top) { int i; @@ -83,14 +157,16 @@ int mlx4_bitmap_init(struct mlx4_bitmap *bitmap, u32 num, u32 mask, u32 reserved bitmap->last = 0; bitmap->top = 0; - bitmap->max = num; + bitmap->max = num - reserved_top; bitmap->mask = mask; + bitmap->reserved_top = reserved_top; spin_lock_init(&bitmap->lock); - bitmap->table = kzalloc(BITS_TO_LONGS(num) * sizeof (long), GFP_KERNEL); + bitmap->table = kzalloc(BITS_TO_LONGS(bitmap->max) * + sizeof (long), GFP_KERNEL); if (!bitmap->table) return -ENOMEM; - for (i = 0; i < reserved; ++i) + for (i = 0; i < reserved_bot; ++i) set_bit(i, bitmap->table); return 0; @@ -117,40 +193,40 @@ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct, buf->nbufs = 1; buf->npages = 1; buf->page_shift = get_order(size) + PAGE_SHIFT; - buf->u.direct.buf = dma_alloc_coherent(&dev->pdev->dev, + buf->direct.buf = dma_alloc_coherent(&dev->pdev->dev, size, &t, GFP_KERNEL); - if (!buf->u.direct.buf) + if (!buf->direct.buf) return -ENOMEM; - buf->u.direct.map = t; + buf->direct.map = t; while (t & ((1 << buf->page_shift) - 1)) { --buf->page_shift; buf->npages *= 2; } - memset(buf->u.direct.buf, 0, size); + memset(buf->direct.buf, 0, size); } else { int i; buf->nbufs = (size + PAGE_SIZE - 1) / PAGE_SIZE; buf->npages = buf->nbufs; buf->page_shift = PAGE_SHIFT; - buf->u.page_list = kzalloc(buf->nbufs * sizeof *buf->u.page_list, + buf->page_list = kzalloc(buf->nbufs * sizeof *buf->page_list, GFP_KERNEL); - if (!buf->u.page_list) + if (!buf->page_list) return -ENOMEM; for (i = 0; i < buf->nbufs; ++i) { - buf->u.page_list[i].buf = + buf->page_list[i].buf = dma_alloc_coherent(&dev->pdev->dev, PAGE_SIZE, &t, GFP_KERNEL); - if (!buf->u.page_list[i].buf) + if (!buf->page_list[i].buf) goto err_free; - buf->u.page_list[i].map = t; + buf->page_list[i].map = t; - memset(buf->u.page_list[i].buf, 0, PAGE_SIZE); + memset(buf->page_list[i].buf, 0, PAGE_SIZE); } if (BITS_PER_LONG == 64) { @@ -159,10 +235,10 @@ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct, if (!pages) goto err_free; for (i = 0; i < buf->nbufs; ++i) - pages[i] = virt_to_page(buf->u.page_list[i].buf); - buf->u.direct.buf = vmap(pages, buf->nbufs, VM_MAP, PAGE_KERNEL); + pages[i] = virt_to_page(buf->page_list[i].buf); + buf->direct.buf = vmap(pages, buf->nbufs, VM_MAP, PAGE_KERNEL); kfree(pages); - if (!buf->u.direct.buf) + if (!buf->direct.buf) goto err_free; } } @@ -181,18 +257,175 @@ void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf) int i; if (buf->nbufs == 1) - dma_free_coherent(&dev->pdev->dev, size, buf->u.direct.buf, - buf->u.direct.map); + dma_free_coherent(&dev->pdev->dev, size, buf->direct.buf, + buf->direct.map); else { if (BITS_PER_LONG == 64) - vunmap(buf->u.direct.buf); + vunmap(buf->direct.buf); for (i = 0; i < buf->nbufs; ++i) - if (buf->u.page_list[i].buf) + if (buf->page_list[i].buf) dma_free_coherent(&dev->pdev->dev, PAGE_SIZE, - buf->u.page_list[i].buf, - buf->u.page_list[i].map); - kfree(buf->u.page_list); + buf->page_list[i].buf, + buf->page_list[i].map); + kfree(buf->page_list); } } EXPORT_SYMBOL_GPL(mlx4_buf_free); + +static struct mlx4_db_pgdir *mlx4_alloc_db_pgdir(struct device *dma_device) +{ + struct mlx4_db_pgdir *pgdir; + + pgdir = kzalloc(sizeof *pgdir, GFP_KERNEL); + if (!pgdir) + return NULL; + + bitmap_fill(pgdir->order1, MLX4_DB_PER_PAGE / 2); + pgdir->bits[0] = pgdir->order0; + pgdir->bits[1] = pgdir->order1; + pgdir->db_page = dma_alloc_coherent(dma_device, PAGE_SIZE, + &pgdir->db_dma, GFP_KERNEL); + if (!pgdir->db_page) { + kfree(pgdir); + return NULL; + } + + return pgdir; +} + +static int mlx4_alloc_db_from_pgdir(struct mlx4_db_pgdir *pgdir, + struct mlx4_db *db, int order) +{ + int o; + int i; + + for (o = order; o <= 1; ++o) { + i = find_first_bit(pgdir->bits[o], MLX4_DB_PER_PAGE >> o); + if (i < MLX4_DB_PER_PAGE >> o) + goto found; + } + + return -ENOMEM; + +found: + clear_bit(i, pgdir->bits[o]); + + i <<= o; + + if (o > order) + set_bit(i ^ 1, pgdir->bits[order]); + + db->u.pgdir = pgdir; + db->index = i; + db->db = pgdir->db_page + db->index; + db->dma = pgdir->db_dma + db->index * 4; + db->order = order; + + return 0; +} + +int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_db_pgdir *pgdir; + int ret = 0; + + mutex_lock(&priv->pgdir_mutex); + + list_for_each_entry(pgdir, &priv->pgdir_list, list) + if (!mlx4_alloc_db_from_pgdir(pgdir, db, order)) + goto out; + + pgdir = mlx4_alloc_db_pgdir(&(dev->pdev->dev)); + if (!pgdir) { + ret = -ENOMEM; + goto out; + } + + list_add(&pgdir->list, &priv->pgdir_list); + + /* This should never fail -- we just allocated an empty page: */ + WARN_ON(mlx4_alloc_db_from_pgdir(pgdir, db, order)); + +out: + mutex_unlock(&priv->pgdir_mutex); + + return ret; +} +EXPORT_SYMBOL_GPL(mlx4_db_alloc); + +void mlx4_db_free(struct mlx4_dev *dev, struct mlx4_db *db) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + int o; + int i; + + mutex_lock(&priv->pgdir_mutex); + + o = db->order; + i = db->index; + + if (db->order == 0 && test_bit(i ^ 1, db->u.pgdir->order0)) { + clear_bit(i ^ 1, db->u.pgdir->order0); + ++o; + } + i >>= o; + set_bit(i, db->u.pgdir->bits[o]); + + if (bitmap_full(db->u.pgdir->order1, MLX4_DB_PER_PAGE / 2)) { + dma_free_coherent(&(dev->pdev->dev), PAGE_SIZE, + db->u.pgdir->db_page, db->u.pgdir->db_dma); + list_del(&db->u.pgdir->list); + kfree(db->u.pgdir); + } + + mutex_unlock(&priv->pgdir_mutex); +} +EXPORT_SYMBOL_GPL(mlx4_db_free); + +int mlx4_alloc_hwq_res(struct mlx4_dev *dev, struct mlx4_hwq_resources *wqres, + int size, int max_direct) +{ + int err; + + err = mlx4_db_alloc(dev, &wqres->db, 1); + if (err) + return err; + + *wqres->db.db = 0; + + err = mlx4_buf_alloc(dev, size, max_direct, &wqres->buf); + if (err) + goto err_db; + + err = mlx4_mtt_init(dev, wqres->buf.npages, wqres->buf.page_shift, + &wqres->mtt); + if (err) + goto err_buf; + + err = mlx4_buf_write_mtt(dev, &wqres->mtt, &wqres->buf); + if (err) + goto err_mtt; + + return 0; + +err_mtt: + mlx4_mtt_cleanup(dev, &wqres->mtt); +err_buf: + mlx4_buf_free(dev, size, &wqres->buf); +err_db: + mlx4_db_free(dev, &wqres->db); + + return err; +} +EXPORT_SYMBOL_GPL(mlx4_alloc_hwq_res); + +void mlx4_free_hwq_res(struct mlx4_dev *dev, struct mlx4_hwq_resources *wqres, + int size) +{ + mlx4_mtt_cleanup(dev, &wqres->mtt); + mlx4_buf_free(dev, size, &wqres->buf); + mlx4_db_free(dev, &wqres->db); +} +EXPORT_SYMBOL_GPL(mlx4_free_hwq_res); diff --git a/drivers/net/mlx4/catas.c b/drivers/net/mlx4/catas.c index 844aac9..f094ee0 100644 --- a/drivers/net/mlx4/catas.c +++ b/drivers/net/mlx4/catas.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -44,7 +45,7 @@ static LIST_HEAD(catas_list); static struct workqueue_struct *catas_wq; static struct work_struct catas_work; -static int internal_err_reset = 0; +static int internal_err_reset = 1; module_param(internal_err_reset, int, 0644); MODULE_PARM_DESC(internal_err_reset, "Reset device on internal errors if non-zero (default 1)"); @@ -69,7 +70,7 @@ static void poll_catas(unsigned long dev_ptr) if (readl(priv->catas_err.map)) { dump_err_buf(dev); - mlx4_dispatch_event(dev, MLX4_EVENT_TYPE_LOCAL_CATAS_ERROR, 0, 0); + mlx4_dispatch_event(dev, MLX4_DEV_EVENT_CATASTROPHIC_ERROR, 0); if (internal_err_reset) { spin_lock(&catas_lock); diff --git a/drivers/net/mlx4/cmd.c b/drivers/net/mlx4/cmd.c index d86b66d..79387fc 100644 --- a/drivers/net/mlx4/cmd.c +++ b/drivers/net/mlx4/cmd.c @@ -1,6 +1,6 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. - * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved. * Copyright (c) 2005, 2006, 2007 Cisco Systems, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -67,6 +67,8 @@ enum { CMD_STAT_BAD_INDEX = 0x0a, /* FW image corrupted: */ CMD_STAT_BAD_NVMEM = 0x0b, + /* Error in ICM mapping (e.g. not enough auxiliary ICM pages to execute command): */ + CMD_STAT_ICM_ERROR = 0x0c, /* Attempt to modify a QP/EE which is not in the presumed state: */ CMD_STAT_BAD_QP_STATE = 0x10, /* Bad segment parameters (Address/Size): */ @@ -104,9 +106,11 @@ struct mlx4_cmd_context { int next; u64 out_param; u16 token; + u8 fw_status; }; -static int mlx4_status_to_errno(u8 status) { +static int mlx4_status_to_errno(u8 status) +{ static const int trans_table[] = { [CMD_STAT_INTERNAL_ERR] = -EIO, [CMD_STAT_BAD_OP] = -EPERM, @@ -118,6 +122,7 @@ static int mlx4_status_to_errno(u8 status) { [CMD_STAT_BAD_RES_STATE] = -EBADF, [CMD_STAT_BAD_INDEX] = -EBADF, [CMD_STAT_BAD_NVMEM] = -EFAULT, + [CMD_STAT_ICM_ERROR] = -ENFILE, [CMD_STAT_BAD_QP_STATE] = -EINVAL, [CMD_STAT_BAD_SEG_PARAM] = -EFAULT, [CMD_STAT_REG_BOUND] = -EBUSY, @@ -208,6 +213,7 @@ static int mlx4_cmd_poll(struct mlx4_dev *dev, u64 in_param, u64 *out_param, void __iomem *hcr = priv->cmd.hcr; int err = 0; unsigned long end; + u32 stat; down(&priv->cmd.poll_sem); @@ -231,9 +237,10 @@ static int mlx4_cmd_poll(struct mlx4_dev *dev, u64 in_param, u64 *out_param, __raw_readl(hcr + HCR_OUT_PARAM_OFFSET)) << 32 | (u64) be32_to_cpu((__force __be32) __raw_readl(hcr + HCR_OUT_PARAM_OFFSET + 4)); - - err = mlx4_status_to_errno(be32_to_cpu((__force __be32) - __raw_readl(hcr + HCR_STATUS_OFFSET)) >> 24); + stat = be32_to_cpu((__force __be32) __raw_readl(hcr + HCR_STATUS_OFFSET)) >> 24; + err = mlx4_status_to_errno(stat); + if (err) + mlx4_err(dev, "command 0x%x failed: fw status = 0x%x\n", op, stat); out: up(&priv->cmd.poll_sem); @@ -250,6 +257,7 @@ void mlx4_cmd_event(struct mlx4_dev *dev, u16 token, u8 status, u64 out_param) if (token != context->token) return; + context->fw_status = status; context->result = mlx4_status_to_errno(status); context->out_param = out_param; @@ -285,8 +293,11 @@ static int mlx4_cmd_wait(struct mlx4_dev *dev, u64 in_param, u64 *out_param, } err = context->result; - if (err) + if (err) { + mlx4_err(dev, "command 0x%x failed: fw status = 0x%x\n", + op, context->fw_status); goto out; + } if (out_is_imm) *out_param = context->out_param; diff --git a/drivers/net/mlx4/cq.c b/drivers/net/mlx4/cq.c index 39004c5..31a5190 100644 --- a/drivers/net/mlx4/cq.c +++ b/drivers/net/mlx4/cq.c @@ -2,7 +2,7 @@ * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2005, 2006, 2007 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved. * Copyright (c) 2004 Voltaire, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -43,6 +43,27 @@ #include "mlx4.h" #include "icm.h" +struct mlx4_cq_context { + __be32 flags; + u16 reserved1[3]; + __be16 page_offset; + __be32 logsize_usrpage; + __be16 cq_period; + __be16 cq_max_count; + u8 reserved2[3]; + u8 comp_eqn; + u8 log_page_size; + u8 reserved3[2]; + u8 mtt_base_addr_h; + __be32 mtt_base_addr_l; + __be32 last_notified_index; + __be32 solicit_producer_index; + __be32 consumer_index; + __be32 producer_index; + u32 reserved4[2]; + __be64 db_rec_addr; +}; + #define MLX4_CQ_STATUS_OK ( 0 << 28) #define MLX4_CQ_STATUS_OVERFLOW ( 9 << 28) #define MLX4_CQ_STATUS_WRITE_FAIL (10 << 28) @@ -114,8 +135,77 @@ static int mlx4_HW2SW_CQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox, MLX4_CMD_TIME_CLASS_A); } +int mlx4_cq_modify(struct mlx4_dev *dev, struct mlx4_cq *cq, + u16 count, u16 period) +{ + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_cq_context *cq_context; + int err; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + cq_context = mailbox->buf; + memset(cq_context, 0, sizeof *cq_context); + + cq_context->cq_max_count = cpu_to_be16(count); + cq_context->cq_period = cpu_to_be16(period); + + err = mlx4_MODIFY_CQ(dev, mailbox, cq->cqn, 1); + + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} +EXPORT_SYMBOL_GPL(mlx4_cq_modify); + +int mlx4_cq_resize(struct mlx4_dev *dev, struct mlx4_cq *cq, + int entries, struct mlx4_mtt *mtt) +{ + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_cq_context *cq_context; + u64 mtt_addr; + int err; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + cq_context = mailbox->buf; + memset(cq_context, 0, sizeof *cq_context); + + cq_context->logsize_usrpage = cpu_to_be32(ilog2(entries) << 24); + cq_context->log_page_size = mtt->page_shift - 12; + mtt_addr = mlx4_mtt_addr(dev, mtt); + cq_context->mtt_base_addr_h = mtt_addr >> 32; + cq_context->mtt_base_addr_l = cpu_to_be32(mtt_addr & 0xffffffff); + + err = mlx4_MODIFY_CQ(dev, mailbox, cq->cqn, 0); + + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} +EXPORT_SYMBOL_GPL(mlx4_cq_resize); + +static int mlx4_find_least_loaded_vector(struct mlx4_priv *priv) +{ + int i; + int index = 0; + int min = priv->eq_table.eq[MLX4_EQ_COMP_CPU0].load; + + for (i = 1; i < priv->dev.caps.num_comp_vectors; i++) { + if (priv->eq_table.eq[MLX4_EQ_COMP_CPU0 + i].load < min) { + index = i; + min = priv->eq_table.eq[MLX4_EQ_COMP_CPU0 + i].load; + } + } + + return index; +} + int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt, - struct mlx4_uar *uar, u64 db_rec, struct mlx4_cq *cq) + struct mlx4_uar *uar, u64 db_rec, struct mlx4_cq *cq, + unsigned vector, int collapsed) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_cq_table *cq_table = &priv->cq_table; @@ -151,8 +241,19 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt, cq_context = mailbox->buf; memset(cq_context, 0, sizeof *cq_context); + cq_context->flags = cpu_to_be32(!!collapsed << 18); cq_context->logsize_usrpage = cpu_to_be32((ilog2(nent) << 24) | uar->index); - cq_context->comp_eqn = priv->eq_table.eq[MLX4_EQ_COMP].eqn; + + if (vector == MLX4_LEAST_ATTACHED_VECTOR) + vector = mlx4_find_least_loaded_vector(priv); + else if (vector >= dev->caps.num_comp_vectors) { + err = -EINVAL; + goto err_radix; + } + + cq->comp_eq_idx = MLX4_EQ_COMP_CPU0 + vector; + cq_context->comp_eqn = priv->eq_table.eq[MLX4_EQ_COMP_CPU0 + + vector].eqn; cq_context->log_page_size = mtt->page_shift - MLX4_ICM_PAGE_SHIFT; mtt_addr = mlx4_mtt_addr(dev, mtt); @@ -165,6 +266,7 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt, if (err) goto err_radix; + priv->eq_table.eq[cq->comp_eq_idx].load++; cq->cons_index = 0; cq->arm_sn = 1; cq->uar = uar; @@ -191,24 +293,6 @@ err_out: } EXPORT_SYMBOL_GPL(mlx4_cq_alloc); -int mlx4_cq_modify(struct mlx4_dev *dev, struct mlx4_cq *cq, - struct mlx4_cq_context *context, int modify) -{ - struct mlx4_cmd_mailbox *mailbox; - int err; - - mailbox = mlx4_alloc_cmd_mailbox(dev); - if (IS_ERR(mailbox)) - return PTR_ERR(mailbox); - - memcpy(mailbox->buf, context, sizeof *context); - err = mlx4_MODIFY_CQ(dev, mailbox, cq->cqn, modify); - - mlx4_free_cmd_mailbox(dev, mailbox); - return err; -} -EXPORT_SYMBOL_GPL(mlx4_cq_modify); - void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq) { struct mlx4_priv *priv = mlx4_priv(dev); @@ -219,7 +303,8 @@ void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq) if (err) mlx4_warn(dev, "HW2SW_CQ failed (%d) for CQN %06x\n", err, cq->cqn); - synchronize_irq(priv->eq_table.eq[MLX4_EQ_COMP].irq); + synchronize_irq(priv->eq_table.eq[cq->comp_eq_idx].irq); + priv->eq_table.eq[cq->comp_eq_idx].load--; spin_lock_irq(&cq_table->lock); radix_tree_delete(&cq_table->tree, cq->cqn); @@ -243,7 +328,7 @@ int mlx4_init_cq_table(struct mlx4_dev *dev) INIT_RADIX_TREE(&cq_table->tree, GFP_ATOMIC); err = mlx4_bitmap_init(&cq_table->bitmap, dev->caps.num_cqs, - dev->caps.num_cqs - 1, dev->caps.reserved_cqs); + dev->caps.num_cqs - 1, dev->caps.reserved_cqs, 0); if (err) return err; diff --git a/drivers/net/mlx4/en_cq.c b/drivers/net/mlx4/en_cq.c new file mode 100644 index 0000000..8e04633 --- /dev/null +++ b/drivers/net/mlx4/en_cq.c @@ -0,0 +1,164 @@ +/* + * Copyright (c) 2007 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <linux/mlx4/cq.h> +#include <linux/mlx4/qp.h> +#include <linux/mlx4/cmd.h> +#include <linux/delay.h> + +#include "mlx4_en.h" + +static void mlx4_en_cq_event(struct mlx4_cq *cq, enum mlx4_event event) +{ + return; +} + + +int mlx4_en_create_cq(struct mlx4_en_priv *priv, + struct mlx4_en_cq *cq, + int entries, int ring, enum cq_type mode) +{ + struct mlx4_en_dev *mdev = priv->mdev; + int err; + + cq->size = entries; + if (mode == RX) { + cq->buf_size = cq->size * sizeof(struct mlx4_cqe); + cq->vector = (ring + priv->port) % + mdev->dev->caps.num_comp_vectors; + } else { + cq->buf_size = sizeof(struct mlx4_cqe); + cq->vector = MLX4_LEAST_ATTACHED_VECTOR; + } + cq->ring = ring; + cq->is_tx = mode; + spin_lock_init(&cq->lock); + + err = mlx4_alloc_hwq_res(mdev->dev, &cq->wqres, + cq->buf_size, 2 * PAGE_SIZE); + if (err) + return err; + + err = mlx4_en_map_buffer(&cq->wqres.buf); + if (err) + mlx4_free_hwq_res(mdev->dev, &cq->wqres, cq->buf_size); + else + cq->buf = (struct mlx4_cqe *) cq->wqres.buf.direct.buf; + + return err; +} + +int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq) +{ + struct mlx4_en_dev *mdev = priv->mdev; + int err; + + cq->dev = mdev->pndev[priv->port]; + cq->mcq.set_ci_db = cq->wqres.db.db; + cq->mcq.arm_db = cq->wqres.db.db + 1; + *cq->mcq.set_ci_db = 0; + *cq->mcq.arm_db = 0; + memset(cq->buf, 0, cq->buf_size); + + err = mlx4_cq_alloc(mdev->dev, cq->size, &cq->wqres.mtt, &mdev->priv_uar, + cq->wqres.db.dma, &cq->mcq, cq->vector, cq->is_tx); + if (err) + return err; + + cq->mcq.comp = cq->is_tx ? mlx4_en_tx_irq : mlx4_en_rx_irq; + cq->mcq.event = mlx4_en_cq_event; + + if (cq->is_tx) { + init_timer(&cq->timer); + cq->timer.function = mlx4_en_poll_tx_cq; + cq->timer.data = (unsigned long) cq; + } else { + char name[IFNAMSIZ]; + + snprintf(name, IFNAMSIZ, "mlx4_en-%d-%d", priv->port, cq->ring); + cq->poll_dev = alloc_netdev(0, name, ether_setup); + if (!cq->poll_dev) + return -ENOMEM; + + cq->poll_dev->priv = cq; + cq->poll_dev->weight = 64; + cq->poll_dev->poll = mlx4_en_poll_rx_cq; + set_bit(__LINK_STATE_START, &cq->poll_dev->state); + } + + return 0; +} + +void mlx4_en_destroy_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq) +{ + struct mlx4_en_dev *mdev = priv->mdev; + + mlx4_en_unmap_buffer(&cq->wqres.buf); + mlx4_free_hwq_res(mdev->dev, &cq->wqres, cq->buf_size); + cq->buf_size = 0; + cq->buf = NULL; +} + +void mlx4_en_deactivate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq) +{ + struct mlx4_en_dev *mdev = priv->mdev; + + if (cq->is_tx) + del_timer(&cq->timer); + else { + while (test_bit(__LINK_STATE_RX_SCHED, + &cq->poll_dev->state)) + msleep(1); + free_netdev(cq->poll_dev); + cq->poll_dev = NULL; + } + + mlx4_cq_free(mdev->dev, &cq->mcq); +} + +/* Set rx cq moderation parameters */ +int mlx4_en_set_cq_moder(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq) +{ + return mlx4_cq_modify(priv->mdev->dev, &cq->mcq, + cq->moder_cnt, cq->moder_time); +} + +int mlx4_en_arm_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq) +{ + mlx4_cq_arm(&cq->mcq, MLX4_CQ_DB_REQ_NOT, priv->mdev->uar_map, + &priv->mdev->uar_lock); + + return 0; +} + + diff --git a/drivers/net/mlx4/en_frag.c b/drivers/net/mlx4/en_frag.c new file mode 100644 index 0000000..9d34e12 --- /dev/null +++ b/drivers/net/mlx4/en_frag.c @@ -0,0 +1,211 @@ +/* + * Copyright (c) 2007 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <linux/skbuff.h> +#include <linux/ip.h> +#include <linux/if_vlan.h> +#include <net/ip.h> +#include <linux/etherdevice.h> + +#include "mlx4_en.h" + + +static struct mlx4_en_ipfrag *find_session(struct mlx4_en_rx_ring *ring, + struct iphdr *iph) +{ + struct mlx4_en_ipfrag *session; + int i; + + for (i = 0; i < MLX4_EN_NUM_IPFRAG_SESSIONS; i++) { + session = &ring->ipfrag[i]; + if (session->fragments == NULL) + continue; + if (session->daddr == iph->daddr && + session->saddr == iph->saddr && + session->id == iph->id && + session->protocol == iph->protocol) { + return session; + } + } + return NULL; +} + +static struct mlx4_en_ipfrag *start_session(struct mlx4_en_rx_ring *ring, + struct iphdr *iph) +{ + struct mlx4_en_ipfrag *session; + int index = -1; + int i; + + for (i = 0; i < MLX4_EN_NUM_IPFRAG_SESSIONS; i++) { + if (ring->ipfrag[i].fragments == NULL) { + index = i; + break; + } + } + if (index < 0) + return NULL; + + session = &ring->ipfrag[index]; + + return session; +} + + +static void flush_session(struct mlx4_en_priv *priv, + struct mlx4_en_ipfrag *session, + u16 more) +{ + struct sk_buff *skb = session->fragments; + struct iphdr *iph = ip_hdr(skb); + struct net_device *dev = skb->dev; + + /* Update IP length and checksum */ + iph->tot_len = htons(session->total_len); + iph->frag_off = htons(more | (session->offset >> 3)); + iph->check = 0; + iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); + + if (session->vlan) + vlan_hwaccel_receive_skb(skb, priv->vlgrp, + be16_to_cpu(session->sl_vid)); + else + netif_receive_skb(skb); + dev->last_rx = jiffies; + session->fragments = NULL; + session->last = NULL; +} + + +static inline void frag_append(struct mlx4_en_priv *priv, + struct mlx4_en_ipfrag *session, + struct sk_buff *skb, + unsigned int data_len) +{ + struct sk_buff *parent = session->fragments; + + /* Update skb bookkeeping */ + parent->len += data_len; + parent->data_len += data_len; + session->total_len += data_len; + + skb_pull(skb, skb->len - data_len); + parent->truesize += skb->truesize; + + if (session->last) + session->last->next = skb; + else + skb_shinfo(parent)->frag_list = skb; + + session->last = skb; +} + +int mlx4_en_rx_frags(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *ring, + struct sk_buff *skb, struct mlx4_cqe *cqe) +{ + struct mlx4_en_ipfrag *session; + struct iphdr *iph; + u16 ip_len; + u16 ip_hlen; + int data_len; + u16 offset; + + skb_reset_network_header(skb); + skb_reset_transport_header(skb); + iph = ip_hdr(skb); + ip_len = ntohs(iph->tot_len); + ip_hlen = iph->ihl * 4; + data_len = ip_len - ip_hlen; + offset = ntohs(iph->frag_off); + offset &= IP_OFFSET; + offset <<= 3; + + session = find_session(ring, iph); + if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) { + if (session) + flush_session(priv, session, IP_MF); + return -EINVAL; + } + if (session) { + if (unlikely(session->offset + session->total_len != + offset + ip_hlen)) { + flush_session(priv, session, IP_MF); + goto new_session; + } + /* Packets smaller then 60 bytes are padded to that size + * Need to fix len field of the skb to fit the actual data size + * Since ethernet header already removed, the IP total length + * is exactly the data size (the skb is linear) + */ + skb->len = ip_len; + + frag_append(priv, session, skb, data_len); + } else { +new_session: + session = start_session(ring, iph); + if (unlikely(!session)) + return -ENOSPC; + + session->fragments = skb; + session->daddr = iph->daddr; + session->saddr = iph->saddr; + session->id = iph->id; + session->protocol = iph->protocol; + session->total_len = ip_len; + session->offset = offset; + session->vlan = (priv->vlgrp && + (be32_to_cpu(cqe->vlan_my_qpn) & + MLX4_CQE_VLAN_PRESENT_MASK)) ? 1 : 0; + session->sl_vid = cqe->sl_vid; + } + if (!(ntohs(iph->frag_off) & IP_MF)) + flush_session(priv, session, 0); + else if (session->fragments->len + priv->dev->mtu > 65536) + flush_session(priv, session, IP_MF); + + return 0; +} + + +void mlx4_en_flush_frags(struct mlx4_en_priv *priv, + struct mlx4_en_rx_ring *ring) +{ + struct mlx4_en_ipfrag *session; + int i; + + for (i = 0; i < MLX4_EN_NUM_IPFRAG_SESSIONS; i++) { + session = &ring->ipfrag[i]; + if (session->fragments) + flush_session(priv, session, IP_MF); + } +} diff --git a/drivers/net/mlx4/en_main.c b/drivers/net/mlx4/en_main.c new file mode 100644 index 0000000..b669fb2 --- /dev/null +++ b/drivers/net/mlx4/en_main.c @@ -0,0 +1,266 @@ +/* + * Copyright (c) 2007 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <linux/cpumask.h> +#include <linux/module.h> +#include <linux/delay.h> +#include <linux/netdevice.h> +#include <linux/cpumask.h> + +#include <linux/mlx4/driver.h> +#include <linux/mlx4/device.h> +#include <linux/mlx4/cmd.h> + +#include "mlx4_en.h" + +MODULE_AUTHOR("Liran Liss, Yevgeny Petrilin"); +MODULE_DESCRIPTION("Mellanox ConnectX HCA Ethernet driver"); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_VERSION(DRV_VERSION " ("DRV_RELDATE")"); + +static const char mlx4_en_version[] __devinitdata = + DRV_NAME ": Mellanox ConnectX HCA Ethernet driver v" + DRV_VERSION " (" DRV_RELDATE ")\n"; + +static void mlx4_en_event(struct mlx4_dev *dev, void *endev_ptr, + enum mlx4_dev_event event, int port) +{ + struct mlx4_en_dev *mdev = (struct mlx4_en_dev *) endev_ptr; + struct mlx4_en_priv *priv; + + if (!mdev->pndev[port]) + return; + + priv = netdev_priv(mdev->pndev[port]); + switch (event) { + case MLX4_DEV_EVENT_PORT_UP: + case MLX4_DEV_EVENT_PORT_DOWN: + /* To prevent races, we poll the link state in a separate + task rather than changing it here */ + priv->link_state = event; + queue_work(mdev->workqueue, &priv->linkstate_task); + break; + + case MLX4_DEV_EVENT_CATASTROPHIC_ERROR: + mlx4_err(mdev, "Internal error detected, restarting device\n"); + break; + + default: + mlx4_warn(mdev, "Unhandled event: %d\n", event); + } +} + +static void mlx4_en_remove(struct mlx4_dev *dev, void *endev_ptr) +{ + struct mlx4_en_dev *mdev = endev_ptr; + int i; + + mutex_lock(&mdev->state_lock); + mdev->device_up = false; + mutex_unlock(&mdev->state_lock); + + mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH) + if (mdev->pndev[i]) + mlx4_en_destroy_netdev(mdev->pndev[i]); + + flush_workqueue(mdev->workqueue); + destroy_workqueue(mdev->workqueue); + mlx4_mr_free(dev, &mdev->mr); + mlx4_uar_free(dev, &mdev->priv_uar); + mlx4_pd_free(dev, mdev->priv_pdn); + kfree(mdev); +} + +static void *mlx4_en_add(struct mlx4_dev *dev) +{ + static int mlx4_en_version_printed; + struct mlx4_en_dev *mdev; + int i; + int err; + + if (!mlx4_en_version_printed) { + printk(KERN_INFO "%s", mlx4_en_version); + mlx4_en_version_printed++; + } + + mdev = kzalloc(sizeof *mdev, GFP_KERNEL); + if (!mdev) { + dev_err(&dev->pdev->dev, "Device struct alloc failed, " + "aborting.\n"); + err = -ENOMEM; + goto err_free_res; + } + + if (mlx4_pd_alloc(dev, &mdev->priv_pdn)) + goto err_free_dev; + + if (mlx4_uar_alloc(dev, &mdev->priv_uar)) + goto err_pd; + + mdev->uar_map = ioremap(mdev->priv_uar.pfn << PAGE_SHIFT, PAGE_SIZE); + if (!mdev->uar_map) + goto err_uar; + spin_lock_init(&mdev->uar_lock); + + mdev->dev = dev; + mdev->dma_device = &(dev->pdev->dev); + mdev->pdev = dev->pdev; + mdev->device_up = false; + + mdev->LSO_support = !!(dev->caps.flags & (1 << 15)); + if (!mdev->LSO_support) + mlx4_warn(mdev, "LSO not supported, please upgrade to later " + "FW version to enable LSO\n"); + + if(mlx4_mr_alloc(mdev->dev, mdev->priv_pdn, 0, ~0ull, + MLX4_PERM_LOCAL_WRITE | MLX4_PERM_LOCAL_READ, + 0, 0, &mdev->mr)){ + mlx4_err(mdev, "Failed allocating memory region\n"); + goto err_uar; + } + if(mlx4_mr_enable(mdev->dev, &mdev->mr)){ + mlx4_err(mdev, "Failed enabling memory region\n"); + goto err_mr; + } + + /* Build device profile according to supplied module parameters */ + err = mlx4_en_get_profile(mdev); + if (err) { + mlx4_err(mdev, "Bad module parameters, aborting.\n"); + goto err_mr; + } + + /* Configure wich ports to start according to module parameters */ + mdev->port_cnt = 0; + mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH) + mdev->port_cnt++; + + /* Number of RX rings is the minimum between: + * number of completion vextors + 1 (for default ring) + * and MAX_RX_RINGS */ + mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH) { + mlx4_info(mdev, "Using %d tx rings for port:%d\n", + mdev->profile.prof[i].tx_ring_num, i); + mdev->profile.prof[i].rx_ring_num = + min_t(int, dev->caps.num_comp_vectors + 1, MAX_RX_RINGS); + mlx4_info(mdev, "Defaulting to %d rx rings for port:%d\n", + mdev->profile.prof[i].rx_ring_num, i); + } + + /* Create our own workqueue for reset/multicast tasks + * Note: we cannot use the shared workqueue because of deadlocks caused + * by the rtnl lock */ + mdev->workqueue = create_singlethread_workqueue("mlx4_en"); + if (!mdev->workqueue) { + err = -ENOMEM; + goto err_close_nic; + } + + /* At this stage all non-port specific tasks are complete: + * mark the card state as up */ + mutex_init(&mdev->state_lock); + mdev->device_up = true; + + /* Setup ports */ + + /* Create a netdev for each port */ + mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH) { + mlx4_info(mdev, "Activating port:%d\n", i); + if (mlx4_en_init_netdev(mdev, i, &mdev->profile.prof[i])) { + mdev->pndev[i] = NULL; + goto err_free_netdev; + } + } + return mdev; + + +err_free_netdev: + mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH) { + if (mdev->pndev[i]) + mlx4_en_destroy_netdev(mdev->pndev[i]); + } + + mutex_lock(&mdev->state_lock); + mdev->device_up = false; + mutex_unlock(&mdev->state_lock); + flush_workqueue(mdev->workqueue); + + /* Stop event queue before we drop down to release shared SW state */ + +err_close_nic: + destroy_workqueue(mdev->workqueue); +err_mr: + mlx4_mr_free(dev, &mdev->mr); +err_uar: + mlx4_uar_free(dev, &mdev->priv_uar); +err_pd: + mlx4_pd_free(dev, mdev->priv_pdn); +err_free_dev: + kfree(mdev); +err_free_res: + return NULL; +} + +enum mlx4_query_reply mlx4_en_query(void *endev_ptr, void *int_dev) +{ + struct mlx4_en_dev *mdev = endev_ptr; + struct net_device *netdev = int_dev; + int p; + + for (p = 1; p <= MLX4_MAX_PORTS; ++p) + if (mdev->pndev[p] == netdev) + return p; + + return MLX4_QUERY_NOT_MINE; +} + +static struct mlx4_interface mlx4_en_interface = { + .add = mlx4_en_add, + .remove = mlx4_en_remove, + .event = mlx4_en_event, + .query = mlx4_en_query +}; + +static int __init mlx4_en_init(void) +{ + return mlx4_register_interface(&mlx4_en_interface); +} + +static void __exit mlx4_en_cleanup(void) +{ + mlx4_unregister_interface(&mlx4_en_interface); +} + +module_init(mlx4_en_init); +module_exit(mlx4_en_cleanup); + diff --git a/drivers/net/mlx4/en_netdev.c b/drivers/net/mlx4/en_netdev.c new file mode 100644 index 0000000..e961481 --- /dev/null +++ b/drivers/net/mlx4/en_netdev.c @@ -0,0 +1,1092 @@ +/* + * Copyright (c) 2007 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <linux/etherdevice.h> +#include <linux/tcp.h> +#include <linux/if_vlan.h> +#include <linux/delay.h> + +#include <linux/mlx4/driver.h> +#include <linux/mlx4/device.h> +#include <linux/mlx4/cmd.h> +#include <linux/mlx4/cq.h> + +#include "mlx4_en.h" +#include "en_port.h" + + +static void mlx4_en_vlan_rx_register(struct net_device *dev, struct vlan_group *grp) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + int err; + + mlx4_dbg(HW, priv, "Regsitering VLAN group:%p\n", grp); + priv->vlgrp = grp; + + mutex_lock(&mdev->state_lock); + if (mdev->device_up && priv->port_up) { + err = mlx4_SET_VLAN_FLTR(mdev->dev, priv->port, grp); + if (err) + mlx4_err(mdev, "Failed configuring VLAN filter\n"); + } + mutex_unlock(&mdev->state_lock); +} + +static void mlx4_en_vlan_rx_add_vid(struct net_device *dev, unsigned short vid) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + int err; + + if (!priv->vlgrp) + return; + + mlx4_dbg(HW, priv, "adding VLAN:%d (vlgrp entry:%p)\n", + vid, vlan_group_get_device(priv->vlgrp, vid)); + + /* Add VID to port VLAN filter */ + mutex_lock(&mdev->state_lock); + if (mdev->device_up && priv->port_up) { + err = mlx4_SET_VLAN_FLTR(mdev->dev, priv->port, priv->vlgrp); + if (err) + mlx4_err(mdev, "Failed configuring VLAN filter\n"); + } + mutex_unlock(&mdev->state_lock); +} + +static void mlx4_en_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + int err; + + if (!priv->vlgrp) + return; + + mlx4_dbg(HW, priv, "Killing VID:%d (vlgrp:%p vlgrp " + "entry:%p)\n", vid, priv->vlgrp, + vlan_group_get_device(priv->vlgrp, vid)); + vlan_group_set_device(priv->vlgrp, vid, NULL); + + /* Remove VID from port VLAN filter */ + mutex_lock(&mdev->state_lock); + if (mdev->device_up && priv->port_up) { + err = mlx4_SET_VLAN_FLTR(mdev->dev, priv->port, priv->vlgrp); + if (err) + mlx4_err(mdev, "Failed configuring VLAN filter\n"); + } + mutex_unlock(&mdev->state_lock); +} + +static u64 mlx4_en_mac_to_u64(u8 *addr) +{ + u64 mac = 0; + int i; + + for (i = 0; i < ETH_ALEN; i++) { + mac <<= 8; + mac |= addr[i]; + } + return mac; +} + +static int mlx4_en_set_mac(struct net_device *dev, void *addr) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + struct sockaddr *saddr = addr; + + if (!is_valid_ether_addr(saddr->sa_data)) + return -EADDRNOTAVAIL; + + memcpy(dev->dev_addr, saddr->sa_data, ETH_ALEN); + priv->mac = mlx4_en_mac_to_u64(dev->dev_addr); + queue_work(mdev->workqueue, &priv->mac_task); + return 0; +} + +static void mlx4_en_do_set_mac(struct work_struct *work) +{ + struct mlx4_en_priv *priv = container_of(work, struct mlx4_en_priv, + mac_task); + struct mlx4_en_dev *mdev = priv->mdev; + int err = 0; + + mutex_lock(&mdev->state_lock); + if (priv->port_up) { + /* Remove old MAC and insert the new one */ + mlx4_unregister_mac(mdev->dev, priv->port, priv->mac_index); + err = mlx4_register_mac(mdev->dev, priv->port, + priv->mac, &priv->mac_index); + if (err) + mlx4_err(mdev, "Failed changing HW MAC address\n"); + } else + mlx4_dbg(HW, priv, "Port is down, exiting...\n"); + + mutex_unlock(&mdev->state_lock); +} + +static void mlx4_en_clear_list(struct net_device *dev) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct dev_mc_list *plist = priv->mc_list; + struct dev_mc_list *next; + + while (plist) { + next = plist->next; + kfree(plist); + plist = next; + } + priv->mc_list = NULL; +} + +static void mlx4_en_cache_mclist(struct net_device *dev) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + struct dev_mc_list *mclist; + struct dev_mc_list *tmp; + struct dev_mc_list *plist = NULL; + + for (mclist = dev->mc_list; mclist; mclist = mclist->next) { + tmp = kmalloc(sizeof(struct dev_mc_list), GFP_ATOMIC); + if (!tmp) { + mlx4_err(mdev, "failed to allocate multicast list\n"); + mlx4_en_clear_list(dev); + return; + } + memcpy(tmp, mclist, sizeof(struct dev_mc_list)); + tmp->next = NULL; + if (plist) + plist->next = tmp; + else + priv->mc_list = tmp; + plist = tmp; + } +} + + +static void mlx4_en_set_multicast(struct net_device *dev) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + + if (!priv->port_up) + return; + + queue_work(priv->mdev->workqueue, &priv->mcast_task); +} + +static void mlx4_en_do_set_multicast(struct work_struct *work) +{ + struct mlx4_en_priv *priv = container_of(work, struct mlx4_en_priv, + mcast_task); + struct mlx4_en_dev *mdev = priv->mdev; + struct net_device *dev = priv->dev; + struct dev_mc_list *mclist; + u64 mcast_addr = 0; + int err; + + mutex_lock(&mdev->state_lock); + if (!mdev->device_up) { + mlx4_dbg(HW, priv, "Card is not up, ignoring " + "multicast change.\n"); + goto out; + } + if (!priv->port_up) { + mlx4_dbg(HW, priv, "Port is down, ignoring " + "multicast change.\n"); + goto out; + } + + /* + * Promsicuous mode: disable all filters + */ + + if (dev->flags & IFF_PROMISC) { + if (!(priv->flags & MLX4_EN_FLAG_PROMISC)) { + if (netif_msg_rx_status(priv)) + mlx4_warn(mdev, "Port:%d entering promiscuous mode\n", + priv->port); + priv->flags |= MLX4_EN_FLAG_PROMISC; + + /* Enable promiscouos mode */ + err = mlx4_SET_PORT_qpn_calc(mdev->dev, priv->port, + priv->base_qpn, 1); + if (err) + mlx4_err(mdev, "Failed enabling " + "promiscous mode\n"); + + /* Disable port multicast filter (unconditionally) */ + err = mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0, + 0, MLX4_MCAST_DISABLE); + if (err) + mlx4_err(mdev, "Failed disabling " + "multicast filter\n"); + + /* Disable port VLAN filter */ + err = mlx4_SET_VLAN_FLTR(mdev->dev, priv->port, NULL); + if (err) + mlx4_err(mdev, "Failed disabling " + "VLAN filter\n"); + } + goto out; + } + + /* + * Not in promiscous mode + */ + + if (priv->flags & MLX4_EN_FLAG_PROMISC) { + if (netif_msg_rx_status(priv)) + mlx4_warn(mdev, "Port:%d leaving promiscuous mode\n", + priv->port); + priv->flags &= ~MLX4_EN_FLAG_PROMISC; + + /* Disable promiscouos mode */ + err = mlx4_SET_PORT_qpn_calc(mdev->dev, priv->port, + priv->base_qpn, 0); + if (err) + mlx4_err(mdev, "Failed disabling promiscous mode\n"); + + /* Enable port VLAN filter */ + err = mlx4_SET_VLAN_FLTR(mdev->dev, priv->port, priv->vlgrp); + if (err) + mlx4_err(mdev, "Failed enabling VLAN filter\n"); + } + + /* Enable/disable the multicast filter according to IFF_ALLMULTI */ + if (dev->flags & IFF_ALLMULTI) { + err = mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0, + 0, MLX4_MCAST_DISABLE); + if (err) + mlx4_err(mdev, "Failed disabling multicast filter\n"); + } else { + err = mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0, + 0, MLX4_MCAST_DISABLE); + if (err) + mlx4_err(mdev, "Failed disabling multicast filter\n"); + + /* Flush mcast filter and init it with broadcast address */ + mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, ETH_BCAST, + 1, MLX4_MCAST_CONFIG); + + /* Update multicast list - we cache all addresses so they won't + * change while HW is updated holding the command semaphor */ + netif_tx_lock_bh(dev); + mlx4_en_cache_mclist(dev); + netif_tx_unlock_bh(dev); + for (mclist = priv->mc_list; mclist; mclist = mclist->next) { + mcast_addr = mlx4_en_mac_to_u64(mclist->dmi_addr); + mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, + mcast_addr, 0, MLX4_MCAST_CONFIG); + } + err = mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0, + 0, MLX4_MCAST_ENABLE); + if (err) + mlx4_err(mdev, "Failed enabling multicast filter\n"); + + mlx4_en_clear_list(dev); + } +out: + mutex_unlock(&mdev->state_lock); +} + +void mlx4_en_netpoll(struct net_device *dev) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_cq *cq; + unsigned long flags; + int i; + + + for (i = 0; i < priv->rx_ring_num; i++) { + cq = &priv->rx_cq[i]; + spin_lock_irqsave(&cq->lock, flags); + while (test_bit(__LINK_STATE_RX_SCHED, &cq->poll_dev->state)) + msleep(1); + if (priv->rx_ring[i].use_frags) + mlx4_en_process_rx_cq(dev, cq, 0); + else + mlx4_en_process_rx_cq_skb(dev, cq, 0); + spin_unlock_irqrestore(&cq->lock, flags); + } +} + + +static void mlx4_en_tx_timeout(struct net_device *dev) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + + if (netif_msg_timer(priv)) + mlx4_warn(mdev, "Tx timeout called on port:%d\n", priv->port); + + if (netif_carrier_ok(dev)) { + priv->port_stats.tx_timeout++; + mlx4_dbg(DRV, priv, "Scheduling watchdog\n"); + queue_work(mdev->workqueue, &priv->watchdog_task); + } +} + + +static struct net_device_stats *mlx4_en_get_stats(struct net_device *dev) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + + spin_lock_bh(&priv->stats_lock); + memcpy(&priv->ret_stats, &priv->stats, sizeof(priv->stats)); + spin_unlock_bh(&priv->stats_lock); + + return &priv->ret_stats; +} + +void mlx4_en_set_default_moderation(struct mlx4_en_priv *priv) +{ + struct mlx4_en_cq *cq; + int i; + + /* If we haven't received a specific coalescing setting + * (module param), we set the moderation paramters as follows: + * - moder_cnt is set to the number of mtu sized packets to + * satisfy our coelsing target. + * - moder_time is set to a fixed value. + */ + priv->rx_frames = MLX4_EN_RX_COAL_TARGET / priv->dev->mtu + 1; + priv->rx_usecs = MLX4_EN_RX_COAL_TIME; + mlx4_dbg(INTR, priv, "Default coalesing params for mtu:%d - " + "rx_frames:%d rx_usecs:%d\n", + priv->dev->mtu, priv->rx_frames, priv->rx_usecs); + + /* Setup cq moderation params */ + for (i = 0; i < priv->rx_ring_num; i++) { + cq = &priv->rx_cq[i]; + cq->moder_cnt = priv->rx_frames; + cq->moder_time = priv->rx_usecs; + } + + for (i = 0; i < priv->tx_ring_num; i++) { + cq = &priv->tx_cq[i]; + cq->moder_cnt = MLX4_EN_TX_COAL_PKTS; + cq->moder_time = MLX4_EN_TX_COAL_TIME; + } + + /* Reset auto-moderation params */ + priv->pkt_rate_low = MLX4_EN_RX_RATE_LOW; + priv->rx_usecs_low = MLX4_EN_RX_COAL_TIME_LOW; + priv->pkt_rate_high = MLX4_EN_RX_RATE_HIGH; + priv->rx_usecs_high = MLX4_EN_RX_COAL_TIME_HIGH; + priv->sample_interval = MLX4_EN_SAMPLE_INTERVAL; + priv->adaptive_rx_coal = 1; + priv->last_moder_time = MLX4_EN_AUTO_CONF; + priv->last_moder_jiffies = 0; + priv->last_moder_packets = 0; + priv->last_moder_tx_packets = 0; + priv->last_moder_bytes = 0; +} + +static void mlx4_en_auto_moderation(struct mlx4_en_priv *priv) +{ + unsigned long period = (unsigned long) (jiffies - priv->last_moder_jiffies); + struct mlx4_en_dev *mdev = priv->mdev; + struct mlx4_en_cq *cq; + unsigned long packets; + unsigned long rate; + unsigned long avg_pkt_size; + unsigned long rx_packets; + unsigned long rx_bytes; + unsigned long tx_packets; + unsigned long tx_pkt_diff; + unsigned long rx_pkt_diff; + int moder_time; + int i, err; + + if (!priv->adaptive_rx_coal || period < priv->sample_interval * HZ) + return; + + spin_lock_bh(&priv->stats_lock); + rx_packets = priv->stats.rx_packets; + rx_bytes = priv->stats.rx_bytes; + tx_packets = priv->stats.tx_packets; + spin_unlock_bh(&priv->stats_lock); + + if (!priv->last_moder_jiffies || !period) + goto out; + + tx_pkt_diff = ((unsigned long) (tx_packets - + priv->last_moder_tx_packets)); + rx_pkt_diff = ((unsigned long) (rx_packets - + priv->last_moder_packets)); + packets = max(tx_pkt_diff, rx_pkt_diff); + rate = packets * HZ / period; + avg_pkt_size = packets ? ((unsigned long) (rx_bytes - + priv->last_moder_bytes)) / packets : 0; + + /* Apply auto-moderation only when packet rate exceeds a rate that + * it matters */ + if (rate > MLX4_EN_RX_RATE_THRESH) { + /* If tx and rx packet rates are not balanced, assume that + * traffic is mainly BW bound and apply maximum moderation. + * Otherwise, moderate according to packet rate */ + if (2 * tx_pkt_diff > 3 * rx_pkt_diff || + 2 * rx_pkt_diff > 3 * tx_pkt_diff) { + moder_time = priv->rx_usecs_high; + } else { + if (rate < priv->pkt_rate_low) + moder_time = priv->rx_usecs_low; + else if (rate > priv->pkt_rate_high) + moder_time = priv->rx_usecs_high; + else + moder_time = (rate - priv->pkt_rate_low) * + (priv->rx_usecs_high - priv->rx_usecs_low) / + (priv->pkt_rate_high - priv->pkt_rate_low) + + priv->rx_usecs_low; + } + } else { + /* When packet rate is low, use default moderation rather than + * 0 to prevent interrupt storms if traffic suddenly increases */ + moder_time = priv->rx_usecs; + } + + mlx4_dbg(INTR, priv, "tx rate:%lu rx_rate:%lu\n", + tx_pkt_diff * HZ / period, rx_pkt_diff * HZ / period); + + mlx4_dbg(INTR, priv, "Rx moder_time changed from:%d to %d period:%lu " + "[jiff] packets:%lu avg_pkt_size:%lu rate:%lu [p/s])\n", + priv->last_moder_time, moder_time, period, packets, + avg_pkt_size, rate); + + if (moder_time != priv->last_moder_time) { + priv->last_moder_time = moder_time; + for (i = 0; i < priv->rx_ring_num; i++) { + cq = &priv->rx_cq[i]; + cq->moder_time = moder_time; + err = mlx4_en_set_cq_moder(priv, cq); + if (err) { + mlx4_err(mdev, "Failed modifying moderation for cq:%d " + "on port:%d\n", i, priv->port); + break; + } + } + } + +out: + priv->last_moder_packets = rx_packets; + priv->last_moder_tx_packets = tx_packets; + priv->last_moder_bytes = rx_bytes; + priv->last_moder_jiffies = jiffies; +} + +static void mlx4_en_do_get_stats(struct work_struct *work) +{ + struct delayed_work *delay = container_of(work, struct delayed_work, work); + struct mlx4_en_priv *priv = container_of(delay, struct mlx4_en_priv, + stats_task); + struct mlx4_en_dev *mdev = priv->mdev; + int err; + + err = mlx4_en_DUMP_ETH_STATS(mdev, priv->port, 0); + if (err) + mlx4_dbg(HW, priv, "Could not update stats for " + "port:%d\n", priv->port); + + mutex_lock(&mdev->state_lock); + if (mdev->device_up) { + if (priv->port_up) + mlx4_en_auto_moderation(priv); + + queue_delayed_work(mdev->workqueue, &priv->stats_task, STATS_DELAY); + } + mutex_unlock(&mdev->state_lock); +} + +static void mlx4_en_linkstate(struct work_struct *work) +{ + struct mlx4_en_priv *priv = container_of(work, struct mlx4_en_priv, + linkstate_task); + struct mlx4_en_dev *mdev = priv->mdev; + int linkstate = priv->link_state; + + mutex_lock(&mdev->state_lock); + /* If observable port state changed set carrier state and + * report to system log */ + if (priv->last_link_state != linkstate) { + if (linkstate == MLX4_DEV_EVENT_PORT_DOWN) { + if (netif_msg_link(priv)) + mlx4_info(mdev, "Port %d - link down\n", priv->port); + netif_carrier_off(priv->dev); + } else { + if (netif_msg_link(priv)) + mlx4_info(mdev, "Port %d - link up\n", priv->port); + netif_carrier_on(priv->dev); + } + } + priv->last_link_state = linkstate; + mutex_unlock(&mdev->state_lock); +} + + +int mlx4_en_start_port(struct net_device *dev) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + struct mlx4_en_cq *cq; + struct mlx4_en_tx_ring *tx_ring; + struct mlx4_en_rx_ring *rx_ring; + int rx_index = 0; + int tx_index = 0; + int err = 0; + int i; + int j; + + if (priv->port_up) { + mlx4_dbg(DRV, priv, "start port called while port already up\n"); + return 0; + } + + /* Calculate Rx buf size */ + dev->mtu = min(dev->mtu, priv->max_mtu); + mlx4_en_calc_rx_buf(dev); + mlx4_dbg(DRV, priv, "Rx buf size:%d\n", priv->rx_skb_size); + /* Configure rx cq's and rings */ + for (i = 0; i < priv->rx_ring_num; i++) { + cq = &priv->rx_cq[i]; + rx_ring = &priv->rx_ring[i]; + + err = mlx4_en_activate_cq(priv, cq); + if (err) { + mlx4_err(mdev, "Failed activating Rx CQ\n"); + goto rx_err; + } + for (j = 0; j < cq->size; j++) + cq->buf[j].owner_sr_opcode = MLX4_CQE_OWNER_MASK; + err = mlx4_en_set_cq_moder(priv, cq); + if (err) { + mlx4_err(mdev, "Failed setting cq moderation parameters"); + mlx4_en_deactivate_cq(priv, cq); + goto cq_err; + } + mlx4_en_arm_cq(priv, cq); + + ++rx_index; + } + + err = mlx4_en_activate_rx_rings(priv); + if (err){ + mlx4_err(mdev, "Failed to activate RX rings\n"); + goto cq_err; + } + + err = mlx4_en_config_rss_steer(priv); + if (err) { + mlx4_err(mdev, "Failed configuring rss steering\n"); + goto rx_err; + } + + /* Configure tx cq's and rings */ + for (i = 0; i < priv->tx_ring_num; i++) { + /* Configure cq */ + cq = &priv->tx_cq[i]; + err = mlx4_en_activate_cq(priv, cq); + if (err) { + mlx4_err(mdev, "Failed allocating Tx CQ\n"); + goto tx_err; + } + err = mlx4_en_set_cq_moder(priv, cq); + if (err) { + mlx4_err(mdev, "Failed setting cq moderation parameters"); + mlx4_en_deactivate_cq(priv, cq); + goto tx_err; + } + mlx4_dbg(DRV, priv, "Resetting index of collapsed CQ:%d " + "to -1\n", i); + cq->buf->wqe_index = 0xffff; + + /* Configure ring */ + tx_ring = &priv->tx_ring[i]; + err = mlx4_en_activate_tx_ring(priv, tx_ring, cq->mcq.cqn, + priv->rx_ring[0].srq.srqn); + if (err) { + mlx4_err(mdev, "Failed allocating Tx ring\n"); + mlx4_en_deactivate_cq(priv, cq); + goto tx_err; + } + /* Set initial ownership of all Tx TXBBs to SW (1) */ + for (j = 0; j < tx_ring->buf_size; j += STAMP_STRIDE) + *((u32 *) (tx_ring->buf + j)) = 0xffffffff; + ++tx_index; + } + + for (i = 0; i < MLX4_EN_TX_HASH_SIZE; i++) { + memset(&priv->tx_hash[i], 0, sizeof(struct mlx4_en_tx_hash_entry)); + /* + * Initially, all streams are assigned to the rings + * that should handle the small packages streams, (the lower ring + * indixes) then moved according the stream charasteristics. + */ + priv->tx_hash[i].ring = i & (MLX4_EN_NUM_HASH_RINGS / 2 - 1); + } + + /* Configure port */ + err = mlx4_SET_PORT_general(mdev->dev, priv->port, + priv->rx_skb_size + ETH_FCS_LEN, + priv->prof->tx_pause, + priv->prof->tx_ppp, + priv->prof->rx_pause, + priv->prof->rx_ppp); + if (err) { + mlx4_err(mdev, "Failed setting port general configurations" + " for port %d, with error %d\n", priv->port, err); + goto tx_err; + } + /* Set default qp number */ + err = mlx4_SET_PORT_qpn_calc(mdev->dev, priv->port, priv->base_qpn, 0); + if (err) { + mlx4_err(mdev, "Failed setting default qp numbers\n"); + goto tx_err; + } + /* Set port mac number */ + err = mlx4_register_mac(mdev->dev, priv->port, + priv->mac, &priv->mac_index); + if (err) { + mlx4_err(mdev, "Failed setting port mac\n"); + goto tx_err; + } + + /* Init port */ + mlx4_dbg(HW, priv, "Initializing port\n"); + err = mlx4_INIT_PORT(mdev->dev, priv->port); + if (err) { + mlx4_err(mdev, "Failed Initializing port\n"); + goto mac_err; + } + + /* Schedule multicast task to populate multicast list */ + queue_work(mdev->workqueue, &priv->mcast_task); + + priv->port_up = true; + netif_start_queue(dev); + return 0; + +mac_err: + mlx4_unregister_mac(mdev->dev, priv->port, priv->mac_index); +tx_err: + while (tx_index--) { + mlx4_en_deactivate_tx_ring(priv, &priv->tx_ring[tx_index]); + mlx4_en_deactivate_cq(priv, &priv->tx_cq[tx_index]); + } + + mlx4_en_release_rss_steer(priv); +rx_err: + for (i = 0; i < priv->rx_ring_num; i++) + mlx4_en_deactivate_rx_ring(priv, &priv->rx_ring[i]); +cq_err: + while (rx_index--) + mlx4_en_deactivate_cq(priv, &priv->rx_cq[rx_index]); + + return err; /* need to close devices */ +} + + +void mlx4_en_stop_port(struct net_device *dev) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + int i; + + if (!priv->port_up) { + mlx4_dbg(DRV, priv, "stop port (%d) called while port already down\n", + priv->port); + return; + } + netif_stop_queue(dev); + + /* Synchronize with tx routine */ + netif_tx_lock_bh(dev); + priv->port_up = false; + netif_tx_unlock_bh(dev); + + /* close port*/ + mlx4_CLOSE_PORT(mdev->dev, priv->port); + + /* Unregister Mac address for the port */ + mlx4_unregister_mac(mdev->dev, priv->port, priv->mac_index); + + /* Free TX Rings */ + for (i = 0; i < priv->tx_ring_num; i++) { + mlx4_en_deactivate_tx_ring(priv, &priv->tx_ring[i]); + mlx4_en_deactivate_cq(priv, &priv->tx_cq[i]); + } + msleep(10); + + for (i = 0; i < priv->tx_ring_num; i++) + mlx4_en_free_tx_buf(dev, &priv->tx_ring[i]); + + /* Free RSS qps */ + mlx4_en_release_rss_steer(priv); + + /* Free RX Rings */ + for (i = 0; i < priv->rx_ring_num; i++) { + mlx4_en_deactivate_rx_ring(priv, &priv->rx_ring[i]); + mlx4_en_deactivate_cq(priv, &priv->rx_cq[i]); + } +} + +static void mlx4_en_restart(struct work_struct *work) +{ + struct mlx4_en_priv *priv = container_of(work, struct mlx4_en_priv, + watchdog_task); + struct mlx4_en_dev *mdev = priv->mdev; + struct net_device *dev = priv->dev; + + mlx4_dbg(DRV, priv, "Watchdog task called for port %d\n", priv->port); + mlx4_en_stop_port(dev); + if (mlx4_en_start_port(dev)) + mlx4_err(mdev, "Failed restarting port %d\n", priv->port); +} + + +int mlx4_en_open(struct net_device *dev) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + int i; + int err = 0; + + mutex_lock(&mdev->state_lock); + + if (!mdev->device_up) { + mlx4_err(mdev, "Cannot open - device down/disabled\n"); + err = -EBUSY; + goto out; + } + + /* Reset HW statistics and performance counters */ + if (mlx4_en_DUMP_ETH_STATS(mdev, priv->port, 1)) + mlx4_dbg(HW, priv, "Failed dumping statistics\n"); + + memset(&priv->stats, 0, sizeof(priv->stats)); + memset(&priv->pstats, 0, sizeof(priv->pstats)); + + for (i = 0; i < priv->tx_ring_num; i++) { + priv->tx_ring[i].bytes = 0; + priv->tx_ring[i].packets = 0; + } + for (i = 0; i < priv->rx_ring_num; i++) { + priv->rx_ring[i].bytes = 0; + priv->rx_ring[i].packets = 0; + } + + mlx4_en_set_default_moderation(priv); + err = mlx4_en_start_port(dev); + if (err) + mlx4_err(mdev, "Failed starting port:%d\n", priv->port); + +out: + mutex_unlock(&mdev->state_lock); + return err; +} + + +int mlx4_en_close(struct net_device *dev) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + + if (netif_msg_ifdown(priv)) + mlx4_info(mdev, "Close called for port:%d\n", priv->port); + + mutex_lock(&mdev->state_lock); + + mlx4_en_stop_port(dev); + netif_carrier_off(dev); + + mutex_unlock(&mdev->state_lock); + return 0; +} + +void mlx4_en_free_resources(struct mlx4_en_priv *priv) +{ + int i; + + for (i = 0; i < priv->tx_ring_num; i++) { + if (priv->tx_ring[i].tx_info) + mlx4_en_destroy_tx_ring(priv, &priv->tx_ring[i]); + if (priv->tx_cq[i].buf) + mlx4_en_destroy_cq(priv, &priv->tx_cq[i]); + } + + for (i = 0; i < priv->rx_ring_num; i++) { + if (priv->rx_ring[i].rx_info) + mlx4_en_destroy_rx_ring(priv, &priv->rx_ring[i]); + if (priv->rx_cq[i].buf) + mlx4_en_destroy_cq(priv, &priv->rx_cq[i]); + } +} + +int mlx4_en_alloc_resources(struct mlx4_en_priv *priv) +{ + struct mlx4_en_dev *mdev = priv->mdev; + struct mlx4_en_port_profile *prof = priv->prof; + int i; + + /* Create tx Rings */ + for (i = 0; i < priv->tx_ring_num; i++) { + if (mlx4_en_create_cq(priv, &priv->tx_cq[i], + prof->tx_ring_size, i, TX)) + goto err; + + if (mlx4_en_create_tx_ring(priv, &priv->tx_ring[i], + prof->tx_ring_size, TXBB_SIZE)) + goto err; + } + + /* Create rx Rings */ + for (i = 0; i < priv->rx_ring_num; i++) { + if (mlx4_en_create_cq(priv, &priv->rx_cq[i], + prof->rx_ring_size, i, RX)) + goto err; + + if (i > 0) + priv->rx_ring[i].use_frags = 1; + else + priv->rx_ring[i].use_frags = 0; + if (mlx4_en_create_rx_ring(priv, &priv->rx_ring[i], + prof->rx_ring_size)) + goto err; + } + + return 0; + +err: + mlx4_err(mdev, "Failed to allocate NIC resources\n"); + return -ENOMEM; +} + + +void mlx4_en_destroy_netdev(struct net_device *dev) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + + mlx4_dbg(DRV, priv, "Destroying netdev on port:%d\n", priv->port); + + /* Unregsiter device - this will close the port if it was up */ + if (priv->registered) + unregister_netdev(dev); + + if (priv->allocated) + mlx4_free_hwq_res(mdev->dev, &priv->res, MLX4_EN_PAGE_SIZE); + + cancel_delayed_work(&priv->stats_task); + cancel_delayed_work(&priv->refill_task); + /* flush any pending task for this netdev */ + flush_workqueue(mdev->workqueue); + + /* Detach the netdev so tasks would not attempt to access it */ + mutex_lock(&mdev->state_lock); + mdev->pndev[priv->port] = NULL; + mutex_unlock(&mdev->state_lock); + + mlx4_en_free_resources(priv); + free_netdev(dev); +} + +static int mlx4_en_change_mtu(struct net_device *dev, int new_mtu) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + int err = 0; + + mlx4_dbg(DRV, priv, "Change MTU called - current:%d new:%d\n", + dev->mtu, new_mtu); + + if ((new_mtu < MLX4_EN_MIN_MTU) || (new_mtu > priv->max_mtu)) { + mlx4_err(mdev, "Bad MTU size:%d.\n", new_mtu); + return -EPERM; + } + dev->mtu = new_mtu; + + if (netif_running(dev)) { + mutex_lock(&mdev->state_lock); + if (!mdev->device_up) { + /* NIC is probably restarting - let watchdog task reset + * the port */ + mlx4_dbg(DRV, priv, "Change MTU called with card down!?\n"); + } else { + mlx4_en_stop_port(dev); + mlx4_en_set_default_moderation(priv); + err = mlx4_en_start_port(dev); + if (err) { + mlx4_err(mdev, "Failed restarting port:%d\n", + priv->port); + queue_work(mdev->workqueue, &priv->watchdog_task); + } + } + mutex_unlock(&mdev->state_lock); + } + return 0; +} + +int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, + struct mlx4_en_port_profile *prof) +{ + struct net_device *dev; + struct mlx4_en_priv *priv; + int i; + int err; + + dev = alloc_etherdev(sizeof(struct mlx4_en_priv)); + if (dev == NULL) { + mlx4_err(mdev, "Net device allocation failed\n"); + return -ENOMEM; + } + + SET_NETDEV_DEV(dev, &mdev->dev->pdev->dev); + + /* + * Initialize driver private data + */ + + priv = netdev_priv(dev); + memset(priv, 0, sizeof(struct mlx4_en_priv)); + priv->dev = dev; + priv->mdev = mdev; + priv->prof = prof; + priv->port = port; + priv->port_up = false; + priv->rx_csum = 1; + priv->flags = prof->flags; + priv->tx_ring_num = prof->tx_ring_num; + priv->rx_ring_num = prof->rx_ring_num; + priv->mc_list = NULL; + priv->mac_index = -1; + priv->msg_enable = MLX4_EN_MSG_LEVEL; + spin_lock_init(&priv->stats_lock); + INIT_WORK(&priv->mcast_task, mlx4_en_do_set_multicast); + INIT_WORK(&priv->mac_task, mlx4_en_do_set_mac); + INIT_DELAYED_WORK(&priv->refill_task, mlx4_en_rx_refill); + INIT_WORK(&priv->watchdog_task, mlx4_en_restart); + INIT_WORK(&priv->linkstate_task, mlx4_en_linkstate); + INIT_DELAYED_WORK(&priv->stats_task, mlx4_en_do_get_stats); + + /* Query for default mac and max mtu */ + priv->max_mtu = mdev->dev->caps.eth_mtu_cap[priv->port]; + priv->mac = mdev->dev->caps.def_mac[priv->port]; + if (ILLEGAL_MAC(priv->mac)) { + mlx4_err(mdev, "Port: %d, invalid mac burned: 0x%llx, quiting\n", + priv->port, priv->mac); + err = -EINVAL; + goto out; + } + + err = mlx4_en_alloc_resources(priv); + if (err) + goto out; + + /* Populate Rx default RSS mappings */ + mlx4_en_set_default_rss_map(priv, &priv->rss_map, priv->rx_ring_num * + RSS_FACTOR, priv->rx_ring_num); + /* Allocate page for receive rings */ + err = mlx4_alloc_hwq_res(mdev->dev, &priv->res, + MLX4_EN_PAGE_SIZE, MLX4_EN_PAGE_SIZE); + if (err) { + mlx4_err(mdev, "Failed to allocate page for rx qps\n"); + goto out; + } + priv->allocated = 1; + + /* Populate Tx priority mappings */ + mlx4_en_set_prio_map(priv, priv->tx_prio_map, + prof->tx_ring_num - MLX4_EN_NUM_HASH_RINGS); + + /* + * Initialize netdev entry points + */ + + dev->open = &mlx4_en_open; + dev->stop = &mlx4_en_close; + dev->hard_start_xmit = &mlx4_en_xmit; + dev->get_stats = &mlx4_en_get_stats; + dev->set_multicast_list = &mlx4_en_set_multicast; + dev->set_mac_address = &mlx4_en_set_mac; + dev->change_mtu = &mlx4_en_change_mtu; + dev->tx_timeout = &mlx4_en_tx_timeout; + dev->watchdog_timeo = MLX4_EN_WATCHDOG_TIMEOUT; + dev->vlan_rx_register = mlx4_en_vlan_rx_register; + dev->vlan_rx_add_vid = mlx4_en_vlan_rx_add_vid; + dev->vlan_rx_kill_vid = mlx4_en_vlan_rx_kill_vid; +#ifdef CONFIG_NET_POLL_CONTROLLER + dev->poll_controller = mlx4_en_netpoll; +#endif + SET_ETHTOOL_OPS(dev, &mlx4_en_ethtool_ops); + + /* Set defualt MAC */ + dev->addr_len = ETH_ALEN; + for (i = 0; i < ETH_ALEN; i++) + dev->dev_addr[ETH_ALEN - 1 - i] = + (u8) (priv->mac >> (8 * i)); + + /* + * Set driver features + */ + dev->features |= NETIF_F_SG; + dev->features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; + dev->features |= NETIF_F_HIGHDMA; + dev->features |= NETIF_F_HW_VLAN_TX | + NETIF_F_HW_VLAN_RX | + NETIF_F_HW_VLAN_FILTER; + if (mdev->profile.num_lro) + dev->features |= NETIF_F_LRO; + if (mdev->LSO_support) { + dev->features |= NETIF_F_TSO; + dev->features |= NETIF_F_TSO6; + } + + mdev->pndev[port] = dev; + + netif_carrier_off(dev); + err = register_netdev(dev); + if (err) { + mlx4_err(mdev, "Netdev registration failed\n"); + goto out; + } + priv->registered = 1; + queue_delayed_work(mdev->workqueue, &priv->stats_task, STATS_DELAY); + return 0; + +out: + mlx4_en_destroy_netdev(dev); + return err; +} + diff --git a/drivers/net/mlx4/en_params.c b/drivers/net/mlx4/en_params.c new file mode 100644 index 0000000..8ac0c02 --- /dev/null +++ b/drivers/net/mlx4/en_params.c @@ -0,0 +1,496 @@ +/* + * Copyright (c) 2007 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <linux/kernel.h> +#include <linux/ethtool.h> +#include <linux/netdevice.h> + +#include "mlx4_en.h" +#include "en_port.h" + +#define MLX4_EN_PARM_INT(X, def_val, desc) \ + static unsigned int X = def_val;\ + module_param(X , uint, 0444); \ + MODULE_PARM_DESC(X, desc); + + +/* + * Device scope module parameters + */ + + +/* Use a XOR rathern than Toeplitz hash function for RSS */ +MLX4_EN_PARM_INT(rss_xor, 0, "Use XOR hash function for RSS"); + +/* RSS hash type mask - default to <saddr, daddr, sport, dport> */ +MLX4_EN_PARM_INT(rss_mask, 0x5, "RSS hash type bitmask"); + +/* Number of LRO sessions per Rx ring (rounded up to a power of two) */ +MLX4_EN_PARM_INT(num_lro, MLX4_EN_MAX_LRO_DESCRIPTORS, + "Number of LRO sessions per ring or disabled (0)"); + +/* Allow reassembly of fragmented IP packets */ +MLX4_EN_PARM_INT(ip_reasm, 1, "Allow reassembly of fragmented IP packets (!0)"); + +/* Priority pausing */ +MLX4_EN_PARM_INT(pfctx, 0, "Priority based Flow Control policy on TX[7:0]." + " Per priority bit mask"); +MLX4_EN_PARM_INT(pfcrx, 0, "Priority based Flow Control policy on RX[7:0]." + " Per priority bit mask"); + +int mlx4_en_get_profile(struct mlx4_en_dev *mdev) +{ + struct mlx4_en_profile *params = &mdev->profile; + int i; + + params->rss_xor = (rss_xor != 0); + params->rss_mask = rss_mask & 0x1f; + params->num_lro = min_t(int, num_lro , MLX4_EN_MAX_LRO_DESCRIPTORS); + params->ip_reasm = ip_reasm; + for (i = 1; i <= MLX4_MAX_PORTS; i++) { + params->prof[i].rx_pause = 1; + params->prof[i].rx_ppp = pfcrx; + params->prof[i].tx_pause = 1; + params->prof[i].tx_ppp = pfctx; + params->prof[i].tx_ring_size = MLX4_EN_DEF_TX_RING_SIZE; + params->prof[i].rx_ring_size = MLX4_EN_DEF_RX_RING_SIZE; + params->prof[i].tx_ring_num = MLX4_EN_NUM_HASH_RINGS + 1 + + (!!pfcrx) * MLX4_EN_NUM_PPP_RINGS; + } + + return 0; +} + + +/* + * Ethtool support + */ + +static void mlx4_en_update_lro_stats(struct mlx4_en_priv *priv) +{ + int i; + + priv->port_stats.lro_aggregated = 0; + priv->port_stats.lro_flushed = 0; + priv->port_stats.lro_no_desc = 0; + + for (i = 0; i < priv->rx_ring_num; i++) { + priv->port_stats.lro_aggregated += priv->rx_ring[i].lro.stats.aggregated; + priv->port_stats.lro_flushed += priv->rx_ring[i].lro.stats.flushed; + priv->port_stats.lro_no_desc += priv->rx_ring[i].lro.stats.no_desc; + } +} + +static void +mlx4_en_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *drvinfo) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + + sprintf(drvinfo->driver, DRV_NAME " (%s)", mdev->dev->board_id); + strncpy(drvinfo->version, DRV_VERSION " (" DRV_RELDATE ")", 32); + sprintf(drvinfo->fw_version, "%d.%d.%d", + (u16) (mdev->dev->caps.fw_ver >> 32), + (u16) ((mdev->dev->caps.fw_ver >> 16) & 0xffff), + (u16) (mdev->dev->caps.fw_ver & 0xffff)); + strncpy(drvinfo->bus_info, pci_name(mdev->dev->pdev), 32); + drvinfo->n_stats = 0; + drvinfo->regdump_len = 0; + drvinfo->eedump_len = 0; +} + +static u32 mlx4_en_get_tso(struct net_device *dev) +{ + return (dev->features & NETIF_F_TSO) != 0; +} + +static int mlx4_en_set_tso(struct net_device *dev, u32 data) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + + if (data) { + if (!priv->mdev->LSO_support) + return -EPERM; + dev->features |= (NETIF_F_TSO | NETIF_F_TSO6); + } else + dev->features &= ~(NETIF_F_TSO | NETIF_F_TSO6); + return 0; +} + +static u32 mlx4_en_get_rx_csum(struct net_device *dev) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + return priv->rx_csum; +} + +static int mlx4_en_set_rx_csum(struct net_device *dev, u32 data) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + priv->rx_csum = (data != 0); + return 0; +} + +static const char main_strings[][ETH_GSTRING_LEN] = { + "rx_packets", "tx_packets", "rx_bytes", "tx_bytes", "rx_errors", + "tx_errors", "rx_dropped", "tx_dropped", "multicast", "collisions", + "rx_length_errors", "rx_over_errors", "rx_crc_errors", + "rx_frame_errors", "rx_fifo_errors", "rx_missed_errors", + "tx_aborted_errors", "tx_carrier_errors", "tx_fifo_errors", + "tx_heartbeat_errors", "tx_window_errors", + + /* port statistics */ + "lro_aggregated", "lro_flushed", "lro_no_desc", "tso_packets", + "queue_stopped", "wake_queue", "tx_timeout", "rx_alloc_failed", + "rx_csum_good", "rx_csum_none", "tx_chksum_offload", + + /* packet statistics */ + "broadcast", "rx_prio_0", "rx_prio_1", "rx_prio_2", "rx_prio_3", + "rx_prio_4", "rx_prio_5", "rx_prio_6", "rx_prio_7", "tx_prio_0", + "tx_prio_1", "tx_prio_2", "tx_prio_3", "tx_prio_4", "tx_prio_5", + "tx_prio_6", "tx_prio_7", +}; +#define NUM_MAIN_STATS 21 +#define NUM_ALL_STATS (NUM_MAIN_STATS + NUM_PORT_STATS + NUM_PKT_STATS + NUM_PERF_STATS) + +static u32 mlx4_en_get_msglevel(struct net_device *dev) +{ + return ((struct mlx4_en_priv *) netdev_priv(dev))->msg_enable; +} + +static void mlx4_en_set_msglevel(struct net_device *dev, u32 val) +{ + ((struct mlx4_en_priv *) netdev_priv(dev))->msg_enable = val; +} + +static void mlx4_en_get_wol(struct net_device *netdev, + struct ethtool_wolinfo *wol) +{ + wol->supported = 0; + wol->wolopts = 0; + + return; +} + +static int mlx4_en_get_sset_count(struct net_device *dev) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + + return NUM_ALL_STATS + (priv->tx_ring_num + priv->rx_ring_num) * 2; +} + +static void mlx4_en_get_ethtool_stats(struct net_device *dev, + struct ethtool_stats *stats, uint64_t *data) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + int index = 0; + int i; + + spin_lock_bh(&priv->stats_lock); + + mlx4_en_update_lro_stats(priv); + + for (i = 0; i < NUM_MAIN_STATS; i++) + data[index++] = ((unsigned long *) &priv->stats)[i]; + for (i = 0; i < NUM_PORT_STATS; i++) + data[index++] = ((unsigned long *) &priv->port_stats)[i]; + for (i = 0; i < priv->tx_ring_num; i++) { + data[index++] = priv->tx_ring[i].packets; + data[index++] = priv->tx_ring[i].bytes; + } + for (i = 0; i < priv->rx_ring_num; i++) { + data[index++] = priv->rx_ring[i].packets; + data[index++] = priv->rx_ring[i].bytes; + } + for (i = 0; i < NUM_PKT_STATS; i++) + data[index++] = ((unsigned long *) &priv->pkstats)[i]; + spin_unlock_bh(&priv->stats_lock); + +} + +static void mlx4_en_get_strings(struct net_device *dev, + uint32_t stringset, uint8_t *data) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + int index = 0; + int i; + + if (stringset != ETH_SS_STATS) + return; + + /* Add main counters */ + for (i = 0; i < NUM_MAIN_STATS; i++) + strcpy(data + (index++) * ETH_GSTRING_LEN, main_strings[i]); + for (i = 0; i< NUM_PORT_STATS; i++) + strcpy(data + (index++) * ETH_GSTRING_LEN, + main_strings[i + NUM_MAIN_STATS]); + for (i = 0; i < priv->tx_ring_num; i++) { + sprintf(data + (index++) * ETH_GSTRING_LEN, + "tx%d_packets", i); + sprintf(data + (index++) * ETH_GSTRING_LEN, + "tx%d_bytes", i); + } + for (i = 0; i < priv->rx_ring_num; i++) { + sprintf(data + (index++) * ETH_GSTRING_LEN, + "rx%d_packets", i); + sprintf(data + (index++) * ETH_GSTRING_LEN, + "rx%d_bytes", i); + } + for (i = 0; i< NUM_PKT_STATS; i++) + strcpy(data + (index++) * ETH_GSTRING_LEN, + main_strings[i + NUM_MAIN_STATS + NUM_PORT_STATS]); +} + +static int mlx4_en_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + int trans_type = priv->mdev->dev->caps.trans_type[priv->port]; + + cmd->autoneg = AUTONEG_DISABLE; + cmd->supported = SUPPORTED_10000baseT_Full; + cmd->advertising = ADVERTISED_10000baseT_Full; + if (netif_carrier_ok(dev)) { + cmd->speed = SPEED_10000; + cmd->duplex = DUPLEX_FULL; + } else { + cmd->speed = -1; + cmd->duplex = -1; + } + + if (trans_type > 0 && trans_type <= 0xC) { + cmd->port = PORT_FIBRE; + cmd->transceiver = XCVR_EXTERNAL; + cmd->supported |= SUPPORTED_FIBRE; + cmd->advertising |= ADVERTISED_FIBRE; + } else if (trans_type == 0x80 || trans_type == 0) { + cmd->port = PORT_TP; + cmd->transceiver = XCVR_INTERNAL; + cmd->supported |= SUPPORTED_TP; + cmd->advertising |= ADVERTISED_TP; + } else { + cmd->port = -1; + cmd->transceiver = -1; + } + return 0; +} + +static int mlx4_en_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) +{ + if ((cmd->autoneg == AUTONEG_ENABLE) || + (cmd->speed != SPEED_10000) || (cmd->duplex != DUPLEX_FULL)) + return -EINVAL; + + /* Nothing to change */ + return 0; +} + +static int mlx4_en_get_coalesce(struct net_device *dev, + struct ethtool_coalesce *coal) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + + coal->tx_coalesce_usecs = 0; + coal->tx_max_coalesced_frames = 0; + coal->rx_coalesce_usecs = priv->rx_usecs; + coal->rx_max_coalesced_frames = priv->rx_frames; + + coal->pkt_rate_low = priv->pkt_rate_low; + coal->rx_coalesce_usecs_low = priv->rx_usecs_low; + coal->pkt_rate_high = priv->pkt_rate_high; + coal->rx_coalesce_usecs_high = priv->rx_usecs_high; + coal->rate_sample_interval = priv->sample_interval; + coal->use_adaptive_rx_coalesce = priv->adaptive_rx_coal; + return 0; +} + +static int mlx4_en_set_coalesce(struct net_device *dev, + struct ethtool_coalesce *coal) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + int err, i; + + priv->rx_frames = (coal->rx_max_coalesced_frames == + MLX4_EN_AUTO_CONF) ? + MLX4_EN_RX_COAL_TARGET / + priv->dev->mtu + 1 : + coal->rx_max_coalesced_frames; + priv->rx_usecs = (coal->rx_coalesce_usecs == + MLX4_EN_AUTO_CONF) ? + MLX4_EN_RX_COAL_TIME : + coal->rx_coalesce_usecs; + + /* Set adaptive coalescing params */ + priv->pkt_rate_low = coal->pkt_rate_low; + priv->rx_usecs_low = coal->rx_coalesce_usecs_low; + priv->pkt_rate_high = coal->pkt_rate_high; + priv->rx_usecs_high = coal->rx_coalesce_usecs_high; + priv->sample_interval = coal->rate_sample_interval; + priv->adaptive_rx_coal = coal->use_adaptive_rx_coalesce; + priv->last_moder_time = MLX4_EN_AUTO_CONF; + if (priv->adaptive_rx_coal) + return 0; + + for (i = 0; i < priv->rx_ring_num; i++) { + priv->rx_cq[i].moder_cnt = priv->rx_frames; + priv->rx_cq[i].moder_time = priv->rx_usecs; + err = mlx4_en_set_cq_moder(priv, &priv->rx_cq[i]); + if (err) + return err; + } + return 0; +} + +static int mlx4_en_set_pauseparam(struct net_device *dev, + struct ethtool_pauseparam *pause) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + int err; + + priv->prof->tx_pause = pause->tx_pause != 0; + priv->prof->rx_pause = pause->rx_pause != 0; + err = mlx4_SET_PORT_general(mdev->dev, priv->port, + priv->rx_skb_size + ETH_FCS_LEN, + priv->prof->tx_pause, + priv->prof->tx_ppp, + priv->prof->rx_pause, + priv->prof->rx_ppp); + if (err) + mlx4_err(mdev, "Failed setting pause params to\n"); + + return err; +} + +static void mlx4_en_get_pauseparam(struct net_device *dev, + struct ethtool_pauseparam *pause) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + + pause->tx_pause = priv->prof->tx_pause; + pause->rx_pause = priv->prof->rx_pause; +} + +static int mlx4_en_set_ringparam(struct net_device *dev, + struct ethtool_ringparam *param) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + u32 rx_size, tx_size; + int port_up = 0; + int err = 0; + + if (param->rx_jumbo_pending || param->rx_mini_pending) + return -EINVAL; + + rx_size = roundup_pow_of_two(param->rx_pending); + rx_size = max_t(u32, rx_size, MLX4_EN_MIN_RX_SIZE); + rx_size = min_t(u32, rx_size, MLX4_EN_MAX_RX_SIZE); + tx_size = roundup_pow_of_two(param->tx_pending); + tx_size = max_t(u32, tx_size, MLX4_EN_MIN_TX_SIZE); + tx_size = min_t(u32, tx_size, MLX4_EN_MAX_TX_SIZE); + + if (rx_size == priv->prof->rx_ring_size && + tx_size == priv->prof->tx_ring_size) + return 0; + + mutex_lock(&mdev->state_lock); + if (priv->port_up) { + port_up = 1; + mlx4_en_stop_port(dev); + } + + mlx4_en_free_resources(priv); + + priv->prof->tx_ring_size = tx_size; + priv->prof->rx_ring_size = rx_size; + + err = mlx4_en_alloc_resources(priv); + if (err) { + mlx4_err(mdev, "Failed reallocating port resources\n"); + goto out; + } + if (port_up) { + err = mlx4_en_start_port(dev); + if (err) + mlx4_err(mdev, "Failed starting port\n"); + } + +out: + mutex_unlock(&mdev->state_lock); + return err; +} + +void mlx4_en_get_ringparam(struct net_device *dev, struct ethtool_ringparam *param) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + + memset(param, 0, sizeof(*param)); + param->rx_max_pending = MLX4_EN_MAX_RX_SIZE; + param->tx_max_pending = MLX4_EN_MAX_TX_SIZE; + param->rx_pending = mdev->profile.prof[priv->port].rx_ring_size; + param->tx_pending = mdev->profile.prof[priv->port].tx_ring_size; +} + +const struct ethtool_ops mlx4_en_ethtool_ops = { + .get_drvinfo = mlx4_en_get_drvinfo, + .get_settings = mlx4_en_get_settings, + .set_settings = mlx4_en_set_settings, +#ifdef NETIF_F_TSO + .get_tso = mlx4_en_get_tso, + .set_tso = mlx4_en_set_tso, +#endif + .get_sg = ethtool_op_get_sg, + .set_sg = ethtool_op_set_sg, + .get_link = ethtool_op_get_link, + .get_rx_csum = mlx4_en_get_rx_csum, + .set_rx_csum = mlx4_en_set_rx_csum, + .get_tx_csum = ethtool_op_get_tx_csum, + .set_tx_csum = ethtool_op_set_tx_csum, + .get_strings = mlx4_en_get_strings, + .get_stats_count = mlx4_en_get_sset_count, + .get_ethtool_stats = mlx4_en_get_ethtool_stats, + .get_wol = mlx4_en_get_wol, + .get_msglevel = mlx4_en_get_msglevel, + .set_msglevel = mlx4_en_set_msglevel, + .get_coalesce = mlx4_en_get_coalesce, + .set_coalesce = mlx4_en_set_coalesce, + .get_pauseparam = mlx4_en_get_pauseparam, + .set_pauseparam = mlx4_en_set_pauseparam, + .get_ringparam = mlx4_en_get_ringparam, + .set_ringparam = mlx4_en_set_ringparam, +}; + + + + + diff --git a/drivers/net/mlx4/en_port.c b/drivers/net/mlx4/en_port.c new file mode 100644 index 0000000..a29abe8 --- /dev/null +++ b/drivers/net/mlx4/en_port.c @@ -0,0 +1,242 @@ +/* + * Copyright (c) 2007 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + + +#include <linux/if_vlan.h> + +#include <linux/mlx4/device.h> +#include <linux/mlx4/cmd.h> + +#include "en_port.h" +#include "mlx4_en.h" + + +int mlx4_SET_MCAST_FLTR(struct mlx4_dev *dev, u8 port, + u64 mac, u64 clear, u8 mode) +{ + return mlx4_cmd(dev, (mac | (clear << 63)), port, mode, + MLX4_CMD_SET_MCAST_FLTR, MLX4_CMD_TIME_CLASS_B); +} + +int mlx4_SET_VLAN_FLTR(struct mlx4_dev *dev, u8 port, struct vlan_group *grp) +{ + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_set_vlan_fltr_mbox *filter; + int i; + int j; + int index = 0; + u32 entry; + int err = 0; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + filter = mailbox->buf; + if (grp) { + memset(filter, 0, sizeof *filter); + for (i = VLAN_FLTR_SIZE - 1; i >= 0; i--) { + entry = 0; + for (j = 0; j < 32; j++) + if (vlan_group_get_device(grp, index++)) + entry |= 1 << j; + filter->entry[i] = cpu_to_be32(entry); + } + } else { + /* When no vlans are configured we block all vlans */ + memset(filter, 0, sizeof(*filter)); + } + err = mlx4_cmd(dev, mailbox->dma, port, 0, MLX4_CMD_SET_VLAN_FLTR, + MLX4_CMD_TIME_CLASS_B); + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} + + +int mlx4_SET_PORT_general(struct mlx4_dev *dev, u8 port, int mtu, + u8 pptx, u8 pfctx, u8 pprx, u8 pfcrx) +{ + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_set_port_general_context *context; + int err; + u32 in_mod; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + context = mailbox->buf; + memset(context, 0, sizeof *context); + + context->flags = SET_PORT_GEN_ALL_VALID; + context->mtu = cpu_to_be16(mtu); + context->pptx = (pptx * (!pfctx)) << 7; + context->pfctx = pfctx; + context->pprx = (pprx * (!pfcrx)) << 7; + context->pfcrx = pfcrx; + + in_mod = MLX4_SET_PORT_GENERAL << 8 | port; + err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT, + MLX4_CMD_TIME_CLASS_B); + + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} + +int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn, + u8 promisc) +{ + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_set_port_rqp_calc_context *context; + int err; + u32 in_mod; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + context = mailbox->buf; + memset(context, 0, sizeof *context); + + context->base_qpn = cpu_to_be32(base_qpn); + context->promisc = cpu_to_be32(promisc << SET_PORT_PROMISC_SHIFT | base_qpn); + context->mcast = cpu_to_be32(1 << SET_PORT_PROMISC_SHIFT | base_qpn); + context->intra_no_vlan = 0; + context->no_vlan = MLX4_NO_VLAN_IDX; + context->intra_vlan_miss = 0; + context->vlan_miss = MLX4_VLAN_MISS_IDX; + + in_mod = MLX4_SET_PORT_RQP_CALC << 8 | port; + err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT, + MLX4_CMD_TIME_CLASS_B); + + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} + + +int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset) +{ + struct mlx4_en_stat_out_mbox *mlx4_en_stats; + struct mlx4_en_priv *priv = netdev_priv(mdev->pndev[port]); + struct net_device_stats *stats = &priv->stats; + struct mlx4_cmd_mailbox *mailbox; + u64 in_mod = reset << 8 | port; + int err; + int i; + + mailbox = mlx4_alloc_cmd_mailbox(mdev->dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + memset(mailbox->buf, 0, sizeof(*mlx4_en_stats)); + err = mlx4_cmd_box(mdev->dev, 0, mailbox->dma, in_mod, 0, + MLX4_CMD_DUMP_ETH_STATS, MLX4_CMD_TIME_CLASS_B); + if (err) + goto out; + + mlx4_en_stats = mailbox->buf; + + spin_lock_bh(&priv->stats_lock); + + stats->rx_packets = 0; + stats->rx_bytes = 0; + for (i = 0; i < priv->rx_ring_num; i++) { + stats->rx_packets += priv->rx_ring[i].packets; + stats->rx_bytes += priv->rx_ring[i].bytes; + } + stats->tx_packets = 0; + stats->tx_bytes = 0; + for (i = 0; i <= priv->tx_ring_num; i++) { + stats->tx_packets += priv->tx_ring[i].packets; + stats->tx_bytes += priv->tx_ring[i].bytes; + } + + stats->rx_errors = be64_to_cpu(mlx4_en_stats->PCS) + + be32_to_cpu(mlx4_en_stats->RdropLength) + + be32_to_cpu(mlx4_en_stats->RJBBR) + + be32_to_cpu(mlx4_en_stats->RCRC) + + be32_to_cpu(mlx4_en_stats->RRUNT); + stats->tx_errors = be32_to_cpu(mlx4_en_stats->TDROP); + stats->multicast = be64_to_cpu(mlx4_en_stats->MCAST_prio_0) + + be64_to_cpu(mlx4_en_stats->MCAST_prio_1) + + be64_to_cpu(mlx4_en_stats->MCAST_prio_2) + + be64_to_cpu(mlx4_en_stats->MCAST_prio_3) + + be64_to_cpu(mlx4_en_stats->MCAST_prio_4) + + be64_to_cpu(mlx4_en_stats->MCAST_prio_5) + + be64_to_cpu(mlx4_en_stats->MCAST_prio_6) + + be64_to_cpu(mlx4_en_stats->MCAST_prio_7) + + be64_to_cpu(mlx4_en_stats->MCAST_novlan); + stats->collisions = 0; + stats->rx_length_errors = be32_to_cpu(mlx4_en_stats->RdropLength); + stats->rx_over_errors = be32_to_cpu(mlx4_en_stats->RdropOvflw); + stats->rx_crc_errors = be32_to_cpu(mlx4_en_stats->RCRC); + stats->rx_frame_errors = 0; + stats->rx_fifo_errors = be32_to_cpu(mlx4_en_stats->RdropOvflw); + stats->rx_missed_errors = be32_to_cpu(mlx4_en_stats->RdropOvflw); + stats->tx_aborted_errors = 0; + stats->tx_carrier_errors = 0; + stats->tx_fifo_errors = 0; + stats->tx_heartbeat_errors = 0; + stats->tx_window_errors = 0; + + priv->pkstats.broadcast = + be64_to_cpu(mlx4_en_stats->RBCAST_prio_0) + + be64_to_cpu(mlx4_en_stats->RBCAST_prio_1) + + be64_to_cpu(mlx4_en_stats->RBCAST_prio_2) + + be64_to_cpu(mlx4_en_stats->RBCAST_prio_3) + + be64_to_cpu(mlx4_en_stats->RBCAST_prio_4) + + be64_to_cpu(mlx4_en_stats->RBCAST_prio_5) + + be64_to_cpu(mlx4_en_stats->RBCAST_prio_6) + + be64_to_cpu(mlx4_en_stats->RBCAST_prio_7) + + be64_to_cpu(mlx4_en_stats->RBCAST_novlan); + priv->pkstats.rx_prio[0] = be64_to_cpu(mlx4_en_stats->RTOT_prio_0); + priv->pkstats.rx_prio[1] = be64_to_cpu(mlx4_en_stats->RTOT_prio_1); + priv->pkstats.rx_prio[2] = be64_to_cpu(mlx4_en_stats->RTOT_prio_2); + priv->pkstats.rx_prio[3] = be64_to_cpu(mlx4_en_stats->RTOT_prio_3); + priv->pkstats.rx_prio[4] = be64_to_cpu(mlx4_en_stats->RTOT_prio_4); + priv->pkstats.rx_prio[5] = be64_to_cpu(mlx4_en_stats->RTOT_prio_5); + priv->pkstats.rx_prio[6] = be64_to_cpu(mlx4_en_stats->RTOT_prio_6); + priv->pkstats.rx_prio[7] = be64_to_cpu(mlx4_en_stats->RTOT_prio_7); + priv->pkstats.tx_prio[0] = be64_to_cpu(mlx4_en_stats->TTOT_prio_0); + priv->pkstats.tx_prio[1] = be64_to_cpu(mlx4_en_stats->TTOT_prio_1); + priv->pkstats.tx_prio[2] = be64_to_cpu(mlx4_en_stats->TTOT_prio_2); + priv->pkstats.tx_prio[3] = be64_to_cpu(mlx4_en_stats->TTOT_prio_3); + priv->pkstats.tx_prio[4] = be64_to_cpu(mlx4_en_stats->TTOT_prio_4); + priv->pkstats.tx_prio[5] = be64_to_cpu(mlx4_en_stats->TTOT_prio_5); + priv->pkstats.tx_prio[6] = be64_to_cpu(mlx4_en_stats->TTOT_prio_6); + priv->pkstats.tx_prio[7] = be64_to_cpu(mlx4_en_stats->TTOT_prio_7); + spin_unlock_bh(&priv->stats_lock); + +out: + mlx4_free_cmd_mailbox(mdev->dev, mailbox); + return err; +} + diff --git a/drivers/net/mlx4/en_port.h b/drivers/net/mlx4/en_port.h new file mode 100644 index 0000000..6e87843 --- /dev/null +++ b/drivers/net/mlx4/en_port.h @@ -0,0 +1,570 @@ +/* + * Copyright (c) 2007 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef _MLX4_EN_PORT_H_ +#define _MLX4_EN_PORT_H_ + + +#define SET_PORT_GEN_ALL_VALID 0x7 +#define SET_PORT_PROMISC_SHIFT 31 + +enum { + MLX4_CMD_SET_VLAN_FLTR = 0x47, + MLX4_CMD_SET_MCAST_FLTR = 0x48, + MLX4_CMD_DUMP_ETH_STATS = 0x49, +}; + +struct mlx4_set_port_general_context { + u8 reserved[3]; + u8 flags; + u16 reserved2; + __be16 mtu; + u8 pptx; + u8 pfctx; + u16 reserved3; + u8 pprx; + u8 pfcrx; + u16 reserved4; +}; + +struct mlx4_set_port_rqp_calc_context { + __be32 base_qpn; + __be32 flags; + u8 reserved[3]; + u8 mac_miss; + u8 intra_no_vlan; + u8 no_vlan; + u8 intra_vlan_miss; + u8 vlan_miss; + u8 reserved4[3]; + u8 no_vlan_prio; + __be32 promisc; + __be32 mcast; +}; + +#define VLAN_FLTR_SIZE 128 +struct mlx4_set_vlan_fltr_mbox { + __be32 entry[VLAN_FLTR_SIZE]; +}; + + +enum { + MLX4_MCAST_CONFIG = 0, + MLX4_MCAST_DISABLE = 1, + MLX4_MCAST_ENABLE = 2, +}; + + +struct mlx4_en_stat_out_mbox { + /* Received frames with a length of 64 octets */ + __be64 R64_prio_0; + __be64 R64_prio_1; + __be64 R64_prio_2; + __be64 R64_prio_3; + __be64 R64_prio_4; + __be64 R64_prio_5; + __be64 R64_prio_6; + __be64 R64_prio_7; + __be64 R64_novlan; + /* Received frames with a length of 127 octets */ + __be64 R127_prio_0; + __be64 R127_prio_1; + __be64 R127_prio_2; + __be64 R127_prio_3; + __be64 R127_prio_4; + __be64 R127_prio_5; + __be64 R127_prio_6; + __be64 R127_prio_7; + __be64 R127_novlan; + /* Received frames with a length of 255 octets */ + __be64 R255_prio_0; + __be64 R255_prio_1; + __be64 R255_prio_2; + __be64 R255_prio_3; + __be64 R255_prio_4; + __be64 R255_prio_5; + __be64 R255_prio_6; + __be64 R255_prio_7; + __be64 R255_novlan; + /* Received frames with a length of 511 octets */ + __be64 R511_prio_0; + __be64 R511_prio_1; + __be64 R511_prio_2; + __be64 R511_prio_3; + __be64 R511_prio_4; + __be64 R511_prio_5; + __be64 R511_prio_6; + __be64 R511_prio_7; + __be64 R511_novlan; + /* Received frames with a length of 1023 octets */ + __be64 R1023_prio_0; + __be64 R1023_prio_1; + __be64 R1023_prio_2; + __be64 R1023_prio_3; + __be64 R1023_prio_4; + __be64 R1023_prio_5; + __be64 R1023_prio_6; + __be64 R1023_prio_7; + __be64 R1023_novlan; + /* Received frames with a length of 1518 octets */ + __be64 R1518_prio_0; + __be64 R1518_prio_1; + __be64 R1518_prio_2; + __be64 R1518_prio_3; + __be64 R1518_prio_4; + __be64 R1518_prio_5; + __be64 R1518_prio_6; + __be64 R1518_prio_7; + __be64 R1518_novlan; + /* Received frames with a length of 1522 octets */ + __be64 R1522_prio_0; + __be64 R1522_prio_1; + __be64 R1522_prio_2; + __be64 R1522_prio_3; + __be64 R1522_prio_4; + __be64 R1522_prio_5; + __be64 R1522_prio_6; + __be64 R1522_prio_7; + __be64 R1522_novlan; + /* Received frames with a length of 1548 octets */ + __be64 R1548_prio_0; + __be64 R1548_prio_1; + __be64 R1548_prio_2; + __be64 R1548_prio_3; + __be64 R1548_prio_4; + __be64 R1548_prio_5; + __be64 R1548_prio_6; + __be64 R1548_prio_7; + __be64 R1548_novlan; + /* Received frames with a length of 1548 < octets < MTU */ + __be64 R2MTU_prio_0; + __be64 R2MTU_prio_1; + __be64 R2MTU_prio_2; + __be64 R2MTU_prio_3; + __be64 R2MTU_prio_4; + __be64 R2MTU_prio_5; + __be64 R2MTU_prio_6; + __be64 R2MTU_prio_7; + __be64 R2MTU_novlan; + /* Received frames with a length of MTU< octets and good CRC */ + __be64 RGIANT_prio_0; + __be64 RGIANT_prio_1; + __be64 RGIANT_prio_2; + __be64 RGIANT_prio_3; + __be64 RGIANT_prio_4; + __be64 RGIANT_prio_5; + __be64 RGIANT_prio_6; + __be64 RGIANT_prio_7; + __be64 RGIANT_novlan; + /* Received broadcast frames with good CRC */ + __be64 RBCAST_prio_0; + __be64 RBCAST_prio_1; + __be64 RBCAST_prio_2; + __be64 RBCAST_prio_3; + __be64 RBCAST_prio_4; + __be64 RBCAST_prio_5; + __be64 RBCAST_prio_6; + __be64 RBCAST_prio_7; + __be64 RBCAST_novlan; + /* Received multicast frames with good CRC */ + __be64 MCAST_prio_0; + __be64 MCAST_prio_1; + __be64 MCAST_prio_2; + __be64 MCAST_prio_3; + __be64 MCAST_prio_4; + __be64 MCAST_prio_5; + __be64 MCAST_prio_6; + __be64 MCAST_prio_7; + __be64 MCAST_novlan; + /* Received unicast not short or GIANT frames with good CRC */ + __be64 RTOTG_prio_0; + __be64 RTOTG_prio_1; + __be64 RTOTG_prio_2; + __be64 RTOTG_prio_3; + __be64 RTOTG_prio_4; + __be64 RTOTG_prio_5; + __be64 RTOTG_prio_6; + __be64 RTOTG_prio_7; + __be64 RTOTG_novlan; + + /* Count of total octets of received frames, includes framing characters */ + __be64 RTTLOCT_prio_0; + /* Count of total octets of received frames, not including framing + characters */ + __be64 RTTLOCT_NOFRM_prio_0; + /* Count of Total number of octets received + (only for frames without errors) */ + __be64 ROCT_prio_0; + + __be64 RTTLOCT_prio_1; + __be64 RTTLOCT_NOFRM_prio_1; + __be64 ROCT_prio_1; + + __be64 RTTLOCT_prio_2; + __be64 RTTLOCT_NOFRM_prio_2; + __be64 ROCT_prio_2; + + __be64 RTTLOCT_prio_3; + __be64 RTTLOCT_NOFRM_prio_3; + __be64 ROCT_prio_3; + + __be64 RTTLOCT_prio_4; + __be64 RTTLOCT_NOFRM_prio_4; + __be64 ROCT_prio_4; + + __be64 RTTLOCT_prio_5; + __be64 RTTLOCT_NOFRM_prio_5; + __be64 ROCT_prio_5; + + __be64 RTTLOCT_prio_6; + __be64 RTTLOCT_NOFRM_prio_6; + __be64 ROCT_prio_6; + + __be64 RTTLOCT_prio_7; + __be64 RTTLOCT_NOFRM_prio_7; + __be64 ROCT_prio_7; + + __be64 RTTLOCT_novlan; + __be64 RTTLOCT_NOFRM_novlan; + __be64 ROCT_novlan; + + /* Count of Total received frames including bad frames */ + __be64 RTOT_prio_0; + /* Count of Total number of received frames with 802.1Q encapsulation */ + __be64 R1Q_prio_0; + __be64 reserved1; + + __be64 RTOT_prio_1; + __be64 R1Q_prio_1; + __be64 reserved2; + + __be64 RTOT_prio_2; + __be64 R1Q_prio_2; + __be64 reserved3; + + __be64 RTOT_prio_3; + __be64 R1Q_prio_3; + __be64 reserved4; + + __be64 RTOT_prio_4; + __be64 R1Q_prio_4; + __be64 reserved5; + + __be64 RTOT_prio_5; + __be64 R1Q_prio_5; + __be64 reserved6; + + __be64 RTOT_prio_6; + __be64 R1Q_prio_6; + __be64 reserved7; + + __be64 RTOT_prio_7; + __be64 R1Q_prio_7; + __be64 reserved8; + + __be64 RTOT_novlan; + __be64 R1Q_novlan; + __be64 reserved9; + + /* Total number of Successfully Received Control Frames */ + __be64 RCNTL; + __be64 reserved10; + __be64 reserved11; + __be64 reserved12; + /* Count of received frames with a length/type field value between 46 + (42 for VLANtagged frames) and 1500 (also 1500 for VLAN-tagged frames), + inclusive */ + __be64 RInRangeLengthErr; + /* Count of received frames with length/type field between 1501 and 1535 + decimal, inclusive */ + __be64 ROutRangeLengthErr; + /* Count of received frames that are longer than max allowed size for + 802.3 frames (1518/1522) */ + __be64 RFrmTooLong; + /* Count frames received with PCS error */ + __be64 PCS; + + /* Transmit frames with a length of 64 octets */ + __be64 T64_prio_0; + __be64 T64_prio_1; + __be64 T64_prio_2; + __be64 T64_prio_3; + __be64 T64_prio_4; + __be64 T64_prio_5; + __be64 T64_prio_6; + __be64 T64_prio_7; + __be64 T64_novlan; + __be64 T64_loopbk; + /* Transmit frames with a length of 65 to 127 octets. */ + __be64 T127_prio_0; + __be64 T127_prio_1; + __be64 T127_prio_2; + __be64 T127_prio_3; + __be64 T127_prio_4; + __be64 T127_prio_5; + __be64 T127_prio_6; + __be64 T127_prio_7; + __be64 T127_novlan; + __be64 T127_loopbk; + /* Transmit frames with a length of 128 to 255 octets */ + __be64 T255_prio_0; + __be64 T255_prio_1; + __be64 T255_prio_2; + __be64 T255_prio_3; + __be64 T255_prio_4; + __be64 T255_prio_5; + __be64 T255_prio_6; + __be64 T255_prio_7; + __be64 T255_novlan; + __be64 T255_loopbk; + /* Transmit frames with a length of 256 to 511 octets */ + __be64 T511_prio_0; + __be64 T511_prio_1; + __be64 T511_prio_2; + __be64 T511_prio_3; + __be64 T511_prio_4; + __be64 T511_prio_5; + __be64 T511_prio_6; + __be64 T511_prio_7; + __be64 T511_novlan; + __be64 T511_loopbk; + /* Transmit frames with a length of 512 to 1023 octets */ + __be64 T1023_prio_0; + __be64 T1023_prio_1; + __be64 T1023_prio_2; + __be64 T1023_prio_3; + __be64 T1023_prio_4; + __be64 T1023_prio_5; + __be64 T1023_prio_6; + __be64 T1023_prio_7; + __be64 T1023_novlan; + __be64 T1023_loopbk; + /* Transmit frames with a length of 1024 to 1518 octets */ + __be64 T1518_prio_0; + __be64 T1518_prio_1; + __be64 T1518_prio_2; + __be64 T1518_prio_3; + __be64 T1518_prio_4; + __be64 T1518_prio_5; + __be64 T1518_prio_6; + __be64 T1518_prio_7; + __be64 T1518_novlan; + __be64 T1518_loopbk; + /* Counts transmit frames with a length of 1519 to 1522 bytes */ + __be64 T1522_prio_0; + __be64 T1522_prio_1; + __be64 T1522_prio_2; + __be64 T1522_prio_3; + __be64 T1522_prio_4; + __be64 T1522_prio_5; + __be64 T1522_prio_6; + __be64 T1522_prio_7; + __be64 T1522_novlan; + __be64 T1522_loopbk; + /* Transmit frames with a length of 1523 to 1548 octets */ + __be64 T1548_prio_0; + __be64 T1548_prio_1; + __be64 T1548_prio_2; + __be64 T1548_prio_3; + __be64 T1548_prio_4; + __be64 T1548_prio_5; + __be64 T1548_prio_6; + __be64 T1548_prio_7; + __be64 T1548_novlan; + __be64 T1548_loopbk; + /* Counts transmit frames with a length of 1549 to MTU bytes */ + __be64 T2MTU_prio_0; + __be64 T2MTU_prio_1; + __be64 T2MTU_prio_2; + __be64 T2MTU_prio_3; + __be64 T2MTU_prio_4; + __be64 T2MTU_prio_5; + __be64 T2MTU_prio_6; + __be64 T2MTU_prio_7; + __be64 T2MTU_novlan; + __be64 T2MTU_loopbk; + /* Transmit frames with a length greater than MTU octets and a good CRC. */ + __be64 TGIANT_prio_0; + __be64 TGIANT_prio_1; + __be64 TGIANT_prio_2; + __be64 TGIANT_prio_3; + __be64 TGIANT_prio_4; + __be64 TGIANT_prio_5; + __be64 TGIANT_prio_6; + __be64 TGIANT_prio_7; + __be64 TGIANT_novlan; + __be64 TGIANT_loopbk; + /* Transmit broadcast frames with a good CRC */ + __be64 TBCAST_prio_0; + __be64 TBCAST_prio_1; + __be64 TBCAST_prio_2; + __be64 TBCAST_prio_3; + __be64 TBCAST_prio_4; + __be64 TBCAST_prio_5; + __be64 TBCAST_prio_6; + __be64 TBCAST_prio_7; + __be64 TBCAST_novlan; + __be64 TBCAST_loopbk; + /* Transmit multicast frames with a good CRC */ + __be64 TMCAST_prio_0; + __be64 TMCAST_prio_1; + __be64 TMCAST_prio_2; + __be64 TMCAST_prio_3; + __be64 TMCAST_prio_4; + __be64 TMCAST_prio_5; + __be64 TMCAST_prio_6; + __be64 TMCAST_prio_7; + __be64 TMCAST_novlan; + __be64 TMCAST_loopbk; + /* Transmit good frames that are neither broadcast nor multicast */ + __be64 TTOTG_prio_0; + __be64 TTOTG_prio_1; + __be64 TTOTG_prio_2; + __be64 TTOTG_prio_3; + __be64 TTOTG_prio_4; + __be64 TTOTG_prio_5; + __be64 TTOTG_prio_6; + __be64 TTOTG_prio_7; + __be64 TTOTG_novlan; + __be64 TTOTG_loopbk; + + /* total octets of transmitted frames, including framing characters */ + __be64 TTTLOCT_prio_0; + /* total octets of transmitted frames, not including framing characters */ + __be64 TTTLOCT_NOFRM_prio_0; + /* ifOutOctets */ + __be64 TOCT_prio_0; + + __be64 TTTLOCT_prio_1; + __be64 TTTLOCT_NOFRM_prio_1; + __be64 TOCT_prio_1; + + __be64 TTTLOCT_prio_2; + __be64 TTTLOCT_NOFRM_prio_2; + __be64 TOCT_prio_2; + + __be64 TTTLOCT_prio_3; + __be64 TTTLOCT_NOFRM_prio_3; + __be64 TOCT_prio_3; + + __be64 TTTLOCT_prio_4; + __be64 TTTLOCT_NOFRM_prio_4; + __be64 TOCT_prio_4; + + __be64 TTTLOCT_prio_5; + __be64 TTTLOCT_NOFRM_prio_5; + __be64 TOCT_prio_5; + + __be64 TTTLOCT_prio_6; + __be64 TTTLOCT_NOFRM_prio_6; + __be64 TOCT_prio_6; + + __be64 TTTLOCT_prio_7; + __be64 TTTLOCT_NOFRM_prio_7; + __be64 TOCT_prio_7; + + __be64 TTTLOCT_novlan; + __be64 TTTLOCT_NOFRM_novlan; + __be64 TOCT_novlan; + + __be64 TTTLOCT_loopbk; + __be64 TTTLOCT_NOFRM_loopbk; + __be64 TOCT_loopbk; + + /* Total frames transmitted with a good CRC that are not aborted */ + __be64 TTOT_prio_0; + /* Total number of frames transmitted with 802.1Q encapsulation */ + __be64 T1Q_prio_0; + __be64 reserved13; + + __be64 TTOT_prio_1; + __be64 T1Q_prio_1; + __be64 reserved14; + + __be64 TTOT_prio_2; + __be64 T1Q_prio_2; + __be64 reserved15; + + __be64 TTOT_prio_3; + __be64 T1Q_prio_3; + __be64 reserved16; + + __be64 TTOT_prio_4; + __be64 T1Q_prio_4; + __be64 reserved17; + + __be64 TTOT_prio_5; + __be64 T1Q_prio_5; + __be64 reserved18; + + __be64 TTOT_prio_6; + __be64 T1Q_prio_6; + __be64 reserved19; + + __be64 TTOT_prio_7; + __be64 T1Q_prio_7; + __be64 reserved20; + + __be64 TTOT_novlan; + __be64 T1Q_novlan; + __be64 reserved21; + + __be64 TTOT_loopbk; + __be64 T1Q_loopbk; + __be64 reserved22; + + /* Received frames with a length greater than MTU octets and a bad CRC */ + __be32 RJBBR; + /* Received frames with a bad CRC that are not runts, jabbers, + or alignment errors */ + __be32 RCRC; + /* Received frames with SFD with a length of less than 64 octets and a + bad CRC */ + __be32 RRUNT; + /* Received frames with a length less than 64 octets and a good CRC */ + __be32 RSHORT; + /* Total Number of Received Packets Dropped */ + __be32 RDROP; + /* Drop due to overflow */ + __be32 RdropOvflw; + /* Drop due to overflow */ + __be32 RdropLength; + /* Total of good frames. Does not include frames received with + frame-too-long, FCS, or length errors */ + __be32 RTOTFRMS; + /* Total dropped Xmited packets */ + __be32 TDROP; +}; + + +#endif diff --git a/drivers/net/mlx4/en_resources.c b/drivers/net/mlx4/en_resources.c new file mode 100644 index 0000000..f14ed14 --- /dev/null +++ b/drivers/net/mlx4/en_resources.c @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2007 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <linux/vmalloc.h> +#include <linux/mlx4/qp.h> + +#include "mlx4_en.h" + +void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride, + int is_tx, int rss, int qpn, int cqn, int srqn, + struct mlx4_qp_context *context) +{ + struct mlx4_en_dev *mdev = priv->mdev; + + memset(context, 0, sizeof *context); + context->flags = cpu_to_be32(7 << 16 | rss << 13); + context->pd = cpu_to_be32(mdev->priv_pdn); + context->mtu_msgmax = 0xff; + context->rq_size_stride = 0; + if (is_tx) + context->sq_size_stride = ilog2(size) << 3 | (ilog2(stride) - 4); + else + context->sq_size_stride = 1; + context->usr_page = cpu_to_be32(mdev->priv_uar.index); + context->local_qpn = cpu_to_be32(qpn); + context->pri_path.ackto = 1 & 0x07; + context->pri_path.sched_queue = 0x83 | (priv->port - 1) << 6; + context->pri_path.counter_index = 0xff; + context->cqn_send = cpu_to_be32(cqn); + context->cqn_recv = cpu_to_be32(cqn); + context->db_rec_addr = cpu_to_be64(priv->res.db.dma << 2); + if (!rss) + context->srqn = cpu_to_be32(MLX4_EN_USE_SRQ | srqn); +} + + +int mlx4_en_map_buffer(struct mlx4_buf *buf) +{ + struct page **pages; + int i; + + if (BITS_PER_LONG == 64 || buf->nbufs == 1) + return 0; + + pages = kmalloc(sizeof *pages * buf->nbufs, GFP_KERNEL); + if (!pages) + return -ENOMEM; + + for (i = 0; i < buf->nbufs; ++i) + pages[i] = virt_to_page(buf->page_list[i].buf); + + buf->direct.buf = vmap(pages, buf->nbufs, VM_MAP, PAGE_KERNEL); + kfree(pages); + if (!buf->direct.buf) + return -ENOMEM; + + return 0; +} + +void mlx4_en_unmap_buffer(struct mlx4_buf *buf) +{ + if (BITS_PER_LONG == 64 || buf->nbufs == 1) + return; + + vunmap(buf->direct.buf); +} + +void mlx4_en_sqp_event(struct mlx4_qp *qp, enum mlx4_event event) +{ + return; +} + diff --git a/drivers/net/mlx4/en_rx.c b/drivers/net/mlx4/en_rx.c new file mode 100644 index 0000000..93d8e02 --- /dev/null +++ b/drivers/net/mlx4/en_rx.c @@ -0,0 +1,1307 @@ +/* + * Copyright (c) 2007 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <linux/mlx4/cq.h> +#include <linux/mlx4/qp.h> +#include <linux/skbuff.h> +#include <linux/if_ether.h> +#include <linux/if_vlan.h> +#include <linux/vmalloc.h> +#include <linux/etherdevice.h> + +#include "mlx4_en.h" + +static void *get_wqe(struct mlx4_en_rx_ring *ring, int n) +{ + int offset = n << ring->srq.wqe_shift; + return ring->buf + offset; +} + +static void mlx4_en_srq_event(struct mlx4_srq *srq, enum mlx4_event type) +{ + return; +} + +static int mlx4_en_get_frag_header(struct skb_frag_struct *frags, void **mac_hdr, + void **ip_hdr, void **tcpudp_hdr, + u64 *hdr_flags, void *priv) +{ + *mac_hdr = page_address(frags->page) + frags->page_offset; + *ip_hdr = *mac_hdr + ETH_HLEN; + *tcpudp_hdr = (struct tcphdr *)(*ip_hdr + sizeof(struct iphdr)); + *hdr_flags = LRO_IPV4 | LRO_TCP; + + return 0; +} + +static int mlx4_en_alloc_frag(struct mlx4_en_priv *priv, + struct mlx4_en_rx_desc *rx_desc, + struct skb_frag_struct *skb_frags, + struct mlx4_en_rx_alloc *ring_alloc, + int i) +{ + struct mlx4_en_dev *mdev = priv->mdev; + struct mlx4_en_frag_info *frag_info = &priv->frag_info[i]; + struct mlx4_en_rx_alloc *page_alloc = &ring_alloc[i]; + struct page *page; + dma_addr_t dma; + + if (page_alloc->offset == frag_info->last_offset) { + /* Allocate new page */ + page = alloc_pages(GFP_ATOMIC | __GFP_COMP, MLX4_EN_ALLOC_ORDER); + if (!page) + return -ENOMEM; + + skb_frags[i].page = page_alloc->page; + skb_frags[i].page_offset = page_alloc->offset; + page_alloc->page = page; + page_alloc->offset = frag_info->frag_align; + } else { + page = page_alloc->page; + get_page(page); + + skb_frags[i].page = page; + skb_frags[i].page_offset = page_alloc->offset; + page_alloc->offset += frag_info->frag_stride; + } + dma = pci_map_single(mdev->pdev, page_address(skb_frags[i].page) + + skb_frags[i].page_offset, frag_info->frag_size, + PCI_DMA_FROMDEVICE); + rx_desc->data[i].addr = cpu_to_be64(dma); + return 0; +} + +static int mlx4_en_init_allocator(struct mlx4_en_priv *priv, + struct mlx4_en_rx_ring *ring) +{ + struct mlx4_en_rx_alloc *page_alloc; + int i; + + for (i = 0; i < priv->num_frags; i++) { + page_alloc = &ring->page_alloc[i]; + page_alloc->page = alloc_pages(GFP_ATOMIC | __GFP_COMP, + MLX4_EN_ALLOC_ORDER); + if (!page_alloc->page) + goto out; + + page_alloc->offset = priv->frag_info[i].frag_align; + mlx4_dbg(DRV, priv, "Initialized allocator:%d with page:%p\n", + i, page_alloc->page); + } + return 0; + +out: + while (i--) { + page_alloc = &ring->page_alloc[i]; + put_page(page_alloc->page); + page_alloc->page = NULL; + } + return -ENOMEM; +} + +static void mlx4_en_destroy_allocator(struct mlx4_en_priv *priv, + struct mlx4_en_rx_ring *ring) +{ + struct mlx4_en_rx_alloc *page_alloc; + int i; + + for (i = 0; i < priv->num_frags; i++) { + page_alloc = &ring->page_alloc[i]; + mlx4_dbg(DRV, priv, "Freeing allocator:%d count:%d\n", + i, page_count(page_alloc->page)); + + put_page(page_alloc->page); + page_alloc->page = NULL; + } +} + +static void +mlx4_en_init_rx_desc_skb(struct mlx4_en_priv *priv, + struct mlx4_en_rx_ring *ring, int index) +{ + struct mlx4_en_rx_desc *rx_desc = ring->buf + ring->stride * index; + + /* Pre-link descriptor */ + rx_desc->next.next_wqe_index = cpu_to_be16((index + 1) & ring->size_mask); + rx_desc->data->byte_count = cpu_to_be32(priv->rx_skb_size); + rx_desc->data->lkey = cpu_to_be32(priv->mdev->mr.key); +} + +static void mlx4_en_init_rx_desc(struct mlx4_en_priv *priv, + struct mlx4_en_rx_ring *ring, int index) +{ + struct mlx4_en_rx_desc *rx_desc = ring->buf + ring->stride * index; + struct skb_frag_struct *skb_frags = ring->rx_info + + (index << priv->log_rx_info); + int possible_frags; + int i; + + /* Pre-link descriptor */ + rx_desc->next.next_wqe_index = cpu_to_be16((index + 1) & ring->size_mask); + + /* Set size and memtype fields */ + for (i = 0; i < priv->num_frags; i++) { + skb_frags[i].size = priv->frag_info[i].frag_size; + rx_desc->data[i].byte_count = + cpu_to_be32(priv->frag_info[i].frag_size); + rx_desc->data[i].lkey = cpu_to_be32(priv->mdev->mr.key); + } + + /* If the number of used fragments does not fill up the ring stride, + * remaining (unused) fragments must be padded with null address/size + * and a special memory key */ + possible_frags = (ring->stride - sizeof(struct mlx4_en_rx_desc)) / DS_SIZE; + for (i = priv->num_frags; i < possible_frags; i++) { + rx_desc->data[i].byte_count = 0; + rx_desc->data[i].lkey = cpu_to_be32(MLX4_EN_MEMTYPE_PAD); + rx_desc->data[i].addr = 0; + } +} + +static int +mlx4_en_alloc_rx_skb(struct mlx4_en_priv *priv, + struct mlx4_en_rx_desc *rx_desc, + struct sk_buff **pskb) +{ + dma_addr_t dma; + int size = priv->rx_skb_size + NET_IP_ALIGN; + struct sk_buff *new_skb = alloc_skb(size, GFP_ATOMIC); + + if (unlikely(new_skb == NULL)) + return -ENOMEM; + + new_skb->dev = priv->dev; + skb_reserve(new_skb, NET_IP_ALIGN); + dma = pci_map_single(priv->mdev->pdev, new_skb->data, size, DMA_FROM_DEVICE); + *pskb = new_skb; + rx_desc->data->addr = cpu_to_be64(dma); + return 0; +} + +static int +mlx4_en_prepare_rx_desc_skb(struct mlx4_en_priv *priv, + struct mlx4_en_rx_ring *ring, int index) +{ + struct mlx4_en_rx_desc *rx_desc = ring->buf + (index * ring->stride); + struct sk_buff **pskb = (struct sk_buff **) ring->rx_info + index; + + return mlx4_en_alloc_rx_skb(priv, rx_desc, pskb); +} + +static int mlx4_en_prepare_rx_desc(struct mlx4_en_priv *priv, + struct mlx4_en_rx_ring *ring, int index) +{ + struct mlx4_en_rx_desc *rx_desc = ring->buf + (index * ring->stride); + struct skb_frag_struct *skb_frags = ring->rx_info + + (index << priv->log_rx_info); + int i; + + for (i = 0; i < priv->num_frags; i++) + if (mlx4_en_alloc_frag(priv, rx_desc, skb_frags, ring->page_alloc, i)) + goto err; + + return 0; + +err: + while (i--) + put_page(skb_frags[i].page); + return -ENOMEM; +} + +static inline void mlx4_en_update_rx_prod_db(struct mlx4_en_rx_ring *ring) +{ + *ring->wqres.db.db = cpu_to_be32(ring->prod & 0xffff); +} + +static int mlx4_en_fill_rx_buffers(struct mlx4_en_priv *priv) +{ + struct mlx4_en_dev *mdev = priv->mdev; + struct mlx4_en_rx_ring *ring; + int ring_ind; + int buf_ind; + int err; + + for (buf_ind = 0; buf_ind < priv->prof->rx_ring_size; buf_ind++) { + for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) { + ring = &priv->rx_ring[ring_ind]; + + if (ring->use_frags) + err = mlx4_en_prepare_rx_desc(priv, ring, + ring->actual_size); + else + err = mlx4_en_prepare_rx_desc_skb(priv, ring, + ring->actual_size); + if (err) { + if (ring->actual_size < MLX4_EN_MIN_RX_SIZE) { + mlx4_err(mdev, "Failed to allocate " + "enough rx buffers\n"); + return -ENOMEM; + } else { + if (netif_msg_rx_err(priv)) + mlx4_warn(mdev, + "Only %d buffers allocated\n", + ring->actual_size); + goto out; + } + } + ring->actual_size++; + ring->prod++; + } + } +out: + return 0; +} + +static int mlx4_en_fill_rx_buf(struct net_device *dev, + struct mlx4_en_rx_ring *ring) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + int num = 0; + int err; + + while ((u32) (ring->prod - ring->cons) < ring->actual_size) { + if (ring->use_frags) + err = mlx4_en_prepare_rx_desc(priv, ring, ring->prod & + ring->size_mask); + else + err = mlx4_en_prepare_rx_desc_skb(priv, ring, ring->prod & + ring->size_mask); + if (err) { + if (netif_msg_rx_err(priv)) + mlx4_warn(priv->mdev, + "Failed preparing rx descriptor\n"); + priv->port_stats.rx_alloc_failed++; + break; + } + ++num; + ++ring->prod; + } + if ((u32) (ring->prod - ring->cons) == ring->size) + ring->full = 1; + + return num; +} + +static void mlx4_en_free_rx_buf(struct mlx4_en_priv *priv, + struct mlx4_en_rx_ring *ring) +{ + struct mlx4_en_dev *mdev = priv->mdev; + struct skb_frag_struct *skb_frags; + struct sk_buff *skb; + struct mlx4_en_rx_desc *rx_desc; + dma_addr_t dma; + int index; + int nr; + + mlx4_dbg(DRV, priv, "Freeing Rx buf - cons:%d prod:%d\n", + ring->cons, ring->prod); + + /* Unmap and free Rx buffers */ + BUG_ON((u32) (ring->prod - ring->cons) > ring->size); + while (ring->cons != ring->prod) { + index = ring->cons & ring->size_mask; + rx_desc = ring->buf + (index << ring->log_stride); + mlx4_dbg(DRV, priv, "Processing descriptor:%d\n", index); + + if (ring->use_frags) { + skb_frags = ring->rx_info + (index << priv->log_rx_info); + for (nr = 0; nr < priv->num_frags; nr++) { + mlx4_dbg(DRV, priv, "Freeing fragment:%d\n", nr); + dma = be64_to_cpu(rx_desc->data[nr].addr); + + mlx4_dbg(DRV, priv, "Unmaping buffer at dma:0x%llx\n", (u64) dma); + pci_unmap_single(mdev->pdev, dma, skb_frags[nr].size, + PCI_DMA_FROMDEVICE); + put_page(skb_frags[nr].page); + } + } else { + skb = *((struct sk_buff **) ring->rx_info + index); + dma = be64_to_cpu(rx_desc->data->addr); + pci_unmap_single(mdev->pdev, dma, + priv->rx_skb_size + NET_IP_ALIGN, + PCI_DMA_FROMDEVICE); + kfree_skb(skb); + } + ++ring->cons; + } +} + + +void mlx4_en_rx_refill(struct work_struct *work) +{ + struct delayed_work *delay = container_of(work, struct delayed_work, work); + struct mlx4_en_priv *priv = container_of(delay, struct mlx4_en_priv, + refill_task); + struct mlx4_en_dev *mdev = priv->mdev; + struct net_device *dev = priv->dev; + struct mlx4_en_rx_ring *ring; + int need_refill = 0; + int i; + + mutex_lock(&mdev->state_lock); + if (!mdev->device_up || !priv->port_up) + goto out; + + /* We only get here if there are no receive buffers, so we can't race + * with Rx interrupts while filling buffers */ + for (i = 0; i < priv->rx_ring_num; i++) { + ring = &priv->rx_ring[i]; + if (ring->need_refill) { + if (mlx4_en_fill_rx_buf(dev, ring)) { + ring->need_refill = 0; + mlx4_en_update_rx_prod_db(ring); + } else + need_refill = 1; + } + } + if (need_refill) + queue_delayed_work(mdev->workqueue, &priv->refill_task, HZ); + +out: + mutex_unlock(&mdev->state_lock); +} + + +int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv, + struct mlx4_en_rx_ring *ring, u32 size) +{ + struct mlx4_en_dev *mdev = priv->mdev; + int err; + int tmp; + + /* Sanity check SRQ size before proceeding */ + if (size >= mdev->dev->caps.max_srq_wqes) + return -EINVAL; + + ring->prod = 0; + ring->cons = 0; + ring->size = size; + ring->size_mask = size - 1; + ring->stride = roundup_pow_of_two(sizeof(struct mlx4_en_rx_desc) + + DS_SIZE * (ring->use_frags ? + MLX4_EN_MAX_RX_FRAGS : 1)); + ring->log_stride = ffs(ring->stride) - 1; + ring->buf_size = ring->size * ring->stride; + + if (ring->use_frags) + tmp = size * roundup_pow_of_two(MLX4_EN_MAX_RX_FRAGS * + sizeof(struct skb_frag_struct)); + else + tmp = size * sizeof(struct sk_buff *); + + ring->rx_info = vmalloc(tmp); + if (!ring->rx_info) { + mlx4_err(mdev, "Failed allocating rx_info ring\n"); + return -ENOMEM; + } + mlx4_dbg(DRV, priv, "Allocated rx_info ring at addr:%p size:%d\n", + ring->rx_info, tmp); + + err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres, + ring->buf_size, 2 * PAGE_SIZE); + if (err) + goto err_ring; + + err = mlx4_en_map_buffer(&ring->wqres.buf); + if (err) { + mlx4_err(mdev, "Failed to map RX buffer\n"); + goto err_hwq; + } + ring->buf = ring->wqres.buf.direct.buf; + + /* Configure lro mngr */ + memset(&ring->lro, 0, sizeof(struct net_lro_mgr)); + ring->lro.dev = priv->dev; + ring->lro.features = LRO_F_NAPI; + ring->lro.frag_align_pad = NET_IP_ALIGN; + ring->lro.ip_summed = CHECKSUM_UNNECESSARY; + ring->lro.ip_summed_aggr = CHECKSUM_UNNECESSARY; + ring->lro.max_desc = mdev->profile.num_lro; + ring->lro.max_aggr = MAX_SKB_FRAGS; + ring->lro.lro_arr = kzalloc(mdev->profile.num_lro * + sizeof(struct net_lro_desc), + GFP_KERNEL); + if (!ring->lro.lro_arr) { + mlx4_err(mdev, "Failed to allocate lro array\n"); + goto err_map; + } + ring->lro.get_frag_header = mlx4_en_get_frag_header; + + return 0; + +err_map: + mlx4_en_unmap_buffer(&ring->wqres.buf); +err_hwq: + mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size); +err_ring: + vfree(ring->rx_info); + ring->rx_info = NULL; + return err; +} + +int mlx4_en_activate_rx_rings(struct mlx4_en_priv *priv) +{ + struct mlx4_en_dev *mdev = priv->mdev; + struct mlx4_wqe_srq_next_seg *next; + struct mlx4_en_rx_ring *ring; + int i; + int ring_ind; + int err; + int stride = roundup_pow_of_two(sizeof(struct mlx4_en_rx_desc) + + DS_SIZE * priv->num_frags); + int max_gs = (stride - sizeof(struct mlx4_wqe_srq_next_seg)) / DS_SIZE; + + for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) { + ring = &priv->rx_ring[ring_ind]; + + ring->prod = 0; + ring->cons = 0; + ring->actual_size = 0; + ring->cqn = priv->rx_cq[ring_ind].mcq.cqn; + + if (ring->use_frags) + ring->stride = stride; + ring->log_stride = ffs(ring->stride) - 1; + ring->buf_size = ring->size * ring->stride; + + memset(ring->rx_info, 0, sizeof(*(ring->rx_info))); + memset(ring->buf, 0, ring->buf_size); + mlx4_en_update_rx_prod_db(ring); + + if (ring->use_frags) { + /* Initailize all descriptors */ + for (i = 0; i < ring->size; i++) + mlx4_en_init_rx_desc(priv, ring, i); + + /* Initialize page allocators */ + err = mlx4_en_init_allocator(priv, ring); + if (err) { + mlx4_err(mdev, "Failed initializing ring allocator\n"); + goto err_allocator; + } + } else { + for (i = 0; i < ring->size; i++) + mlx4_en_init_rx_desc_skb(priv, ring, i); + } + + /* Fill Rx buffers */ + ring->full = 0; + } + err = mlx4_en_fill_rx_buffers(priv); + if (err) + goto err_buffers; + + for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) { + ring = &priv->rx_ring[ring_ind]; + + mlx4_en_update_rx_prod_db(ring); + + /* Configure SRQ representing the ring */ + ring->srq.max = ring->size; + ring->srq.max_gs = max_gs; + ring->srq.wqe_shift = ilog2(ring->stride); + + for (i = 0; i < ring->srq.max; ++i) { + next = get_wqe(ring, i); + next->next_wqe_index = + cpu_to_be16((i + 1) & (ring->srq.max - 1)); + } + + err = mlx4_srq_alloc(mdev->dev, mdev->priv_pdn, ring->cqn, 0, + &ring->wqres.mtt, ring->wqres.db.dma, &ring->srq); + if (err){ + mlx4_err(mdev, "Failed to allocate srq\n"); + goto err_srq; + } + ring->srq.event = mlx4_en_srq_event; + } + + return 0; + +err_srq: + while (ring_ind >= 0) { + ring = &priv->rx_ring[ring_ind]; + mlx4_srq_invalidate(mdev->dev, &ring->srq); + mlx4_srq_remove(mdev->dev, &ring->srq); + mlx4_srq_free(mdev->dev, &ring->srq); + ring_ind--; + } + +err_buffers: + for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) + mlx4_en_free_rx_buf(priv, &priv->rx_ring[ring_ind]); + + ring_ind = priv->rx_ring_num - 1; +err_allocator: + while (ring_ind >= 0) { + if (priv->rx_ring[ring_ind].use_frags) + mlx4_en_destroy_allocator(priv, &priv->rx_ring[ring_ind]); + ring_ind--; + } + return err; +} + +void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv, + struct mlx4_en_rx_ring *ring) +{ + struct mlx4_en_dev *mdev = priv->mdev; + + kfree(ring->lro.lro_arr); + mlx4_en_unmap_buffer(&ring->wqres.buf); + mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size); + vfree(ring->rx_info); + ring->rx_info = NULL; +} + +void mlx4_en_deactivate_rx_ring(struct mlx4_en_priv *priv, + struct mlx4_en_rx_ring *ring) +{ + struct mlx4_en_dev *mdev = priv->mdev; + + mlx4_srq_invalidate(mdev->dev, &ring->srq); + mlx4_srq_remove(mdev->dev, &ring->srq); + mlx4_srq_free(mdev->dev, &ring->srq); + mlx4_en_free_rx_buf(priv, ring); + if (ring->use_frags) + mlx4_en_destroy_allocator(priv, ring); +} + + +/* Unmap a completed descriptor and free unused pages */ +int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv, + struct mlx4_en_rx_desc *rx_desc, + struct skb_frag_struct *skb_frags, + struct skb_frag_struct *skb_frags_rx, + struct mlx4_en_rx_alloc *page_alloc, + int length) +{ + struct mlx4_en_dev *mdev = priv->mdev; + struct mlx4_en_frag_info *frag_info; + int nr; + dma_addr_t dma; + + /* Collect used fragments while replacing them in the HW descirptors */ + for (nr = 0; nr < priv->num_frags; nr++) { + frag_info = &priv->frag_info[nr]; + if (length <= frag_info->frag_prefix_size) + break; + + /* Save page reference in skb */ + skb_frags_rx[nr].page = skb_frags[nr].page; + skb_frags_rx[nr].size = skb_frags[nr].size; + skb_frags_rx[nr].page_offset = skb_frags[nr].page_offset; + dma = be64_to_cpu(rx_desc->data[nr].addr); + + /* Allocate a replacement page */ + if (mlx4_en_alloc_frag(priv, rx_desc, skb_frags, page_alloc, nr)) + goto fail; + + /* Unmap buffer */ + pci_unmap_single(mdev->pdev, dma, skb_frags[nr].size, + PCI_DMA_FROMDEVICE); + } + /* Adjust size of last fragment to match actual length */ + skb_frags_rx[nr - 1].size = length - + priv->frag_info[nr - 1].frag_prefix_size; + return nr; + +fail: + /* Drop all accumulated fragments (which have already been replaced in + * the descriptor) of this packet; remaining fragments are reused... */ + while (nr > 0) { + nr--; + put_page(skb_frags_rx[nr].page); + } + return 0; +} + + +struct sk_buff *mlx4_en_rx_skb(struct mlx4_en_priv *priv, + struct mlx4_en_rx_desc *rx_desc, + struct skb_frag_struct *skb_frags, + struct mlx4_en_rx_alloc *page_alloc, + unsigned int length) +{ + struct mlx4_en_dev *mdev = priv->mdev; + struct sk_buff *skb; + void *va; + int used_frags; + dma_addr_t dma; + + skb = dev_alloc_skb(SMALL_PACKET_SIZE + NET_IP_ALIGN); + if (!skb) { + mlx4_dbg(RX_ERR, priv, "Failed allocating skb\n"); + return NULL; + } + skb->dev = priv->dev; + skb_reserve(skb, NET_IP_ALIGN); + skb->len = length; + skb->truesize = length + sizeof(struct sk_buff); + + /* Get pointer to first fragment so we could copy the headers into the + * (linear part of the) skb */ + va = page_address(skb_frags[0].page) + skb_frags[0].page_offset; + + if (length <= SMALL_PACKET_SIZE) { + /* We are copying all relevant data to the skb - temporarily + * synch buffers for the copy */ + dma = be64_to_cpu(rx_desc->data[0].addr); + dma_sync_single_range_for_cpu(&mdev->pdev->dev, dma, 0, + length, DMA_FROM_DEVICE); + skb_copy_to_linear_data(skb, va, length); + dma_sync_single_range_for_device(&mdev->pdev->dev, dma, 0, + length, DMA_FROM_DEVICE); + skb->tail += length; + } else { + + /* Move relevant fragments to skb */ + used_frags = mlx4_en_complete_rx_desc(priv, rx_desc, skb_frags, + skb_shinfo(skb)->frags, + page_alloc, length); + skb_shinfo(skb)->nr_frags = used_frags; + + /* Copy headers into the skb linear buffer */ + memcpy(skb->data, va, HEADER_COPY_SIZE); + skb->tail += HEADER_COPY_SIZE; + + /* Skip headers in first fragment */ + skb_shinfo(skb)->frags[0].page_offset += HEADER_COPY_SIZE; + + /* Adjust size of first fragment */ + skb_shinfo(skb)->frags[0].size -= HEADER_COPY_SIZE; + skb->data_len = length - HEADER_COPY_SIZE; + } + return skb; +} + +static void mlx4_en_copy_desc(struct mlx4_en_priv *priv, + struct mlx4_en_rx_ring *ring, + int from, int to, int num) +{ + struct skb_frag_struct *skb_frags_from; + struct skb_frag_struct *skb_frags_to; + struct mlx4_en_rx_desc *rx_desc_from; + struct mlx4_en_rx_desc *rx_desc_to; + int from_index, to_index; + int nr, i; + + for (i = 0; i < num; i++) { + from_index = (from + i) & ring->size_mask; + to_index = (to + i) & ring->size_mask; + skb_frags_from = ring->rx_info + (from_index << priv->log_rx_info); + skb_frags_to = ring->rx_info + (to_index << priv->log_rx_info); + rx_desc_from = ring->buf + (from_index << ring->log_stride); + rx_desc_to = ring->buf + (to_index << ring->log_stride); + + for (nr = 0; nr < priv->num_frags; nr++) { + skb_frags_to[nr].page = skb_frags_from[nr].page; + skb_frags_to[nr].page_offset = skb_frags_from[nr].page_offset; + rx_desc_to->data[nr].addr = rx_desc_from->data[nr].addr; + } + } +} + +static inline int invalid_cqe(struct mlx4_en_priv *priv, + struct mlx4_cqe *cqe) +{ + /* Drop packet on bad receive or bad checksum */ + if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == + MLX4_CQE_OPCODE_ERROR)) { + mlx4_err(priv->mdev, "CQE completed in error - vendor " + "syndrom:%d syndrom:%d\n", + ((struct mlx4_err_cqe *) cqe)->vendor_err_syndrome, + ((struct mlx4_err_cqe *) cqe)->syndrome); + return 1; + } + if (unlikely(cqe->badfcs_enc & MLX4_CQE_BAD_FCS)) { + mlx4_dbg(RX_ERR, priv, "Accepted frame with bad FCS\n"); + return 1;; + } + + return 0; +} + +static struct sk_buff * +mlx4_en_get_rx_skb(struct mlx4_en_priv *priv, + struct mlx4_en_rx_desc *rx_desc, + struct sk_buff **pskb, + unsigned int length) +{ + struct mlx4_en_dev *mdev = priv->mdev; + struct sk_buff *skb; + dma_addr_t dma; + + if (length <= SMALL_PACKET_SIZE) { + skb = dev_alloc_skb(length + NET_IP_ALIGN); + if (unlikely(!skb)) + return NULL; + + skb_reserve(skb, NET_IP_ALIGN); + /* We are copying all relevant data to the skb - temporarily + * synch buffers for the copy */ + dma = be64_to_cpu(rx_desc->data->addr); + dma_sync_single_range_for_cpu(&mdev->pdev->dev, dma, 0, + length, DMA_FROM_DEVICE); + skb_copy_to_linear_data(skb, (*pskb)->data, length); + dma_sync_single_range_for_device(&mdev->pdev->dev, dma, 0, + length, DMA_FROM_DEVICE); + + } else { + skb = *pskb; + if (unlikely(mlx4_en_alloc_rx_skb(priv, rx_desc, pskb))) + return NULL; + + pci_unmap_single(mdev->pdev, be64_to_cpu(rx_desc->data->addr), + be32_to_cpu(rx_desc->data->byte_count), + PCI_DMA_FROMDEVICE); + } + + skb->tail += length; + skb->len = length; + skb->truesize = length + sizeof(struct sk_buff); + return skb; +} + +int mlx4_en_process_rx_cq_skb(struct net_device *dev, + struct mlx4_en_cq *cq, int budget) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + struct mlx4_cqe *cqe; + struct mlx4_en_rx_ring *ring = &priv->rx_ring[cq->ring]; + struct mlx4_en_rx_desc *rx_desc; + struct sk_buff **pskb; + struct sk_buff *skb; + int index; + unsigned int length; + int polled = 0; + + if (!priv->port_up) + return 0; + + /* We assume a 1:1 mapping between CQEs and Rx descriptors, so Rx + * descriptor offset can be deduced from the CQE index instead of + * reading 'cqe->index' */ + index = cq->mcq.cons_index & ring->size_mask; + cqe = &cq->buf[index]; + + /* Process all completed CQEs */ + while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK, + cq->mcq.cons_index & cq->size)) { + + pskb = (struct sk_buff **) ring->rx_info + index; + rx_desc = ring->buf + (index << ring->log_stride); + + /* + * make sure we read the CQE after we read the ownership bit + */ + rmb(); + + if (invalid_cqe(priv, cqe)) + goto next; + + /* + * Packet is OK - process it. + */ + length = be32_to_cpu(cqe->byte_cnt); + ring->bytes += length; + ring->packets++; + + skb = mlx4_en_get_rx_skb(priv, rx_desc, pskb, length); + if (unlikely(!skb)) + goto next; + skb->protocol = eth_type_trans(skb, dev); + + if (likely(priv->rx_csum && cqe->checksum == 0xffff)) { + priv->port_stats.rx_chksum_good++; + skb->ip_summed = CHECKSUM_UNNECESSARY; + } else { + priv->port_stats.rx_chksum_none++; + skb->ip_summed = CHECKSUM_NONE; + if (mdev->profile.ip_reasm && + cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPV4) && + !mlx4_en_rx_frags(priv, ring, skb, cqe)) + goto next; + } + + /* Push it up the stack */ + if (priv->vlgrp && (be32_to_cpu(cqe->vlan_my_qpn) & + MLX4_CQE_VLAN_PRESENT_MASK)) { + vlan_hwaccel_receive_skb(skb, priv->vlgrp, + be16_to_cpu(cqe->sl_vid)); + } else + netif_receive_skb(skb); + + dev->last_rx = jiffies; + +next: + ++cq->mcq.cons_index; + index = (cq->mcq.cons_index) & ring->size_mask; + cqe = &cq->buf[index]; + if (++polled == budget) + goto out; + } + + /* If CQ is empty, flush all pending IP reassembly sessions */ + mlx4_en_flush_frags(priv, ring); + +out: + AVG_PERF_COUNTER(priv->pstats.rx_coal_avg, polled); + mlx4_cq_set_ci(&cq->mcq); + wmb(); /* ensure HW sees CQ consumer before we post new buffers */ + ring->cons = cq->mcq.cons_index; + ring->prod += polled; /* Polled descriptors were realocated in place */ + if (unlikely(!ring->full)) + mlx4_en_fill_rx_buf(dev, ring); + mlx4_en_update_rx_prod_db(ring); + return polled; +} + +int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int budget) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + struct mlx4_cqe *cqe; + struct mlx4_en_rx_ring *ring = &priv->rx_ring[cq->ring]; + struct skb_frag_struct *skb_frags; + struct skb_frag_struct lro_frags[MLX4_EN_MAX_RX_FRAGS]; + struct mlx4_en_rx_desc *rx_desc; + struct sk_buff *skb; + int index; + int nr; + unsigned int length; + int polled = 0; + int ip_summed; + + if (!priv->port_up) + return 0; + + /* We assume a 1:1 mapping between CQEs and Rx descriptors, so Rx + * descriptor offset can be deduced from the CQE index instead of + * reading 'cqe->index' */ + index = cq->mcq.cons_index & ring->size_mask; + cqe = &cq->buf[index]; + + /* Process all completed CQEs */ + while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK, + cq->mcq.cons_index & cq->size)) { + + skb_frags = ring->rx_info + (index << priv->log_rx_info); + rx_desc = ring->buf + (index << ring->log_stride); + + /* + * make sure we read the CQE after we read the ownership bit + */ + rmb(); + + if (invalid_cqe(priv, cqe)) + goto next; + + /* + * Packet is OK - process it. + */ + length = be32_to_cpu(cqe->byte_cnt); + ring->bytes += length; + ring->packets++; + + if (likely(priv->rx_csum)) { + if ((cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPOK)) && + (cqe->checksum == 0xffff)) { + priv->port_stats.rx_chksum_good++; + /* This packet is eligible for LRO if it is: + * - DIX Ethernet (type interpretation) + * - TCP/IP (v4) + * - without IP options + * - not an IP fragment */ + if (mlx4_en_can_lro(cqe->status) && + dev->features & NETIF_F_LRO) { + + nr = mlx4_en_complete_rx_desc( + priv, rx_desc, + skb_frags, lro_frags, + ring->page_alloc, length); + if (!nr) + goto next; + + if (priv->vlgrp && (cqe->vlan_my_qpn & + MLX4_CQE_VLAN_PRESENT_MASK)) { + lro_vlan_hwaccel_receive_frags( + &ring->lro, lro_frags, + length, length, + priv->vlgrp, + be16_to_cpu(cqe->sl_vid), + NULL, 0); + } else + lro_receive_frags(&ring->lro, + lro_frags, + length, + length, + NULL, 0); + + goto next; + } + + /* LRO not possible, complete processing here */ + ip_summed = CHECKSUM_UNNECESSARY; + INC_PERF_COUNTER(priv->pstats.lro_misses); + } else { + ip_summed = CHECKSUM_NONE; + priv->port_stats.rx_chksum_none++; + } + } else { + ip_summed = CHECKSUM_NONE; + priv->port_stats.rx_chksum_none++; + } + + skb = mlx4_en_rx_skb(priv, rx_desc, skb_frags, + ring->page_alloc, length); + if (!skb) { + priv->stats.rx_dropped++; + goto next; + } + + skb->ip_summed = ip_summed; + skb->protocol = eth_type_trans(skb, dev); + + /* Push it up the stack */ + if (priv->vlgrp && (be32_to_cpu(cqe->vlan_my_qpn) & + MLX4_CQE_VLAN_PRESENT_MASK)) { + vlan_hwaccel_receive_skb(skb, priv->vlgrp, + be16_to_cpu(cqe->sl_vid)); + } else + netif_receive_skb(skb); + + dev->last_rx = jiffies; + +next: + ++cq->mcq.cons_index; + index = (cq->mcq.cons_index) & ring->size_mask; + cqe = &cq->buf[index]; + if (++polled == budget) { + /* We are here because we reached the NAPI budget - + * flush only pending LRO sessions */ + lro_flush_all(&ring->lro); + goto out; + } + } + + /* If CQ is empty flush all LRO sessions unconditionally */ + lro_flush_all(&ring->lro); + +out: + AVG_PERF_COUNTER(priv->pstats.rx_coal_avg, polled); + mlx4_cq_set_ci(&cq->mcq); + wmb(); /* ensure HW sees CQ consumer before we post new buffers */ + ring->cons = cq->mcq.cons_index; + ring->prod += polled; /* Polled descriptors were realocated in place */ + if (unlikely(!ring->full)) { + mlx4_en_copy_desc(priv, ring, ring->cons - polled, + ring->prod - polled, polled); + mlx4_en_fill_rx_buf(dev, ring); + } + mlx4_en_update_rx_prod_db(ring); + return polled; +} + + +void mlx4_en_rx_irq(struct mlx4_cq *mcq) +{ + struct mlx4_en_cq *cq = container_of(mcq, struct mlx4_en_cq, mcq); + struct mlx4_en_priv *priv = netdev_priv(cq->dev); + + if (priv->port_up) + netif_rx_schedule(cq->poll_dev); + else + mlx4_en_arm_cq(priv, cq); +} + +/* Rx CQ polling - called by NAPI */ +int mlx4_en_poll_rx_cq(struct net_device *poll_dev, int *budget) +{ + struct mlx4_en_cq *cq = poll_dev->priv; + struct net_device *dev = cq->dev; + struct mlx4_en_priv *priv = netdev_priv(dev); + int done; + int work = min(*budget, poll_dev->quota); + + if (priv->rx_ring[cq->ring].use_frags) + done = mlx4_en_process_rx_cq(dev, cq, work); + else + done = mlx4_en_process_rx_cq_skb(dev, cq, work); + + dev->quota -= done; + *budget -= done; + + /* If we used up all the quota - we're probably not done yet... */ + if (done == work) { + INC_PERF_COUNTER(priv->pstats.napi_quota); + return 1; + } + + /* Done for now */ + netif_rx_complete(poll_dev); + mlx4_en_arm_cq(priv, cq); + return 0; +} + + +/* Calculate the last offset position that accomodates a full fragment + * (assuming fagment size = stride-align) */ +static int mlx4_en_last_alloc_offset(struct mlx4_en_priv *priv, u16 stride, u16 align) +{ + u16 res = MLX4_EN_ALLOC_SIZE % stride; + u16 offset = MLX4_EN_ALLOC_SIZE - stride - res + align; + + mlx4_dbg(DRV, priv, "Calculated last offset for stride:%d align:%d " + "res:%d offset:%d\n", stride, align, res, offset); + return offset; +} + + +static int frag_sizes[] = { + FRAG_SZ0, + FRAG_SZ1, + FRAG_SZ2, + FRAG_SZ3 +}; + +void mlx4_en_calc_rx_buf(struct net_device *dev) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + int eff_mtu = dev->mtu + ETH_HLEN + VLAN_HLEN + ETH_LLC_SNAP_SIZE; + int buf_size = 0; + int i = 0; + + while (buf_size < eff_mtu) { + priv->frag_info[i].frag_size = + (eff_mtu > buf_size + frag_sizes[i]) ? + frag_sizes[i] : eff_mtu - buf_size; + priv->frag_info[i].frag_prefix_size = buf_size; + if (!i) { + priv->frag_info[i].frag_align = NET_IP_ALIGN; + priv->frag_info[i].frag_stride = + ALIGN(frag_sizes[i] + NET_IP_ALIGN, SMP_CACHE_BYTES); + } else { + priv->frag_info[i].frag_align = 0; + priv->frag_info[i].frag_stride = + ALIGN(frag_sizes[i], SMP_CACHE_BYTES); + } + priv->frag_info[i].last_offset = mlx4_en_last_alloc_offset( + priv, priv->frag_info[i].frag_stride, + priv->frag_info[i].frag_align); + buf_size += priv->frag_info[i].frag_size; + i++; + } + + priv->num_frags = i; + priv->rx_skb_size = eff_mtu; + priv->log_rx_info = ROUNDUP_LOG2(i * sizeof(struct skb_frag_struct)); + + mlx4_dbg(DRV, priv, "Rx buffer scatter-list (effective-mtu:%d " + "num_frags:%d):\n", eff_mtu, priv->num_frags); + for (i = 0; i < priv->num_frags; i++) { + mlx4_dbg(DRV, priv, " frag:%d - size:%d prefix:%d align:%d " + "stride:%d last_offset:%d\n", i, + priv->frag_info[i].frag_size, + priv->frag_info[i].frag_prefix_size, + priv->frag_info[i].frag_align, + priv->frag_info[i].frag_stride, + priv->frag_info[i].last_offset); + } +} + +/* RSS related functions */ + +/* Calculate rss size and map each entry in rss table to rx ring */ +void mlx4_en_set_default_rss_map(struct mlx4_en_priv *priv, + struct mlx4_en_rss_map *rss_map, + int num_entries, int num_rings) +{ + int i; + + rss_map->size = roundup_pow_of_two(num_entries); + mlx4_dbg(DRV, priv, "Setting default RSS map of %d entires\n", + rss_map->size); + + for (i = 0; i < rss_map->size; i++) { + rss_map->map[i] = i % num_rings; + mlx4_dbg(DRV, priv, "Entry %d ---> ring %d\n", i, rss_map->map[i]); + } +} + +static int mlx4_en_config_rss_qp(struct mlx4_en_priv *priv, + int qpn, int srqn, int cqn, + enum mlx4_qp_state *state, + struct mlx4_qp *qp) +{ + struct mlx4_en_dev *mdev = priv->mdev; + struct mlx4_qp_context *context; + int err = 0; + + context = kmalloc(sizeof *context , GFP_KERNEL); + if (!context) { + mlx4_err(mdev, "Failed to allocate qp context\n"); + return -ENOMEM; + } + + err = mlx4_qp_alloc(mdev->dev, qpn, qp); + if (err) { + mlx4_err(mdev, "Failed to allocate qp #%d\n", qpn); + goto out; + return err; + } + qp->event = mlx4_en_sqp_event; + + memset(context, 0, sizeof *context); + mlx4_en_fill_qp_context(priv, 0, 0, 0, 0, qpn, cqn, srqn, context); + + err = mlx4_qp_to_ready(mdev->dev, &priv->res.mtt, context, qp, state); + if (err) { + mlx4_qp_remove(mdev->dev, qp); + mlx4_qp_free(mdev->dev, qp); + } +out: + kfree(context); + return err; +} + +/* Allocate rx qp's and configure them according to rss map */ +int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv) +{ + struct mlx4_en_dev *mdev = priv->mdev; + struct mlx4_en_rss_map *rss_map = &priv->rss_map; + struct mlx4_qp_context context; + struct mlx4_en_rss_context *rss_context; + void *ptr; + int rss_xor = mdev->profile.rss_xor; + u8 rss_mask = mdev->profile.rss_mask; + int i, srqn, qpn, cqn; + int err = 0; + int good_qps = 0; + + mlx4_dbg(DRV, priv, "Configuring rss steering for port %u\n", priv->port); + err = mlx4_qp_reserve_range(mdev->dev, rss_map->size, + rss_map->size, &rss_map->base_qpn); + if (err) { + mlx4_err(mdev, "Failed reserving %d qps for port %u\n", + rss_map->size, priv->port); + return err; + } + + for (i = 0; i < rss_map->size; i++) { + cqn = priv->rx_ring[rss_map->map[i]].cqn; + srqn = priv->rx_ring[rss_map->map[i]].srq.srqn; + qpn = rss_map->base_qpn + i; + err = mlx4_en_config_rss_qp(priv, qpn, srqn, cqn, + &rss_map->state[i], + &rss_map->qps[i]); + if (err) + goto rss_err; + + ++good_qps; + } + + /* Configure RSS indirection qp */ + err = mlx4_qp_reserve_range(mdev->dev, 1, 1, &priv->base_qpn); + if (err) { + mlx4_err(mdev, "Failed to reserve range for RSS " + "indirection qp\n"); + goto rss_err; + } + err = mlx4_qp_alloc(mdev->dev, priv->base_qpn, &rss_map->indir_qp); + if (err) { + mlx4_err(mdev, "Failed to allocate RSS indirection QP\n"); + goto reserve_err; + } + rss_map->indir_qp.event = mlx4_en_sqp_event; + mlx4_en_fill_qp_context(priv, 0, 0, 0, 1, priv->base_qpn, + priv->rx_ring[0].cqn, 0, &context); + + ptr = ((void *) &context) + 0x3c; + rss_context = (struct mlx4_en_rss_context *) ptr; + rss_context->base_qpn = cpu_to_be32(ilog2(rss_map->size - 1) << 24 | + (rss_map->base_qpn + 1)); + rss_context->default_qpn = cpu_to_be32(rss_map->base_qpn); + rss_context->hash_fn = rss_xor & 0x3; + rss_context->flags = rss_mask << 2; + + err = mlx4_qp_to_ready(mdev->dev, &priv->res.mtt, &context, + &rss_map->indir_qp, &rss_map->indir_state); + if (err) + goto indir_err; + + return 0; + +indir_err: + mlx4_qp_modify(mdev->dev, NULL, rss_map->indir_state, + MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->indir_qp); + mlx4_qp_remove(mdev->dev, &rss_map->indir_qp); + mlx4_qp_free(mdev->dev, &rss_map->indir_qp); +reserve_err: + mlx4_qp_release_range(mdev->dev, priv->base_qpn, 1); +rss_err: + for (i = 0; i < good_qps; i++) { + mlx4_qp_modify(mdev->dev, NULL, rss_map->state[i], + MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->qps[i]); + mlx4_qp_remove(mdev->dev, &rss_map->qps[i]); + mlx4_qp_free(mdev->dev, &rss_map->qps[i]); + } + mlx4_qp_release_range(mdev->dev, rss_map->base_qpn, rss_map->size); + return err; +} + +void mlx4_en_release_rss_steer(struct mlx4_en_priv *priv) +{ + struct mlx4_en_dev *mdev = priv->mdev; + struct mlx4_en_rss_map *rss_map = &priv->rss_map; + int i; + + mlx4_qp_modify(mdev->dev, NULL, rss_map->indir_state, + MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->indir_qp); + mlx4_qp_remove(mdev->dev, &rss_map->indir_qp); + mlx4_qp_free(mdev->dev, &rss_map->indir_qp); + mlx4_qp_release_range(mdev->dev, priv->base_qpn, 1); + + for (i = 0; i < rss_map->size; i++) { + mlx4_qp_modify(mdev->dev, NULL, rss_map->state[i], + MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->qps[i]); + mlx4_qp_remove(mdev->dev, &rss_map->qps[i]); + mlx4_qp_free(mdev->dev, &rss_map->qps[i]); + } + mlx4_qp_release_range(mdev->dev, rss_map->base_qpn, rss_map->size); +} + + + + + diff --git a/drivers/net/mlx4/en_tx.c b/drivers/net/mlx4/en_tx.c new file mode 100644 index 0000000..9203626 --- /dev/null +++ b/drivers/net/mlx4/en_tx.c @@ -0,0 +1,868 @@ +/* + * Copyright (c) 2007 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <asm/page.h> +#include <linux/mlx4/cq.h> +#include <linux/mlx4/qp.h> +#include <linux/in.h> +#include <linux/skbuff.h> +#include <linux/if_vlan.h> +#include <linux/vmalloc.h> +#include <linux/tcp.h> +#include <linux/ip.h> + +#include "mlx4_en.h" + +enum { + MAX_INLINE = 104, /* 128 - 16 - 4 - 4 */ +}; + +static int inline_thold __read_mostly = MAX_INLINE; + +module_param_named(inline_thold, inline_thold, int, 0444); +MODULE_PARM_DESC(inline_thold, "treshold for using inline data"); + +int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, + struct mlx4_en_tx_ring *ring, u32 size, + u16 stride) +{ + struct mlx4_en_dev *mdev = priv->mdev; + int tmp; + int err; + + ring->size = size; + ring->size_mask = size - 1; + ring->stride = stride; + + inline_thold = min(inline_thold, MAX_INLINE); + + spin_lock_init(&ring->comp_lock); + + tmp = size * sizeof(struct mlx4_en_tx_info); + ring->tx_info = vmalloc(tmp); + if (!ring->tx_info) { + mlx4_err(mdev, "Failed allocating tx_info ring\n"); + return -ENOMEM; + } + mlx4_dbg(DRV, priv, "Allocated tx_info ring at addr:%p size:%d\n", + ring->tx_info, tmp); + + ring->bounce_buf = kmalloc(MAX_DESC_SIZE, GFP_KERNEL); + if (!ring->bounce_buf) { + mlx4_err(mdev, "Failed allocating bounce buffer\n"); + err = -ENOMEM; + goto err_tx; + } + ring->buf_size = ALIGN(size * ring->stride, MLX4_EN_PAGE_SIZE); + + err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres, ring->buf_size, + 2 * PAGE_SIZE); + if (err) { + mlx4_err(mdev, "Failed allocating hwq resources\n"); + goto err_bounce; + } + + err = mlx4_en_map_buffer(&ring->wqres.buf); + if (err) { + mlx4_err(mdev, "Failed to map TX buffer\n"); + goto err_hwq_res; + } + + ring->buf = ring->wqres.buf.direct.buf; + + mlx4_dbg(DRV, priv, "Allocated TX ring (addr:%p) - buf:%p size:%d " + "buf_size:%d dma:%llx\n", ring, ring->buf, ring->size, + ring->buf_size, ring->wqres.buf.direct.map); + + err = mlx4_qp_reserve_range(mdev->dev, 1, 1, &ring->qpn); + if (err) { + mlx4_err(mdev, "Failed reserving qp for tx ring.\n"); + goto err_map; + } + + err = mlx4_qp_alloc(mdev->dev, ring->qpn, &ring->qp); + if (err) { + mlx4_err(mdev, "Failed allocating qp %d\n", ring->qpn); + goto err_reserve; + } + ring->qp.event = mlx4_en_sqp_event; + + return 0; + +err_reserve: + mlx4_qp_release_range(mdev->dev, ring->qpn, 1); +err_map: + mlx4_en_unmap_buffer(&ring->wqres.buf); +err_hwq_res: + mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size); +err_bounce: + kfree(ring->bounce_buf); + ring->bounce_buf = NULL; +err_tx: + vfree(ring->tx_info); + ring->tx_info = NULL; + return err; +} + +void mlx4_en_destroy_tx_ring(struct mlx4_en_priv *priv, + struct mlx4_en_tx_ring *ring) +{ + struct mlx4_en_dev *mdev = priv->mdev; + mlx4_dbg(DRV, priv, "Destroying tx ring, qpn: %d\n", ring->qpn); + + mlx4_qp_remove(mdev->dev, &ring->qp); + mlx4_qp_free(mdev->dev, &ring->qp); + mlx4_qp_release_range(mdev->dev, ring->qpn, 1); + mlx4_en_unmap_buffer(&ring->wqres.buf); + mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size); + kfree(ring->bounce_buf); + ring->bounce_buf = NULL; + vfree(ring->tx_info); + ring->tx_info = NULL; +} + +int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv, + struct mlx4_en_tx_ring *ring, + int cq, int srqn) +{ + struct mlx4_en_dev *mdev = priv->mdev; + int err; + + ring->cqn = cq; + ring->prod = 0; + ring->cons = 0xffffffff; + ring->last_nr_txbb = 1; + ring->poll_cnt = 0; + ring->blocked = 0; + memset(ring->tx_info, 0, ring->size * sizeof(struct mlx4_en_tx_info)); + memset(ring->buf, 0, ring->buf_size); + + ring->qp_state = MLX4_QP_STATE_RST; + ring->doorbell_qpn = swab32(ring->qp.qpn << 8); + + mlx4_en_fill_qp_context(priv, ring->size, ring->stride, 1, 0, ring->qpn, + ring->cqn, srqn, &ring->context); + + err = mlx4_qp_to_ready(mdev->dev, &ring->wqres.mtt, &ring->context, + &ring->qp, &ring->qp_state); + + return err; +} + +void mlx4_en_deactivate_tx_ring(struct mlx4_en_priv *priv, + struct mlx4_en_tx_ring *ring) +{ + struct mlx4_en_dev *mdev = priv->mdev; + + mlx4_qp_modify(mdev->dev, NULL, ring->qp_state, + MLX4_QP_STATE_RST, NULL, 0, 0, &ring->qp); +} + + +static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, + struct mlx4_en_tx_ring *ring, + int index, u8 owner) +{ + struct mlx4_en_dev *mdev = priv->mdev; + struct mlx4_en_tx_info *tx_info = &ring->tx_info[index]; + struct mlx4_en_tx_desc *tx_desc = ring->buf + index * TXBB_SIZE; + struct mlx4_wqe_data_seg *data = (void *) tx_desc + tx_info->data_offset; + struct sk_buff *skb = tx_info->skb; + struct skb_frag_struct *frag; + void *end = ring->buf + ring->buf_size; + int frags = skb_shinfo(skb)->nr_frags; + int i; + __be32 *ptr = (__be32 *)tx_desc; + __be32 stamp = cpu_to_be32(STAMP_VAL | (!!owner << STAMP_SHIFT)); + + /* Optimize the common case when there are no wraparounds */ + if (likely((void *) tx_desc + tx_info->nr_txbb * TXBB_SIZE <= end)) { + if (!tx_info->inl) { + if (tx_info->linear) { + pci_unmap_single(mdev->pdev, + (dma_addr_t) be64_to_cpu(data->addr), + be32_to_cpu(data->byte_count), + PCI_DMA_TODEVICE); + ++data; + } + + for (i = 0; i < frags; i++) { + frag = &skb_shinfo(skb)->frags[i]; + pci_unmap_page(mdev->pdev, + (dma_addr_t) be64_to_cpu(data[i].addr), + frag->size, PCI_DMA_TODEVICE); + } + } + /* Stamp the freed descriptor */ + for (i = 0; i < tx_info->nr_txbb * TXBB_SIZE; i += STAMP_STRIDE) { + *ptr = stamp; + ptr += STAMP_DWORDS; + } + + } else { + if (!tx_info->inl) { + if ((void *) data >= end) { + data = (struct mlx4_wqe_data_seg *) + (ring->buf + ((void *) data - end)); + } + + if (tx_info->linear) { + pci_unmap_single(mdev->pdev, + (dma_addr_t) be64_to_cpu(data->addr), + be32_to_cpu(data->byte_count), + PCI_DMA_TODEVICE); + ++data; + } + + for (i = 0; i < frags; i++) { + /* Check for wraparound before unmapping */ + if ((void *) data >= end) + data = (struct mlx4_wqe_data_seg *) ring->buf; + frag = &skb_shinfo(skb)->frags[i]; + pci_unmap_page(mdev->pdev, + (dma_addr_t) be64_to_cpu(data->addr), + frag->size, PCI_DMA_TODEVICE); + } + } + /* Stamp the freed descriptor */ + for (i = 0; i < tx_info->nr_txbb * TXBB_SIZE; i += STAMP_STRIDE) { + *ptr = stamp; + ptr += STAMP_DWORDS; + if ((void *) ptr >= end) { + ptr = ring->buf; + stamp ^= cpu_to_be32(0x80000000); + } + } + + } + dev_kfree_skb_any(skb); + return tx_info->nr_txbb; +} + + +int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + int cnt = 0; + + /* Skip last polled descriptor */ + ring->cons += ring->last_nr_txbb; + mlx4_dbg(DRV, priv, "Freeing Tx buf - cons:0x%x prod:0x%x\n", + ring->cons, ring->prod); + + if ((u32) (ring->prod - ring->cons) > ring->size) { + if (netif_msg_tx_err(priv)) + mlx4_warn(priv->mdev, "Tx consumer passed producer!\n"); + return 0; + } + + while (ring->cons != ring->prod) { + ring->last_nr_txbb = mlx4_en_free_tx_desc(priv, ring, + ring->cons & ring->size_mask, + !!(ring->cons & ring->size)); + ring->cons += ring->last_nr_txbb; + cnt++; + } + + if (cnt) + mlx4_dbg(DRV, priv, "Freed %d uncompleted tx descriptors\n", cnt); + + return cnt; +} + +void mlx4_en_set_prio_map(struct mlx4_en_priv *priv, u16 *prio_map, u32 ring_num) +{ + int block = 8 / ring_num; + int extra = 8 - (block * ring_num); + int num = 0; + u16 ring = MLX4_EN_NUM_HASH_RINGS + 1; + int prio; + + if (ring_num == 1) { + for (prio = 0; prio < 8; prio++) + prio_map[prio] = 0; + return; + } + + for (prio = 0; prio < 8; prio++) { + if (extra && (num == block + 1)) { + ring++; + num = 0; + extra--; + } else if (!extra && (num == block)) { + ring++; + num = 0; + } + prio_map[prio] = ring; + mlx4_dbg(DRV, priv, " prio:%d --> ring:%d\n", prio, ring); + num++; + } +} + +void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_cq *mcq = &cq->mcq; + struct mlx4_en_tx_ring *ring = &priv->tx_ring[cq->ring]; + struct mlx4_cqe *cqe = cq->buf; + u16 index; + u16 new_index; + u32 txbbs_skipped = 0; + u32 cq_last_sav; + + /* index always points to the first TXBB of the last polled descriptor */ + index = ring->cons & ring->size_mask; + new_index = be16_to_cpu(cqe->wqe_index) & ring->size_mask; + if (index == new_index) + return; + + if (!priv->port_up) + return; + + /* + * We use a two-stage loop: + * - the first samples the HW-updated CQE + * - the second frees TXBBs until the last sample + * This lets us amortize CQE cache misses, while still polling the CQ + * until is quiescent. + */ + cq_last_sav = mcq->cons_index; + do { + do { + /* Skip over last polled CQE */ + index = (index + ring->last_nr_txbb) & ring->size_mask; + txbbs_skipped += ring->last_nr_txbb; + + /* Poll next CQE */ + ring->last_nr_txbb = mlx4_en_free_tx_desc( + priv, ring, index, + !!((ring->cons + txbbs_skipped) & + ring->size)); + ++mcq->cons_index; + + } while (index != new_index); + + new_index = be16_to_cpu(cqe->wqe_index) & ring->size_mask; + } while (index != new_index); + AVG_PERF_COUNTER(priv->pstats.tx_coal_avg, + (u32) (mcq->cons_index - cq_last_sav)); + + /* + * To prevent CQ overflow we first update CQ consumer and only then + * the ring consumer. + */ + mlx4_cq_set_ci(mcq); + wmb(); + ring->cons += txbbs_skipped; + + /* Wakeup Tx queue if this ring stopped it */ + if (unlikely(ring->blocked)) { + if ((u32) (ring->prod - ring->cons) <= + ring->size - HEADROOM - MAX_DESC_TXBBS) { + + /* TODO: support multiqueue netdevs. Currently, we block + * when *any* ring is full. Note that: + * - 2 Tx rings can unblock at the same time and call + * netif_wake_queue(), which is OK since this + * operation is idempotent. + * - We might wake the queue just after another ring + * stopped it. This is no big deal because the next + * transmission on that ring would stop the queue. + */ + ring->blocked = 0; + netif_wake_queue(dev); + priv->port_stats.wake_queue++; + } + } +} + +void mlx4_en_tx_irq(struct mlx4_cq *mcq) +{ + struct mlx4_en_cq *cq = container_of(mcq, struct mlx4_en_cq, mcq); + struct mlx4_en_priv *priv = netdev_priv(cq->dev); + struct mlx4_en_tx_ring *ring = &priv->tx_ring[cq->ring]; + + if (!spin_trylock(&ring->comp_lock)) + return; + mlx4_en_process_tx_cq(cq->dev, cq); + mod_timer(&cq->timer, jiffies + 1); + spin_unlock(&ring->comp_lock); +} + + +void mlx4_en_poll_tx_cq(unsigned long data) +{ + struct mlx4_en_cq *cq = (struct mlx4_en_cq *) data; + struct mlx4_en_priv *priv = netdev_priv(cq->dev); + struct mlx4_en_tx_ring *ring = &priv->tx_ring[cq->ring]; + u32 inflight; + + INC_PERF_COUNTER(priv->pstats.tx_poll); + + if (!spin_trylock(&ring->comp_lock)){ + mod_timer(&cq->timer, jiffies + MLX4_EN_TX_POLL_TIMEOUT); + return; + } + mlx4_en_process_tx_cq(cq->dev, cq); + inflight = (u32) (ring->prod - ring->cons - ring->last_nr_txbb); + + /* If there are still packets in flight and the timer has not already + * been scheduled by the Tx routine then schedule it here to guarantee + * completion processing of these packets */ + if (inflight && priv->port_up) + mod_timer(&cq->timer, jiffies + MLX4_EN_TX_POLL_TIMEOUT); + + spin_unlock(&ring->comp_lock); +} + +static struct mlx4_en_tx_desc *mlx4_en_bounce_to_desc(struct mlx4_en_priv *priv, + struct mlx4_en_tx_ring *ring, + u32 index, + unsigned int desc_size) +{ + u32 copy = (ring->size - index) * TXBB_SIZE; + int i; + + for (i = desc_size - copy - 4; i >= 0; i -= 4) { + if ((i & (TXBB_SIZE - 1)) == 0) + wmb(); + + *((u32 *) (ring->buf + i)) = + *((u32 *) (ring->bounce_buf + copy + i)); + } + + for (i = copy - 4; i >= 4 ; i -= 4) { + if ((i & (TXBB_SIZE - 1)) == 0) + wmb(); + + *((u32 *) (ring->buf + index * TXBB_SIZE + i)) = + *((u32 *) (ring->bounce_buf + i)); + } + + /* Return real descriptor location */ + return ring->buf + index * TXBB_SIZE; +} + +static inline void mlx4_en_xmit_poll(struct mlx4_en_priv *priv, int tx_ind) +{ + struct mlx4_en_cq *cq = &priv->tx_cq[tx_ind]; + struct mlx4_en_tx_ring *ring = &priv->tx_ring[tx_ind]; + + /* If we don't have a pending timer, set one up to catch our recent + post in case the interface becomes idle */ + if (!timer_pending(&cq->timer)) + mod_timer(&cq->timer, jiffies + MLX4_EN_TX_POLL_TIMEOUT); + + /* Poll the CQ every mlx4_en_TX_MODER_POLL packets */ + if ((++ring->poll_cnt & (MLX4_EN_TX_POLL_MODER - 1)) == 0) + if (spin_trylock(&ring->comp_lock)) { + mlx4_en_process_tx_cq(priv->dev, cq); + spin_unlock(&ring->comp_lock); + } +} + +static void *get_frag_ptr(struct sk_buff *skb) +{ + struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0]; + struct page *page = frag->page; + void *ptr; + + ptr = page_address(page); + if (unlikely(!ptr)) + return NULL; + + return ptr + frag->page_offset; +} + +static int is_inline(struct sk_buff *skb, void **pfrag) +{ + void *ptr; + + if (inline_thold && !skb_is_gso(skb) && skb->len <= inline_thold) { + if (skb_shinfo(skb)->nr_frags == 1) { + ptr = get_frag_ptr(skb); + if (unlikely(!ptr)) + return 0; + + if (pfrag) + *pfrag = ptr; + + return 1; + } else if (unlikely(skb_shinfo(skb)->nr_frags)) + return 0; + else + return 1; + } + + return 0; +} + +int inline_size(struct sk_buff *skb) +{ + if (skb->len + CTRL_SIZE + sizeof(struct mlx4_wqe_inline_seg) + <= MLX4_INLINE_ALIGN) + return ALIGN(skb->len + CTRL_SIZE + + sizeof(struct mlx4_wqe_inline_seg), 16); + else + return ALIGN(skb->len + CTRL_SIZE + 2 * + sizeof(struct mlx4_wqe_inline_seg), 16); +} + +static int get_real_size(struct sk_buff *skb, struct net_device *dev, + int *lso_header_size) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + int real_size; + + if (skb_is_gso(skb)) { + *lso_header_size = skb_transport_offset(skb) + tcp_hdrlen(skb); + real_size = CTRL_SIZE + skb_shinfo(skb)->nr_frags * DS_SIZE + + ALIGN(*lso_header_size + 4, DS_SIZE); + if (unlikely(*lso_header_size != skb_headlen(skb))) { + /* We add a segment for the skb linear buffer only if + * it contains data */ + if (*lso_header_size < skb_headlen(skb)) + real_size += DS_SIZE; + else { + if (netif_msg_tx_err(priv)) + mlx4_warn(mdev, "Non-linear headers\n"); + dev_kfree_skb_any(skb); + return 0; + } + } + if (unlikely(*lso_header_size > MAX_LSO_HDR_SIZE)) { + if (netif_msg_tx_err(priv)) + mlx4_warn(mdev, "LSO header size too big\n"); + dev_kfree_skb_any(skb); + return 0; + } + } else { + *lso_header_size = 0; + if (!is_inline(skb, NULL)) + real_size = CTRL_SIZE + (skb_shinfo(skb)->nr_frags + 1) * DS_SIZE; + else + real_size = inline_size(skb); + } + + return real_size; +} + +static void build_inline_wqe(struct mlx4_en_tx_desc *tx_desc, struct sk_buff *skb, + int real_size, u16 *vlan_tag, int tx_ind, void *fragptr) +{ + struct mlx4_wqe_inline_seg *inl = &tx_desc->inl; + int spc = MLX4_INLINE_ALIGN - CTRL_SIZE - sizeof *inl; + + if (skb->len <= spc) { + inl->byte_count = cpu_to_be32(1 << 31 | skb->len); + skb_copy_from_linear_data(skb, inl + 1, skb_headlen(skb)); + if (skb_shinfo(skb)->nr_frags) + memcpy(((void *)(inl + 1)) + skb_headlen(skb), fragptr, + skb_shinfo(skb)->frags[0].size); + + } else { + inl->byte_count = cpu_to_be32(1 << 31 | spc); + if (skb_headlen(skb) <= spc) { + skb_copy_from_linear_data(skb, inl + 1, skb_headlen(skb)); + if (skb_headlen(skb) < spc) { + memcpy(((void *)(inl + 1)) + skb_headlen(skb), + fragptr, spc - skb_headlen(skb)); + fragptr += spc - skb_headlen(skb); + } + inl = (void *) (inl + 1) + spc; + memcpy(((void *)(inl + 1)), fragptr, skb->len -spc); + } else { + skb_copy_from_linear_data(skb, inl + 1, spc); + inl = (void *) (inl + 1) + spc; + skb_copy_from_linear_data_offset(skb, spc, inl + 1, + skb_headlen(skb) - spc); + if (skb_shinfo(skb)->nr_frags) + memcpy(((void *)(inl + 1)) + skb_headlen(skb) - spc, + fragptr, skb_shinfo(skb)->frags[0].size); + } + + wmb(); + inl->byte_count = cpu_to_be32(1 << 31 | (skb->len - spc)); + } + tx_desc->ctrl.vlan_tag = cpu_to_be16(*vlan_tag); + tx_desc->ctrl.ins_vlan = MLX4_WQE_CTRL_INS_VLAN * !!(*vlan_tag); + tx_desc->ctrl.fence_size = (real_size / 16) & 0x3f; +} + +static int mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + u16 vlan_tag = 0; + int tx_ind = 0; + struct tcphdr *th = tcp_hdr(skb); + struct iphdr *iph = ip_hdr(skb); + struct mlx4_en_tx_hash_entry *entry; + u32 hash_index; + + /* Obtain VLAN information if present */ + if (priv->vlgrp && vlan_tx_tag_present(skb)) { + vlan_tag = vlan_tx_tag_get(skb); + /* Set the Tx ring to use according to vlan priority */ + tx_ind = priv->tx_prio_map[vlan_tag >> 13]; + if (tx_ind) + return tx_ind; + } + + /* Hashing is only done for TCP/IP or UDP/IP packets */ + if (be16_to_cpu(skb->protocol) != ETH_P_IP) + return MLX4_EN_NUM_HASH_RINGS; + + hash_index = be32_to_cpu(iph->daddr) & MLX4_EN_TX_HASH_MASK; + switch(iph->protocol) { + case IPPROTO_UDP: + break; + case IPPROTO_TCP: + hash_index = (hash_index ^ be16_to_cpu(th->dest ^ th->source)) & + MLX4_EN_TX_HASH_MASK; + break; + default: + return MLX4_EN_NUM_HASH_RINGS; + } + + entry = &priv->tx_hash[hash_index]; + if (skb->len > MLX4_EN_SMALL_PKT_SIZE) + entry->big_pkts++; + else + entry->small_pkts++; + + if(unlikely(!(++entry->cnt))) { + tx_ind = hash_index & (MLX4_EN_NUM_HASH_RINGS / 2 - 1); + if (2 * entry->big_pkts > entry->small_pkts) + tx_ind += MLX4_EN_NUM_HASH_RINGS / 2; + entry->small_pkts = entry->big_pkts = 0; + entry->ring = tx_ind; + } + return entry->ring; +} + +int mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + struct mlx4_en_tx_ring *ring; + struct mlx4_en_cq *cq; + struct mlx4_en_tx_desc *tx_desc; + struct mlx4_wqe_data_seg *data; + struct skb_frag_struct *frag; + struct mlx4_en_tx_info *tx_info; + int tx_ind = 0; + int nr_txbb; + int desc_size; + int real_size; + dma_addr_t dma; + u32 index; + __be32 op_own; + u16 vlan_tag = 0; + int i; + int lso_header_size; + void *fragptr; + + if (unlikely(!skb->len)) { + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; + } + real_size = get_real_size(skb, dev, &lso_header_size); + if (unlikely(!real_size)) + return NETDEV_TX_OK; + + /* Allign descriptor to TXBB size */ + desc_size = ALIGN(real_size, TXBB_SIZE); + nr_txbb = desc_size / TXBB_SIZE; + if (unlikely(nr_txbb > MAX_DESC_TXBBS)) { + if (netif_msg_tx_err(priv)) + mlx4_warn(mdev, "Oversized header or SG list\n"); + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; + } + + tx_ind = mlx4_en_select_queue(dev, skb); + ring = &priv->tx_ring[tx_ind]; + if (priv->vlgrp && vlan_tx_tag_present(skb)) + vlan_tag = vlan_tx_tag_get(skb); + + /* Check available TXBBs And 2K spare for prefetch */ + if (unlikely(((int)(ring->prod - ring->cons)) > + ring->size - HEADROOM - MAX_DESC_TXBBS)) { + /* every full Tx ring stops queue. + * TODO: implement multi-queue support (per-queue stop) */ + netif_stop_queue(dev); + ring->blocked = 1; + priv->port_stats.queue_stopped++; + + /* Use interrupts to find out when queue opened */ + cq = &priv->tx_cq[tx_ind]; + mlx4_en_arm_cq(priv, cq); + return NETDEV_TX_BUSY; + } + + /* Now that we know what Tx ring to use */ + if (unlikely(!priv->port_up)) { + if (netif_msg_tx_err(priv)) + mlx4_warn(mdev, "xmit: port down!\n"); + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; + } + + /* Track current inflight packets for performance analysis */ + AVG_PERF_COUNTER(priv->pstats.inflight_avg, + (u32) (ring->prod - ring->cons - 1)); + + /* Packet is good - grab an index and transmit it */ + index = ring->prod & ring->size_mask; + + /* See if we have enough space for whole descriptor TXBB for setting + * SW ownership on next descriptor; if not, use a bounce buffer. */ + if (likely(index + nr_txbb <= ring->size)) + tx_desc = ring->buf + index * TXBB_SIZE; + else + tx_desc = (struct mlx4_en_tx_desc *) ring->bounce_buf; + + /* Save skb in tx_info ring */ + tx_info = &ring->tx_info[index]; + tx_info->skb = skb; + tx_info->nr_txbb = nr_txbb; + + /* Prepare ctrl segement apart opcode+ownership, which depends on + * whether LSO is used */ + tx_desc->ctrl.vlan_tag = cpu_to_be16(vlan_tag); + tx_desc->ctrl.ins_vlan = MLX4_WQE_CTRL_INS_VLAN * !!vlan_tag; + tx_desc->ctrl.fence_size = (real_size / 16) & 0x3f; + tx_desc->ctrl.srcrb_flags = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE | + MLX4_WQE_CTRL_SOLICITED); + if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) { + tx_desc->ctrl.srcrb_flags |= cpu_to_be32(MLX4_WQE_CTRL_IP_CSUM | + MLX4_WQE_CTRL_TCP_UDP_CSUM); + priv->port_stats.tx_chksum_offload++; + } + + /* Handle LSO (TSO) packets */ + if (lso_header_size) { + /* Mark opcode as LSO */ + op_own = cpu_to_be32(MLX4_OPCODE_LSO | (1 << 6)) | + ((ring->prod & ring->size) ? + cpu_to_be32(MLX4_EN_BIT_DESC_OWN) : 0); + + /* Fill in the LSO prefix */ + tx_desc->lso.mss_hdr_size = cpu_to_be32( + skb_shinfo(skb)->gso_size << 16 | lso_header_size); + + /* Copy headers; + * note that we already verified that it is linear */ + memcpy(tx_desc->lso.header, skb->data, lso_header_size); + data = ((void *) &tx_desc->lso + + ALIGN(lso_header_size + 4, DS_SIZE)); + + priv->port_stats.tso_packets++; + i = ((skb->len - lso_header_size) / skb_shinfo(skb)->gso_size) + + !!((skb->len - lso_header_size) % skb_shinfo(skb)->gso_size); + ring->bytes += skb->len + (i - 1) * lso_header_size; + ring->packets += i; + } else { + /* Normal (Non LSO) packet */ + op_own = cpu_to_be32(MLX4_OPCODE_SEND) | + ((ring->prod & ring->size) ? + cpu_to_be32(MLX4_EN_BIT_DESC_OWN) : 0); + data = &tx_desc->data; + ring->bytes += max_t(int, skb->len, ETH_ZLEN); + ring->packets++; + + } + AVG_PERF_COUNTER(priv->pstats.tx_pktsz_avg, skb->len); + + + /* valid only for none inline segments */ + tx_info->data_offset = (void *) data - (void *) tx_desc; + + tx_info->linear = (lso_header_size < skb_headlen(skb) && !is_inline(skb, NULL)) ? 1 : 0; + data += skb_shinfo(skb)->nr_frags + tx_info->linear - 1; + + if (!is_inline(skb, &fragptr)) { + /* Map fragments */ + for (i = skb_shinfo(skb)->nr_frags - 1; i >= 0; i--) { + frag = &skb_shinfo(skb)->frags[i]; + dma = pci_map_page(mdev->dev->pdev, frag->page, frag->page_offset, + frag->size, PCI_DMA_TODEVICE); + data->addr = cpu_to_be64(dma); + data->lkey = cpu_to_be32(mdev->mr.key); + wmb(); + data->byte_count = cpu_to_be32(frag->size); + --data; + } + + /* Map linear part */ + if (tx_info->linear) { + dma = pci_map_single(mdev->dev->pdev, skb->data + lso_header_size, + skb_headlen(skb) - lso_header_size, PCI_DMA_TODEVICE); + data->addr = cpu_to_be64(dma); + data->lkey = cpu_to_be32(mdev->mr.key); + wmb(); + data->byte_count = cpu_to_be32(skb_headlen(skb) - lso_header_size); + } + tx_info->inl = 0; + } else { + build_inline_wqe(tx_desc, skb, real_size, &vlan_tag, tx_ind, fragptr); + tx_info->inl = 1; + } + + ring->prod += nr_txbb; + + /* If we used a bounce buffer then copy descriptor back into place */ + if (tx_desc == (struct mlx4_en_tx_desc *) ring->bounce_buf) + tx_desc = mlx4_en_bounce_to_desc(priv, ring, index, desc_size); + + /* Run destructor before passing skb to HW */ + if (likely(!skb_shared(skb))) + skb_orphan(skb); + + /* Ensure new descirptor hits memory + * before setting ownership of this descriptor to HW */ + wmb(); + tx_desc->ctrl.owner_opcode = op_own; + + /* Ring doorbell! */ + wmb(); + writel(ring->doorbell_qpn, mdev->uar_map + MLX4_SEND_DOORBELL); + dev->trans_start = jiffies; + + /* Poll CQ here */ + mlx4_en_xmit_poll(priv, tx_ind); + + return 0; +} + diff --git a/drivers/net/mlx4/eq.c b/drivers/net/mlx4/eq.c index 9c36c20..5306659 100644 --- a/drivers/net/mlx4/eq.c +++ b/drivers/net/mlx4/eq.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved. * Copyright (c) 2005, 2006, 2007 Cisco Systems, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -33,6 +33,7 @@ #include <linux/init.h> #include <linux/interrupt.h> +#include <linux/mm.h> #include <linux/dma-mapping.h> #include <linux/mlx4/cmd.h> @@ -162,6 +163,7 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) int cqn; int eqes_found = 0; int set_ci = 0; + int port; while ((eqe = next_eqe_sw(eq))) { /* @@ -202,8 +204,16 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) break; case MLX4_EVENT_TYPE_PORT_CHANGE: - mlx4_dispatch_event(dev, eqe->type, eqe->subtype, - be32_to_cpu(eqe->event.port_change.port) >> 28); + port = be32_to_cpu(eqe->event.port_change.port) >> 28; + if (eqe->subtype == MLX4_PORT_CHANGE_SUBTYPE_DOWN) { + mlx4_dispatch_event(dev, MLX4_DEV_EVENT_PORT_DOWN, + port); + mlx4_priv(dev)->sense.do_sense_port[port] = 1; + } else { + mlx4_dispatch_event(dev, MLX4_DEV_EVENT_PORT_UP, + port); + mlx4_priv(dev)->sense.do_sense_port[port] = 0; + } break; case MLX4_EVENT_TYPE_CQ_ERROR: @@ -262,7 +272,7 @@ static irqreturn_t mlx4_interrupt(int irq, void *dev_ptr) writel(priv->eq_table.clr_mask, priv->eq_table.clr_int); - for (i = 0; i < MLX4_NUM_EQ; ++i) + for (i = 0; i < MLX4_EQ_COMP_CPU0 + dev->caps.num_comp_vectors; ++i) work |= mlx4_eq_int(dev, &priv->eq_table.eq[i]); return IRQ_RETVAL(work); @@ -479,7 +489,7 @@ static void mlx4_free_irqs(struct mlx4_dev *dev) if (eq_table->have_irq) free_irq(dev->pdev->irq, dev); - for (i = 0; i < MLX4_NUM_EQ; ++i) + for (i = 0; i < MLX4_EQ_COMP_CPU0 + dev->caps.num_comp_vectors; ++i) if (eq_table->eq[i].have_irq) free_irq(eq_table->eq[i].irq, eq_table->eq + i); } @@ -550,11 +560,12 @@ void mlx4_unmap_eq_icm(struct mlx4_dev *dev) int mlx4_init_eq_table(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); + int req_eqs; int err; int i; err = mlx4_bitmap_init(&priv->eq_table.bitmap, dev->caps.num_eqs, - dev->caps.num_eqs - 1, dev->caps.reserved_eqs); + dev->caps.num_eqs - 1, dev->caps.reserved_eqs, 0); if (err) return err; @@ -570,11 +581,21 @@ int mlx4_init_eq_table(struct mlx4_dev *dev) priv->eq_table.clr_int = priv->clr_base + (priv->eq_table.inta_pin < 32 ? 4 : 0); - err = mlx4_create_eq(dev, dev->caps.num_cqs + MLX4_NUM_SPARE_EQE, - (dev->flags & MLX4_FLAG_MSI_X) ? MLX4_EQ_COMP : 0, - &priv->eq_table.eq[MLX4_EQ_COMP]); - if (err) - goto err_out_unmap; + dev->caps.num_comp_vectors = 0; + req_eqs = (dev->flags & MLX4_FLAG_MSI_X) ? num_online_cpus() : 1; + while (req_eqs) { + err = mlx4_create_eq( + dev, dev->caps.num_cqs + MLX4_NUM_SPARE_EQE, + (dev->flags & MLX4_FLAG_MSI_X) ? + (MLX4_EQ_COMP_CPU0 + dev->caps.num_comp_vectors) : 0, + &priv->eq_table.eq[MLX4_EQ_COMP_CPU0 + + dev->caps.num_comp_vectors]); + if (err) + goto err_out_comp; + + dev->caps.num_comp_vectors++; + req_eqs--; + } err = mlx4_create_eq(dev, MLX4_NUM_ASYNC_EQE + MLX4_NUM_SPARE_EQE, (dev->flags & MLX4_FLAG_MSI_X) ? MLX4_EQ_ASYNC : 0, @@ -583,12 +604,16 @@ int mlx4_init_eq_table(struct mlx4_dev *dev) goto err_out_comp; if (dev->flags & MLX4_FLAG_MSI_X) { - static const char *eq_name[] = { - [MLX4_EQ_COMP] = DRV_NAME " (comp)", - [MLX4_EQ_ASYNC] = DRV_NAME " (async)" - }; + static char eq_name[MLX4_NUM_EQ][20]; + + for (i = 0; i < MLX4_EQ_COMP_CPU0 + + dev->caps.num_comp_vectors; ++i) { + if (i == 0) + snprintf(eq_name[0], 20, DRV_NAME "(async)"); + else + snprintf(eq_name[i], 20, "eth-mlx4-%d", + i - 1); - for (i = 0; i < MLX4_NUM_EQ; ++i) { err = request_irq(priv->eq_table.eq[i].irq, mlx4_msi_x_interrupt, 0, eq_name[i], priv->eq_table.eq + i); @@ -613,7 +638,7 @@ int mlx4_init_eq_table(struct mlx4_dev *dev) mlx4_warn(dev, "MAP_EQ for async EQ %d failed (%d)\n", priv->eq_table.eq[MLX4_EQ_ASYNC].eqn, err); - for (i = 0; i < MLX4_NUM_EQ; ++i) + for (i = 0; i < MLX4_EQ_COMP_CPU0 + dev->caps.num_comp_vectors; ++i) eq_set_ci(&priv->eq_table.eq[i], 1); return 0; @@ -622,9 +647,9 @@ err_out_async: mlx4_free_eq(dev, &priv->eq_table.eq[MLX4_EQ_ASYNC]); err_out_comp: - mlx4_free_eq(dev, &priv->eq_table.eq[MLX4_EQ_COMP]); + for (i = 0; i < dev->caps.num_comp_vectors; ++i) + mlx4_free_eq(dev, &priv->eq_table.eq[MLX4_EQ_COMP_CPU0 + i]); -err_out_unmap: mlx4_unmap_clr_int(dev); mlx4_free_irqs(dev); @@ -643,7 +668,7 @@ void mlx4_cleanup_eq_table(struct mlx4_dev *dev) mlx4_free_irqs(dev); - for (i = 0; i < MLX4_NUM_EQ; ++i) + for (i = 0; i < MLX4_EQ_COMP_CPU0 + dev->caps.num_comp_vectors; ++i) mlx4_free_eq(dev, &priv->eq_table.eq[i]); mlx4_unmap_clr_int(dev); diff --git a/drivers/net/mlx4/fw.c b/drivers/net/mlx4/fw.c index d8c7515..fe2ff64 100644 --- a/drivers/net/mlx4/fw.c +++ b/drivers/net/mlx4/fw.c @@ -1,6 +1,6 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. - * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved. * Copyright (c) 2005, 2006, 2007 Cisco Systems, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -46,9 +46,9 @@ enum { extern void __buggy_use_of_MLX4_GET(void); extern void __buggy_use_of_MLX4_PUT(void); -static int mlx4_core_enable_qos = 0; -module_param_named(enable_qos, mlx4_core_enable_qos, int, 0444); -MODULE_PARM_DESC(enable_qos, "Enable Quality of Service support in the HCA if > 0, (default 0)"); +static int enable_qos; +module_param(enable_qos, bool, 0444); +MODULE_PARM_DESC(enable_qos, "Enable Quality of Service support in the HCA (default: off)"); #define MLX4_GET(dest, source, offset) \ do { \ @@ -105,11 +105,42 @@ static void dump_dev_cap_flags(struct mlx4_dev *dev, u32 flags) mlx4_dbg(dev, " %s\n", fname[i]); } +int mlx4_MOD_STAT_CFG(struct mlx4_dev *dev, struct mlx4_mod_stat_cfg *cfg) +{ + struct mlx4_cmd_mailbox *mailbox; + u32 *inbox; + int err = 0; + +#define MOD_STAT_CFG_IN_SIZE 0x100 + +#define MOD_STAT_CFG_PG_SZ_M_OFFSET 0x002 +#define MOD_STAT_CFG_PG_SZ_OFFSET 0x003 + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + inbox = mailbox->buf; + + memset(inbox, 0, MOD_STAT_CFG_IN_SIZE); + + MLX4_PUT(inbox, cfg->log_pg_sz, MOD_STAT_CFG_PG_SZ_OFFSET); + MLX4_PUT(inbox, cfg->log_pg_sz_m, MOD_STAT_CFG_PG_SZ_M_OFFSET); + + err = mlx4_cmd(dev, mailbox->dma, 0, 0, MLX4_CMD_MOD_STAT_CFG, + MLX4_CMD_TIME_CLASS_A); + + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} + int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) { struct mlx4_cmd_mailbox *mailbox; u32 *outbox; u8 field; + u16 field16; + u32 field32; + u64 field64; u16 size; u16 stat_rate; int err; @@ -174,7 +205,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) #define QUERY_DEV_CAP_C_MPT_ENTRY_SZ_OFFSET 0x8e #define QUERY_DEV_CAP_MTT_ENTRY_SZ_OFFSET 0x90 #define QUERY_DEV_CAP_D_MPT_ENTRY_SZ_OFFSET 0x92 -#define QUERY_DEV_CAP_BMME_FLAGS_OFFSET 0x97 +#define QUERY_DEV_CAP_BMME_FLAGS_OFFSET 0x94 #define QUERY_DEV_CAP_RSVD_LKEY_OFFSET 0x98 #define QUERY_DEV_CAP_MAX_ICM_SZ_OFFSET 0xa0 @@ -318,7 +349,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) MLX4_GET(field, outbox, QUERY_DEV_CAP_VL_PORT_OFFSET); dev_cap->max_vl[i] = field >> 4; MLX4_GET(field, outbox, QUERY_DEV_CAP_MTU_WIDTH_OFFSET); - dev_cap->max_mtu[i] = field >> 4; + dev_cap->ib_mtu[i] = field >> 4; dev_cap->max_port_width[i] = field & 0xf; MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_GID_OFFSET); dev_cap->max_gids[i] = 1 << (field & 0xf); @@ -326,10 +357,15 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev_cap->max_pkeys[i] = 1 << (field & 0xf); } } else { +#define QUERY_PORT_SUPPORTED_TYPE_OFFSET 0x00 #define QUERY_PORT_MTU_OFFSET 0x01 #define QUERY_PORT_WIDTH_OFFSET 0x06 #define QUERY_PORT_MAX_GID_PKEY_OFFSET 0x07 +#define QUERY_PORT_MAX_MACVLAN_OFFSET 0x0a #define QUERY_PORT_MAX_VL_OFFSET 0x0b +#define QUERY_PORT_TRANS_VENDOR_OFFSET 0x18 +#define QUERY_PORT_WAVELENGTH_OFFSET 0x1c +#define QUERY_PORT_TRANS_CODE_OFFSET 0x20 for (i = 1; i <= dev_cap->num_ports; ++i) { err = mlx4_cmd_box(dev, 0, mailbox->dma, i, 0, MLX4_CMD_QUERY_PORT, @@ -337,8 +373,11 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) if (err) goto out; + MLX4_GET(field, outbox, + QUERY_PORT_SUPPORTED_TYPE_OFFSET); + dev_cap->supported_port_types[i] = field & 3; MLX4_GET(field, outbox, QUERY_PORT_MTU_OFFSET); - dev_cap->max_mtu[i] = field & 0xf; + dev_cap->ib_mtu[i] = field & 0xf; MLX4_GET(field, outbox, QUERY_PORT_WIDTH_OFFSET); dev_cap->max_port_width[i] = field & 0xf; MLX4_GET(field, outbox, QUERY_PORT_MAX_GID_PKEY_OFFSET); @@ -346,15 +385,23 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev_cap->max_pkeys[i] = 1 << (field & 0xf); MLX4_GET(field, outbox, QUERY_PORT_MAX_VL_OFFSET); dev_cap->max_vl[i] = field & 0xf; + MLX4_GET(field, outbox, QUERY_PORT_MAX_MACVLAN_OFFSET); + dev_cap->log_max_macs[i] = field & 0xf; + dev_cap->log_max_vlans[i] = field >> 4; + dev_cap->eth_mtu[i] = be16_to_cpu(((u16 *) outbox)[1]); + dev_cap->def_mac[i] = be64_to_cpu(((u64 *) outbox)[2]); + MLX4_GET(field32, outbox, QUERY_PORT_TRANS_VENDOR_OFFSET); + dev_cap->trans_type[i] = field32 >> 24; + dev_cap->vendor_oui[i] = field32 & 0xffffff; + MLX4_GET(field16, outbox, QUERY_PORT_WAVELENGTH_OFFSET); + dev_cap->wavelength[i] = field16; + MLX4_GET(field64, outbox, QUERY_PORT_TRANS_CODE_OFFSET); + dev_cap->trans_code[i] = field64; } } - if (dev_cap->bmme_flags & 1) - mlx4_dbg(dev, "Base MM extensions: yes " - "(flags %d, rsvd L_Key %08x)\n", - dev_cap->bmme_flags, dev_cap->reserved_lkey); - else - mlx4_dbg(dev, "Base MM extensions: no\n"); + mlx4_dbg(dev, "Base MM extensions: flags %08x, rsvd L_Key %08x\n", + dev_cap->bmme_flags, dev_cap->reserved_lkey); /* * Each UAR has 4 EQ doorbells; so if a UAR is reserved, then @@ -383,7 +430,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) mlx4_dbg(dev, "Max CQEs: %d, max WQEs: %d, max SRQ WQEs: %d\n", dev_cap->max_cq_sz, dev_cap->max_qp_sz, dev_cap->max_srq_sz); mlx4_dbg(dev, "Local CA ACK delay: %d, max MTU: %d, port width cap: %d\n", - dev_cap->local_ca_ack_delay, 128 << dev_cap->max_mtu[1], + dev_cap->local_ca_ack_delay, 128 << dev_cap->ib_mtu[1], dev_cap->max_port_width[1]); mlx4_dbg(dev, "Max SQ desc size: %d, max SQ S/G: %d\n", dev_cap->max_sq_desc_sz, dev_cap->max_sq_sg); @@ -714,8 +761,12 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param) /* Check port for UD address vector: */ *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1); + /* Enable IPoIB checksumming if we can: */ + if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IPOIB_CSUM) + *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 3); + /* Enable QoS support if module parameter set */ - if (mlx4_core_enable_qos) + if (enable_qos) *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 2); /* QPC/EEC/CQC/EQC/RDMARC attributes */ @@ -752,9 +803,6 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param) MLX4_PUT(inbox, (u8) (PAGE_SHIFT - 12), INIT_HCA_UAR_PAGE_SZ_OFFSET); MLX4_PUT(inbox, param->log_uar_sz, INIT_HCA_LOG_UAR_SZ_OFFSET); - if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IPOIB_CSUM) - *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 3); - err = mlx4_cmd(dev, mailbox->dma, 0, 0, MLX4_CMD_INIT_HCA, 10000); if (err) @@ -799,7 +847,7 @@ int mlx4_INIT_PORT(struct mlx4_dev *dev, int port) flags |= (dev->caps.port_width_cap[port] & 0xf) << INIT_PORT_PORT_WIDTH_SHIFT; MLX4_PUT(inbox, flags, INIT_PORT_FLAGS_OFFSET); - field = 128 << dev->caps.mtu_cap[port]; + field = 128 << dev->caps.ib_mtu_cap[port]; MLX4_PUT(inbox, field, INIT_PORT_MTU_OFFSET); field = dev->caps.gid_table_len[port]; MLX4_PUT(inbox, field, INIT_PORT_MAX_GID_OFFSET); @@ -854,12 +902,10 @@ int mlx4_NOP(struct mlx4_dev *dev) } int mlx4_query_diag_counters(struct mlx4_dev *dev, int array_length, - int in_modifier, unsigned int in_offset[], - u32 counter_out[]) + u8 op_modifier, u32 in_offset[], u32 counter_out[]) { struct mlx4_cmd_mailbox *mailbox; u32 *outbox; - u32 op_modifer = (u32)in_modifier; int ret; int i; @@ -868,20 +914,19 @@ int mlx4_query_diag_counters(struct mlx4_dev *dev, int array_length, return PTR_ERR(mailbox); outbox = mailbox->buf; - ret = mlx4_cmd_box(dev, 0, mailbox->dma, 0, op_modifer, + ret = mlx4_cmd_box(dev, 0, mailbox->dma, 0, op_modifier, MLX4_CMD_DIAG_RPRT, MLX4_CMD_TIME_CLASS_A); if (ret) goto out; - for(i=0; i<array_length; i++) { + for (i=0; i < array_length; i++) { if (in_offset[i] > MLX4_MAILBOX_SIZE) { - ret = -1; + ret = -EINVAL; goto out; } - MLX4_GET(counter_out[i], outbox, in_offset[i]); + MLX4_GET(counter_out[i], outbox, in_offset[i]); } - ret = 0; out: mlx4_free_cmd_mailbox(dev, mailbox); diff --git a/drivers/net/mlx4/fw.h b/drivers/net/mlx4/fw.h index 306cb9b..cabcb87 100644 --- a/drivers/net/mlx4/fw.h +++ b/drivers/net/mlx4/fw.h @@ -1,6 +1,6 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. - * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved. * Copyright (c) 2006, 2007 Cisco Systems. All rights reserved. * * This software is available to you under a choice of one of two @@ -38,6 +38,11 @@ #include "mlx4.h" #include "icm.h" +struct mlx4_mod_stat_cfg { + u8 log_pg_sz; + u8 log_pg_sz_m; +}; + struct mlx4_dev_cap { int max_srq_sz; int max_qp_sz; @@ -61,11 +66,17 @@ struct mlx4_dev_cap { int local_ca_ack_delay; int num_ports; u32 max_msg_sz; - int max_mtu[MLX4_MAX_PORTS + 1]; + int ib_mtu[MLX4_MAX_PORTS + 1]; int max_port_width[MLX4_MAX_PORTS + 1]; int max_vl[MLX4_MAX_PORTS + 1]; int max_gids[MLX4_MAX_PORTS + 1]; int max_pkeys[MLX4_MAX_PORTS + 1]; + u64 def_mac[MLX4_MAX_PORTS + 1]; + int eth_mtu[MLX4_MAX_PORTS + 1]; + int trans_type[MLX4_MAX_PORTS + 1]; + int vendor_oui[MLX4_MAX_PORTS + 1]; + int wavelength[MLX4_MAX_PORTS + 1]; + u64 trans_code[MLX4_MAX_PORTS + 1]; u16 stat_rate_support; u32 flags; int reserved_uars; @@ -93,10 +104,13 @@ struct mlx4_dev_cap { int cmpt_entry_sz; int mtt_entry_sz; int resize_srq; - u8 bmme_flags; + u32 bmme_flags; u32 reserved_lkey; u64 max_icm_sz; int max_gso_sz; + u8 supported_port_types[MLX4_MAX_PORTS + 1]; + u8 log_max_macs[MLX4_MAX_PORTS + 1]; + u8 log_max_vlans[MLX4_MAX_PORTS + 1]; }; struct mlx4_adapter { @@ -162,5 +176,6 @@ int mlx4_SET_ICM_SIZE(struct mlx4_dev *dev, u64 icm_size, u64 *aux_pages); int mlx4_MAP_ICM_AUX(struct mlx4_dev *dev, struct mlx4_icm *icm); int mlx4_UNMAP_ICM_AUX(struct mlx4_dev *dev); int mlx4_NOP(struct mlx4_dev *dev); +int mlx4_MOD_STAT_CFG(struct mlx4_dev *dev, struct mlx4_mod_stat_cfg *cfg); #endif /* MLX4_FW_H */ diff --git a/drivers/net/mlx4/icm.c b/drivers/net/mlx4/icm.c index 2a5bef6..baf4bf6 100644 --- a/drivers/net/mlx4/icm.c +++ b/drivers/net/mlx4/icm.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved. * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. * * This software is available to you under a choice of one of two diff --git a/drivers/net/mlx4/icm.h b/drivers/net/mlx4/icm.h index 6c44edf..ab56a2f 100644 --- a/drivers/net/mlx4/icm.h +++ b/drivers/net/mlx4/icm.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved. * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. * * This software is available to you under a choice of one of two diff --git a/drivers/net/mlx4/intf.c b/drivers/net/mlx4/intf.c index be5d9e9..cb72e28 100644 --- a/drivers/net/mlx4/intf.c +++ b/drivers/net/mlx4/intf.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -30,8 +31,6 @@ * SOFTWARE. */ -#include <linux/mlx4/driver.h> - #include "mlx4.h" struct mlx4_device_context { @@ -113,8 +112,37 @@ void mlx4_unregister_interface(struct mlx4_interface *intf) } EXPORT_SYMBOL_GPL(mlx4_unregister_interface); -void mlx4_dispatch_event(struct mlx4_dev *dev, enum mlx4_event type, - int subtype, int port) +struct mlx4_dev *mlx4_query_interface(void *int_dev, int *port) +{ + struct mlx4_priv *priv; + struct mlx4_device_context *dev_ctx; + enum mlx4_query_reply r; + unsigned long flags; + + mutex_lock(&intf_mutex); + + list_for_each_entry(priv, &dev_list, dev_list) { + spin_lock_irqsave(&priv->ctx_lock, flags); + list_for_each_entry(dev_ctx, &priv->ctx_list, list) { + if (!dev_ctx->intf->query) + continue; + r = dev_ctx->intf->query(dev_ctx->context, int_dev); + if (r != MLX4_QUERY_NOT_MINE) { + *port = r; + spin_unlock_irqrestore(&priv->ctx_lock, flags); + mutex_unlock(&intf_mutex); + return &priv->dev; + } + } + spin_unlock_irqrestore(&priv->ctx_lock, flags); + } + + mutex_unlock(&intf_mutex); + return NULL; +} +EXPORT_SYMBOL_GPL(mlx4_query_interface); + +void mlx4_dispatch_event(struct mlx4_dev *dev, enum mlx4_dev_event type, int port) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_device_context *dev_ctx; @@ -124,8 +152,7 @@ void mlx4_dispatch_event(struct mlx4_dev *dev, enum mlx4_event type, list_for_each_entry(dev_ctx, &priv->ctx_list, list) if (dev_ctx->intf->event) - dev_ctx->intf->event(dev, dev_ctx->context, type, - subtype, port); + dev_ctx->intf->event(dev, dev_ctx->context, type, port); spin_unlock_irqrestore(&priv->ctx_lock, flags); } @@ -144,6 +171,8 @@ int mlx4_register_device(struct mlx4_dev *dev) mutex_unlock(&intf_mutex); mlx4_start_catas_poll(dev); + mlx4_start_sense(dev); + return 0; } @@ -152,6 +181,8 @@ void mlx4_unregister_device(struct mlx4_dev *dev) struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_interface *intf; + mlx4_stop_sense(dev); + mlx4_stop_catas_poll(dev); mutex_lock(&intf_mutex); diff --git a/drivers/net/mlx4/main.c b/drivers/net/mlx4/main.c index bda8839..9358528 100644 --- a/drivers/net/mlx4/main.c +++ b/drivers/net/mlx4/main.c @@ -1,7 +1,7 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. - * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved. * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -75,17 +75,17 @@ MODULE_PARM_DESC(msi_x, "attempt to use MSI-X if nonzero"); #endif /* CONFIG_PCI_MSI */ -static const char mlx4_version[] __devinitdata = +static char mlx4_version[] __devinitdata = DRV_NAME ": Mellanox ConnectX core driver v" DRV_VERSION " (" DRV_RELDATE ")\n"; static struct mlx4_profile default_profile = { - .num_qp = 1 << 17, + .num_qp = 1 << 18, .num_srq = 1 << 16, .rdmarc_per_qp = 1 << 4, .num_cq = 1 << 16, .num_mcg = 1 << 13, - .num_mpt = 1 << 17, + .num_mpt = 1 << 19, .num_mtt = 1 << 20, }; @@ -114,6 +114,10 @@ module_param_named(log_num_mtt, mod_param_profile.num_mtt, int, 0444); MODULE_PARM_DESC(log_num_mtt, "log maximum number of memory translation table segments per HCA"); +static int log_mtts_per_seg = ilog2(MLX4_MTT_ENTRY_PER_SEG); +module_param_named(log_mtts_per_seg, log_mtts_per_seg, int, 0444); +MODULE_PARM_DESC(log_mtts_per_seg, "Log2 number of MTT entries per segment (1-5)"); + static void process_mod_param_profile(void) { default_profile.num_qp = (mod_param_profile.num_qp ? @@ -139,6 +143,75 @@ static void process_mod_param_profile(void) default_profile.num_mtt); } +static int log_num_mac = 2; +module_param_named(log_num_mac, log_num_mac, int, 0444); +MODULE_PARM_DESC(log_num_mac, "Log 2 Max number of MACs per ETH port (1-7)"); + +static int log_num_vlan; +module_param_named(log_num_vlan, log_num_vlan, int, 0444); +MODULE_PARM_DESC(log_num_vlan, "Log 2 Max number of VLANs per ETH port (0-7)"); + +static int use_prio; +module_param_named(use_prio, use_prio, bool, 0444); +MODULE_PARM_DESC(use_prio, "Enable steering by VLAN priority on ETH ports " + "(0/1, default 0)"); + +struct mlx4_port_config +{ + struct list_head list; + enum mlx4_port_type port_type[MLX4_MAX_PORTS + 1]; + struct pci_dev *pdev; +}; +static LIST_HEAD(config_list); + +static void mlx4_config_cleanup(void) +{ + struct mlx4_port_config *config, *tmp; + + list_for_each_entry_safe(config, tmp, &config_list, list) { + list_del(&config->list); + kfree(config); + } +} + +int mlx4_check_port_params(struct mlx4_dev *dev, + enum mlx4_port_type *port_type) +{ + int i; + + for (i = 0; i < dev->caps.num_ports - 1; i++) { + if (port_type[i] != port_type[i+1]) { + if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP)) { + mlx4_err(dev, "Only same port types supported " + "on this HCA, aborting.\n"); + return -EINVAL; + } + if ((port_type[i] == MLX4_PORT_TYPE_ETH) || + (port_type[i+1] == MLX4_PORT_TYPE_IB)) + return -EINVAL; + } + } + + for (i = 0; i < dev->caps.num_ports; i++) { + if (!(port_type[i] & dev->caps.supported_type[i+1])) { + mlx4_err(dev, "Requested port type for port %d is not " + "supported on this HCA\n", i + 1); + return -EINVAL; + } + } + return 0; +} + +static void mlx4_set_port_mask(struct mlx4_dev *dev) +{ + int i; + + dev->caps.port_mask = 0; + for (i = 1; i <= dev->caps.num_ports; ++i) + if (dev->caps.port_type[i] == MLX4_PORT_TYPE_IB) + dev->caps.port_mask |= 1 << (i - 1); +} + static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) { int err; @@ -174,10 +247,17 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev->caps.num_ports = dev_cap->num_ports; for (i = 1; i <= dev->caps.num_ports; ++i) { dev->caps.vl_cap[i] = dev_cap->max_vl[i]; - dev->caps.mtu_cap[i] = dev_cap->max_mtu[i]; + dev->caps.ib_mtu_cap[i] = dev_cap->ib_mtu[i]; dev->caps.gid_table_len[i] = dev_cap->max_gids[i]; dev->caps.pkey_table_len[i] = dev_cap->max_pkeys[i]; dev->caps.port_width_cap[i] = dev_cap->max_port_width[i]; + dev->caps.eth_mtu_cap[i] = dev_cap->eth_mtu[i]; + dev->caps.def_mac[i] = dev_cap->def_mac[i]; + dev->caps.supported_type[i] = dev_cap->supported_port_types[i]; + dev->caps.trans_type[i] = dev_cap->trans_type[i]; + dev->caps.vendor_oui[i] = dev_cap->vendor_oui[i]; + dev->caps.wavelength[i] = dev_cap->wavelength[i]; + dev->caps.trans_code[i] = dev_cap->trans_code[i]; } dev->caps.num_uars = dev_cap->uar_size / PAGE_SIZE; @@ -188,7 +268,6 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev->caps.max_rq_sg = dev_cap->max_rq_sg; dev->caps.max_wqes = dev_cap->max_qp_sz; dev->caps.max_qp_init_rdma = dev_cap->max_requester_per_qp; - dev->caps.reserved_qps = dev_cap->reserved_qps; dev->caps.max_srq_wqes = dev_cap->max_srq_sz; dev->caps.max_srq_sge = dev_cap->max_rq_sg - 1; dev->caps.reserved_srqs = dev_cap->reserved_srqs; @@ -203,22 +282,247 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev->caps.max_cqes = dev_cap->max_cq_sz - 1; dev->caps.reserved_cqs = dev_cap->reserved_cqs; dev->caps.reserved_eqs = dev_cap->reserved_eqs; + dev->caps.mtts_per_seg = 1 << log_mtts_per_seg; dev->caps.reserved_mtts = DIV_ROUND_UP(dev_cap->reserved_mtts, - MLX4_MTT_ENTRY_PER_SEG); + dev->caps.mtts_per_seg); dev->caps.reserved_mrws = dev_cap->reserved_mrws; dev->caps.reserved_uars = dev_cap->reserved_uars; dev->caps.reserved_pds = dev_cap->reserved_pds; - dev->caps.mtt_entry_sz = MLX4_MTT_ENTRY_PER_SEG * dev_cap->mtt_entry_sz; + dev->caps.mtt_entry_sz = dev->caps.mtts_per_seg * dev_cap->mtt_entry_sz; dev->caps.max_msg_sz = dev_cap->max_msg_sz; dev->caps.page_size_cap = ~(u32) (dev_cap->min_page_sz - 1); dev->caps.flags = dev_cap->flags; + dev->caps.bmme_flags = dev_cap->bmme_flags; + dev->caps.reserved_lkey = dev_cap->reserved_lkey; dev->caps.stat_rate_support = dev_cap->stat_rate_support; dev->caps.max_gso_sz = dev_cap->max_gso_sz; + dev->caps.log_num_macs = log_num_mac; + dev->caps.log_num_vlans = log_num_vlan; + dev->caps.log_num_prios = use_prio ? 3 : 0; + + for (i = 1; i <= dev->caps.num_ports; ++i) { + if (dev->caps.supported_type[i] != MLX4_PORT_TYPE_ETH) + dev->caps.port_type[i] = MLX4_PORT_TYPE_IB; + else + dev->caps.port_type[i] = MLX4_PORT_TYPE_ETH; + dev->caps.possible_type[i] = dev->caps.port_type[i]; + mlx4_priv(dev)->sense.sense_allowed[i] = + dev->caps.supported_type[i] == MLX4_PORT_TYPE_AUTO ? 1 : 0; + + if (dev->caps.log_num_macs > dev_cap->log_max_macs[i]) { + dev->caps.log_num_macs = dev_cap->log_max_macs[i]; + mlx4_warn(dev, "Requested number of MACs is too much " + "for port %d, reducing to %d.\n", + i, 1 << dev->caps.log_num_macs); + } + if (dev->caps.log_num_vlans > dev_cap->log_max_vlans[i]) { + dev->caps.log_num_vlans = dev_cap->log_max_vlans[i]; + mlx4_warn(dev, "Requested number of VLANs is too much " + "for port %d, reducing to %d.\n", + i, 1 << dev->caps.log_num_vlans); + } + } + + dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] = dev_cap->reserved_qps; + dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] = + dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] = + (1 << dev->caps.log_num_macs)* + (1 << dev->caps.log_num_vlans)* + (1 << dev->caps.log_num_prios)* + dev->caps.num_ports; + + dev->caps.reserved_qps = dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] + + dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] + + dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR]; + + return 0; +} + +static int mlx4_save_config(struct mlx4_dev *dev) +{ + struct mlx4_port_config *config; + int i; + + list_for_each_entry(config, &config_list, list) { + if (config->pdev == dev->pdev) { + for (i = 1; i <= dev->caps.num_ports; i++) + config->port_type[i] = dev->caps.possible_type[i]; + return 0; + } + } + + config = kmalloc(sizeof(struct mlx4_port_config), GFP_KERNEL); + if (!config) + return -ENOMEM; + + config->pdev = dev->pdev; + for (i = 1; i <= dev->caps.num_ports; i++) + config->port_type[i] = dev->caps.possible_type[i]; + + list_add_tail(&config->list, &config_list); + return 0; } -static int __devinit mlx4_load_fw(struct mlx4_dev *dev) +/* Changes the port configuration of the device. + * Every user of this function must hold the port lock */ +int mlx4_change_port_types(struct mlx4_dev *dev, + enum mlx4_port_type *port_types) +{ + int err = 0; + int change = 0; + int port; + + for (port = 0; port < dev->caps.num_ports; port++) { + /* Change the port type only if the new type is different + * from the current, and not set to Auto */ + if (port_types[port] != dev->caps.port_type[port + 1]) { + change = 1; + dev->caps.port_type[port + 1] = port_types[port]; + } + } + if (change) { + mlx4_unregister_device(dev); + for (port = 1; port <= dev->caps.num_ports; port++) { + mlx4_CLOSE_PORT(dev, port); + err = mlx4_SET_PORT(dev, port); + if (err) { + mlx4_err(dev, "Failed to set port %d, " + "aborting\n", port); + goto out; + } + } + mlx4_set_port_mask(dev); + mlx4_save_config(dev); + err = mlx4_register_device(dev); + } + +out: + return err; +} + +static ssize_t show_port_type(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info, + port_attr); + struct mlx4_dev *mdev = info->dev; + char type[8]; + + sprintf(type, "%s", (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_IB) ? + "ib" : "eth"); + if (mdev->caps.possible_type[info->port] == MLX4_PORT_TYPE_AUTO) + sprintf(buf, "auto (%s)\n", type); + else + sprintf(buf, "%s\n", type); + + return strlen(buf); +} + +static ssize_t set_port_type(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info, + port_attr); + struct mlx4_dev *mdev = info->dev; + struct mlx4_priv *priv = mlx4_priv(mdev); + enum mlx4_port_type types[MLX4_MAX_PORTS]; + enum mlx4_port_type new_types[MLX4_MAX_PORTS]; + int i; + int err = 0; + + if (!strcmp(buf, "ib\n")) + info->tmp_type = MLX4_PORT_TYPE_IB; + else if (!strcmp(buf, "eth\n")) + info->tmp_type = MLX4_PORT_TYPE_ETH; + else if (!strcmp(buf, "auto\n")) + info->tmp_type = MLX4_PORT_TYPE_AUTO; + else { + mlx4_err(mdev, "%s is not supported port type\n", buf); + return -EINVAL; + } + + mutex_lock(&priv->port_mutex); + /* Possible type is always the one that was delivered */ + mdev->caps.possible_type[info->port] = info->tmp_type; + + /* Collect the required port types from all ports, + * If not specified, use the currently configured */ + for (i = 0; i < mdev->caps.num_ports; i++) { + types[i] = priv->port[i+1].tmp_type ? priv->port[i+1].tmp_type : + mdev->caps.possible_type[i+1]; + if (types[i] == MLX4_PORT_TYPE_AUTO) + types[i] = mdev->caps.port_type[i+1]; + } + + if (priv->trig) { + /* Wait for the other ports to be modified, or perform + * the change now */ + if (++priv->changed_ports < mdev->caps.num_ports) + goto out; + else + priv->trig = priv->changed_ports = 0; + } + + if (!(mdev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP)) { + for (i = 1; i <= mdev->caps.num_ports; i++) { + if (mdev->caps.possible_type[i] == MLX4_PORT_TYPE_AUTO) { + mdev->caps.possible_type[i] = mdev->caps.port_type[i]; + err = -EINVAL; + } + } + } + if (err) { + mlx4_err(mdev, "Auto sensing is not supported on this HCA. " + "Set only 'eth' or 'ib' for both ports " + "(should be the same)\n"); + goto out; + } + + mlx4_do_sense_ports(mdev, new_types, types); + + err = mlx4_check_port_params(mdev, new_types); + if (err) + goto out; + + /* We are about to apply the changes after the configuration + * was verified, no need to remember the temporary types + * any more */ + for (i = 0; i < mdev->caps.num_ports; i++) { + priv->port[i + 1].tmp_type = 0; + } + + err = mlx4_change_port_types(mdev, new_types); + +out: + mutex_unlock(&priv->port_mutex); + return err ? err : count; +} + +/* + * This function is invoked if user wants to modify all port types + * at once. We will wait for all the ports to be assigned new values, + * and only then will perform the change. + */ +static ssize_t trigger_port(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct mlx4_priv *priv = container_of(attr, struct mlx4_priv, trigger_attr); + if (!priv) + return -ENODEV; + + mutex_lock(&priv->port_mutex); + priv->trig = 1; + mutex_unlock(&priv->port_mutex); + return count; +} +DEVICE_ATTR(port_trigger, S_IWUGO, NULL, trigger_port); + +static int mlx4_load_fw(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); int err; @@ -252,8 +556,8 @@ err_free: return err; } -static int __devinit mlx4_init_cmpt_table(struct mlx4_dev *dev, u64 cmpt_base, - int cmpt_entry_sz) +static int mlx4_init_cmpt_table(struct mlx4_dev *dev, u64 cmpt_base, + int cmpt_entry_sz) { struct mlx4_priv *priv = mlx4_priv(dev); int err; @@ -263,7 +567,8 @@ static int __devinit mlx4_init_cmpt_table(struct mlx4_dev *dev, u64 cmpt_base, ((u64) (MLX4_CMPT_TYPE_QP * cmpt_entry_sz) << MLX4_CMPT_SHIFT), cmpt_entry_sz, dev->caps.num_qps, - dev->caps.reserved_qps, 0, 0); + dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], + 0, 0); if (err) goto err; @@ -388,7 +693,8 @@ static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap, init_hca->qpc_base, dev_cap->qpc_entry_sz, dev->caps.num_qps, - dev->caps.reserved_qps, 0, 0); + dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], + 0, 0); if (err) { mlx4_err(dev, "Failed to map QP context memory, aborting.\n"); goto err_unmap_dmpt; @@ -398,7 +704,8 @@ static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap, init_hca->auxc_base, dev_cap->aux_entry_sz, dev->caps.num_qps, - dev->caps.reserved_qps, 0, 0); + dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], + 0, 0); if (err) { mlx4_err(dev, "Failed to map AUXC context memory, aborting.\n"); goto err_unmap_qp; @@ -408,7 +715,8 @@ static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap, init_hca->altc_base, dev_cap->altc_entry_sz, dev->caps.num_qps, - dev->caps.reserved_qps, 0, 0); + dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], + 0, 0); if (err) { mlx4_err(dev, "Failed to map ALTC context memory, aborting.\n"); goto err_unmap_auxc; @@ -418,7 +726,8 @@ static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap, init_hca->rdmarc_base, dev_cap->rdmarc_entry_sz << priv->qp_table.rdmarc_shift, dev->caps.num_qps, - dev->caps.reserved_qps, 0, 0); + dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], + 0, 0); if (err) { mlx4_err(dev, "Failed to map RDMARC context memory, aborting\n"); goto err_unmap_altc; @@ -539,10 +848,13 @@ static int mlx4_init_hca(struct mlx4_dev *dev) struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_adapter adapter; struct mlx4_dev_cap dev_cap; + struct mlx4_mod_stat_cfg mlx4_cfg; struct mlx4_profile profile; struct mlx4_init_hca_param init_hca; + struct mlx4_port_config *config; u64 icm_size; int err; + int i; err = mlx4_QUERY_FW(dev); if (err) { @@ -556,6 +868,12 @@ static int mlx4_init_hca(struct mlx4_dev *dev) return err; } + mlx4_cfg.log_pg_sz_m = 1; + mlx4_cfg.log_pg_sz = 0; + err = mlx4_MOD_STAT_CFG(dev, &mlx4_cfg); + if (err) + mlx4_warn(dev, "Failed to override log_pg_sz parameter\n"); + err = mlx4_dev_cap(dev, &dev_cap); if (err) { mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n"); @@ -565,6 +883,18 @@ static int mlx4_init_hca(struct mlx4_dev *dev) process_mod_param_profile(); profile = default_profile; + list_for_each_entry(config, &config_list, list) { + if (config->pdev == dev->pdev) { + for (i = 1; i <= dev->caps.num_ports; i++) { + dev->caps.possible_type[i] = config->port_type[i]; + if (config->port_type[i] != MLX4_PORT_TYPE_AUTO) + dev->caps.port_type[i] = config->port_type[i]; + } + } + } + + mlx4_set_port_mask(dev); + icm_size = mlx4_make_profile(dev, &profile, &dev_cap, &init_hca); if ((long long) icm_size < 0) { err = icm_size; @@ -611,6 +941,8 @@ static int mlx4_setup_hca(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); int err; + int port; + __be32 ib_port_default_caps; err = mlx4_init_uar_table(dev); if (err) { @@ -709,8 +1041,27 @@ static int mlx4_setup_hca(struct mlx4_dev *dev) goto err_qp_table_free; } + for (port = 1; port <= dev->caps.num_ports; port++) { + ib_port_default_caps = 0; + err = mlx4_get_port_ib_caps(dev, port, &ib_port_default_caps); + if (err) + mlx4_warn(dev, "failed to get port %d default " + "ib capabilities (%d). Continuing with " + "caps = 0\n", port, err); + dev->caps.ib_port_def_cap[port] = ib_port_default_caps; + err = mlx4_SET_PORT(dev, port); + if (err) { + mlx4_err(dev, "Failed to set port %d, aborting\n", + port); + goto err_mcg_table_free; + } + } + return 0; +err_mcg_table_free: + mlx4_cleanup_mcg_table(dev); + err_qp_table_free: mlx4_cleanup_qp_table(dev); @@ -743,26 +1094,28 @@ err_uar_table_free: return err; } -static void __devinit mlx4_enable_msi_x(struct mlx4_dev *dev) +static void mlx4_enable_msi_x(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); struct msix_entry entries[MLX4_NUM_EQ]; + int needed_vectors = MLX4_EQ_COMP_CPU0 + num_online_cpus(); int err; int i; if (msi_x) { - for (i = 0; i < MLX4_NUM_EQ; ++i) + for (i = 0; i < needed_vectors; ++i) entries[i].entry = i; - err = pci_enable_msix(dev->pdev, entries, ARRAY_SIZE(entries)); + err = pci_enable_msix(dev->pdev, entries, needed_vectors); if (err) { if (err > 0) - mlx4_info(dev, "Only %d MSI-X vectors available, " - "not using MSI-X\n", err); + mlx4_info(dev, "Only %d MSI-X vectors " + "available, need %d. Not using MSI-X\n", + err, needed_vectors); goto no_msi; } - for (i = 0; i < MLX4_NUM_EQ; ++i) + for (i = 0; i < needed_vectors; ++i) priv->eq_table.eq[i].irq = entries[i].vector; dev->flags |= MLX4_FLAG_MSI_X; @@ -770,15 +1123,57 @@ static void __devinit mlx4_enable_msi_x(struct mlx4_dev *dev) } no_msi: - for (i = 0; i < MLX4_NUM_EQ; ++i) + for (i = 0; i < needed_vectors; ++i) priv->eq_table.eq[i].irq = dev->pdev->irq; } +static int mlx4_init_port_info(struct mlx4_dev *dev, int port) +{ + struct mlx4_port_info *info = &mlx4_priv(dev)->port[port]; + struct attribute attr = {.name = info->dev_name, + .mode = S_IWUGO | S_IRUGO}; + int err = 0; + + info->dev = dev; + info->port = port; + mlx4_init_mac_table(dev, &info->mac_table); + mlx4_init_vlan_table(dev, &info->vlan_table); + + sprintf(info->dev_name, "mlx4_port%d", port); + memcpy(&info->port_attr.attr, &attr, sizeof(attr)); + info->port_attr.show = show_port_type; + info->port_attr.store = set_port_type; + + err = device_create_file(&dev->pdev->dev, &info->port_attr); + if (err) { + mlx4_err(dev, "Failed to create file for port %d\n", port); + info->port = -1; + } + + return err; +} + +static void mlx4_cleanup_port_info(struct mlx4_port_info *info) +{ + if (info->port < 0) + return; + + device_remove_file(&info->dev->pdev->dev, &info->port_attr); +} + +static int mlx4_init_trigger(struct mlx4_priv *priv) +{ + memcpy(&priv->trigger_attr, &dev_attr_port_trigger, + sizeof(struct device_attribute)); + return device_create_file(&priv->dev.pdev->dev, &priv->trigger_attr); +} + static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id) { struct mlx4_priv *priv; struct mlx4_dev *dev; int err; + int port; printk(KERN_INFO PFX "Initializing %s\n", pci_name(pdev)); @@ -853,6 +1248,11 @@ static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id) INIT_LIST_HEAD(&priv->ctx_list); spin_lock_init(&priv->ctx_lock); + mutex_init(&priv->port_mutex); + + INIT_LIST_HEAD(&priv->pgdir_list); + mutex_init(&priv->pgdir_mutex); + /* * Now reset the HCA before we touch the PCI capabilities or * attempt a firmware command, since a boot ROM may have left @@ -885,15 +1285,38 @@ static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id) if (err) goto err_close; + for (port = 1; port <= dev->caps.num_ports; port++) { + err = mlx4_init_port_info(dev, port); + if (err) + goto err_port; + } + + err = mlx4_init_trigger(priv); + if (err) + goto err_port; + + err = mlx4_sense_init(dev); + if (err) { + device_remove_file(&dev->pdev->dev, &priv->trigger_attr); + goto err_port; + } + err = mlx4_register_device(dev); if (err) - goto err_cleanup; + goto err_sense; pci_set_drvdata(pdev, dev); return 0; -err_cleanup: +err_sense: + mlx4_sense_cleanup(dev); + device_remove_file(&dev->pdev->dev, &priv->trigger_attr); + +err_port: + for (port = 1; port <= dev->caps.num_ports; port++) + mlx4_cleanup_port_info(&priv->port[port]); + mlx4_cleanup_mcg_table(dev); mlx4_cleanup_qp_table(dev); mlx4_cleanup_srq_table(dev); @@ -948,11 +1371,13 @@ static void mlx4_remove_one(struct pci_dev *pdev) int p; if (dev) { + mlx4_sense_cleanup(dev); mlx4_unregister_device(dev); - - for (p = 1; p <= dev->caps.num_ports; ++p) + device_remove_file(&dev->pdev->dev, &priv->trigger_attr); + for (p = 1; p <= dev->caps.num_ports; p++) { + mlx4_cleanup_port_info(&priv->port[p]); mlx4_CLOSE_PORT(dev, p); - + } mlx4_cleanup_mcg_table(dev); mlx4_cleanup_qp_table(dev); mlx4_cleanup_srq_table(dev); @@ -991,6 +1416,10 @@ static struct pci_device_id mlx4_pci_table[] = { { PCI_VDEVICE(MELLANOX, 0x6354) }, /* MT25408 "Hermon" QDR */ { PCI_VDEVICE(MELLANOX, 0x6732) }, /* MT25408 "Hermon" DDR PCIe gen2 */ { PCI_VDEVICE(MELLANOX, 0x673c) }, /* MT25408 "Hermon" QDR PCIe gen2 */ + { PCI_VDEVICE(MELLANOX, 0x6368) }, /* MT25408 "Hermon"EN 10GigE */ + { PCI_VDEVICE(MELLANOX, 0x6750) }, /* MT25408 "Hermon"EN 10GigE + Gen2 */ + { PCI_VDEVICE(MELLANOX, 0x6372) }, /* MT25408 "YATIR" EN 10GigE */ + { PCI_VDEVICE(MELLANOX, 0x675a) }, /* MT25408 "YATIR" EN 10GigE + Gen2 */ { 0, } }; @@ -1003,10 +1432,33 @@ static struct pci_driver mlx4_driver = { .remove = __devexit_p(mlx4_remove_one) }; +static int __init mlx4_verify_params(void) +{ + if ((log_num_mac < 0) || (log_num_mac > 7)) { + printk(KERN_WARNING "mlx4_core: bad num_mac: %d\n", log_num_mac); + return -1; + } + + if ((log_num_vlan < 0) || (log_num_vlan > 7)) { + printk(KERN_WARNING "mlx4_core: bad num_vlan: %d\n", log_num_vlan); + return -1; + } + + if ((log_mtts_per_seg < 1) || (log_mtts_per_seg > 5)) { + printk(KERN_WARNING "mlx4_core: bad log_mtts_per_seg: %d\n", log_mtts_per_seg); + return -1; + } + + return 0; +} + static int __init mlx4_init(void) { int ret; + if (mlx4_verify_params()) + return -EINVAL; + ret = mlx4_catas_init(); if (ret) return ret; @@ -1019,6 +1471,7 @@ static void __exit mlx4_cleanup(void) { pci_unregister_driver(&mlx4_driver); mlx4_catas_cleanup(); + mlx4_config_cleanup(); } module_init(mlx4_init); diff --git a/drivers/net/mlx4/mcg.c b/drivers/net/mlx4/mcg.c index 7aa375b..ad62dca 100644 --- a/drivers/net/mlx4/mcg.c +++ b/drivers/net/mlx4/mcg.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -38,6 +39,9 @@ #include "mlx4.h" +#define MGM_QPN_MASK 0x00FFFFFF +#define MGM_BLCK_LB_BIT 30 + struct mlx4_mgm { __be32 next_gid_index; __be32 members_count; @@ -153,7 +157,8 @@ static int find_mgm(struct mlx4_dev *dev, return err; } -int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16]) +int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], + int block_mcast_loopback) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_cmd_mailbox *mailbox; @@ -210,6 +215,7 @@ int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16]) mgm->qp[members_count++] = cpu_to_be32((qp->qpn & MGM_QPN_MASK) | (!!mlx4_blck_lb << MGM_BLCK_LB_BIT)); + mgm->members_count = cpu_to_be32(members_count); err = mlx4_WRITE_MCG(dev, index, mailbox); @@ -359,8 +365,8 @@ int mlx4_init_mcg_table(struct mlx4_dev *dev) struct mlx4_priv *priv = mlx4_priv(dev); int err; - err = mlx4_bitmap_init(&priv->mcg_table.bitmap, - dev->caps.num_amgms, dev->caps.num_amgms - 1, 0); + err = mlx4_bitmap_init(&priv->mcg_table.bitmap, dev->caps.num_amgms, + dev->caps.num_amgms - 1, 0, 0); if (err) return err; diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h index 8fc297e..f2ce940 100644 --- a/drivers/net/mlx4/mlx4.h +++ b/drivers/net/mlx4/mlx4.h @@ -2,7 +2,7 @@ * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2005, 2006, 2007 Cisco Systems. All rights reserved. - * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved. * Copyright (c) 2004 Voltaire, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -40,14 +40,17 @@ #include <linux/mutex.h> #include <linux/radix-tree.h> #include <linux/timer.h> +#include <linux/random.h> +#include <linux/workqueue.h> #include <linux/mlx4/device.h> +#include <linux/mlx4/driver.h> #include <linux/mlx4/doorbell.h> #define DRV_NAME "mlx4_core" #define PFX DRV_NAME ": " #define DRV_VERSION "1.0" -#define DRV_RELDATE "February 28, 2008" +#define DRV_RELDATE "April 4, 2008" enum { MLX4_HCR_BASE = 0x80680, @@ -63,8 +66,8 @@ enum { enum { MLX4_EQ_ASYNC, - MLX4_EQ_COMP, - MLX4_NUM_EQ + MLX4_EQ_COMP_CPU0, + MLX4_NUM_EQ = MLX4_EQ_COMP_CPU0 + NR_CPUS }; enum { @@ -106,14 +109,13 @@ extern int mlx4_debug_level; #define mlx4_warn(mdev, format, arg...) \ dev_warn(&mdev->pdev->dev, format, ## arg) -#define MGM_QPN_MASK 0x00FFFFFF -#define MGM_BLCK_LB_BIT 30 extern int mlx4_blck_lb; struct mlx4_bitmap { u32 last; u32 top; u32 max; + u32 reserved_top; u32 mask; spinlock_t lock; unsigned long *table; @@ -121,6 +123,7 @@ struct mlx4_bitmap { struct mlx4_buddy { unsigned long **bits; + unsigned int *num_free; int max_order; spinlock_t lock; }; @@ -146,6 +149,7 @@ struct mlx4_eq { u16 irq; u16 have_irq; int nent; + int load; struct mlx4_buf_list *page_list; struct mlx4_mtt mtt; }; @@ -253,6 +257,49 @@ struct mlx4_catas_err { struct list_head list; }; +struct mlx4_mac_table { +#define MLX4_MAX_MAC_NUM 128 +#define MLX4_MAC_MASK 0xffffffffffff +#define MLX4_MAC_VALID_SHIFT 63 +#define MLX4_MAC_TABLE_SIZE (MLX4_MAX_MAC_NUM << 3) + __be64 entries[MLX4_MAX_MAC_NUM]; + int refs[MLX4_MAX_MAC_NUM]; + struct semaphore mac_sem; + int total; + int max; +}; + +struct mlx4_vlan_table { +#define MLX4_MAX_VLAN_NUM 128 +#define MLX4_VLAN_MASK 0xfff +#define MLX4_VLAN_VALID (1 << 31) +#define MLX4_VLAN_TABLE_SIZE (MLX4_MAX_VLAN_NUM << 2) + __be32 entries[MLX4_MAX_VLAN_NUM]; + int refs[MLX4_MAX_VLAN_NUM]; + struct semaphore vlan_sem; + int total; + int max; +}; + +struct mlx4_port_info { + struct mlx4_dev *dev; + int port; + char dev_name[16]; + struct device_attribute port_attr; + enum mlx4_port_type tmp_type; + struct mlx4_mac_table mac_table; + struct mlx4_vlan_table vlan_table; +}; + +struct mlx4_sense { + struct mlx4_dev *dev; + u8 do_sense_port[MLX4_MAX_PORTS + 1]; + u8 sense_allowed[MLX4_MAX_PORTS + 1]; + struct delayed_work sense_poll; + struct workqueue_struct *sense_wq; + u32 resched; +}; + struct mlx4_priv { struct mlx4_dev dev; @@ -260,6 +307,9 @@ struct mlx4_priv { struct list_head ctx_list; spinlock_t ctx_lock; + struct list_head pgdir_list; + struct mutex pgdir_mutex; + struct mlx4_fw fw; struct mlx4_cmd cmd; @@ -278,6 +328,12 @@ struct mlx4_priv { struct mlx4_uar driver_uar; void __iomem *kar; + struct mlx4_port_info port[MLX4_MAX_PORTS + 1]; + struct device_attribute trigger_attr; + int trig; + int changed_ports; + struct mlx4_sense sense; + struct mutex port_mutex; }; static inline struct mlx4_priv *mlx4_priv(struct mlx4_dev *dev) @@ -285,9 +341,14 @@ static inline struct mlx4_priv *mlx4_priv(struct mlx4_dev *dev) return container_of(dev, struct mlx4_priv, dev); } +#define MLX4_SENSE_RANGE (HZ * 3) + u32 mlx4_bitmap_alloc(struct mlx4_bitmap *bitmap); void mlx4_bitmap_free(struct mlx4_bitmap *bitmap, u32 obj); -int mlx4_bitmap_init(struct mlx4_bitmap *bitmap, u32 num, u32 mask, u32 reserved); +u32 mlx4_bitmap_alloc_range(struct mlx4_bitmap *bitmap, int cnt, int align); +void mlx4_bitmap_free_range(struct mlx4_bitmap *bitmap, u32 obj, int cnt); +int mlx4_bitmap_init(struct mlx4_bitmap *bitmap, u32 num, u32 mask, + u32 reserved_bot, u32 resetrved_top); void mlx4_bitmap_cleanup(struct mlx4_bitmap *bitmap); int mlx4_reset(struct mlx4_dev *dev); @@ -317,8 +378,7 @@ void mlx4_catas_cleanup(void); int mlx4_restart_one(struct pci_dev *pdev); int mlx4_register_device(struct mlx4_dev *dev); void mlx4_unregister_device(struct mlx4_dev *dev); -void mlx4_dispatch_event(struct mlx4_dev *dev, enum mlx4_event type, - int subtype, int port); +void mlx4_dispatch_event(struct mlx4_dev *dev, enum mlx4_dev_event type, int port); struct mlx4_dev_cap; struct mlx4_init_hca_param; @@ -346,4 +406,22 @@ void mlx4_srq_event(struct mlx4_dev *dev, u32 srqn, int event_type); void mlx4_handle_catas_err(struct mlx4_dev *dev); +void mlx4_do_sense_ports(struct mlx4_dev *dev, + enum mlx4_port_type *stype, + enum mlx4_port_type *defaults); +void mlx4_start_sense(struct mlx4_dev *dev); +void mlx4_stop_sense(struct mlx4_dev *dev); +int mlx4_sense_init(struct mlx4_dev *dev); +void mlx4_sense_cleanup(struct mlx4_dev *dev); +int mlx4_check_port_params(struct mlx4_dev *dev, + enum mlx4_port_type *port_type); +int mlx4_change_port_types(struct mlx4_dev *dev, + enum mlx4_port_type *port_types); + +void mlx4_init_mac_table(struct mlx4_dev *dev, struct mlx4_mac_table *table); +void mlx4_init_vlan_table(struct mlx4_dev *dev, struct mlx4_vlan_table *table); + +int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port); +int mlx4_get_port_ib_caps(struct mlx4_dev *dev, u8 port, __be32 *caps); + #endif /* MLX4_H */ diff --git a/drivers/net/mlx4/mlx4_en.h b/drivers/net/mlx4/mlx4_en.h new file mode 100644 index 0000000..6952b10 --- /dev/null +++ b/drivers/net/mlx4/mlx4_en.h @@ -0,0 +1,616 @@ +/* + * Copyright (c) 2007 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef _MLX4_EN_H_ +#define _MLX4_EN_H_ + +#include <linux/compiler.h> +#include <linux/list.h> +#include <linux/mutex.h> +#include <linux/netdevice.h> +#include <linux/inet_lro.h> + +#include <linux/mlx4/device.h> +#include <linux/mlx4/qp.h> +#include <linux/mlx4/cq.h> +#include <linux/mlx4/srq.h> +#include <linux/mlx4/doorbell.h> + +#include "en_port.h" + +#define DRV_NAME "mlx4_en" +#define DRV_VERSION "1.4.1-RC7" +#define DRV_RELDATE "April 2009" + + +#define MLX4_EN_MSG_LEVEL (NETIF_MSG_LINK | NETIF_MSG_IFDOWN) + +#define mlx4_dbg(mlevel, priv, format, arg...) \ + if (NETIF_MSG_##mlevel & priv->msg_enable) \ + printk(KERN_DEBUG "%s %s: " format , DRV_NAME ,\ + (&priv->mdev->pdev->dev)->bus_id , ## arg) + +#define mlx4_err(mdev, format, arg...) \ + printk(KERN_ERR "%s %s: " format , DRV_NAME ,\ + (&mdev->pdev->dev)->bus_id , ## arg) +#define mlx4_info(mdev, format, arg...) \ + printk(KERN_INFO "%s %s: " format , DRV_NAME ,\ + (&mdev->pdev->dev)->bus_id , ## arg) +#define mlx4_warn(mdev, format, arg...) \ + printk(KERN_WARNING "%s %s: " format , DRV_NAME ,\ + (&mdev->pdev->dev)->bus_id , ## arg) + +/* + * Device constants + */ + + +#define MLX4_EN_PAGE_SHIFT 12 +#define MLX4_EN_PAGE_SIZE (1 << MLX4_EN_PAGE_SHIFT) +#define MAX_TX_RINGS (MLX4_EN_NUM_HASH_RINGS + MLX4_EN_NUM_PPP_RINGS + 1) +#define MAX_RX_RINGS 17 +#define MAX_RSS_MAP_SIZE 64 +#define RSS_FACTOR 1 +#define TXBB_SIZE 64 +#define HEADROOM (2048 / TXBB_SIZE + 1) +#define MAX_LSO_HDR_SIZE 92 +#define STAMP_STRIDE 64 +#define STAMP_DWORDS (STAMP_STRIDE / 4) +#define STAMP_SHIFT 31 +#define STAMP_VAL 0x7fffffff +#define STATS_DELAY (HZ / 4) + +/* Typical TSO descriptor with 16 gather entries is 352 bytes... */ +#define MAX_DESC_SIZE 512 +#define MAX_DESC_TXBBS (MAX_DESC_SIZE / TXBB_SIZE) + +/* + * OS related constants and tunables + */ + +#define MLX4_EN_WATCHDOG_TIMEOUT (15 * HZ) + +#define MLX4_EN_ALLOC_ORDER 2 +#define MLX4_EN_ALLOC_SIZE (PAGE_SIZE << MLX4_EN_ALLOC_ORDER) + +#define MLX4_EN_MAX_LRO_DESCRIPTORS 32 +#define MLX4_EN_NUM_IPFRAG_SESSIONS 16 + +/* Receive fragment sizes; we use at most 4 fragments (for 9600 byte MTU + * and 4K allocations) */ +enum { + FRAG_SZ0 = 512 - NET_IP_ALIGN, + FRAG_SZ1 = 1024, + FRAG_SZ2 = 4096, + FRAG_SZ3 = MLX4_EN_ALLOC_SIZE +}; +#define MLX4_EN_MAX_RX_FRAGS 4 + +/* Maximum ring sizes */ +#define MLX4_EN_MAX_TX_SIZE 8192 +#define MLX4_EN_MAX_RX_SIZE 8192 + +/* Minimum ring size for our page-allocation sceme to work */ +#define MLX4_EN_MIN_RX_SIZE (MLX4_EN_ALLOC_SIZE / SMP_CACHE_BYTES) +#define MLX4_EN_MIN_TX_SIZE (4096 / TXBB_SIZE) + +#define MLX4_EN_SMALL_PKT_SIZE 128 +#define MLX4_EN_TX_HASH_SIZE 256 +#define MLX4_EN_TX_HASH_MASK (MLX4_EN_TX_HASH_SIZE - 1) +#define MLX4_EN_NUM_HASH_RINGS 8 +#define MLX4_EN_NUM_PPP_RINGS 8 +#define MLX4_EN_DEF_TX_RING_SIZE 512 +#define MLX4_EN_DEF_RX_RING_SIZE 1024 + +/* Target number of bytes to coalesce with interrupt moderation */ +#define MLX4_EN_RX_COAL_TARGET 0x20000 +#define MLX4_EN_RX_COAL_TIME 0x10 + +#define MLX4_EN_TX_COAL_PKTS 5 +#define MLX4_EN_TX_COAL_TIME 0x80 + +#define MLX4_EN_RX_RATE_LOW 400000 +#define MLX4_EN_RX_COAL_TIME_LOW 0 +#define MLX4_EN_RX_RATE_HIGH 450000 +#define MLX4_EN_RX_COAL_TIME_HIGH 128 +#define MLX4_EN_RX_SIZE_THRESH 1024 +#define MLX4_EN_RX_RATE_THRESH (1000000 / MLX4_EN_RX_COAL_TIME_HIGH) +#define MLX4_EN_SAMPLE_INTERVAL 0 + +#define MLX4_EN_AUTO_CONF 0xffff + +#define MLX4_EN_DEF_RX_PAUSE 1 +#define MLX4_EN_DEF_TX_PAUSE 1 + +/* Interval between sucessive polls in the Tx routine when polling is used + instead of interrupts (in per-core Tx rings) - should be power of 2 */ +#define MLX4_EN_TX_POLL_MODER 16 +#define MLX4_EN_TX_POLL_TIMEOUT (HZ / 4) + +#define ETH_LLC_SNAP_SIZE 8 + +#define SMALL_PACKET_SIZE (256 - NET_IP_ALIGN) +#define HEADER_COPY_SIZE (128 - NET_IP_ALIGN) + +#define MLX4_EN_MIN_MTU 46 +#define ETH_BCAST 0xffffffffffff + +#ifdef MLX4_EN_PERF_STAT +/* Number of samples to 'average' */ +#define AVG_SIZE 128 +#define AVG_FACTOR 1024 +#define NUM_PERF_STATS NUM_PERF_COUNTERS + +#define INC_PERF_COUNTER(cnt) (++(cnt)) +#define ADD_PERF_COUNTER(cnt, add) ((cnt) += (add)) +#define AVG_PERF_COUNTER(cnt, sample) \ + ((cnt) = ((cnt) * (AVG_SIZE - 1) + (sample) * AVG_FACTOR) / AVG_SIZE) +#define GET_PERF_COUNTER(cnt) (cnt) +#define GET_AVG_PERF_COUNTER(cnt) ((cnt) / AVG_FACTOR) + +#else + +#define NUM_PERF_STATS 0 +#define INC_PERF_COUNTER(cnt) do {} while (0) +#define ADD_PERF_COUNTER(cnt, add) do {} while (0) +#define AVG_PERF_COUNTER(cnt, sample) do {} while (0) +#define GET_PERF_COUNTER(cnt) (0) +#define GET_AVG_PERF_COUNTER(cnt) (0) +#endif /* MLX4_EN_PERF_STAT */ + +/* + * Configurables + */ + +enum cq_type { + RX = 0, + TX = 1, +}; + + +/* + * Useful macros + */ +#define ROUNDUP_LOG2(x) ilog2(roundup_pow_of_two(x)) +#define XNOR(x, y) (!(x) == !(y)) +#define ILLEGAL_MAC(addr) (addr == 0xffffffffffff || addr == 0x0) + + +struct mlx4_en_tx_info { + struct sk_buff *skb; + u32 nr_txbb; + u8 linear; + u8 data_offset; + u8 inl; +}; + + +#define MLX4_EN_BIT_DESC_OWN 0x80000000 +#define CTRL_SIZE sizeof(struct mlx4_wqe_ctrl_seg) +#define MLX4_EN_MEMTYPE_PAD 0x100 +#define DS_SIZE sizeof(struct mlx4_wqe_data_seg) + + +struct mlx4_en_tx_desc { + struct mlx4_wqe_ctrl_seg ctrl; + union { + struct mlx4_wqe_data_seg data; /* at least one data segment */ + struct mlx4_wqe_lso_seg lso; + struct mlx4_wqe_inline_seg inl; + }; +}; + +#define MLX4_EN_USE_SRQ 0x01000000 + +struct mlx4_en_rx_alloc { + struct page *page; + u16 offset; +}; + +struct mlx4_en_tx_ring { + struct mlx4_hwq_resources wqres; + u32 size ; /* number of TXBBs */ + u32 size_mask; + u16 stride; + u16 cqn; /* index of port CQ associated with this ring */ + u32 prod; + u32 cons; + u32 buf_size; + u32 doorbell_qpn; + void *buf; + u16 poll_cnt; + int blocked; + struct mlx4_en_tx_info *tx_info; + u8 *bounce_buf; + u32 last_nr_txbb; + struct mlx4_qp qp; + struct mlx4_qp_context context; + u32 qpn; + enum mlx4_qp_state qp_state; + struct mlx4_srq dummy; + unsigned long bytes; + unsigned long packets; + spinlock_t comp_lock; +}; + + +struct mlx4_en_ipfrag { + struct sk_buff *fragments; + struct sk_buff *last; + __be32 saddr; + __be32 daddr; + __be16 id; + u8 protocol; + int total_len; + u16 offset; + unsigned int vlan; + __be16 sl_vid; +}; + +struct mlx4_en_rx_desc { + struct mlx4_wqe_srq_next_seg next; + /* actual number of entries depends on rx ring stride */ + struct mlx4_wqe_data_seg data[0]; +}; + +struct mlx4_en_rx_ring { + struct mlx4_srq srq; + struct mlx4_hwq_resources wqres; + struct mlx4_en_rx_alloc page_alloc[MLX4_EN_MAX_RX_FRAGS]; + struct net_lro_mgr lro; + u32 size ; /* number of Rx descs*/ + u32 actual_size; + u32 size_mask; + u16 stride; + u16 log_stride; + u16 cqn; /* index of port CQ associated with this ring */ + u32 prod; + u32 cons; + u32 buf_size; + int need_refill; + int full; + void *buf; + void *rx_info; + unsigned long bytes; + unsigned long packets; + struct mlx4_en_ipfrag ipfrag[MLX4_EN_NUM_IPFRAG_SESSIONS]; + unsigned int use_frags; +}; + + +static inline int mlx4_en_can_lro(__be16 status) +{ + return (status & cpu_to_be16(MLX4_CQE_STATUS_IPV4 | + MLX4_CQE_STATUS_IPV4F | + MLX4_CQE_STATUS_IPV6 | + MLX4_CQE_STATUS_IPV4OPT | + MLX4_CQE_STATUS_TCP | + MLX4_CQE_STATUS_UDP | + MLX4_CQE_STATUS_IPOK)) == + cpu_to_be16(MLX4_CQE_STATUS_IPV4 | + MLX4_CQE_STATUS_IPOK | + MLX4_CQE_STATUS_TCP); +} + +struct mlx4_en_cq { + struct mlx4_cq mcq; + struct mlx4_hwq_resources wqres; + int ring; + spinlock_t lock; + struct net_device *dev; + struct net_device *poll_dev; /* for napi */ + /* Per-core Tx cq processing support */ + struct timer_list timer; + int size; + int buf_size; + unsigned vector; + enum cq_type is_tx; + u16 moder_time; + u16 moder_cnt; + struct mlx4_cqe *buf; +#define MLX4_EN_OPCODE_ERROR 0x1e +}; + +struct mlx4_en_port_profile { + u32 flags; + u32 tx_ring_num; + u32 rx_ring_num; + u32 tx_ring_size; + u32 rx_ring_size; + u8 rx_pause; + u8 rx_ppp; + u8 tx_pause; + u8 tx_ppp; +}; + +struct mlx4_en_profile { + int rss_xor; + int num_lro; + int ip_reasm; + u8 rss_mask; + u32 active_ports; + u32 small_pkt_int; + u8 no_reset; + struct mlx4_en_port_profile prof[MLX4_MAX_PORTS + 1]; +}; + +struct mlx4_en_dev { + struct mlx4_dev *dev; + struct pci_dev *pdev; + struct mutex state_lock; + struct net_device *pndev[MLX4_MAX_PORTS + 1]; + u32 port_cnt; + bool device_up; + struct mlx4_en_profile profile; + u32 LSO_support; + struct workqueue_struct *workqueue; + struct device *dma_device; + void __iomem *uar_map; + struct mlx4_uar priv_uar; + struct mlx4_mr mr; + u32 priv_pdn; + spinlock_t uar_lock; +}; + + +struct mlx4_en_rss_map { + int size; + int base_qpn; + u16 map[MAX_RSS_MAP_SIZE]; + struct mlx4_qp qps[MAX_RSS_MAP_SIZE]; + enum mlx4_qp_state state[MAX_RSS_MAP_SIZE]; + struct mlx4_qp indir_qp; + enum mlx4_qp_state indir_state; +}; + +struct mlx4_en_rss_context { + __be32 base_qpn; + __be32 default_qpn; + u16 reserved; + u8 hash_fn; + u8 flags; + __be32 rss_key[10]; +}; + +struct mlx4_en_pkt_stats { + unsigned long broadcast; + unsigned long rx_prio[8]; + unsigned long tx_prio[8]; +#define NUM_PKT_STATS 17 +}; + +struct mlx4_en_port_stats { + unsigned long lro_aggregated; + unsigned long lro_flushed; + unsigned long lro_no_desc; + unsigned long tso_packets; + unsigned long queue_stopped; + unsigned long wake_queue; + unsigned long tx_timeout; + unsigned long rx_alloc_failed; + unsigned long rx_chksum_good; + unsigned long rx_chksum_none; + unsigned long tx_chksum_offload; +#define NUM_PORT_STATS 11 +}; + +struct mlx4_en_perf_stats { + u32 tx_poll; + u64 tx_pktsz_avg; + u32 inflight_avg; + u16 tx_coal_avg; + u16 rx_coal_avg; + u32 napi_quota; +#define NUM_PERF_COUNTERS 6 +}; + +struct mlx4_en_frag_info { + u16 frag_size; + u16 frag_prefix_size; + u16 frag_stride; + u16 frag_align; + u16 last_offset; + +}; + +struct mlx4_en_tx_hash_entry { + u8 cnt; + unsigned int small_pkts; + unsigned int big_pkts; + unsigned int ring; +}; + +struct mlx4_en_priv { + struct mlx4_en_dev *mdev; + struct mlx4_en_port_profile *prof; + struct net_device *dev; + struct vlan_group *vlgrp; + struct net_device_stats stats; + struct net_device_stats ret_stats; + spinlock_t stats_lock; + + unsigned long last_moder_packets; + unsigned long last_moder_tx_packets; + unsigned long last_moder_bytes; + unsigned long last_moder_jiffies; + int last_moder_time; + u16 rx_usecs; + u16 rx_frames; + u16 tx_usecs; + u16 tx_frames; + u32 pkt_rate_low; + u16 rx_usecs_low; + u32 pkt_rate_high; + u16 rx_usecs_high; + u16 sample_interval; + u16 adaptive_rx_coal; + u32 msg_enable; + + struct mlx4_hwq_resources res; + int link_state; + int last_link_state; + bool port_up; + int port; + int registered; + int allocated; + int rx_csum; + u64 mac; + int mac_index; + unsigned max_mtu; + int base_qpn; + + struct mlx4_en_rss_map rss_map; + u16 tx_prio_map[8]; + u32 flags; +#define MLX4_EN_FLAG_PROMISC 0x1 + u32 tx_ring_num; + u32 rx_ring_num; + u32 rx_skb_size; + struct mlx4_en_frag_info frag_info[MLX4_EN_MAX_RX_FRAGS]; + u16 num_frags; + u16 log_rx_info; + + struct mlx4_en_tx_ring tx_ring[MAX_TX_RINGS]; + struct mlx4_en_rx_ring rx_ring[MAX_RX_RINGS]; + struct mlx4_en_cq tx_cq[MAX_TX_RINGS]; + struct mlx4_en_cq rx_cq[MAX_RX_RINGS]; + struct mlx4_en_tx_hash_entry tx_hash[MLX4_EN_TX_HASH_SIZE]; + struct work_struct mcast_task; + struct work_struct mac_task; + struct delayed_work refill_task; + struct work_struct watchdog_task; + struct work_struct linkstate_task; + struct delayed_work stats_task; + struct mlx4_en_perf_stats pstats; + struct mlx4_en_pkt_stats pkstats; + struct mlx4_en_port_stats port_stats; + struct dev_mc_list *mc_list; + struct mlx4_en_stat_out_mbox hw_stats; +}; + +int mlx4_en_rx_frags(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *ring, + struct sk_buff *skb, struct mlx4_cqe *cqe); +void mlx4_en_flush_frags(struct mlx4_en_priv *priv, + struct mlx4_en_rx_ring *ring); +void mlx4_en_destroy_netdev(struct net_device *dev); +int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, + struct mlx4_en_port_profile *prof); + +int mlx4_en_start_port(struct net_device *dev); +void mlx4_en_stop_port(struct net_device *dev); + +void mlx4_en_free_resources(struct mlx4_en_priv *priv); +int mlx4_en_alloc_resources(struct mlx4_en_priv *priv); + +int mlx4_en_get_profile(struct mlx4_en_dev *mdev); + +int mlx4_en_create_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq, + int entries, int ring, enum cq_type mode); +void mlx4_en_destroy_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq); +int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq); +void mlx4_en_deactivate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq); +int mlx4_en_set_cq_moder(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq); +int mlx4_en_arm_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq); + +void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq); +void mlx4_en_poll_tx_cq(unsigned long data); +void mlx4_en_tx_irq(struct mlx4_cq *mcq); +int mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev); + +int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ring, + u32 size, u16 stride); +void mlx4_en_destroy_tx_ring(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ring); +int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv, + struct mlx4_en_tx_ring *ring, + int cq, int srqn); +void mlx4_en_deactivate_tx_ring(struct mlx4_en_priv *priv, + struct mlx4_en_tx_ring *ring); + +int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv, + struct mlx4_en_rx_ring *ring, u32 size); +void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv, + struct mlx4_en_rx_ring *ring); +int mlx4_en_activate_rx_rings(struct mlx4_en_priv *priv); +void mlx4_en_deactivate_rx_ring(struct mlx4_en_priv *priv, + struct mlx4_en_rx_ring *ring); +int mlx4_en_process_rx_cq(struct net_device *dev, + struct mlx4_en_cq *cq, + int budget); +int mlx4_en_process_rx_cq_skb(struct net_device *dev, + struct mlx4_en_cq *cq, + int budget); +int mlx4_en_poll_rx_cq(struct net_device *poll_dev, int *budget);; +void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride, + int is_tx, int rss, int qpn, int cqn, int srqn, + struct mlx4_qp_context *context); +void mlx4_en_sqp_event(struct mlx4_qp *qp, enum mlx4_event event); +int mlx4_en_map_buffer(struct mlx4_buf *buf); +void mlx4_en_unmap_buffer(struct mlx4_buf *buf); + +void mlx4_en_calc_rx_buf(struct net_device *dev); +int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv, + struct mlx4_en_rx_desc *rx_desc, + struct skb_frag_struct *skb_frags, + struct skb_frag_struct *skb_frags_rx, + struct mlx4_en_rx_alloc *page_alloc, + int length); +struct sk_buff *mlx4_en_rx_skb(struct mlx4_en_priv *priv, + struct mlx4_en_rx_desc *rx_desc, + struct skb_frag_struct *skb_frags, + struct mlx4_en_rx_alloc *page_alloc, + unsigned int length); + +void mlx4_en_set_default_rss_map(struct mlx4_en_priv *priv, + struct mlx4_en_rss_map *rss_map, + int num_entries, int num_rings); +void mlx4_en_set_prio_map(struct mlx4_en_priv *priv, u16 *prio_map, u32 ring_num); +int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv); +void mlx4_en_release_rss_steer(struct mlx4_en_priv *priv); +int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring); +void mlx4_en_rx_refill(struct work_struct *work); +void mlx4_en_rx_irq(struct mlx4_cq *mcq); + +int mlx4_SET_MCAST_FLTR(struct mlx4_dev *dev, u8 port, u64 mac, u64 clear, u8 mode); +int mlx4_SET_VLAN_FLTR(struct mlx4_dev *dev, u8 port, struct vlan_group *grp); +int mlx4_SET_PORT_general(struct mlx4_dev *dev, u8 port, int mtu, + u8 pptx, u8 pfctx, u8 pprx, u8 pfcrx); +int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn, + u8 promisc); + +int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset); + +/* + * Globals + */ +extern const struct ethtool_ops mlx4_en_ethtool_ops; +#endif diff --git a/drivers/net/mlx4/mr.c b/drivers/net/mlx4/mr.c index 7d63d7c..3c0e802 100644 --- a/drivers/net/mlx4/mr.c +++ b/drivers/net/mlx4/mr.c @@ -1,6 +1,6 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. - * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved. * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -47,12 +47,14 @@ struct mlx4_mpt_entry { __be32 flags; __be32 qpn; __be32 key; - __be32 pd; + __be32 pd_flags; __be64 start; __be64 length; __be32 lkey; __be32 win_cnt; - u8 reserved1[3]; + u8 reserved1; + u8 flags2; + u8 reserved2; u8 mtt_rep; __be64 mtt_seg; __be32 mtt_sz; @@ -61,12 +63,17 @@ struct mlx4_mpt_entry { } __attribute__((packed)); #define MLX4_MPT_FLAG_SW_OWNS (0xfUL << 28) +#define MLX4_MPT_FLAG_FREE (0x3UL << 28) #define MLX4_MPT_FLAG_MIO (1 << 17) #define MLX4_MPT_FLAG_BIND_ENABLE (1 << 15) #define MLX4_MPT_FLAG_PHYSICAL (1 << 9) #define MLX4_MPT_FLAG_REGION (1 << 8) -#define MLX4_MTT_FLAG_PRESENT 1 +#define MLX4_MPT_PD_FLAG_FAST_REG (1 << 27) +#define MLX4_MPT_PD_FLAG_RAE (1 << 28) +#define MLX4_MPT_PD_FLAG_EN_INV (3 << 24) + +#define MLX4_MPT_FLAG2_FBO_EN (1 << 7) #define MLX4_MPT_STATUS_SW 0xF0 #define MLX4_MPT_STATUS_HW 0x00 @@ -79,23 +86,26 @@ static u32 mlx4_buddy_alloc(struct mlx4_buddy *buddy, int order) spin_lock(&buddy->lock); - for (o = order; o <= buddy->max_order; ++o) { - m = 1 << (buddy->max_order - o); - seg = find_first_bit(buddy->bits[o], m); - if (seg < m) - goto found; - } + for (o = order; o <= buddy->max_order; ++o) + if (buddy->num_free[o]) { + m = 1 << (buddy->max_order - o); + seg = find_first_bit(buddy->bits[o], m); + if (seg < m) + goto found; + } spin_unlock(&buddy->lock); return -1; found: clear_bit(seg, buddy->bits[o]); + --buddy->num_free[o]; while (o > order) { --o; seg <<= 1; set_bit(seg ^ 1, buddy->bits[o]); + ++buddy->num_free[o]; } spin_unlock(&buddy->lock); @@ -113,16 +123,18 @@ static void mlx4_buddy_free(struct mlx4_buddy *buddy, u32 seg, int order) while (test_bit(seg ^ 1, buddy->bits[order])) { clear_bit(seg ^ 1, buddy->bits[order]); + --buddy->num_free[order]; seg >>= 1; ++order; } set_bit(seg, buddy->bits[order]); + ++buddy->num_free[order]; spin_unlock(&buddy->lock); } -static int __devinit mlx4_buddy_init(struct mlx4_buddy *buddy, int max_order) +static int mlx4_buddy_init(struct mlx4_buddy *buddy, int max_order) { int i, s; @@ -131,7 +143,9 @@ static int __devinit mlx4_buddy_init(struct mlx4_buddy *buddy, int max_order) buddy->bits = kzalloc((buddy->max_order + 1) * sizeof (long *), GFP_KERNEL); - if (!buddy->bits) + buddy->num_free = kzalloc((buddy->max_order + 1) * sizeof (int *), + GFP_KERNEL); + if (!buddy->bits || !buddy->num_free) goto err_out; for (i = 0; i <= buddy->max_order; ++i) { @@ -143,6 +157,7 @@ static int __devinit mlx4_buddy_init(struct mlx4_buddy *buddy, int max_order) } set_bit(0, buddy->bits[buddy->max_order]); + buddy->num_free[buddy->max_order] = 1; return 0; @@ -150,9 +165,10 @@ err_out_free: for (i = 0; i <= buddy->max_order; ++i) kfree(buddy->bits[i]); +err_out: kfree(buddy->bits); + kfree(buddy->num_free); -err_out: return -ENOMEM; } @@ -164,6 +180,7 @@ static void mlx4_buddy_cleanup(struct mlx4_buddy *buddy) kfree(buddy->bits[i]); kfree(buddy->bits); + kfree(buddy->num_free); } static u32 mlx4_alloc_mtt_range(struct mlx4_dev *dev, int order) @@ -196,7 +213,7 @@ int mlx4_mtt_init(struct mlx4_dev *dev, int npages, int page_shift, } else mtt->page_shift = page_shift; - for (mtt->order = 0, i = MLX4_MTT_ENTRY_PER_SEG; i < npages; i <<= 1) + for (mtt->order = 0, i = dev->caps.mtts_per_seg; i < npages; i <<= 1) ++mtt->order; mtt->first_seg = mlx4_alloc_mtt_range(dev, mtt->order); @@ -250,25 +267,56 @@ static int mlx4_HW2SW_MPT(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox !mailbox, MLX4_CMD_HW2SW_MPT, MLX4_CMD_TIME_CLASS_B); } -int mlx4_mr_alloc(struct mlx4_dev *dev, u32 pd, u64 iova, u64 size, u32 access, - int npages, int page_shift, struct mlx4_mr *mr) +int mlx4_mr_reserve_range(struct mlx4_dev *dev, int cnt, int align, u32 *base_mridx) { struct mlx4_priv *priv = mlx4_priv(dev); - u32 index; - int err; + u32 mridx; - index = mlx4_bitmap_alloc(&priv->mr_table.mpt_bitmap); - if (index == -1) + mridx = mlx4_bitmap_alloc_range(&priv->mr_table.mpt_bitmap, cnt, align); + if (mridx == -1) return -ENOMEM; + *base_mridx = mridx; + return 0; + +} +EXPORT_SYMBOL_GPL(mlx4_mr_reserve_range); + +void mlx4_mr_release_range(struct mlx4_dev *dev, u32 base_mridx, int cnt) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + mlx4_bitmap_free_range(&priv->mr_table.mpt_bitmap, base_mridx, cnt); +} +EXPORT_SYMBOL_GPL(mlx4_mr_release_range); + +int mlx4_mr_alloc_reserved(struct mlx4_dev *dev, u32 mridx, u32 pd, + u64 iova, u64 size, u32 access, int npages, + int page_shift, struct mlx4_mr *mr) +{ mr->iova = iova; mr->size = size; mr->pd = pd; mr->access = access; mr->enabled = 0; - mr->key = hw_index_to_key(index); + mr->key = hw_index_to_key(mridx); + + return mlx4_mtt_init(dev, npages, page_shift, &mr->mtt); +} +EXPORT_SYMBOL_GPL(mlx4_mr_alloc_reserved); + +int mlx4_mr_alloc(struct mlx4_dev *dev, u32 pd, u64 iova, u64 size, u32 access, + int npages, int page_shift, struct mlx4_mr *mr) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + u32 index; + int err; - err = mlx4_mtt_init(dev, npages, page_shift, &mr->mtt); + index = mlx4_bitmap_alloc(&priv->mr_table.mpt_bitmap); + if (index == -1) + return -ENOMEM; + + err = mlx4_mr_alloc_reserved(dev, index, pd, iova, size, + access, npages, page_shift, mr); if (err) mlx4_bitmap_free(&priv->mr_table.mpt_bitmap, index); @@ -276,9 +324,8 @@ int mlx4_mr_alloc(struct mlx4_dev *dev, u32 pd, u64 iova, u64 size, u32 access, } EXPORT_SYMBOL_GPL(mlx4_mr_alloc); -void mlx4_mr_free(struct mlx4_dev *dev, struct mlx4_mr *mr) +void mlx4_mr_free_reserved(struct mlx4_dev *dev, struct mlx4_mr *mr) { - struct mlx4_priv *priv = mlx4_priv(dev); int err; if (mr->enabled) { @@ -290,6 +337,13 @@ void mlx4_mr_free(struct mlx4_dev *dev, struct mlx4_mr *mr) } mlx4_mtt_cleanup(dev, &mr->mtt); +} +EXPORT_SYMBOL_GPL(mlx4_mr_free_reserved); + +void mlx4_mr_free(struct mlx4_dev *dev, struct mlx4_mr *mr) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + mlx4_mr_free_reserved(dev, mr); mlx4_bitmap_free(&priv->mr_table.mpt_bitmap, key_to_hw_index(mr->key)); } EXPORT_SYMBOL_GPL(mlx4_mr_free); @@ -314,21 +368,33 @@ int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr) memset(mpt_entry, 0, sizeof *mpt_entry); - mpt_entry->flags = cpu_to_be32(MLX4_MPT_FLAG_SW_OWNS | - MLX4_MPT_FLAG_MIO | + mpt_entry->flags = cpu_to_be32(MLX4_MPT_FLAG_MIO | MLX4_MPT_FLAG_REGION | mr->access); mpt_entry->key = cpu_to_be32(key_to_hw_index(mr->key)); - mpt_entry->pd = cpu_to_be32(mr->pd); + mpt_entry->pd_flags = cpu_to_be32(mr->pd | MLX4_MPT_PD_FLAG_EN_INV); mpt_entry->start = cpu_to_be64(mr->iova); mpt_entry->length = cpu_to_be64(mr->size); mpt_entry->entity_size = cpu_to_be32(mr->mtt.page_shift); + if (mr->mtt.order < 0) { mpt_entry->flags |= cpu_to_be32(MLX4_MPT_FLAG_PHYSICAL); mpt_entry->mtt_seg = 0; - } else + } else { mpt_entry->mtt_seg = cpu_to_be64(mlx4_mtt_addr(dev, &mr->mtt)); + } + + if (mr->mtt.order >= 0 && mr->mtt.page_shift == 0) { + /* fast register MR in free state */ + mpt_entry->flags |= cpu_to_be32(MLX4_MPT_FLAG_FREE); + mpt_entry->pd_flags |= cpu_to_be32(MLX4_MPT_PD_FLAG_FAST_REG | + MLX4_MPT_PD_FLAG_RAE); + mpt_entry->mtt_sz = cpu_to_be32((1 << mr->mtt.order) * + dev->caps.mtts_per_seg); + } else { + mpt_entry->flags |= cpu_to_be32(MLX4_MPT_FLAG_SW_OWNS); + } err = mlx4_SW2HW_MPT(dev, mailbox, key_to_hw_index(mr->key) & (dev->caps.num_mpts - 1)); @@ -366,7 +432,7 @@ static int mlx4_write_mtt_chunk(struct mlx4_dev *dev, struct mlx4_mtt *mtt, (start_index + npages - 1) / (PAGE_SIZE / sizeof (u64))) return -EINVAL; - if (start_index & (MLX4_MTT_ENTRY_PER_SEG - 1)) + if (start_index & (dev->caps.mtts_per_seg - 1)) return -EINVAL; mtts = mlx4_table_find(&priv->mr_table.mtt_table, mtt->first_seg + @@ -419,9 +485,9 @@ int mlx4_buf_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt, for (i = 0; i < buf->npages; ++i) if (buf->nbufs == 1) - page_list[i] = buf->u.direct.map + (i << buf->page_shift); + page_list[i] = buf->direct.map + (i << buf->page_shift); else - page_list[i] = buf->u.page_list[i].map; + page_list[i] = buf->page_list[i].map; err = mlx4_write_mtt(dev, mtt, 0, buf->npages, page_list); @@ -435,8 +501,11 @@ int mlx4_init_mr_table(struct mlx4_dev *dev) struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table; int err; + if (!is_power_of_2(dev->caps.num_mpts)) + return -EINVAL; + err = mlx4_bitmap_init(&mr_table->mpt_bitmap, dev->caps.num_mpts, - ~0, dev->caps.reserved_mrws); + ~0, dev->caps.reserved_mrws, 0); if (err) return err; @@ -500,8 +569,9 @@ static inline int mlx4_check_fmr(struct mlx4_fmr *fmr, u64 *page_list, return 0; } -int mlx4_map_phys_fmr(struct mlx4_dev *dev, struct mlx4_fmr *fmr, u64 *page_list, - int npages, u64 iova, u32 *lkey, u32 *rkey) +int mlx4_map_phys_fmr_fbo(struct mlx4_dev *dev, struct mlx4_fmr *fmr, + u64 *page_list, int npages, u64 iova, u32 fbo, + u32 len, u32 *lkey, u32 *rkey, int same_key) { u32 key; int i, err; @@ -513,7 +583,8 @@ int mlx4_map_phys_fmr(struct mlx4_dev *dev, struct mlx4_fmr *fmr, u64 *page_list ++fmr->maps; key = key_to_hw_index(fmr->mr.key); - key += dev->caps.num_mpts; + if (!same_key) + key += dev->caps.num_mpts; *lkey = *rkey = fmr->mr.key = hw_index_to_key(key); *(u8 *) fmr->mpt = MLX4_MPT_STATUS_SW; @@ -529,8 +600,10 @@ int mlx4_map_phys_fmr(struct mlx4_dev *dev, struct mlx4_fmr *fmr, u64 *page_list fmr->mpt->key = cpu_to_be32(key); fmr->mpt->lkey = cpu_to_be32(key); - fmr->mpt->length = cpu_to_be64(npages * (1ull << fmr->page_shift)); + fmr->mpt->length = cpu_to_be64(len); fmr->mpt->start = cpu_to_be64(iova); + fmr->mpt->first_byte_offset = cpu_to_be32(fbo & 0x001fffff); + fmr->mpt->flags2 = (fbo ? MLX4_MPT_FLAG2_FBO_EN : 0); /* Make MTT entries are visible before setting MPT status */ wmb(); @@ -542,6 +615,16 @@ int mlx4_map_phys_fmr(struct mlx4_dev *dev, struct mlx4_fmr *fmr, u64 *page_list return 0; } +EXPORT_SYMBOL_GPL(mlx4_map_phys_fmr_fbo); + +int mlx4_map_phys_fmr(struct mlx4_dev *dev, struct mlx4_fmr *fmr, u64 *page_list, + int npages, u64 iova, u32 *lkey, u32 *rkey) +{ + u32 len = npages * (1ull << fmr->page_shift); + + return mlx4_map_phys_fmr_fbo(dev, fmr, page_list, npages, iova, 0, + len, lkey, rkey, 0); +} EXPORT_SYMBOL_GPL(mlx4_map_phys_fmr); int mlx4_fmr_alloc(struct mlx4_dev *dev, u32 pd, u32 access, int max_pages, @@ -586,6 +669,49 @@ err_free: } EXPORT_SYMBOL_GPL(mlx4_fmr_alloc); +int mlx4_fmr_alloc_reserved(struct mlx4_dev *dev, u32 mridx, + u32 pd, u32 access, int max_pages, + int max_maps, u8 page_shift, struct mlx4_fmr *fmr) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + u64 mtt_seg; + int err = -ENOMEM; + + if (page_shift < (ffs(dev->caps.page_size_cap) - 1) || page_shift >= 32) + return -EINVAL; + + /* All MTTs must fit in the same page */ + if (max_pages * sizeof *fmr->mtts > PAGE_SIZE) + return -EINVAL; + + fmr->page_shift = page_shift; + fmr->max_pages = max_pages; + fmr->max_maps = max_maps; + fmr->maps = 0; + + err = mlx4_mr_alloc_reserved(dev, mridx, pd, 0, 0, access, max_pages, + page_shift, &fmr->mr); + if (err) + return err; + + mtt_seg = fmr->mr.mtt.first_seg * dev->caps.mtt_entry_sz; + + fmr->mtts = mlx4_table_find(&priv->mr_table.mtt_table, + fmr->mr.mtt.first_seg, + &fmr->dma_handle); + if (!fmr->mtts) { + err = -ENOMEM; + goto err_free; + } + + return 0; + +err_free: + mlx4_mr_free_reserved(dev, &fmr->mr); + return err; +} +EXPORT_SYMBOL_GPL(mlx4_fmr_alloc_reserved); + int mlx4_fmr_enable(struct mlx4_dev *dev, struct mlx4_fmr *fmr) { struct mlx4_priv *priv = mlx4_priv(dev); @@ -628,6 +754,18 @@ int mlx4_fmr_free(struct mlx4_dev *dev, struct mlx4_fmr *fmr) } EXPORT_SYMBOL_GPL(mlx4_fmr_free); +int mlx4_fmr_free_reserved(struct mlx4_dev *dev, struct mlx4_fmr *fmr) +{ + if (fmr->maps) + return -EBUSY; + + fmr->mr.enabled = 0; + mlx4_mr_free_reserved(dev, &fmr->mr); + + return 0; +} +EXPORT_SYMBOL_GPL(mlx4_fmr_free_reserved); + int mlx4_SYNC_TPT(struct mlx4_dev *dev) { return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_SYNC_TPT, 1000); diff --git a/drivers/net/mlx4/pd.c b/drivers/net/mlx4/pd.c index 3a93c5f..26d1a7a 100644 --- a/drivers/net/mlx4/pd.c +++ b/drivers/net/mlx4/pd.c @@ -62,7 +62,7 @@ int mlx4_init_pd_table(struct mlx4_dev *dev) struct mlx4_priv *priv = mlx4_priv(dev); return mlx4_bitmap_init(&priv->pd_bitmap, dev->caps.num_pds, - (1 << 24) - 1, dev->caps.reserved_pds); + (1 << 24) - 1, dev->caps.reserved_pds, 0); } void mlx4_cleanup_pd_table(struct mlx4_dev *dev) @@ -91,9 +91,16 @@ EXPORT_SYMBOL_GPL(mlx4_uar_free); int mlx4_init_uar_table(struct mlx4_dev *dev) { + if (dev->caps.num_uars <= 128) { + mlx4_err(dev, "Only %d UAR pages (need more than 128)\n", + dev->caps.num_uars); + mlx4_err(dev, "Increase firmware log2_uar_bar_megabytes?\n"); + return -ENODEV; + } + return mlx4_bitmap_init(&mlx4_priv(dev)->uar_table.bitmap, dev->caps.num_uars, dev->caps.num_uars - 1, - max(128, dev->caps.reserved_uars)); + max(128, dev->caps.reserved_uars), 0); } void mlx4_cleanup_uar_table(struct mlx4_dev *dev) diff --git a/drivers/net/mlx4/port.c b/drivers/net/mlx4/port.c new file mode 100644 index 0000000..b93fb6c --- /dev/null +++ b/drivers/net/mlx4/port.c @@ -0,0 +1,320 @@ +/* + * Copyright (c) 2007 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <linux/errno.h> +#include <linux/if_ether.h> + +#include <linux/mlx4/cmd.h> + +#include "mlx4.h" + +int mlx4_ib_set_4k_mtu = 0; +module_param_named(set_4k_mtu, mlx4_ib_set_4k_mtu, int, 0444); +MODULE_PARM_DESC(set_4k_mtu, "attempt to set 4K MTU to all ConnectX ports"); + +void mlx4_init_mac_table(struct mlx4_dev *dev, struct mlx4_mac_table *table) +{ + int i; + + sema_init(&table->mac_sem, 1); + for (i = 0; i < MLX4_MAX_MAC_NUM; i++) { + table->entries[i] = 0; + table->refs[i] = 0; + } + table->max = 1 << dev->caps.log_num_macs; + table->total = 0; +} + +void mlx4_init_vlan_table(struct mlx4_dev *dev, struct mlx4_vlan_table *table) +{ + int i; + + sema_init(&table->vlan_sem, 1); + for (i = 0; i < MLX4_MAX_VLAN_NUM; i++) { + table->entries[i] = 0; + table->refs[i] = 0; + } + table->max = 1 << dev->caps.log_num_vlans; + table->total = 0; +} + +static int mlx4_SET_PORT_mac_table(struct mlx4_dev *dev, u8 port, + __be64 *entries) +{ + struct mlx4_cmd_mailbox *mailbox; + u32 in_mod; + int err; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + memcpy(mailbox->buf, entries, MLX4_MAC_TABLE_SIZE); + + in_mod = MLX4_SET_PORT_MAC_TABLE << 8 | port; + err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT, + MLX4_CMD_TIME_CLASS_B); + + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} + +int mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *index) +{ + struct mlx4_mac_table *table = &mlx4_priv(dev)->port[port].mac_table; + int i, err = 0; + int free = -1; + u64 valid = 1; + + mlx4_dbg(dev, "Registering mac : 0x%llx\n", mac); + down(&table->mac_sem); + for (i = 0; i < MLX4_MAX_MAC_NUM - 1; i++) { + if (free < 0 && !table->refs[i]) { + free = i; + continue; + } + + if (mac == (MLX4_MAC_MASK & be64_to_cpu(table->entries[i]))) { + /* Mac already registered, increase refernce count */ + *index = i; + ++table->refs[i]; + goto out; + } + } + mlx4_dbg(dev, "Free mac index is %d\n", free); + + if (table->total == table->max) { + /* No free mac entries */ + err = -ENOSPC; + goto out; + } + + /* Register new MAC */ + table->refs[free] = 1; + table->entries[free] = cpu_to_be64(mac | valid << MLX4_MAC_VALID_SHIFT); + + err = mlx4_SET_PORT_mac_table(dev, port, table->entries); + if (unlikely(err)) { + mlx4_err(dev, "Failed adding mac: 0x%llx\n", mac); + table->refs[free] = 0; + table->entries[free] = 0; + goto out; + } + + *index = free; + ++table->total; +out: + up(&table->mac_sem); + return err; +} +EXPORT_SYMBOL_GPL(mlx4_register_mac); + +void mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, int index) +{ + struct mlx4_mac_table *table = &mlx4_priv(dev)->port[port].mac_table; + + down(&table->mac_sem); + if (!table->refs[index]) { + mlx4_warn(dev, "No mac entry for index %d\n", index); + goto out; + } + if (--table->refs[index]) { + mlx4_warn(dev, "Have more references for index %d," + "no need to modify mac table\n", index); + goto out; + } + table->entries[index] = 0; + mlx4_SET_PORT_mac_table(dev, port, table->entries); + --table->total; +out: + up(&table->mac_sem); +} +EXPORT_SYMBOL_GPL(mlx4_unregister_mac); + +static int mlx4_SET_PORT_vlan_table(struct mlx4_dev *dev, u8 port, + __be32 *entries) +{ + struct mlx4_cmd_mailbox *mailbox; + u32 in_mod; + int err; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + memcpy(mailbox->buf, entries, MLX4_VLAN_TABLE_SIZE); + in_mod = MLX4_SET_PORT_VLAN_TABLE << 8 | port; + err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT, + MLX4_CMD_TIME_CLASS_B); + + mlx4_free_cmd_mailbox(dev, mailbox); + + return err; +} + +int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index) +{ + struct mlx4_vlan_table *table = &mlx4_priv(dev)->port[port].vlan_table; + int i, err = 0; + int free = -1; + + down(&table->vlan_sem); + for (i = MLX4_VLAN_REGULAR; i < MLX4_MAX_VLAN_NUM; i++) { + if (free < 0 && (table->refs[i] == 0)) { + free = i; + continue; + } + + if (table->refs[i] && + (vlan == (MLX4_VLAN_MASK & + be32_to_cpu(table->entries[i])))) { + /* Vlan already registered, increase refernce count */ + *index = i; + ++table->refs[i]; + goto out; + } + } + + if (table->total == table->max) { + /* No free vlan entries */ + err = -ENOSPC; + goto out; + } + + /* Register new MAC */ + table->refs[free] = 1; + table->entries[free] = cpu_to_be32(vlan | MLX4_VLAN_VALID); + + err = mlx4_SET_PORT_vlan_table(dev, port, table->entries); + if (unlikely(err)) { + mlx4_warn(dev, "Failed adding vlan: %u\n", vlan); + table->refs[free] = 0; + table->entries[free] = 0; + goto out; + } + + *index = free; + ++table->total; +out: + up(&table->vlan_sem); + return err; +} +EXPORT_SYMBOL_GPL(mlx4_register_vlan); + +void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, int index) +{ + struct mlx4_vlan_table *table = &mlx4_priv(dev)->port[port].vlan_table; + + if (index < MLX4_VLAN_REGULAR) { + mlx4_warn(dev, "Trying to free special vlan index %d\n", index); + return; + } + + down(&table->vlan_sem); + if (!table->refs[index]) { + mlx4_warn(dev, "No vlan entry for index %d\n", index); + goto out; + } + if (--table->refs[index]) { + mlx4_dbg(dev, "Have more references for index %d," + "no need to modify vlan table\n", index); + goto out; + } + table->entries[index] = 0; + mlx4_SET_PORT_vlan_table(dev, port, table->entries); + --table->total; +out: + up(&table->vlan_sem); +} +EXPORT_SYMBOL_GPL(mlx4_unregister_vlan); + +int mlx4_get_port_ib_caps(struct mlx4_dev *dev, u8 port, __be32 *caps) +{ + struct mlx4_cmd_mailbox *inmailbox, *outmailbox; + u8 *inbuf, *outbuf; + int err; + + inmailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(inmailbox)) + return PTR_ERR(inmailbox); + + outmailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(outmailbox)) { + mlx4_free_cmd_mailbox(dev, inmailbox); + return PTR_ERR(outmailbox); + } + + inbuf = inmailbox->buf; + outbuf = outmailbox->buf; + memset(inbuf, 0, 256); + memset(outbuf, 0, 256); + inbuf[0] = 1; + inbuf[1] = 1; + inbuf[2] = 1; + inbuf[3] = 1; + *(__be16 *) (&inbuf[16]) = cpu_to_be16(0x0015); + *(__be32 *) (&inbuf[20]) = cpu_to_be32(port); + + err = mlx4_cmd_box(dev, inmailbox->dma, outmailbox->dma, port, 3, + MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C); + if (!err) + *caps = *(__be32 *) (outbuf + 84); + mlx4_free_cmd_mailbox(dev, inmailbox); + mlx4_free_cmd_mailbox(dev, outmailbox); + return err; +} + +int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port) +{ + struct mlx4_cmd_mailbox *mailbox; + int err = 0; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + memset(mailbox->buf, 0, 256); + if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH) + goto out; + + if (mlx4_ib_set_4k_mtu) + ((__be32 *) mailbox->buf)[0] |= cpu_to_be32((1 << 22) | (1 << 21) | (5 << 12) | (2 << 4)); + ((__be32 *) mailbox->buf)[1] = dev->caps.ib_port_def_cap[port]; + + err = mlx4_cmd(dev, mailbox->dma, port, 0, MLX4_CMD_SET_PORT, + MLX4_CMD_TIME_CLASS_B); + +out: + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} diff --git a/drivers/net/mlx4/profile.c b/drivers/net/mlx4/profile.c index 9ca42b2..5958874 100644 --- a/drivers/net/mlx4/profile.c +++ b/drivers/net/mlx4/profile.c @@ -98,7 +98,7 @@ u64 mlx4_make_profile(struct mlx4_dev *dev, profile[MLX4_RES_EQ].size = dev_cap->eqc_entry_sz; profile[MLX4_RES_DMPT].size = dev_cap->dmpt_entry_sz; profile[MLX4_RES_CMPT].size = dev_cap->cmpt_entry_sz; - profile[MLX4_RES_MTT].size = MLX4_MTT_ENTRY_PER_SEG * dev_cap->mtt_entry_sz; + profile[MLX4_RES_MTT].size = dev->caps.mtts_per_seg * dev_cap->mtt_entry_sz; profile[MLX4_RES_MCG].size = MLX4_MGM_ENTRY_SIZE; profile[MLX4_RES_QP].num = request->num_qp; diff --git a/drivers/net/mlx4/qp.c b/drivers/net/mlx4/qp.c index f5f446c..9c9f1a2 100644 --- a/drivers/net/mlx4/qp.c +++ b/drivers/net/mlx4/qp.c @@ -1,7 +1,7 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. * Copyright (c) 2005, 2006, 2007 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved. * Copyright (c) 2004 Voltaire, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -55,8 +55,7 @@ void mlx4_qp_event(struct mlx4_dev *dev, u32 qpn, int event_type) spin_unlock(&qp_table->lock); if (!qp) { - mlx4_warn(dev, "Async event %d for bogus QP %08x\n", - event_type, qpn); + mlx4_warn(dev, "Async event for bogus QP %08x\n", qpn); return; } @@ -148,19 +147,42 @@ int mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt, } EXPORT_SYMBOL_GPL(mlx4_qp_modify); -int mlx4_qp_alloc(struct mlx4_dev *dev, int sqpn, struct mlx4_qp *qp) +int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, int *base) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_qp_table *qp_table = &priv->qp_table; + int qpn; + + qpn = mlx4_bitmap_alloc_range(&qp_table->bitmap, cnt, align); + if (qpn == -1) + return -ENOMEM; + + *base = qpn; + return 0; +} +EXPORT_SYMBOL_GPL(mlx4_qp_reserve_range); + +void mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_qp_table *qp_table = &priv->qp_table; + if (base_qpn < dev->caps.sqp_start + 8) + return; + + mlx4_bitmap_free_range(&qp_table->bitmap, base_qpn, cnt); +} +EXPORT_SYMBOL_GPL(mlx4_qp_release_range); + +int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_qp_table *qp_table = &priv->qp_table; int err; - if (sqpn) - qp->qpn = sqpn; - else { - qp->qpn = mlx4_bitmap_alloc(&qp_table->bitmap); - if (qp->qpn == -1) - return -ENOMEM; - } + if (!qpn) + return -EINVAL; + + qp->qpn = qpn; err = mlx4_table_get(dev, &qp_table->qp_table, qp->qpn); if (err) @@ -209,9 +231,6 @@ err_put_qp: mlx4_table_put(dev, &qp_table->qp_table, qp->qpn); err_out: - if (!sqpn) - mlx4_bitmap_free(&qp_table->bitmap, qp->qpn); - return err; } EXPORT_SYMBOL_GPL(mlx4_qp_alloc); @@ -241,21 +260,21 @@ void mlx4_qp_free(struct mlx4_dev *dev, struct mlx4_qp *qp) mlx4_table_put(dev, &qp_table->auxc_table, qp->qpn); mlx4_table_put(dev, &qp_table->qp_table, qp->qpn); - if (qp->qpn >= dev->caps.sqp_start + 8) - mlx4_bitmap_free(&qp_table->bitmap, qp->qpn); } EXPORT_SYMBOL_GPL(mlx4_qp_free); static int mlx4_CONF_SPECIAL_QP(struct mlx4_dev *dev, u32 base_qpn) { - return mlx4_cmd(dev, 0, base_qpn, 0, MLX4_CMD_CONF_SPECIAL_QP, - MLX4_CMD_TIME_CLASS_B); + return mlx4_cmd(dev, 0, base_qpn, + (dev->caps.flags & MLX4_DEV_CAP_FLAG_RAW_ETY) ? 4 : 0, + MLX4_CMD_CONF_SPECIAL_QP, MLX4_CMD_TIME_CLASS_B); } int mlx4_init_qp_table(struct mlx4_dev *dev) { struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table; int err; + int reserved_from_top = 0; spin_lock_init(&qp_table->lock); INIT_RADIX_TREE(&dev->qp_table_tree, GFP_ATOMIC); @@ -265,9 +284,40 @@ int mlx4_init_qp_table(struct mlx4_dev *dev) * block of special QPs must be aligned to a multiple of 8, so * round up. */ - dev->caps.sqp_start = ALIGN(dev->caps.reserved_qps, 8); + dev->caps.sqp_start = + ALIGN(dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 8); + + { + int sort[MLX4_QP_REGION_COUNT]; + int i, j, tmp; + int last_base = dev->caps.num_qps; + + for (i = 1; i < MLX4_QP_REGION_COUNT; ++i) + sort[i] = i; + + for (i = MLX4_QP_REGION_COUNT; i > 0; --i) { + for (j = 2; j < i; ++j) { + if (dev->caps.reserved_qps_cnt[sort[j]] > + dev->caps.reserved_qps_cnt[sort[j - 1]]) { + tmp = sort[j]; + sort[j] = sort[j - 1]; + sort[j - 1] = tmp; + } + } + } + + for (i = 1; i < MLX4_QP_REGION_COUNT; ++i) { + last_base -= dev->caps.reserved_qps_cnt[sort[i]]; + dev->caps.reserved_qps_base[sort[i]] = last_base; + reserved_from_top += + dev->caps.reserved_qps_cnt[sort[i]]; + } + + } + err = mlx4_bitmap_init(&qp_table->bitmap, dev->caps.num_qps, - (1 << 24) - 1, dev->caps.sqp_start + 8); + (1 << 23) - 1, dev->caps.sqp_start + 8, + reserved_from_top); if (err) return err; @@ -280,6 +330,20 @@ void mlx4_cleanup_qp_table(struct mlx4_dev *dev) mlx4_bitmap_cleanup(&mlx4_priv(dev)->qp_table.bitmap); } +int mlx4_qp_get_region(struct mlx4_dev *dev, + enum qp_region region, + int *base_qpn, int *cnt) +{ + if ((region < 0) || (region >= MLX4_QP_REGION_COUNT)) + return -EINVAL; + + *base_qpn = dev->caps.reserved_qps_base[region]; + *cnt = dev->caps.reserved_qps_cnt[region]; + + return 0; +} +EXPORT_SYMBOL_GPL(mlx4_qp_get_region); + int mlx4_qp_query(struct mlx4_dev *dev, struct mlx4_qp *qp, struct mlx4_qp_context *context) { @@ -300,3 +364,34 @@ int mlx4_qp_query(struct mlx4_dev *dev, struct mlx4_qp *qp, } EXPORT_SYMBOL_GPL(mlx4_qp_query); +int mlx4_qp_to_ready(struct mlx4_dev *dev, struct mlx4_mtt *mtt, + struct mlx4_qp_context *context, + struct mlx4_qp *qp, enum mlx4_qp_state *qp_state) +{ + int err; + int i; + enum mlx4_qp_state states[] = { + MLX4_QP_STATE_RST, + MLX4_QP_STATE_INIT, + MLX4_QP_STATE_RTR, + MLX4_QP_STATE_RTS + }; + + for (i = 0; i < ARRAY_SIZE(states) - 1; i++) { + context->flags &= cpu_to_be32(~(0xf << 28)); + context->flags |= cpu_to_be32(states[i + 1] << 28); + err = mlx4_qp_modify(dev, mtt, states[i], states[i + 1], + context, 0, 0, qp); + if (err) { + mlx4_err(dev, "Failed to bring QP to state: " + "%d with error: %d\n", + states[i + 1], err); + return err; + } + + *qp_state = states[i + 1]; + } + + return 0; +} +EXPORT_SYMBOL_GPL(mlx4_qp_to_ready); diff --git a/drivers/net/mlx4/reset.c b/drivers/net/mlx4/reset.c index e199715..3951b88 100644 --- a/drivers/net/mlx4/reset.c +++ b/drivers/net/mlx4/reset.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/net/mlx4/sense.c b/drivers/net/mlx4/sense.c new file mode 100644 index 0000000..d6c2ba4 --- /dev/null +++ b/drivers/net/mlx4/sense.c @@ -0,0 +1,174 @@ +/* + * Copyright (c) 2007 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <linux/errno.h> +#include <linux/if_ether.h> + +#include <linux/mlx4/cmd.h> + +#include "mlx4.h" + + +static int mlx4_SENSE_PORT(struct mlx4_dev *dev, int port, + enum mlx4_port_type *type) +{ + u64 out_param; + int err = 0; + + err = mlx4_cmd_imm(dev, 0, &out_param, port, 0, + MLX4_CMD_SENSE_PORT, MLX4_CMD_TIME_CLASS_B); + if (err) { + mlx4_err(dev, "Sense command failed for port: %d\n", port); + return err; + } + + if (out_param > 2) { + mlx4_err(dev, "Sense returned illegal value: 0x%llx\n", out_param); + return EINVAL; + } + + *type = out_param; + return 0; +} + +void mlx4_do_sense_ports(struct mlx4_dev *dev, + enum mlx4_port_type *stype, + enum mlx4_port_type *defaults) +{ + struct mlx4_sense *sense = &mlx4_priv(dev)->sense; + int err; + int i; + + for (i = 1; i <= dev->caps.num_ports; i++) { + stype[i-1] = 0; + if (sense->do_sense_port[i] && sense->sense_allowed[i] && + dev->caps.possible_type[i] == MLX4_PORT_TYPE_AUTO) { + err = mlx4_SENSE_PORT(dev, i, &stype[i-1]); + if (err) + stype[i-1] = defaults[i-1]; + } else + stype[i-1] = defaults[i-1]; + } + + /* + * Adjust port configuration: + * If port 1 sensed nothing and port 2 is IB, set both as IB + * If port 2 sensed nothing and port 1 is Eth, set both as Eth + */ + if (stype[0] == MLX4_PORT_TYPE_ETH) { + for (i = 1; i < dev->caps.num_ports; i++) + stype[i] = stype[i] ? stype[i] : MLX4_PORT_TYPE_ETH; + } + if (stype[dev->caps.num_ports - 1] == MLX4_PORT_TYPE_IB) { + for (i = 0; i < dev->caps.num_ports - 1; i++) + stype[i] = stype[i] ? stype[i] : MLX4_PORT_TYPE_IB; + } + + /* + * If sensed nothing, remain in current configuration. + */ + for (i = 0; i < dev->caps.num_ports; i++) + stype[i] = stype[i] ? stype[i] : defaults[i]; + +} + +static void mlx4_sense_port(struct work_struct *work) +{ + struct delayed_work *delay = container_of(work, struct delayed_work, work); + struct mlx4_sense *sense = container_of(delay, struct mlx4_sense, + sense_poll); + struct mlx4_dev *dev = sense->dev; + struct mlx4_priv *priv = mlx4_priv(dev); + enum mlx4_port_type stype[MLX4_MAX_PORTS]; + + mutex_lock(&priv->port_mutex); + mlx4_do_sense_ports(dev, stype, &dev->caps.port_type[1]); + + if (mlx4_check_port_params(dev, stype)) + goto sense_again; + + if (mlx4_change_port_types(dev, stype)) + mlx4_err(dev, "Failed to change port_types\n"); + +sense_again: + mutex_unlock(&priv->port_mutex); + if (sense->resched) + queue_delayed_work(sense->sense_wq , &sense->sense_poll, + round_jiffies(MLX4_SENSE_RANGE)); +} + +void mlx4_start_sense(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_sense *sense = &priv->sense; + + if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP)) + return; + + sense->resched = 1; + queue_delayed_work(sense->sense_wq , &sense->sense_poll, + round_jiffies(MLX4_SENSE_RANGE)); +} + + +void mlx4_stop_sense(struct mlx4_dev *dev) +{ + mlx4_priv(dev)->sense.resched = 0; +} + +int mlx4_sense_init(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_sense *sense = &priv->sense; + int port; + + sense->dev = dev; + sense->sense_wq = create_singlethread_workqueue("mlx4_sense"); + if (!sense->sense_wq) + return -ENOMEM; + + for (port = 1; port <= dev->caps.num_ports; port++) + sense->do_sense_port[port] = 1; + + INIT_DELAYED_WORK_DEFERRABLE(&sense->sense_poll, mlx4_sense_port); + + return 0; +} + +void mlx4_sense_cleanup(struct mlx4_dev *dev) +{ + mlx4_stop_sense(dev); + cancel_delayed_work(&mlx4_priv(dev)->sense.sense_poll); + destroy_workqueue(mlx4_priv(dev)->sense.sense_wq); +} + diff --git a/drivers/net/mlx4/srq.c b/drivers/net/mlx4/srq.c index d23f46d..fe9f218 100644 --- a/drivers/net/mlx4/srq.c +++ b/drivers/net/mlx4/srq.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -244,7 +245,7 @@ int mlx4_init_srq_table(struct mlx4_dev *dev) INIT_RADIX_TREE(&srq_table->tree, GFP_ATOMIC); err = mlx4_bitmap_init(&srq_table->bitmap, dev->caps.num_srqs, - dev->caps.num_srqs - 1, dev->caps.reserved_srqs); + dev->caps.num_srqs - 1, dev->caps.reserved_srqs, 0); if (err) return err; diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index 77323a7..0f82293 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -55,6 +55,7 @@ enum { MLX4_CMD_CLOSE_PORT = 0xa, MLX4_CMD_QUERY_HCA = 0xb, MLX4_CMD_QUERY_PORT = 0x43, + MLX4_CMD_SENSE_PORT = 0x4d, MLX4_CMD_SET_PORT = 0xc, MLX4_CMD_ACCESS_DDR = 0x2e, MLX4_CMD_MAP_ICM = 0xffa, @@ -132,6 +133,15 @@ enum { MLX4_MAILBOX_SIZE = 4096 }; +enum { + /* set port opcode modifiers */ + MLX4_SET_PORT_GENERAL = 0x0, + MLX4_SET_PORT_RQP_CALC = 0x1, + MLX4_SET_PORT_MAC_TABLE = 0x2, + MLX4_SET_PORT_VLAN_TABLE = 0x3, + MLX4_SET_PORT_PRIO_MAP = 0x4, +}; + struct mlx4_dev; struct mlx4_cmd_mailbox { diff --git a/include/linux/mlx4/cq.h b/include/linux/mlx4/cq.h index 5d8625e..6f65b2c 100644 --- a/include/linux/mlx4/cq.h +++ b/include/linux/mlx4/cq.h @@ -38,39 +38,19 @@ #include <linux/mlx4/device.h> #include <linux/mlx4/doorbell.h> -struct mlx4_cq_context { - __be32 flags; - u16 reserved1[3]; - __be16 page_offset; - __be32 logsize_usrpage; - u16 cq_period; - u16 cq_max_count; - u8 reserved4[3]; - u8 comp_eqn; - u8 log_page_size; - u8 reserved5[2]; - u8 mtt_base_addr_h; - __be32 mtt_base_addr_l; - __be32 last_notified_index; - __be32 solicit_producer_index; - __be32 consumer_index; - __be32 producer_index; - u32 reserved6[2]; - __be64 db_rec_addr; -}; - struct mlx4_cqe { - __be32 my_qpn; + __be32 vlan_my_qpn; __be32 immed_rss_invalid; __be32 g_mlpath_rqpn; - u8 sl; - u8 reserved1; + __be16 sl_vid; __be16 rlid; - __be32 ipoib_status; + __be16 status; + u8 ipv6_ext_mask; + u8 badfcs_enc; __be32 byte_cnt; __be16 wqe_index; __be16 checksum; - u8 reserved2[3]; + u8 reserved[3]; u8 owner_sr_opcode; }; @@ -85,6 +65,11 @@ struct mlx4_err_cqe { }; enum { + MLX4_CQE_VLAN_PRESENT_MASK = 1 << 29, + MLX4_CQE_QPN_MASK = 0xffffff, +}; + +enum { MLX4_CQE_OWNER_MASK = 0x80, MLX4_CQE_IS_SEND_MASK = 0x40, MLX4_CQE_OPCODE_MASK = 0x1f @@ -106,6 +91,22 @@ enum { MLX4_CQE_SYNDROME_REMOTE_ABORTED_ERR = 0x22, }; +enum { + MLX4_CQE_STATUS_IPV4 = 1 << 6, + MLX4_CQE_STATUS_IPV4F = 1 << 7, + MLX4_CQE_STATUS_IPV6 = 1 << 8, + MLX4_CQE_STATUS_IPV4OPT = 1 << 9, + MLX4_CQE_STATUS_TCP = 1 << 10, + MLX4_CQE_STATUS_UDP = 1 << 11, + MLX4_CQE_STATUS_IPOK = 1 << 12, +}; + +enum { + MLX4_CQE_LLC = 1, + MLX4_CQE_SNAP = 1 << 1, + MLX4_CQE_BAD_FCS = 1 << 4, +}; + static inline void mlx4_cq_arm(struct mlx4_cq *cq, u32 cmd, void __iomem *uar_page, spinlock_t *doorbell_lock) @@ -141,8 +142,9 @@ enum { MLX4_CQ_DB_REQ_NOT = 2 << 24 }; - int mlx4_cq_modify(struct mlx4_dev *dev, struct mlx4_cq *cq, - struct mlx4_cq_context *context, int resize); + u16 count, u16 period); +int mlx4_cq_resize(struct mlx4_dev *dev, struct mlx4_cq *cq, + int entries, struct mlx4_mtt *mtt); #endif /* MLX4_CQ_H */ diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 645dedc..b3ed1cb 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -60,6 +60,8 @@ enum { MLX4_DEV_CAP_FLAG_IPOIB_CSUM = 1 << 7, MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR = 1 << 8, MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR = 1 << 9, + MLX4_DEV_CAP_FLAG_DPDP = 1 << 12, + MLX4_DEV_CAP_FLAG_RAW_ETY = 1 << 13, MLX4_DEV_CAP_FLAG_MEM_WINDOW = 1 << 16, MLX4_DEV_CAP_FLAG_APM = 1 << 17, MLX4_DEV_CAP_FLAG_ATOMIC = 1 << 18, @@ -68,6 +70,14 @@ enum { MLX4_DEV_CAP_FLAG_UD_MCAST = 1 << 21 }; +enum { + MLX4_BMME_FLAG_LOCAL_INV = 1 << 6, + MLX4_BMME_FLAG_REMOTE_INV = 1 << 7, + MLX4_BMME_FLAG_TYPE_2_WIN = 1 << 9, + MLX4_BMME_FLAG_RESERVED_LKEY = 1 << 10, + MLX4_BMME_FLAG_FAST_REG_WR = 1 << 11, +}; + enum mlx4_event { MLX4_EVENT_TYPE_COMP = 0x00, MLX4_EVENT_TYPE_PATH_MIG = 0x01, @@ -133,6 +143,31 @@ enum { MLX4_STAT_RATE_OFFSET = 5 }; +enum { + MLX4_MTT_FLAG_PRESENT = 1 +}; + +enum qp_region { + MLX4_QP_REGION_FW = 0, + MLX4_QP_REGION_ETH_ADDR, + MLX4_QP_REGION_FC_ADDR, + MLX4_QP_REGION_COUNT +}; + +enum mlx4_port_type { + MLX4_PORT_TYPE_IB = 1, + MLX4_PORT_TYPE_ETH = 2, + MLX4_PORT_TYPE_AUTO = 3 +}; + +enum mlx4_special_vlan_idx { + MLX4_NO_VLAN_IDX = 0, + MLX4_VLAN_MISS_IDX, + MLX4_VLAN_REGULAR +}; + +#define MLX4_LEAST_ATTACHED_VECTOR 0xffffffff + static inline u64 mlx4_fw_ver(u64 major, u64 minor, u64 subminor) { return (major << 32) | (minor << 16) | subminor; @@ -142,9 +177,16 @@ struct mlx4_caps { u64 fw_ver; int num_ports; int vl_cap[MLX4_MAX_PORTS + 1]; - int mtu_cap[MLX4_MAX_PORTS + 1]; + int ib_mtu_cap[MLX4_MAX_PORTS + 1]; + __be32 ib_port_def_cap[MLX4_MAX_PORTS + 1]; + u64 def_mac[MLX4_MAX_PORTS + 1]; + int eth_mtu_cap[MLX4_MAX_PORTS + 1]; int gid_table_len[MLX4_MAX_PORTS + 1]; int pkey_table_len[MLX4_MAX_PORTS + 1]; + int trans_type[MLX4_MAX_PORTS + 1]; + int vendor_oui[MLX4_MAX_PORTS + 1]; + int wavelength[MLX4_MAX_PORTS + 1]; + u64 trans_code[MLX4_MAX_PORTS + 1]; int local_ca_ack_delay; int num_uars; int bf_reg_size; @@ -157,7 +199,6 @@ struct mlx4_caps { int max_rq_desc_sz; int max_qp_init_rdma; int max_qp_dest_rdma; - int reserved_qps; int sqp_start; int num_srqs; int max_srq_wqes; @@ -168,8 +209,10 @@ struct mlx4_caps { int reserved_cqs; int num_eqs; int reserved_eqs; + int num_comp_vectors; int num_mpts; int num_mtt_segs; + int mtts_per_seg; int fmr_reserved_mtts; int reserved_mtts; int reserved_mrws; @@ -184,9 +227,21 @@ struct mlx4_caps { u32 max_msg_sz; u32 page_size_cap; u32 flags; + u32 bmme_flags; + u32 reserved_lkey; u16 stat_rate_support; u8 port_width_cap[MLX4_MAX_PORTS + 1]; int max_gso_sz; + int reserved_qps_cnt[MLX4_QP_REGION_COUNT]; + int reserved_qps; + int reserved_qps_base[MLX4_QP_REGION_COUNT]; + int log_num_macs; + int log_num_vlans; + int log_num_prios; + enum mlx4_port_type port_type[MLX4_MAX_PORTS + 1]; + u8 supported_type[MLX4_MAX_PORTS + 1]; + u32 port_mask; + enum mlx4_port_type possible_type[MLX4_MAX_PORTS + 1]; }; struct mlx4_buf_list { @@ -195,10 +250,8 @@ struct mlx4_buf_list { }; struct mlx4_buf { - struct { - struct mlx4_buf_list direct; - struct mlx4_buf_list *page_list; - } u; + struct mlx4_buf_list direct; + struct mlx4_buf_list *page_list; int nbufs; int npages; int page_shift; @@ -210,6 +263,38 @@ struct mlx4_mtt { int page_shift; }; +enum { + MLX4_DB_PER_PAGE = PAGE_SIZE / 4 +}; + +struct mlx4_db_pgdir { + struct list_head list; + DECLARE_BITMAP(order0, MLX4_DB_PER_PAGE); + DECLARE_BITMAP(order1, MLX4_DB_PER_PAGE / 2); + unsigned long *bits[2]; + __be32 *db_page; + dma_addr_t db_dma; +}; + +struct mlx4_ib_user_db_page; + +struct mlx4_db { + __be32 *db; + union { + struct mlx4_db_pgdir *pgdir; + struct mlx4_ib_user_db_page *user_page; + } u; + dma_addr_t dma; + int index; + int order; +}; + +struct mlx4_hwq_resources { + struct mlx4_db db; + struct mlx4_mtt mtt; + struct mlx4_buf buf; +}; + struct mlx4_mr { struct mlx4_mtt mtt; u64 iova; @@ -249,6 +334,7 @@ struct mlx4_cq { int arm_sn; int cqn; + int comp_eq_idx; atomic_t refcount; struct completion free; @@ -311,9 +397,30 @@ struct mlx4_init_port_param { u64 si_guid; }; +static inline void mlx4_query_steer_cap(struct mlx4_dev *dev, int *log_mac, + int *log_vlan, int *log_prio) +{ + *log_mac = dev->caps.log_num_macs; + *log_vlan = dev->caps.log_num_vlans; + *log_prio = dev->caps.log_num_prios; +} + +#define mlx4_foreach_port(port, dev, type) \ + for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \ + if ((type == MLX4_PORT_TYPE_IB ? (dev)->caps.port_mask : \ + ~(dev)->caps.port_mask) & 1 << ((port)-1)) + int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct, struct mlx4_buf *buf); void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf); +static inline void *mlx4_buf_offset(struct mlx4_buf *buf, int offset) +{ + if (BITS_PER_LONG == 64 || buf->nbufs == 1) + return buf->direct.buf + offset; + else + return buf->page_list[offset >> PAGE_SHIFT].buf + + (offset & (PAGE_SIZE - 1)); +} int mlx4_pd_alloc(struct mlx4_dev *dev, u32 *pdn); void mlx4_pd_free(struct mlx4_dev *dev, u32 pdn); @@ -326,8 +433,14 @@ int mlx4_mtt_init(struct mlx4_dev *dev, int npages, int page_shift, void mlx4_mtt_cleanup(struct mlx4_dev *dev, struct mlx4_mtt *mtt); u64 mlx4_mtt_addr(struct mlx4_dev *dev, struct mlx4_mtt *mtt); +int mlx4_mr_reserve_range(struct mlx4_dev *dev, int cnt, int align, u32 *base_mridx); +void mlx4_mr_release_range(struct mlx4_dev *dev, u32 base_mridx, int cnt); +int mlx4_mr_alloc_reserved(struct mlx4_dev *dev, u32 mridx, u32 pd, + u64 iova, u64 size, u32 access, int npages, + int page_shift, struct mlx4_mr *mr); int mlx4_mr_alloc(struct mlx4_dev *dev, u32 pd, u64 iova, u64 size, u32 access, int npages, int page_shift, struct mlx4_mr *mr); +void mlx4_mr_free_reserved(struct mlx4_dev *dev, struct mlx4_mr *mr); void mlx4_mr_free(struct mlx4_dev *dev, struct mlx4_mr *mr); int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr); int mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt, @@ -335,11 +448,23 @@ int mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt, int mlx4_buf_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt, struct mlx4_buf *buf); +int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order); +void mlx4_db_free(struct mlx4_dev *dev, struct mlx4_db *db); + +int mlx4_alloc_hwq_res(struct mlx4_dev *dev, struct mlx4_hwq_resources *wqres, + int size, int max_direct); +void mlx4_free_hwq_res(struct mlx4_dev *mdev, struct mlx4_hwq_resources *wqres, + int size); + int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt, - struct mlx4_uar *uar, u64 db_rec, struct mlx4_cq *cq); + struct mlx4_uar *uar, u64 db_rec, struct mlx4_cq *cq, + unsigned vector, int collapsed); void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq); -int mlx4_qp_alloc(struct mlx4_dev *dev, int sqpn, struct mlx4_qp *qp); +int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, int *base); +void mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt); + +int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp); void mlx4_qp_free(struct mlx4_dev *dev, struct mlx4_qp *qp); int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, struct mlx4_mtt *mtt, @@ -351,20 +476,33 @@ int mlx4_srq_query(struct mlx4_dev *dev, struct mlx4_srq *srq, int *limit_waterm int mlx4_INIT_PORT(struct mlx4_dev *dev, int port); int mlx4_CLOSE_PORT(struct mlx4_dev *dev, int port); -int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16]); +int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], + int block_mcast_loopback); int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16]); +int mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *index); +void mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, int index); + +int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index); +void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, int index); + +int mlx4_map_phys_fmr_fbo(struct mlx4_dev *dev, struct mlx4_fmr *fmr, + u64 *page_list, int npages, u64 iova, u32 fbo, + u32 len, u32 *lkey, u32 *rkey, int same_key); int mlx4_map_phys_fmr(struct mlx4_dev *dev, struct mlx4_fmr *fmr, u64 *page_list, int npages, u64 iova, u32 *lkey, u32 *rkey); +int mlx4_fmr_alloc_reserved(struct mlx4_dev *dev, u32 mridx, u32 pd, + u32 access, int max_pages, int max_maps, + u8 page_shift, struct mlx4_fmr *fmr); int mlx4_fmr_alloc(struct mlx4_dev *dev, u32 pd, u32 access, int max_pages, int max_maps, u8 page_shift, struct mlx4_fmr *fmr); int mlx4_fmr_enable(struct mlx4_dev *dev, struct mlx4_fmr *fmr); void mlx4_fmr_unmap(struct mlx4_dev *dev, struct mlx4_fmr *fmr, u32 *lkey, u32 *rkey); +int mlx4_fmr_free_reserved(struct mlx4_dev *dev, struct mlx4_fmr *fmr); int mlx4_fmr_free(struct mlx4_dev *dev, struct mlx4_fmr *fmr); int mlx4_SYNC_TPT(struct mlx4_dev *dev); -int mlx4_query_diag_counters(struct mlx4_dev *melx4_dev, int array_length, - int in_modifier, unsigned int in_offset[], - u32 counter_out[]); +int mlx4_query_diag_counters(struct mlx4_dev *mlx4_dev, int array_length, + u8 op_modifier, u32 in_offset[], u32 counter_out[]); #endif /* MLX4_DEVICE_H */ diff --git a/include/linux/mlx4/driver.h b/include/linux/mlx4/driver.h index 1b835ca..55b45a6 100644 --- a/include/linux/mlx4/driver.h +++ b/include/linux/mlx4/driver.h @@ -44,16 +44,22 @@ enum mlx4_dev_event { MLX4_DEV_EVENT_PORT_REINIT, }; +enum mlx4_query_reply { + MLX4_QUERY_NOT_MINE = -1, + MLX4_QUERY_MINE_NOPORT = 0 +}; + struct mlx4_interface { void * (*add) (struct mlx4_dev *dev); void (*remove)(struct mlx4_dev *dev, void *context); void (*event) (struct mlx4_dev *dev, void *context, - enum mlx4_dev_event event, int subtype, - int port); + enum mlx4_dev_event event, int port); + enum mlx4_query_reply (*query) (void *context, void *); struct list_head list; }; int mlx4_register_interface(struct mlx4_interface *intf); void mlx4_unregister_interface(struct mlx4_interface *intf); +struct mlx4_dev *mlx4_query_interface(void *, int *port); #endif /* MLX4_DRIVER_H */ diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h index ac33c5c..9a628a6 100644 --- a/include/linux/mlx4/qp.h +++ b/include/linux/mlx4/qp.h @@ -151,24 +151,35 @@ struct mlx4_qp_context { u8 reserved4[2]; u8 mtt_base_addr_h; __be32 mtt_base_addr_l; - u32 reserved5[10]; + u8 VE; + u8 reserved5; + __be16 VFT_id_prio; + u8 reserved6; + u8 exch_size; + __be16 exch_base; + u8 VFT_hop_cnt; + u8 my_fc_id_idx; + __be16 reserved7; + u32 reserved8[7]; }; /* Which firmware version adds support for NEC (NoErrorCompletion) bit */ #define MLX4_FW_VER_WQE_CTRL_NEC mlx4_fw_ver(2, 2, 232) enum { - MLX4_WQE_CTRL_NEC = 1 << 29, - MLX4_WQE_CTRL_FENCE = 1 << 6, - MLX4_WQE_CTRL_CQ_UPDATE = 3 << 2, - MLX4_WQE_CTRL_SOLICITED = 1 << 1, - MLX4_WQE_CTRL_IP_CSUM = 1 << 4, - MLX4_WQE_CTRL_TCP_UDP_CSUM = 1 << 5, + MLX4_WQE_CTRL_NEC = 1 << 29, + MLX4_WQE_CTRL_FENCE = 1 << 6, + MLX4_WQE_CTRL_CQ_UPDATE = 3 << 2, + MLX4_WQE_CTRL_SOLICITED = 1 << 1, + MLX4_WQE_CTRL_IP_CSUM = 1 << 4, + MLX4_WQE_CTRL_TCP_UDP_CSUM = 1 << 5, + MLX4_WQE_CTRL_INS_VLAN = 1 << 6, }; struct mlx4_wqe_ctrl_seg { __be32 owner_opcode; - u8 reserved2[3]; + __be16 vlan_tag; + u8 ins_vlan; u8 fence_size; /* * High 24 bits are SRC remote buffer; low 8 bits are flags: @@ -189,7 +200,8 @@ struct mlx4_wqe_ctrl_seg { enum { MLX4_WQE_MLX_VL15 = 1 << 17, - MLX4_WQE_MLX_SLR = 1 << 16 + MLX4_WQE_MLX_SLR = 1 << 16, + MLX4_WQE_MLX_ICRC = 1 << 4 }; struct mlx4_wqe_mlx_seg { @@ -219,9 +231,9 @@ struct mlx4_wqe_datagram_seg { __be32 reservd[2]; }; -struct mlx4_lso_seg { - __be32 mss_hdr_size; - __be32 header[0]; +struct mlx4_wqe_lso_seg { + __be32 mss_hdr_size; + __be32 header[0]; }; struct mlx4_wqe_bind_seg { @@ -233,6 +245,14 @@ struct mlx4_wqe_bind_seg { __be64 length; }; +enum { + MLX4_WQE_FMR_PERM_LOCAL_READ = 1 << 27, + MLX4_WQE_FMR_PERM_LOCAL_WRITE = 1 << 28, + MLX4_WQE_FMR_PERM_REMOTE_READ = 1 << 29, + MLX4_WQE_FMR_PERM_REMOTE_WRITE = 1 << 30, + MLX4_WQE_FMR_PERM_ATOMIC = 1 << 31 +}; + struct mlx4_wqe_fmr_seg { __be32 flags; __be32 mem_key; @@ -255,11 +275,11 @@ struct mlx4_wqe_fmr_ext_seg { }; struct mlx4_wqe_local_inval_seg { - u8 flags; - u8 reserved1[3]; + __be32 flags; + u32 reserved1; __be32 mem_key; - u8 reserved2[3]; - u8 guest_id; + u32 reserved2[2]; + __be32 guest_id; __be64 pa; }; @@ -296,6 +316,10 @@ int mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt, int mlx4_qp_query(struct mlx4_dev *dev, struct mlx4_qp *qp, struct mlx4_qp_context *context); +int mlx4_qp_to_ready(struct mlx4_dev *dev, struct mlx4_mtt *mtt, + struct mlx4_qp_context *context, + struct mlx4_qp *qp, enum mlx4_qp_state *qp_state); + static inline struct mlx4_qp *__mlx4_qp_lookup(struct mlx4_dev *dev, u32 qpn) { return radix_tree_lookup(&dev->qp_table_tree, qpn & (dev->caps.num_qps - 1)); @@ -303,4 +327,8 @@ static inline struct mlx4_qp *__mlx4_qp_lookup(struct mlx4_dev *dev, u32 qpn) void mlx4_qp_remove(struct mlx4_dev *dev, struct mlx4_qp *qp); +int mlx4_qp_get_region(struct mlx4_dev *dev, + enum qp_region region, + int *base_qpn, int *cnt); + #endif /* MLX4_QP_H */