From: ddugger@redhat.com <ddugger@redhat.com> Date: Wed, 29 Apr 2009 18:55:05 -0600 Subject: [pci] add SR-IOV API for Physical Function driver Message-id: 20090430005505.GS31744@sobek.n0ano.com O-Subject: [RHEL5.4 PATCH 14/17 V2] BZ493152: Backport: PCI: add SR-IOV API for Physical Function driver Bugzilla: 493152 RH-Acked-by: Rik van Riel <riel@redhat.com> RH-Acked-by: Chris Wright <chrisw@redhat.com> (Note: the only change to this patch is to move the `is_virtfn' bit to a different place in the pci structure, facilitating the merge of the bare metal VTd patches.) Upstream status: commit dd7cc44d0bcec5e9c42fe52e88dc254ae62eac8d Author: Yu Zhao <yu.zhao@intel.com> Date: Fri Mar 20 11:25:15 2009 +0800 PCI: add SR-IOV API for Physical Function driver Add or remove the Virtual Function when the SR-IOV is enabled or disabled by the device driver. This can happen anytime rather than only at the device probe stage. Reviewed-by: Matthew Wilcox <willy@linux.intel.com> Signed-off-by: Yu Zhao <yu.zhao@intel.com> Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org> Signed-off-by: Gerd Hoffman <kraxel@redhat.com> Signed-off-by: Don Dugger <donald.d.dugger@intel.com> diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c index 2ac1ac0..2db7950 100644 --- a/drivers/pci/iov.c +++ b/drivers/pci/iov.c @@ -13,6 +13,7 @@ #include <linux/delay.h> #include "pci.h" +#define VIRTFN_ID_LEN 16 static inline u8 virtfn_bus(struct pci_dev *dev, int id) { @@ -26,6 +27,280 @@ static inline u8 virtfn_devfn(struct pci_dev *dev, int id) dev->sriov->stride * id) & 0xff; } +static struct pci_bus *virtfn_add_bus(struct pci_bus *bus, int busnr) +{ + struct pci_bus *child; + + if (bus->number == busnr) + return bus; + + child = pci_find_bus(pci_domain_nr(bus), busnr); + if (child) + return child; + + child = pci_add_new_bus(bus, NULL, busnr); + if (!child) + return NULL; + + child->subordinate = busnr; + + return child; +} + +static void virtfn_remove_bus(struct pci_bus *bus, int busnr) +{ + struct pci_bus *child; + + if (bus->number == busnr) + return; + + child = pci_find_bus(pci_domain_nr(bus), busnr); + BUG_ON(!child); + + if (list_empty(&child->devices)) + pci_remove_bus(child); +} + +static int virtfn_add(struct pci_dev *dev, int id) +{ + int i; + int rc; + u64 size; + char buf[VIRTFN_ID_LEN]; + struct pci_dev *virtfn; + struct resource *res; + struct pci_sriov *iov = dev->sriov; + + virtfn = kzalloc(sizeof(struct pci_dev), GFP_KERNEL); + if (!virtfn) + return -ENOMEM; + + mutex_lock(&iov->dev->sriov->lock); + virtfn->bus = virtfn_add_bus(dev->bus, virtfn_bus(dev, id)); + if (!virtfn->bus) { + kfree(virtfn); + mutex_unlock(&iov->dev->sriov->lock); + return -ENOMEM; + } + virtfn->devfn = virtfn_devfn(dev, id); + virtfn->vendor = dev->vendor; + pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_DID, &virtfn->device); + pci_setup_device(virtfn); + virtfn->dev.parent = dev->dev.parent; + + for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { + res = iov->res + i; + if (!res->parent) + continue; + virtfn->resource[i].name = pci_name(virtfn); + virtfn->resource[i].flags = res->flags; + size = res->end - res->start + 1; + do_div(size, iov->total); + virtfn->resource[i].start = res->start + size * id; + virtfn->resource[i].end = virtfn->resource[i].start + size - 1; + rc = request_resource(res, &virtfn->resource[i]); + BUG_ON(rc); + } + + pci_device_add(virtfn, virtfn->bus); + mutex_unlock(&iov->dev->sriov->lock); + + virtfn->physfn = pci_dev_get(dev); + virtfn->is_virtfn = 1; + + pci_bus_add_device(virtfn); + sprintf(buf, "virtfn%u", id); + rc = sysfs_create_link(&dev->dev.kobj, &virtfn->dev.kobj, buf); + if (rc) + goto failed1; + rc = sysfs_create_link(&virtfn->dev.kobj, &dev->dev.kobj, "physfn"); + if (rc) + goto failed2; + + kobject_uevent(&virtfn->dev.kobj, KOBJ_CHANGE); + + return 0; + +failed2: + sysfs_remove_link(&dev->dev.kobj, buf); +failed1: + pci_dev_put(dev); + mutex_lock(&iov->dev->sriov->lock); + pci_remove_bus_device(virtfn); + virtfn_remove_bus(dev->bus, virtfn_bus(dev, id)); + mutex_unlock(&iov->dev->sriov->lock); + + return rc; +} + +static void virtfn_remove(struct pci_dev *dev, int id) +{ + char buf[VIRTFN_ID_LEN]; + struct pci_bus *bus; + struct pci_dev *virtfn; + struct pci_sriov *iov = dev->sriov; + + bus = pci_find_bus(pci_domain_nr(dev->bus), virtfn_bus(dev, id)); + if (!bus) + return; + + virtfn = pci_get_slot(bus, virtfn_devfn(dev, id)); + if (!virtfn) + return; + + pci_dev_put(virtfn); + + sprintf(buf, "virtfn%u", id); + sysfs_remove_link(&dev->dev.kobj, buf); + sysfs_remove_link(&virtfn->dev.kobj, "physfn"); + + mutex_lock(&iov->dev->sriov->lock); + pci_remove_bus_device(virtfn); + virtfn_remove_bus(dev->bus, virtfn_bus(dev, id)); + mutex_unlock(&iov->dev->sriov->lock); + + pci_dev_put(dev); +} + +static int sriov_enable(struct pci_dev *dev, int nr_virtfn) +{ + int rc; + int i, j; + u16 offset, stride, initial; + struct resource *res; + struct pci_dev *pdev; + struct pci_sriov *iov = dev->sriov; + + if (!nr_virtfn) + return 0; + + if (iov->nr_virtfn) + return -EINVAL; + + pci_read_config_word(dev, iov->pos + PCI_SRIOV_INITIAL_VF, &initial); + if (initial > iov->total || + (!(iov->cap & PCI_SRIOV_CAP_VFM) && (initial != iov->total))) + return -EIO; + + if (nr_virtfn < 0 || nr_virtfn > iov->total || + (!(iov->cap & PCI_SRIOV_CAP_VFM) && (nr_virtfn > initial))) + return -EINVAL; + + pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, nr_virtfn); + pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_OFFSET, &offset); + pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_STRIDE, &stride); + if (!offset || (nr_virtfn > 1 && !stride)) + return -EIO; + + for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { + res = iov->res + i; + if (!res->flags) + continue; + rc = pci_assign_resource(dev, i + PCI_IOV_RESOURCES); + if (rc) { + dev_err(&dev->dev, "not enough MMIO resources for SR-IOV\n"); + goto failed1; + } + } + + iov->offset = offset; + iov->stride = stride; + + if (virtfn_bus(dev, nr_virtfn - 1) > dev->bus->subordinate) { + dev_err(&dev->dev, "SR-IOV: bus number out of range\n"); + return -ENOMEM; + } + + if (iov->link != dev->devfn) { + pdev = pci_get_slot(dev->bus, iov->link); + if (!pdev) + return -ENODEV; + + pci_dev_put(pdev); + + if (!pdev->is_physfn) + return -ENODEV; + + rc = sysfs_create_link(&dev->dev.kobj, + &pdev->dev.kobj, "dep_link"); + if (rc) + return rc; + } + + iov->ctrl |= PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE; + pci_block_user_cfg_access(dev); + pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); + msleep(100); + pci_unblock_user_cfg_access(dev); + + iov->initial = initial; + if (nr_virtfn < initial) + initial = nr_virtfn; + + for (i = 0; i < initial; i++) { + rc = virtfn_add(dev, i); + if (rc) + goto failed2; + } + + kobject_uevent(&dev->dev.kobj, KOBJ_CHANGE); + iov->nr_virtfn = nr_virtfn; + + return 0; + +failed2: + for (j = 0; j < i; j++) + virtfn_remove(dev, j); + + iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE); + pci_block_user_cfg_access(dev); + pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); + ssleep(1); + pci_unblock_user_cfg_access(dev); + + if (iov->link != dev->devfn) + sysfs_remove_link(&dev->dev.kobj, "dep_link"); + +failed1: + for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { + res = iov->res + i; + if (res->parent) + release_resource(res); + } + + return rc; +} + +static void sriov_disable(struct pci_dev *dev) +{ + int i; + struct resource *res; + struct pci_sriov *iov = dev->sriov; + + if (!iov->nr_virtfn) + return; + + for (i = 0; i < iov->nr_virtfn; i++) + virtfn_remove(dev, i); + + iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE); + pci_block_user_cfg_access(dev); + pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); + ssleep(1); + pci_unblock_user_cfg_access(dev); + + if (iov->link != dev->devfn) + sysfs_remove_link(&dev->dev.kobj, "dep_link"); + + for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { + res = iov->res + i; + if (res->parent) + release_resource(res); + } + + iov->nr_virtfn = 0; +} + static int sriov_init(struct pci_dev *dev, int pos) { int i; @@ -113,6 +388,8 @@ found: static void sriov_release(struct pci_dev *dev) { + BUG_ON(dev->sriov->nr_virtfn); + if (dev == dev->sriov->dev) mutex_destroy(&dev->sriov->lock); else @@ -136,6 +413,7 @@ static void sriov_restore_state(struct pci_dev *dev) pci_update_resource(dev, iov->res + i, i + PCI_IOV_RESOURCES); pci_write_config_dword(dev, iov->pos + PCI_SRIOV_SYS_PGSIZE, iov->pgsz); + pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, iov->nr_virtfn); pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); if (iov->ctrl & PCI_SRIOV_CTRL_VFE) msleep(100); @@ -223,3 +501,35 @@ int pci_iov_bus_range(struct pci_bus *bus) return max ? max - bus->number : 0; } + +/** + * pci_enable_sriov - enable the SR-IOV capability + * @dev: the PCI device + * + * Returns 0 on success, or negative on failure. + */ +int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn) +{ + might_sleep(); + + if (!dev->is_physfn) + return -ENODEV; + + return sriov_enable(dev, nr_virtfn); +} +EXPORT_SYMBOL_GPL(pci_enable_sriov); + +/** + * pci_disable_sriov - disable the SR-IOV capability + * @dev: the PCI device + */ +void pci_disable_sriov(struct pci_dev *dev) +{ + might_sleep(); + + if (!dev->is_physfn) + return; + + sriov_disable(dev); +} +EXPORT_SYMBOL_GPL(pci_disable_sriov); diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index be77c94..0e97265 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -122,6 +122,8 @@ struct pci_sriov { u32 cap; /* SR-IOV Capabilities */ u16 ctrl; /* SR-IOV Control */ u16 total; /* total VFs associated with the PF */ + u16 initial; /* initial VFs associated with the PF */ + u16 nr_virtfn; /* number of VFs available */ u16 offset; /* first VF Routing ID offset */ u16 stride; /* following VF stride */ u32 pgsz; /* page size for BAR alignment */ diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c index aaac923..c7b7ac9 100644 --- a/drivers/pci/setup-res.c +++ b/drivers/pci/setup-res.c @@ -117,16 +117,33 @@ EXPORT_SYMBOL_GPL(pci_claim_resource); int pci_assign_resource(struct pci_dev *dev, int resno) { struct pci_bus *bus = dev->bus; - struct resource *res = dev->resource + resno; + struct resource *res; resource_size_t size, min, align; int ret; + if (resno <= PCI_NUM_RESOURCES) { + res = dev->resource + resno; +#ifdef CONFIG_PCI_IOV + } else if (resno >= PCI_IOV_RESOURCES && + resno <= PCI_IOV_RESOURCE_END) { + + BUG_ON(!dev->is_physfn); + + res = dev->sriov->res + (resno - PCI_IOV_RESOURCES); +#endif + } else { + dev_err(&dev->dev, "invalid resource #%d\n", resno); + + return -EINVAL; + } + size = res->end - res->start + 1; min = (res->flags & IORESOURCE_IO) ? PCIBIOS_MIN_IO : PCIBIOS_MIN_MEM; /* The bridge resources are special, as their size != alignment. Sizing routines return required alignment in the "start" field. */ - align = (resno < PCI_BRIDGE_RESOURCES) ? size : res->start; + align = (resno < PCI_BRIDGE_RESOURCES || + resno > PCI_NUM_RESOURCES) ? size : res->start; /* First, try exact prefetching match.. */ ret = pci_bus_alloc_resource(bus, res, size, align, min, @@ -145,7 +162,7 @@ int pci_assign_resource(struct pci_dev *dev, int resno) } if (ret) { - if (resno < PCI_ROM_RESOURCE) + if (resno < PCI_ROM_RESOURCE || resno > PCI_NUM_RESOURCES) printk(KERN_ERR "PCI: Failed to allocate %s resource " "#%d:%llx@%llx for %s\n", res->flags & IORESOURCE_IO ? "I/O" : "mem", @@ -157,7 +174,7 @@ int pci_assign_resource(struct pci_dev *dev, int resno) res->flags & IORESOURCE_IO ? "I/O" : "mem", resno, (unsigned long long)size, (unsigned long long)res->start, pci_name(dev)); - } else if (resno < PCI_BRIDGE_RESOURCES) { + } else if (resno < PCI_BRIDGE_RESOURCES || resno > PCI_NUM_RESOURCES) { pci_update_resource(dev, res, resno); } diff --git a/include/linux/pci.h b/include/linux/pci.h index dc5f059..0e2359e 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -184,6 +184,7 @@ struct pci_dev { unsigned int ari_enabled:1; /* ARI forwarding */ unsigned int is_managed:1; unsigned int is_physfn:1; + unsigned int is_virtfn:1; #endif u32 saved_config_space[16]; /* config space saved at suspend time */ @@ -194,7 +195,10 @@ struct pci_dev { #ifndef __GENKSYMS__ u8 revision; /* PCI revision, low byte of class word */ #ifdef CONFIG_PCI_IOV - struct pci_sriov *sriov; /* SR-IOV capability related */ + union { + struct pci_sriov *sriov; /* SR-IOV capability related */ + struct pci_dev *physfn; /* the PF this VF is associated with */ + }; #endif #endif }; @@ -892,5 +896,18 @@ extern int pci_pci_problems; #define PCIPCI_VSFX 16 #define PCIPCI_ALIMAGIK 32 +#ifdef CONFIG_PCI_IOV +extern int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn); +extern void pci_disable_sriov(struct pci_dev *dev); +#else +static inline int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn) +{ + return -ENODEV; +} +static inline void pci_disable_sriov(struct pci_dev *dev) +{ +} +#endif + #endif /* __KERNEL__ */ #endif /* LINUX_PCI_H */