Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > 89877e42827f16fa5f86b1df0c2860b1 > files > 1750

kernel-2.6.18-128.1.10.el5.src.rpm

Date: Mon, 06 Nov 2006 18:50:59 -0500
From: John Feeney <jfeeney@redhat.com>
Subject: bz209484



>From Matt Domsch at Dell:

Problem:
New Dell PowerEdge servers have 2 embedded ethernet ports, which are
labeled NIC1 and NIC2 on the chassis, in the BIOS setup screens, and in the
printed documentation.  Assuming no other add-in ethernet ports in the
system, Linux 2.4 kernels name these eth0 and eth1 respectively.  Many
people have come to expect this naming.  Linux 2.6 kernels name these eth1
and eth0 respectively (backwards from expectations).  I also have reports
that various Sun and HP servers have similar behavior.

Root cause:
Linux 2.4 kernels walk the pci_devices list, which happens to be sorted in
breadth-first order (or pcbios_find_device order on i386, which most often
is breadth-first also).  2.6 kernels have both the pci_devices list and the
pci_bus_type.klist_devices list, the latter is what is walked at driver
load time to match the pci_id tables; this klist happens to be in
depth-first order.

On systems where, for physical routing reasons, NIC1 appears on a lower bus
number than NIC2, but NIC2's bridge is discovered first in the depth-first
ordering, NIC2 will be discovered before NIC1.  If the list were sorted
breadth-first, NIC1 would be discovered before NIC2.

A PowerEdge 1955 system has the following topology which easily exhibits
the difference between depth-first and breadth-first device lists.

-[0000:00]-+-00.0  Intel Corporation 5000P Chipset Memory Controller Hub
           +-02.0-[0000:03-08]--+-00.0-[0000:04-07]--+-00.0-[0000:05-06]----00.0-[0000 \
:06]----00.0  Broadcom Corporation NetXtreme II BCM5708S Gigabit Ethernet (labeled \
NIC2, 2.4 kernel name eth1, 2.6 kernel name eth0)  \
+-1c.0-[0000:01-02]----00.0-[0000:02]----00.0  Broadcom Corporation NetXtreme II \
BCM5708S Gigabit Ethernet (labeled NIC1, 2.4 kernel name eth0, 2.6 kernel name eth1)

Other factors, such as device driver load order and the presence of
PCI slots at various points in the bus hierarchy further complicate
this problem; I'm not trying to solve those here, just restore the
device order, and thus basic behavior, that 2.4 kernels had.

Solution:
Patch below optionally sorts the two device lists into breadth-first
ordering to maintain compatibility with 2.4 kernels.  It adds two new
command line options:
  pci=bfsort
  pci=nobfsort
to force the sort order, or not, as you wish.  It also adds DMI checks
for the specific Dell systems which exhibit "backwards" ordering, to
make them "right".

Accepted upstream (see http://marc.theaimsgroup.com/?l=linux-mm-commits&m=115958255609603&w=2)
Signed-off-by: Matt Domsch <Matt_Domsch@dell.com>
Cc: Greg KH <greg@kroah.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>

Fixes bugzilla 209484 "RHEL5beta1 kernel: sort PCI device list breadth-first"

---

--- linux-2.6.18.noarch/Documentation/kernel-parameters.txt.orig	2006-11-01 16:50:03.000000000 -0500
+++ linux-2.6.18.noarch/Documentation/kernel-parameters.txt	2006-11-01 17:14:43.000000000 -0500
@@ -1240,6 +1240,10 @@
 				bootloader. This is currently used on
 				IXP2000 systems where the bus has to be
 				configured a certain way for adjunct CPUs.
+                bfsort          Sort PCI devices into breadth-first order.
+                                This sorting is done to get a device
+                                order compatible with older (<= 2.4) kernels.
+                nobfsort        Don't sort PCI devices into breadth-first order.
 
 	pcmv=		[HW,PCMCIA] BadgePAD 4

--- linux-2.6.18.noarch/arch/i386/pci/common.c.orig	2006-11-02 10:55:02.000000000 -0500
+++ linux-2.6.18.noarch/arch/i386/pci/common.c	2006-11-02 11:05:02.000000000 -0500
@@ -20,6 +20,7 @@
 unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2 |
 				PCI_PROBE_MMCONF;
 
+int pci_bf_sort;
 int pci_routeirq;
 int pcibios_last_bus = -1;
 unsigned long pirq_table_addr;
@@ -122,6 +123,21 @@
 }
 
 /*
+ * Only use DMI information to set this if nothing was passed
+ * on the kernel command line (which was parsed earlier).
+ */
+
+static int __devinit set_bf_sort(struct dmi_system_id *d)
+{
+       if (pci_bf_sort == pci_bf_sort_default) {
+               pci_bf_sort = pci_dmi_bf;
+               printk(KERN_INFO 
+		  "PCI: %s detected, enabling pci=bfsort.\n", d->ident);
+       }
+       return 0;
+}
+
+/*
  * Enable renumbering of PCI bus# ranges to reach all PCI busses (Cardbus)
  */
 #ifdef __i386__
@@ -144,11 +160,11 @@
 #endif
 
 
+static struct dmi_system_id __devinitdata pciprobe_dmi_table[] = {
+#ifdef __i386__
 /*
  * Laptops which need pci=assign-busses to see Cardbus cards
  */
-static struct dmi_system_id __devinitdata pciprobe_dmi_table[] = {
-#ifdef __i386__
 	{
 		.callback = assign_all_busses,
 		.ident = "Samsung X20 Laptop",
@@ -174,7 +190,38 @@
 		},
 	},
 #endif
-
+        {
+                .callback = set_bf_sort,
+                .ident = "Dell PowerEdge 1950",
+                .matches = {
+                        DMI_MATCH(DMI_SYS_VENDOR, "Dell"),
+                        DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 1950"),
+                },
+        },
+        {
+                .callback = set_bf_sort,
+                .ident = "Dell PowerEdge 1955",
+                .matches = {
+                        DMI_MATCH(DMI_SYS_VENDOR, "Dell"),
+                        DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 1955"),
+                },
+        },
+        {
+                .callback = set_bf_sort,
+                .ident = "Dell PowerEdge 2900",
+                .matches = {
+                        DMI_MATCH(DMI_SYS_VENDOR, "Dell"),
+                        DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 2900"),
+                },
+        },
+        {
+                .callback = set_bf_sort,
+                .ident = "Dell PowerEdge 2950",
+                .matches = {
+                        DMI_MATCH(DMI_SYS_VENDOR, "Dell"),
+                        DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 2950"),
+                },
+        },
 	{}
 };
 
@@ -220,6 +267,8 @@
 
 	pcibios_resource_survey();
 
+        if (pci_bf_sort >= pci_force_bf)
+                pci_sort_breadthfirst();
 #ifdef CONFIG_PCI_BIOS
 	if ((pci_probe & PCI_BIOS_SORT) && !(pci_probe & PCI_NO_SORT))
 		pcibios_sort();
@@ -234,6 +283,12 @@
 	if (!strcmp(str, "off")) {
 		pci_probe = 0;
 		return NULL;
+        } else if (!strcmp(str, "bfsort")) {
+                pci_bf_sort = pci_force_bf;
+                return NULL;
+        } else if (!strcmp(str, "nobfsort")) {
+                pci_bf_sort = pci_force_nobf;
+                return NULL;
 	}
 #ifdef CONFIG_PCI_BIOS
 	else if (!strcmp(str, "bios")) {
 
--- linux-2.6.18.noarch/arch/i386/pci/pci.h.orig	2006-11-01 17:34:09.000000000 -0500
+++ linux-2.6.18.noarch/arch/i386/pci/pci.h	2006-11-01 17:35:00.000000000 -0500
@@ -29,6 +29,13 @@
 extern unsigned int pci_probe;
 extern unsigned long pirq_table_addr;
 
+enum pci_bf_sort_state {
+       pci_bf_sort_default,
+       pci_force_nobf,
+       pci_force_bf,
+       pci_dmi_bf,
+};
+
 /* pci-i386.c */
 
 extern unsigned int pcibios_max_latency;

--- linux-2.6.18.noarch/drivers/pci/probe.c.orig	2006-11-01 17:37:50.000000000 -0500
+++ linux-2.6.18.noarch/drivers/pci/probe.c	2006-11-02 10:42:50.000000000 -0500
@@ -1055,3 +1055,93 @@
 EXPORT_SYMBOL(pci_scan_single_device);
 EXPORT_SYMBOL_GPL(pci_scan_child_bus);
 #endif
+
+static int __init pci_sort_bf_cmp(const struct pci_dev *a, const struct pci_dev *b)
+{
+	if      (pci_domain_nr(a->bus) < pci_domain_nr(b->bus)) return -1;
+	else if (pci_domain_nr(a->bus) > pci_domain_nr(b->bus)) return  1;
+
+	if      (a->bus->number < b->bus->number) return -1;
+	else if (a->bus->number > b->bus->number) return  1;
+
+	if      (a->devfn < b->devfn) return -1;
+	else if (a->devfn > b->devfn) return  1;
+
+	return 0;
+}
+
+/*
+ * Yes, this forcably breaks the klist abstraction temporarily.  It
+ * just wants to sort the klist, not change reference counts and
+ * take/drop locks rapidly in the process.  It does all this while
+ * holding the lock for the list, so objects can't otherwise be
+ * added/removed while we're swizzling.
+ */
+
+static void __init pci_insertion_sort_klist(struct pci_dev *a, struct list_head \
+*list) {
+	struct list_head *pos;
+	struct klist_node *n;
+	struct device *dev;
+	struct pci_dev *b;
+	list_for_each(pos, list) {
+		n = container_of(pos, struct klist_node, n_node);
+		dev = container_of(n, struct device, knode_bus);
+		b = to_pci_dev(dev);
+		if (pci_sort_bf_cmp(a, b) <= 0) {
+			list_move_tail(&a->dev.knode_bus.n_node, &b->dev.knode_bus.n_node);
+			return;
+		}
+	}
+	list_move_tail(&a->dev.knode_bus.n_node, list);
+}
+
+static void __init pci_sort_breadthfirst_klist(void)
+{
+	LIST_HEAD(sorted_devices);
+	struct list_head *pos, *tmp;
+	struct klist_node *n;
+	struct device *dev;
+	struct pci_dev *pdev;
+	spin_lock(&pci_bus_type.klist_devices.k_lock);
+	list_for_each_safe(pos, tmp, &pci_bus_type.klist_devices.k_list) {
+		n = container_of(pos, struct klist_node, n_node);
+		dev = container_of(n, struct device, knode_bus);
+		pdev = to_pci_dev(dev);
+		pci_insertion_sort_klist(pdev, &sorted_devices);
+	}
+	list_splice(&sorted_devices, &pci_bus_type.klist_devices.k_list);
+	spin_unlock(&pci_bus_type.klist_devices.k_lock);
+}
+
+static void __init pci_insertion_sort_devices(struct pci_dev *a, struct list_head \
+*list) {
+	struct pci_dev *b;
+	list_for_each_entry(b, list, global_list) {
+		if (pci_sort_bf_cmp(a, b) <= 0) {
+			list_move_tail(&a->global_list, &b->global_list);
+			return;
+		}
+	}
+	list_move_tail(&a->global_list, list);
+}
+
+static void __init pci_sort_breadthfirst_devices(void)
+{
+	LIST_HEAD(sorted_devices);
+	struct pci_dev *dev, *tmp;
+
+	down_write(&pci_bus_sem);
+	list_for_each_entry_safe(dev, tmp, &pci_devices, global_list) {
+		pci_insertion_sort_devices(dev, &sorted_devices);
+	}
+	list_splice(&sorted_devices, &pci_devices);
+	up_write(&pci_bus_sem);
+}
+
+void __init pci_sort_breadthfirst(void)
+{
+	pci_sort_breadthfirst_devices();
+	pci_sort_breadthfirst_klist();
+}
+
--- linux-2.6.18.noarch/include/linux/pci.h.orig	2006-11-02 11:17:17.000000000 -0500
+++ linux-2.6.18.noarch/include/linux/pci.h	2006-11-02 11:17:44.000000000 -0500
@@ -438,6 +438,7 @@
 extern void pci_remove_bus_device(struct pci_dev *dev);
 extern void pci_stop_bus_device(struct pci_dev *dev);
 void pci_setup_cardbus(struct pci_bus *bus);
+extern void pci_sort_breadthfirst(void);
 
 /* Generic PCI functions exported to card drivers */