From: Don Dutile <ddutile@redhat.com> Date: Mon, 17 Nov 2008 16:54:20 -0500 Subject: [xen] guest crashes if RTL8139 NIC is only one specified Message-id: 4921E80C.4090702@redhat.com O-Subject: [RHEL5.3 PATCH] RHEL5.3 guest will crash if RTL8139 NIC is only one specified Bugzilla: 471110 RH-Acked-by: Herbert Xu <herbert.xu@redhat.com> RH-Acked-by: Chris Lalancette <clalance@redhat.com> RH-Acked-by: Bill Burns <bburns@redhat.com> BZ 471110 -- this is a regression from RHEL5.2 . Note: this BZ is against a PV kernel, which is a bit odd to specify only an RTL8139 as a PV kernel's network, since it can't be done. *But* we received a follow-on bz, 471536, from Oracle claiming it broke a FV/HVM guest as well. If the vif config entry for a rhel5.3 xen guest is specified as only: vif = [ "type=ioemu" ], which forces *only* the RTL8139 to be presented to the guest as a possible network connection, the guest kernel will crash. The crash is due to a bogus/erroneous netif_free() call/use in netfront.c. Looking at upstream xen, the netif_free() call has been removed since cset 12048 (Oct. 2006). (Thanks to Herbert for finding the cset, as well as confirming the bogus netif_free() calls). After finding this 'oldie but goodie', I did a full comparison of RHEL5.3's netfront.c to upstream xen, and also backported the following changes: (a) changed send_fake_arp() from a fcn that return an int to a void fcn, since every invocation in netfront cast the call to void (upstream has it as a void). (b) changed flags in get_zeroed_page() calls from GFP_KERNEL to GFP_NOIO|__GFP_HIGH to avoid swap on resume hangs -- a backport of linux-2.6-xen cset 572. I also removed a horrible hack I put into netfront.c when I did the pv-on-hvm backport from the asynch kmod-xenpv package, which involved a bunch of #if defined's. I implemented the upstream solution to do if CONFIG_XEN else /* PV-on-HVM */ definition in include/asm-i386/mach-xen/asm/hypervisor.h. Now, except for some new functionality in upstream xen's netfront.c that RHEL5.3 does not have/support, the two versions of netfront.c are now much closer & cleaner. Thus, the regression is fixed and merging any future bug fixes should be more straightforward. Brew build: https://brewweb.devel.redhat.com/taskinfo?taskID=1573220 Testing: (1) Duplicated the error stated in bz 471110 on test system. (2) Tested x86_64 patched kernel with exclusive rtl8139 nic spec to confirm fix; also tested more common xen-vnif spec for *PV guest* to ensure no regression in that area (3) Ditto (2) except for *FV guest*; xen-vnif test ensured pv-on-hvm drivers still working properly as well as emulated rtl8139. Please review & ACK. - Don diff --git a/drivers/xen/netfront/netfront.c b/drivers/xen/netfront/netfront.c index 88ae481..d4aa1b9 100644 --- a/drivers/xen/netfront/netfront.c +++ b/drivers/xen/netfront/netfront.c @@ -240,12 +240,11 @@ static struct net_device *create_netdev(struct xenbus_device *); static void end_access(int, void *); static void netif_disconnect_backend(struct netfront_info *); -static void netif_free(struct netfront_info *); static int network_connect(struct net_device *); static void network_tx_buf_gc(struct net_device *); static void network_alloc_rx_buffers(struct net_device *); -static int send_fake_arp(struct net_device *); +static void send_fake_arp(struct net_device *); static irqreturn_t netif_int(int irq, void *dev_id, struct pt_regs *ptregs); @@ -477,7 +476,7 @@ static int setup_device(struct xenbus_device *dev, struct netfront_info *info) info->tx.sring = NULL; info->irq = 0; - txs = (struct netif_tx_sring *)get_zeroed_page(GFP_KERNEL); + txs = (struct netif_tx_sring *)get_zeroed_page(GFP_NOIO|__GFP_HIGH); if (!txs) { err = -ENOMEM; xenbus_dev_fatal(dev, err, "allocating tx ring page"); @@ -493,7 +492,7 @@ static int setup_device(struct xenbus_device *dev, struct netfront_info *info) } info->tx_ring_ref = err; - rxs = (struct netif_rx_sring *)get_zeroed_page(GFP_KERNEL); + rxs = (struct netif_rx_sring *)get_zeroed_page(GFP_NOIO|__GFP_HIGH); if (!rxs) { err = -ENOMEM; xenbus_dev_fatal(dev, err, "allocating rx ring page"); @@ -523,7 +522,6 @@ static int setup_device(struct xenbus_device *dev, struct netfront_info *info) return 0; fail: - netif_free(info); return err; } @@ -547,16 +545,14 @@ static void backend_changed(struct xenbus_device *dev, break; case XenbusStateConnected: - (void)send_fake_arp(netdev); + send_fake_arp(netdev); break; case XenbusStateInitWait: if (dev->state != XenbusStateInitialising) break; - if (network_connect(netdev) != 0) { - netif_free(np); + if (network_connect(netdev) != 0) break; - } xenbus_switch_state(dev, XenbusStateConnected); break; @@ -573,9 +569,8 @@ static void backend_changed(struct xenbus_device *dev, * MAC. We send a fake ARP request. * * @param dev device - * @return 0 on success, error code otherwise */ -static int send_fake_arp(struct net_device *dev) +static void send_fake_arp(struct net_device *dev) { struct sk_buff *skb; u32 src_ip, dst_ip; @@ -585,16 +580,16 @@ static int send_fake_arp(struct net_device *dev) /* No IP? Then nothing to do. */ if (src_ip == 0) - return 0; + return; skb = arp_create(ARPOP_REPLY, ETH_P_ARP, dst_ip, dev, src_ip, /*dst_hw*/ NULL, /*src_hw*/ NULL, /*target_hw*/ dev->dev_addr); if (skb == NULL) - return -ENOMEM; + return; - return dev_queue_xmit(skb); + dev_queue_xmit(skb); } @@ -1168,22 +1163,17 @@ static int xennet_get_responses(struct netfront_info *np, struct page *page = skb_shinfo(skb)->frags[0].page; unsigned long pfn = page_to_pfn(page); -/* to avoid build warnings */ -#if defined CONFIG_XEN || (!defined CONFIG_X86_PAE && defined CONFIG_XEN_PV_ON_HVM) void *vaddr = page_address(page); -#endif mcl = np->rx_mcl + pages_flipped; mmu = np->rx_mmu + pages_flipped; -/* not for rhel4 & rhel5 -- _supported_pte_mask not exported for pfn_pte_ma */ -#if defined CONFIG_XEN || (!defined CONFIG_X86_PAE && defined CONFIG_XEN_PV_ON_HVM) MULTI_update_va_mapping(mcl, (unsigned long)vaddr, pfn_pte_ma(mfn, PAGE_KERNEL), 0); -#endif + mmu->ptr = ((maddr_t)mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE; mmu->val = pfn; @@ -1549,14 +1539,12 @@ static void netif_release_rx_bufs_flip(struct netfront_info *np) /* Remap the page. */ struct page *page = skb_shinfo(skb)->frags[0].page; unsigned long pfn = page_to_pfn(page); -/* to avoid build warnings */ -#if defined CONFIG_XEN || (!defined CONFIG_X86_PAE && defined CONFIG_XEN_PV_ON_HVM) void *vaddr = page_address(page); MULTI_update_va_mapping(mcl, (unsigned long)vaddr, pfn_pte_ma(mfn, PAGE_KERNEL), 0); -#endif + mcl++; mmu->ptr = ((maddr_t)mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE; @@ -2060,7 +2048,7 @@ inetdev_notify(struct notifier_block *this, unsigned long event, void *ptr) /* UP event and is it one of our devices? */ if (event == NETDEV_UP && dev->open == network_open) - (void)send_fake_arp(dev); + send_fake_arp(dev); return NOTIFY_DONE; } @@ -2076,7 +2064,7 @@ netdev_notify(struct notifier_block *this, unsigned long event, void *ptr) /* Carrier up event and is it one of our devices? */ if (event == NETDEV_CHANGE && netif_carrier_ok(dev) && dev->open == network_open) - (void)send_fake_arp(dev); + send_fake_arp(dev); return NOTIFY_DONE; } @@ -2104,13 +2092,6 @@ static void netif_disconnect_backend(struct netfront_info *info) } -static void netif_free(struct netfront_info *info) -{ - netif_disconnect_backend(info); - free_netdev(info->netdev); -} - - static void end_access(int ref, void *page) { if (ref != GRANT_INVALID_REF) @@ -2125,9 +2106,7 @@ static struct xenbus_device_id netfront_ids[] = { { "vif" }, { "" } }; -#ifdef CONFIG_XEN_PV_ON_HVM MODULE_ALIAS("xen:vif"); -#endif static struct xenbus_driver netfront = { .name = "vif", diff --git a/include/asm-i386/mach-xen/asm/hypervisor.h b/include/asm-i386/mach-xen/asm/hypervisor.h index d03f63d..89cde62 100644 --- a/include/asm-i386/mach-xen/asm/hypervisor.h +++ b/include/asm-i386/mach-xen/asm/hypervisor.h @@ -188,6 +188,8 @@ HYPERVISOR_poll( return rc; } +#ifdef CONFIG_XEN + static inline void MULTI_update_va_mapping( multicall_entry_t *mcl, unsigned long va, @@ -237,4 +239,12 @@ MULTI_update_va_mapping_otherdomain( mcl->args[MULTI_UVMDOMID_INDEX] = domid; } +#else /* !defined(CONFIG_XEN) .... HVM */ + +/* Multicalls not supported for HVM guests. */ +#define MULTI_update_va_mapping(a,b,c,d) ((void)0) +#define MULTI_grant_table_op(a,b,c,d) ((void)0) + +#endif + #endif /* __HYPERVISOR_H__ */