Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > 27922b4260f65d317aabda37e42bbbff > files > 827

kernel-2.6.18-238.el5.src.rpm

From: Jay Fenlason <fenlason@redhat.com>
Date: Thu, 14 Aug 2008 13:08:30 -0400
Subject: [firewire] latest upstream snapshot for RHEL-5.3
Message-id: 20080814170829.GA1280@redhat.com
O-Subject: [PATCH] RHEL-5.3 bz#449520 Upgrade Firewire stack so it can be taken out of Tech Preview
Bugzilla: 449520 430300 429950 429951
RH-Acked-by: Jarod Wilson <jarod@redhat.com>

This is a backport of the latest for-linus branch from upstream to the
5.3 codebase.  It also closes
bz#446962	Access to firewire devices is still allowed after the device is
		removed from the bus.
It may also close others.

Brew built at:
http://brewweb.devel.redhat.com/brew/taskinfo?taskID=1429407

Signed-off-by: Jay Fenlason <fenlason@redhat.com>

diff --git a/drivers/firewire/Kconfig b/drivers/firewire/Kconfig
index a312488..1616d04 100644
--- a/drivers/firewire/Kconfig
+++ b/drivers/firewire/Kconfig
@@ -1,31 +1,30 @@
-# -*- shell-script -*-
-
-comment "An alternative FireWire stack is available with EXPERIMENTAL=y"
+comment "A new alternative FireWire stack is available with EXPERIMENTAL=y"
 	depends on EXPERIMENTAL=n
 
+comment "Enable only one of the two stacks, unless you know what you are doing"
+	depends on EXPERIMENTAL
+
 config FIREWIRE
-	tristate "IEEE 1394 (FireWire) support (JUJU alternative stack, experimental)"
+	tristate "New FireWire stack, EXPERIMENTAL"
 	depends on EXPERIMENTAL
 	help
-	  IEEE 1394 describes a high performance serial bus, which is also
-	  known as FireWire(tm) or i.Link(tm) and is used for connecting all
-	  sorts of devices (most notably digital video cameras) to your
-	  computer.
-
-	  If you have FireWire hardware and want to use it, say Y here.  This
-	  is the core support only, you will also need to select a driver for
-	  your IEEE 1394 adapter.
+	  This is the "Juju" FireWire stack, a new alternative implementation
+	  designed for robustness and simplicity.  You can build either this
+	  stack, or the old stack (the ieee1394 driver, ohci1394 etc.) or both.
+	  Please read http://wiki.linux1394.org/JujuMigration before you
+	  enable the new stack.
 
 	  To compile this driver as a module, say M here: the module will be
 	  called firewire-core.
 
-	  This is the "JUJU" FireWire stack, an alternative implementation
-	  designed for robustness and simplicity.  You can build either this
-	  stack, or the classic stack (the ieee1394 driver, ohci1394 etc.)
-	  or both.
+	  This module functionally replaces ieee1394, raw1394, and video1394.
+	  To access it from application programs, you generally need at least
+	  libraw1394 version 2.  IIDC/DCAM applications also need libdc1394
+	  version 2.  No libraries are required to access storage devices
+	  through the firewire-sbp2 driver.
 
 config FIREWIRE_OHCI
-	tristate "Support for OHCI FireWire host controllers"
+	tristate "OHCI-1394 controllers"
 	depends on PCI && FIREWIRE
 	help
 	  Enable this driver if you have a FireWire controller based
@@ -33,14 +32,37 @@ config FIREWIRE_OHCI
 	  is the only chipset in use, so say Y here.
 
 	  To compile this driver as a module, say M here:  The module will be
-	  called firewire-ohci.
+	  called firewire-ohci.  It replaces ohci1394 of the classic IEEE 1394
+	  stack.
+
+	  NOTE:
 
-	  If you also build ohci1394 of the classic IEEE 1394 driver stack,
-	  blacklist either ohci1394 or firewire-ohci to let hotplug load the
-	  desired driver.
+	  You should only build either firewire-ohci or the old ohci1394 driver,
+	  but not both.  If you nevertheless want to install both, you should
+	  configure them only as modules and blacklist the driver(s) which you
+	  don't want to have auto-loaded.  Add either
+
+	      blacklist firewire-ohci
+	  or
+	      blacklist ohci1394
+	      blacklist video1394
+	      blacklist dv1394
+
+	  to /etc/modprobe.conf or /etc/modprobe.d/* and update modprobe.conf
+	  depending on your distribution.  The latter two modules should be
+	  blacklisted together with ohci1394 because they depend on ohci1394.
+
+	  If you have an old modprobe which doesn't implement the blacklist
+	  directive, use "install modulename /bin/true" for the modules to be
+	  blacklisted.
+
+config FIREWIRE_OHCI_DEBUG
+	bool
+	depends on FIREWIRE_OHCI
+	default y
 
 config FIREWIRE_SBP2
-	tristate "Support for storage devices (SBP-2 protocol driver)"
+	tristate "Storage devices (SBP-2 protocol)"
 	depends on FIREWIRE && SCSI
 	help
 	  This option enables you to use SBP-2 devices connected to a
@@ -49,12 +71,8 @@ config FIREWIRE_SBP2
 	  like scanners.
 
 	  To compile this driver as a module, say M here:  The module will be
-	  called firewire-sbp2.
+	  called firewire-sbp2.  It replaces sbp2 of the classic IEEE 1394
+	  stack.
 
 	  You should also enable support for disks, CD-ROMs, etc. in the SCSI
 	  configuration section.
-
-	  If you also build sbp2 of the classic IEEE 1394 driver stack,
-	  blacklist either sbp2 or firewire-sbp2 to let hotplug load the
-	  desired driver.
-
diff --git a/drivers/firewire/Makefile b/drivers/firewire/Makefile
index bef1f86..a7c31e9 100644
--- a/drivers/firewire/Makefile
+++ b/drivers/firewire/Makefile
@@ -2,9 +2,9 @@
 # Makefile for the Linux IEEE 1394 implementation
 #
 
-firewire-core-y := fw-card.o fw-topology.o fw-transaction.o fw-iso.o \
-	fw-device.o fw-cdev.o
-firewire-ohci-y += fw-ohci.o            
+firewire-core-y += fw-card.o fw-topology.o fw-transaction.o fw-iso.o \
+                   fw-device.o fw-cdev.o
+firewire-ohci-y += fw-ohci.o
 firewire-sbp2-y += fw-sbp2.o
 
 obj-$(CONFIG_FIREWIRE) += firewire-core.o
diff --git a/drivers/firewire/fw-card.c b/drivers/firewire/fw-card.c
index 23218b0..8f1bccc 100644
--- a/drivers/firewire/fw-card.c
+++ b/drivers/firewire/fw-card.c
@@ -16,10 +16,14 @@
  * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  */
 
-#include <linux/module.h>
-#include <linux/errno.h>
+#include <linux/completion.h>
+#include <linux/delay.h>
 #include <linux/device.h>
+#include <linux/errno.h>
+#include <linux/kref.h>
+#include <linux/module.h>
 #include <linux/mutex.h>
+
 #include "fw-transaction.h"
 #include "fw-topology.h"
 #include "fw-device.h"
@@ -84,18 +88,17 @@ static u16 crc_itu_t(u16 crc, const u8 *buffer, size_t len)
 
 int fw_compute_block_crc(u32 *block)
 {
-        __be32 be32_block[256];
-        int i, length;
+	__be32 be32_block[256];
+	int i, length;
 
-        length = (*block >> 16) & 0xff;
-        for (i = 0; i < length; i++)
-                be32_block[i] = cpu_to_be32(block[i + 1]);
-        *block |= crc_itu_t(0, (u8 *) be32_block, length * 4);
+	length = (*block >> 16) & 0xff;
+	for (i = 0; i < length; i++)
+		be32_block[i] = cpu_to_be32(block[i + 1]);
+	*block |= crc_itu_t(0, (u8 *) be32_block, length * 4);
 
-        return length;
+	return length;
 }
 
-
 static DEFINE_MUTEX(card_mutex);
 static LIST_HEAD(card_list);
 
@@ -170,15 +173,12 @@ generate_config_rom(struct fw_card *card, size_t *config_rom_length)
 		i += desc->length;
 	}
 
-	/*
-	 * Calculate CRCs for all blocks in the config rom.  This
+	/* Calculate CRCs for all blocks in the config rom.  This
 	 * assumes that CRC length and info length are identical for
 	 * the bus info block, which is always the case for this
-	 * implementation.
-	 */
-	for (i = 0; i < j; i += length + 1) {
+	 * implementation. */
+	for (i = 0; i < j; i += length + 1)
 		length = fw_compute_block_crc(config_rom + i);
-	}
 
 	*config_rom_length = j;
 
@@ -199,7 +199,7 @@ update_config_roms(void)
 }
 
 int
-fw_core_add_descriptor (struct fw_descriptor *desc)
+fw_core_add_descriptor(struct fw_descriptor *desc)
 {
 	size_t i;
 
@@ -248,48 +248,37 @@ static const char gap_count_table[] = {
 	63, 5, 7, 8, 10, 13, 16, 18, 21, 24, 26, 29, 32, 35, 37, 40
 };
 
-struct bm_data {
-	struct fw_transaction t;
-	struct {
-		__be32 arg;
-		__be32 data;
-	} lock;
-	u32 old;
-	int rcode;
-	struct completion done;
-};
-
-static void
-complete_bm_lock(struct fw_card *card, int rcode,
-		 void *payload, size_t length, void *data)
-{
-	struct bm_data *bmd = data;
-
-	if (rcode == RCODE_COMPLETE)
-		bmd->old = be32_to_cpu(*(__be32 *) payload);
-	bmd->rcode = rcode;
-	complete(&bmd->done);
-}
-
 static void
 fw_card_bm_work(void *w)
 {
 	struct work_struct *work = w;
 	struct fw_card *card = container_of(work, struct fw_card, work);
-	struct fw_device *root;
-	struct bm_data bmd;
+	struct fw_device *root_device;
+	struct fw_node *root_node, *local_node;
 	unsigned long flags;
-	int root_id, new_root_id, irm_id, gap_count, generation, grace;
-	int do_reset = 0;
+	int root_id, new_root_id, irm_id, gap_count, generation, grace, rcode;
+	bool do_reset = false;
+	__be32 lock_data[2];
 
 	spin_lock_irqsave(&card->lock, flags);
+	local_node = card->local_node;
+	root_node  = card->root_node;
+
+	if (local_node == NULL) {
+		spin_unlock_irqrestore(&card->lock, flags);
+		return;
+	}
+	fw_node_get(local_node);
+	fw_node_get(root_node);
 
 	generation = card->generation;
-	root = card->root_node->data;
-	root_id = card->root_node->node_id;
+	root_device = root_node->data;
+	if (root_device)
+		fw_device_get(root_device);
+	root_id = root_node->node_id;
 	grace = time_after(jiffies, card->reset_jiffies + DIV_ROUND_UP(HZ, 10));
 
-	if (card->bm_generation + 1 == generation ||
+	if (is_next_generation(generation, card->bm_generation) ||
 	    (card->bm_generation != generation && grace)) {
 		/*
 		 * This first step is to figure out who is IRM and
@@ -305,78 +294,77 @@ fw_card_bm_work(void *w)
 
 		irm_id = card->irm_node->node_id;
 		if (!card->irm_node->link_on) {
-			new_root_id = card->local_node->node_id;
+			new_root_id = local_node->node_id;
 			fw_notify("IRM has link off, making local node (%02x) root.\n",
 				  new_root_id);
 			goto pick_me;
 		}
 
-		bmd.lock.arg = cpu_to_be32(0x3f);
-		bmd.lock.data = cpu_to_be32(card->local_node->node_id);
+		lock_data[0] = cpu_to_be32(0x3f);
+		lock_data[1] = cpu_to_be32(local_node->node_id);
 
 		spin_unlock_irqrestore(&card->lock, flags);
 
-		init_completion(&bmd.done);
-		fw_send_request(card, &bmd.t, TCODE_LOCK_COMPARE_SWAP,
-				irm_id, generation,
-				SCODE_100, CSR_REGISTER_BASE + CSR_BUS_MANAGER_ID,
-				&bmd.lock, sizeof(bmd.lock),
-				complete_bm_lock, &bmd);
-		wait_for_completion(&bmd.done);
+		rcode = fw_run_transaction(card, TCODE_LOCK_COMPARE_SWAP,
+				irm_id, generation, SCODE_100,
+				CSR_REGISTER_BASE + CSR_BUS_MANAGER_ID,
+				lock_data, sizeof(lock_data));
 
-		if (bmd.rcode == RCODE_GENERATION) {
-			/*
-			 * Another bus reset happened. Just return,
-			 * the BM work has been rescheduled.
-			 */
-			return;
-		}
+		if (rcode == RCODE_GENERATION)
+			/* Another bus reset, BM work has been rescheduled. */
+			goto out;
 
-		if (bmd.rcode == RCODE_COMPLETE && bmd.old != 0x3f)
+		if (rcode == RCODE_COMPLETE &&
+		    lock_data[0] != cpu_to_be32(0x3f))
 			/* Somebody else is BM, let them do the work. */
-			return;
+			goto out;
 
 		spin_lock_irqsave(&card->lock, flags);
-		if (bmd.rcode != RCODE_COMPLETE) {
+
+		if (rcode != RCODE_COMPLETE) {
 			/*
 			 * The lock request failed, maybe the IRM
 			 * isn't really IRM capable after all. Let's
 			 * do a bus reset and pick the local node as
 			 * root, and thus, IRM.
 			 */
-			new_root_id = card->local_node->node_id;
+			new_root_id = local_node->node_id;
 			fw_notify("BM lock failed, making local node (%02x) root.\n",
 				  new_root_id);
 			goto pick_me;
 		}
 	} else if (card->bm_generation != generation) {
-		/* OK, we weren't BM in the last generation, and it's
+		/*
+		 * OK, we weren't BM in the last generation, and it's
 		 * less than 100ms since last bus reset. Reschedule
-		 * this task 100ms from now. */
+		 * this task 100ms from now.
+		 */
 		spin_unlock_irqrestore(&card->lock, flags);
 		schedule_delayed_work(&card->work, DIV_ROUND_UP(HZ, 10));
-		return;
+		goto out;
 	}
 
-	/* We're bus manager for this generation, so next step is to
+	/*
+	 * We're bus manager for this generation, so next step is to
 	 * make sure we have an active cycle master and do gap count
-	 * optimization. */
+	 * optimization.
+	 */
 	card->bm_generation = generation;
 
-	if (root == NULL) {
+	if (root_device == NULL) {
 		/*
 		 * Either link_on is false, or we failed to read the
 		 * config rom.  In either case, pick another root.
 		 */
-		new_root_id = card->local_node->node_id;
-	} else if (atomic_read(&root->state) != FW_DEVICE_RUNNING) {
+		new_root_id = local_node->node_id;
+	} else if (atomic_read(&root_device->state) != FW_DEVICE_RUNNING) {
 		/*
 		 * If we haven't probed this device yet, bail out now
 		 * and let's try again once that's done.
 		 */
 		spin_unlock_irqrestore(&card->lock, flags);
-		return;
-	} else if (root->config_rom[2] & BIB_CMC) {
+		goto out;
+	} else if (root_device->cmc) {
 		/*
 		 * FIXME: I suppose we should set the cmstr bit in the
 		 * STATE_CLEAR register of this node, as described in
@@ -390,39 +378,43 @@ fw_card_bm_work(void *w)
 		 * successfully read the config rom, but it's not
 		 * cycle master capable.
 		 */
-		new_root_id = card->local_node->node_id;
+		new_root_id = local_node->node_id;
 	}
 
  pick_me:
 	/*
 	 * Pick a gap count from 1394a table E-1.  The table doesn't cover
 	 * the typically much larger 1394b beta repeater delays though.
-	*/
+	 */
 	if (!card->beta_repeaters_present &&
-	    card->root_node->max_hops < ARRAY_SIZE(gap_count_table)) {
-		gap_count = gap_count_table[card->root_node->max_hops];
-	} else {
+	    root_node->max_hops < ARRAY_SIZE(gap_count_table))
+		gap_count = gap_count_table[root_node->max_hops];
+	else
 		gap_count = 63;
-	}
-		
 
 	/*
-	 * Finally, figure out if we should do a reset or not.  If we've
-	 * done less that 5 resets with the same physical topology and we
+	 * Finally, figure out if we should do a reset or not.  If we have
+	 * done less than 5 resets with the same physical topology and we
 	 * have either a new root or a new gap count setting, let's do it.
 	 */
 
 	if (card->bm_retries++ < 5 &&
-	    (card->gap_count != gap_count || new_root_id != root_id)) {
-		do_reset = 1;
-	}
+	    (card->gap_count != gap_count || new_root_id != root_id))
+		do_reset = true;
 
 	spin_unlock_irqrestore(&card->lock, flags);
 
 	if (do_reset) {
+		fw_notify("phy config: card %d, new root=%x, gap_count=%d\n",
+			  card->index, new_root_id, gap_count);
 		fw_send_phy_config(card, new_root_id, generation, gap_count);
 		fw_core_initiate_bus_reset(card, 1);
 	}
+ out:
+	if (root_device)
+		fw_device_put(root_device);
+	fw_node_put(root_node);
+	fw_node_put(local_node);
 }
 
 static void
@@ -439,14 +431,16 @@ fw_card_initialize(struct fw_card *card, const struct fw_card_driver *driver,
 {
 	static atomic_t index = ATOMIC_INIT(-1);
 
-	kref_init(&card->kref);
 	card->index = atomic_inc_return(&index);
 	card->driver = driver;
 	card->device = device;
 	card->current_tlabel = 0;
 	card->tlabel_mask = 0;
 	card->color = 0;
+	card->broadcast_channel = BROADCAST_CHANNEL_INITIAL;
 
+	kref_init(&card->kref);
+	init_completion(&card->done);
 	INIT_LIST_HEAD(&card->transaction_list);
 	spin_lock_init(&card->lock);
 	setup_timer(&card->flush_timer,
@@ -469,12 +463,6 @@ fw_card_add(struct fw_card *card,
 	card->link_speed = link_speed;
 	card->guid = guid;
 
-	/*
-	 * The subsystem grabs a reference when the card is added and
-	 * drops it when the driver calls fw_core_remove_card.
-	 */
-	fw_card_get(card);
-
 	mutex_lock(&card_mutex);
 	config_rom = generate_config_rom(card, &length);
 	list_add_tail(&card->link, &card_list);
@@ -545,7 +533,6 @@ dummy_enable_phys_dma(struct fw_card *card,
 }
 
 static struct fw_card_driver dummy_driver = {
-	.name            = "dummy",
 	.enable          = dummy_enable,
 	.update_phy_reg  = dummy_update_phy_reg,
 	.set_config_rom  = dummy_set_config_rom,
@@ -556,6 +543,14 @@ static struct fw_card_driver dummy_driver = {
 };
 
 void
+fw_card_release(struct kref *kref)
+{
+	struct fw_card *card = container_of(kref, struct fw_card, kref);
+
+	complete(&card->done);
+}
+
+void
 fw_core_remove_card(struct fw_card *card)
 {
 	card->driver->update_phy_reg(card, 4,
@@ -570,43 +565,16 @@ fw_core_remove_card(struct fw_card *card)
 	card->driver = &dummy_driver;
 
 	fw_destroy_nodes(card);
-	flush_scheduled_work();
-
-	fw_flush_transactions(card);
-	del_timer_sync(&card->flush_timer);
 
+	/* Wait for all users, especially device workqueue jobs, to finish. */
 	fw_card_put(card);
-}
-EXPORT_SYMBOL(fw_core_remove_card);
+	wait_for_completion(&card->done);
 
-struct fw_card *
-fw_card_get(struct fw_card *card)
-{
-	kref_get(&card->kref);
-
-	return card;
-}
-EXPORT_SYMBOL(fw_card_get);
-
-static void
-release_card(struct kref *kref)
-{
-	struct fw_card *card = container_of(kref, struct fw_card, kref);
-
-	kfree(card);
-}
-
-/*
- * An assumption for fw_card_put() is that the card driver allocates
- * the fw_card struct with kalloc and that it has been shut down
- * before the last ref is dropped.
- */
-void
-fw_card_put(struct fw_card *card)
-{
-	kref_put(&card->kref, release_card);
+	cancel_rearming_delayed_work(&card->work);
+	WARN_ON(!list_empty(&card->transaction_list));
+	del_timer_sync(&card->flush_timer);
 }
-EXPORT_SYMBOL(fw_card_put);
+EXPORT_SYMBOL(fw_core_remove_card);
 
 int
 fw_core_initiate_bus_reset(struct fw_card *card, int short_reset)
diff --git a/drivers/firewire/fw-cdev.c b/drivers/firewire/fw-cdev.c
index 9de210d..2e6d584 100644
--- a/drivers/firewire/fw-cdev.c
+++ b/drivers/firewire/fw-cdev.c
@@ -1,4 +1,6 @@
 /*
+ * Char device for device raw access
+ *
  * Copyright (C) 2005-2007  Kristian Hoegsberg <krh@bitplanet.net>
  *
  * This program is free software; you can redistribute it and/or modify
@@ -36,7 +38,6 @@
 #include "fw-topology.h"
 #include "fw-device.h"
 
-
 struct client;
 struct client_resource {
 	struct list_head link;
@@ -108,15 +109,22 @@ static int fw_device_op_open(struct inode *inode, struct file *file)
 	struct client *client;
 	unsigned long flags;
 
-	device = fw_device_from_devt(inode->i_rdev);
+	device = fw_device_get_by_devt(inode->i_rdev);
 	if (device == NULL)
 		return -ENODEV;
 
+	if (fw_device_is_shutdown(device)) {
+		fw_device_put(device);
+		return -ENODEV;
+	}
+
 	client = kzalloc(sizeof(*client), GFP_KERNEL);
-	if (client == NULL)
+	if (client == NULL) {
+		fw_device_put(device);
 		return -ENOMEM;
+	}
 
-	client->device = fw_device_get(device);
+	client->device = device;
 	INIT_LIST_HEAD(&client->event_list);
 	INIT_LIST_HEAD(&client->resource_list);
 	spin_lock_init(&client->lock);
@@ -197,6 +205,7 @@ fw_device_op_read(struct file *file,
 	return dequeue_event(client, buffer, count);
 }
 
+/* caller must hold card->lock so that node pointers can be dereferenced here */
 static void
 fill_bus_reset_event(struct fw_cdev_event_bus_reset *event,
 		     struct client *client)
@@ -206,7 +215,6 @@ fill_bus_reset_event(struct fw_cdev_event_bus_reset *event,
 	event->closure	     = client->bus_reset_closure;
 	event->type          = FW_CDEV_EVENT_BUS_RESET;
 	event->generation    = client->device->generation;
-	smp_rmb();	     /* node_id must not be older than generation */
 	event->node_id       = client->device->node_id;
 	event->local_node_id = card->local_node->node_id;
 	event->bm_node_id    = 0; /* FIXME: We don't track the BM. */
@@ -266,31 +274,43 @@ static int ioctl_get_info(struct client *client, void *buffer)
 {
 	struct fw_cdev_get_info *get_info = buffer;
 	struct fw_cdev_event_bus_reset bus_reset;
+	struct fw_card *card = client->device->card;
+	unsigned long ret = 0;
 
 	client->version = get_info->version;
 	get_info->version = FW_CDEV_VERSION;
 
+	down_read(&fw_device_rwsem);
+
 	if (get_info->rom != 0) {
 		void __user *uptr = u64_to_uptr(get_info->rom);
 		size_t want = get_info->rom_length;
 		size_t have = client->device->config_rom_length * 4;
 
-		if (copy_to_user(uptr, client->device->config_rom,
-				 min(want, have)))
-			return -EFAULT;
+		ret = copy_to_user(uptr, client->device->config_rom,
+				   min(want, have));
 	}
 	get_info->rom_length = client->device->config_rom_length * 4;
 
+	up_read(&fw_device_rwsem);
+
+	if (ret != 0)
+		return -EFAULT;
+
 	client->bus_reset_closure = get_info->bus_reset_closure;
 	if (get_info->bus_reset != 0) {
 		void __user *uptr = u64_to_uptr(get_info->bus_reset);
+		unsigned long flags;
 
+		spin_lock_irqsave(&card->lock, flags);
 		fill_bus_reset_event(&bus_reset, client);
+		spin_unlock_irqrestore(&card->lock, flags);
+
 		if (copy_to_user(uptr, &bus_reset, sizeof(bus_reset)))
 			return -EFAULT;
 	}
 
-	get_info->card = client->device->card->index;
+	get_info->card = card->index;
 
 	return 0;
 }
@@ -349,22 +369,33 @@ complete_transaction(struct fw_card *card, int rcode,
 	struct response *response = data;
 	struct client *client = response->client;
 	unsigned long flags;
+	struct fw_cdev_event_response *r = &response->response;
 
-	if (length < response->response.length)
-		response->response.length = length;
+	if (length < r->length)
+		r->length = length;
 	if (rcode == RCODE_COMPLETE)
-		memcpy(response->response.data, payload,
-		       response->response.length);
+		memcpy(r->data, payload, r->length);
 
 	spin_lock_irqsave(&client->lock, flags);
 	list_del(&response->resource.link);
 	spin_unlock_irqrestore(&client->lock, flags);
 
-	response->response.type   = FW_CDEV_EVENT_RESPONSE;
-	response->response.rcode  = rcode;
-	queue_event(client, &response->event,
-		    &response->response, sizeof(response->response),
-		    response->response.data, response->response.length);
+	r->type   = FW_CDEV_EVENT_RESPONSE;
+	r->rcode  = rcode;
+
+	/*
+	 * In the case that sizeof(*r) doesn't align with the position of the
+	 * data, and the read is short, preserve an extra copy of the data
+	 * to stay compatible with a pre-2.6.27 bug.  Since the bug is harmless
+	 * for short reads and some apps depended on it, this is both safe
+	 * and prudent for compatibility.
+	 */
+	if (r->length <= sizeof(*r) - offsetof(typeof(*r), data))
+		queue_event(client, &response->event, r, sizeof(*r),
+			    r->data, r->length);
+	else
+		queue_event(client, &response->event, r, sizeof(*r) + r->length,
+			    NULL, 0);
 }
 
 static int ioctl_send_request(struct client *client, void *buffer)
@@ -643,6 +674,10 @@ static int ioctl_create_iso_context(struct client *client, void *buffer)
 	struct fw_cdev_create_iso_context *request = buffer;
 	struct fw_iso_context *context;
 
+	/* We only support one context at this time. */
+	if (client->iso_context != NULL)
+		return -EBUSY;
+
 	if (request->channel > 63)
 		return -EINVAL;
 
@@ -663,17 +698,18 @@ static int ioctl_create_iso_context(struct client *client, void *buffer)
 		return -EINVAL;
 	}
 
-	context = fw_iso_context_create(client->device->card,
-						    request->type,
-						    request->channel,
-						    request->speed,
-						    request->header_size,
-						    iso_callback, client);
+	context =  fw_iso_context_create(client->device->card,
+					 request->type,
+					 request->channel,
+					 request->speed,
+					 request->header_size,
+					 iso_callback, client);
 	if (IS_ERR(context))
 		return PTR_ERR(context);
 
 	client->iso_closure = request->closure;
 	client->iso_context = context;
+
 	/* We only support one context at this time. */
 	request->handle = 0;
 
@@ -788,8 +824,9 @@ static int ioctl_start_iso(struct client *client, void *buffer)
 {
 	struct fw_cdev_start_iso *request = buffer;
 
-	if (request->handle != 0)
+	if (client->iso_context == NULL || request->handle != 0)
 		return -EINVAL;
+
 	if (client->iso_context->type == FW_ISO_CONTEXT_RECEIVE) {
 		if (request->tags == 0 || request->tags > 15)
 			return -EINVAL;
@@ -806,7 +843,7 @@ static int ioctl_stop_iso(struct client *client, void *buffer)
 {
 	struct fw_cdev_stop_iso *request = buffer;
 
-	if (request->handle != 0)
+	if (client->iso_context == NULL || request->handle != 0)
 		return -EINVAL;
 
 	return fw_iso_context_stop(client->iso_context);
@@ -885,6 +922,9 @@ fw_device_op_ioctl(struct file *file,
 {
 	struct client *client = file->private_data;
 
+	if (fw_device_is_shutdown(client->device))
+		return -ENODEV;
+
 	return dispatch_ioctl(client, cmd, (void __user *) arg);
 }
 
@@ -895,6 +935,9 @@ fw_device_op_compat_ioctl(struct file *file,
 {
 	struct client *client = file->private_data;
 
+	if (fw_device_is_shutdown(client->device))
+		return -ENODEV;
+
 	return dispatch_ioctl(client, cmd, compat_ptr(arg));
 }
 #endif
@@ -906,6 +949,9 @@ static int fw_device_op_mmap(struct file *file, struct vm_area_struct *vma)
 	unsigned long size;
 	int page_count, retval;
 
+	if (fw_device_is_shutdown(client->device))
+		return -ENODEV;
+
 	/* FIXME: We could support multiple buffers, but we don't. */
 	if (client->buffer.pages != NULL)
 		return -EBUSY;
diff --git a/drivers/firewire/fw-device.c b/drivers/firewire/fw-device.c
index ba3ae3d..29c3653 100644
--- a/drivers/firewire/fw-device.c
+++ b/drivers/firewire/fw-device.c
@@ -1,4 +1,6 @@
 /*
+ * Device probing and sysfs code.
+ *
  * Copyright (C) 2005-2006  Kristian Hoegsberg <krh@bitplanet.net>
  *
  * This program is free software; you can redistribute it and/or modify
@@ -23,8 +25,9 @@
 #include <linux/device.h>
 #include <linux/delay.h>
 #include <linux/idr.h>
+#include <linux/string.h>
 #include <linux/rwsem.h>
-#include <asm/semaphore.h>
+#include <asm/system.h>
 #include <linux/ctype.h>
 #include "fw-transaction.h"
 #include "fw-topology.h"
@@ -156,35 +159,24 @@ struct bus_type fw_bus_type = {
 };
 EXPORT_SYMBOL(fw_bus_type);
 
-struct fw_device *fw_device_get(struct fw_device *device)
-{
-	get_device(&device->device);
-
-	return device;
-}
-
-void fw_device_put(struct fw_device *device)
-{
-	put_device(&device->device);
-}
-
 static void fw_device_release(struct device *dev)
 {
 	struct fw_device *device = fw_device(dev);
+	struct fw_card *card = device->card;
 	unsigned long flags;
 
 	/*
 	 * Take the card lock so we don't set this to NULL while a
 	 * FW_NODE_UPDATED callback is being handled.
 	 */
-	spin_lock_irqsave(&device->card->lock, flags);
+	spin_lock_irqsave(&card->lock, flags);
 	device->node->data = NULL;
-	spin_unlock_irqrestore(&device->card->lock, flags);
+	spin_unlock_irqrestore(&card->lock, flags);
 
 	fw_node_put(device->node);
-	fw_card_put(device->card);
 	kfree(device->config_rom);
 	kfree(device);
+	fw_card_put(card);
 }
 
 int fw_device_enable_phys_dma(struct fw_device *device)
@@ -193,6 +185,7 @@ int fw_device_enable_phys_dma(struct fw_device *device)
 
 	/* device->node_id, accessed below, must not be older than generation */
 	smp_rmb();
+
 	return device->card->driver->enable_phys_dma(device->card,
 						     device->node_id,
 						     generation);
@@ -211,7 +204,9 @@ show_immediate(struct device *dev, struct device_attribute *dattr, char *buf)
 		container_of(dattr, struct config_rom_attribute, attr);
 	struct fw_csr_iterator ci;
 	u32 *dir;
-	int key, value;
+	int key, value, ret = -ENOENT;
+
+	down_read(&fw_device_rwsem);
 
 	if (is_fw_unit(dev))
 		dir = fw_unit(dev)->directory;
@@ -220,11 +215,15 @@ show_immediate(struct device *dev, struct device_attribute *dattr, char *buf)
 
 	fw_csr_iterator_init(&ci, dir);
 	while (fw_csr_iterator_next(&ci, &key, &value))
-		if (attr->key == key)
-			return snprintf(buf, buf ? PAGE_SIZE : 0,
-					"0x%06x\n", value);
+		if (attr->key == key) {
+			ret = snprintf(buf, buf ? PAGE_SIZE : 0,
+				       "0x%06x\n", value);
+			break;
+		}
+
+	up_read(&fw_device_rwsem);
 
-	return -ENOENT;
+	return ret;
 }
 
 #define IMMEDIATE_ATTR(name, key)				\
@@ -237,9 +236,11 @@ show_text_leaf(struct device *dev, struct device_attribute *dattr, char *buf)
 		container_of(dattr, struct config_rom_attribute, attr);
 	struct fw_csr_iterator ci;
 	u32 *dir, *block = NULL, *p, *end;
-	int length, key, value, last_key = 0;
+	int length, key, value, last_key = 0, ret = -ENOENT;
 	char *b;
 
+	down_read(&fw_device_rwsem);
+
 	if (is_fw_unit(dev))
 		dir = fw_unit(dev)->directory;
 	else
@@ -254,18 +255,20 @@ show_text_leaf(struct device *dev, struct device_attribute *dattr, char *buf)
 	}
 
 	if (block == NULL)
-		return -ENOENT;
+		goto out;
 
 	length = min(block[0] >> 16, 256U);
 	if (length < 3)
-		return -ENOENT;
+		goto out;
 
 	if (block[1] != 0 || block[2] != 0)
 		/* Unknown encoding. */
-		return -ENOENT;
+		goto out;
 
-	if (buf == NULL)
-		return length * 4;
+	if (buf == NULL) {
+		ret = length * 4;
+		goto out;
+	}
 
 	b = buf;
 	end = &block[length + 1];
@@ -275,8 +278,11 @@ show_text_leaf(struct device *dev, struct device_attribute *dattr, char *buf)
 	/* Strip trailing whitespace and add newline. */
 	while (b--, (isspace(*b) || *b == '\0') && b > buf);
 	strcpy(b + 1, "\n");
+	ret = b + 2 - buf;
+ out:
+	up_read(&fw_device_rwsem);
 
-	return b + 2 - buf;
+	return ret;
 }
 
 #define TEXT_LEAF_ATTR(name, key)				\
@@ -352,22 +358,28 @@ static ssize_t
 config_rom_show(struct device *dev, struct device_attribute *attr, char *buf)
 {
 	struct fw_device *device = fw_device(dev);
+	size_t length;
 
-	memcpy(buf, device->config_rom, device->config_rom_length * 4);
+	down_read(&fw_device_rwsem);
+	length = device->config_rom_length * 4;
+	memcpy(buf, device->config_rom, length);
+	up_read(&fw_device_rwsem);
 
-	return device->config_rom_length * 4;
+	return length;
 }
 
 static ssize_t
 guid_show(struct device *dev, struct device_attribute *attr, char *buf)
 {
 	struct fw_device *device = fw_device(dev);
-	u64 guid;
+	int ret;
 
-	guid = ((u64)device->config_rom[3] << 32) | device->config_rom[4];
+	down_read(&fw_device_rwsem);
+	ret = snprintf(buf, PAGE_SIZE, "0x%08x%08x\n",
+		       device->config_rom[3], device->config_rom[4]);
+	up_read(&fw_device_rwsem);
 
-	return snprintf(buf, PAGE_SIZE, "0x%016llx\n",
-			(unsigned long long)guid);
+	return ret;
 }
 
 static struct device_attribute fw_device_attributes[] = {
@@ -376,47 +388,26 @@ static struct device_attribute fw_device_attributes[] = {
 	__ATTR_NULL,
 };
 
-struct read_quadlet_callback_data {
-	struct completion done;
-	int rcode;
-	u32 data;
-};
-
-static void
-complete_transaction(struct fw_card *card, int rcode,
-		     void *payload, size_t length, void *data)
-{
-	struct read_quadlet_callback_data *callback_data = data;
-
-	if (rcode == RCODE_COMPLETE)
-		callback_data->data = be32_to_cpu(*(__be32 *)payload);
-	callback_data->rcode = rcode;
-	complete(&callback_data->done);
-}
-
 static int
-read_rom(struct fw_device *device, int generation, int index, u32 * data)
+read_rom(struct fw_device *device, int generation, int index, u32 *data)
 {
-	struct read_quadlet_callback_data callback_data;
-	struct fw_transaction t;
-	u64 offset;
+	int rcode;
 
 	/* device->node_id, accessed below, must not be older than generation */
 	smp_rmb();
-	init_completion(&callback_data.done);
 
-	offset = 0xfffff0000400ULL + index * 4;
-	fw_send_request(device->card, &t, TCODE_READ_QUADLET_REQUEST,
+	rcode = fw_run_transaction(device->card, TCODE_READ_QUADLET_REQUEST,
 			device->node_id, generation, device->max_speed,
-			offset, NULL, 4, complete_transaction, &callback_data);
-
-	wait_for_completion(&callback_data.done);
+			(CSR_REGISTER_BASE | CSR_CONFIG_ROM) + index * 4,
+			data, 4);
+	be32_to_cpus(data);
 
-	*data = callback_data.data;
-
-	return callback_data.rcode;
+	return rcode;
 }
 
+#define READ_BIB_ROM_SIZE	256
+#define READ_BIB_STACK_SIZE	16
+
 /*
  * Read the bus info block, perform a speed probe, and read all of the rest of
  * the config ROM.  We do all this with a cached bus generation.  If the bus
@@ -426,16 +417,23 @@ read_rom(struct fw_device *device, int generation, int index, u32 * data)
  */
 static int read_bus_info_block(struct fw_device *device, int generation)
 {
-	static u32 rom[256];
-	u32 stack[16], sp, key;
-	int i, end, length;
+	u32 *rom, *stack, *old_rom, *new_rom;
+	u32 sp, key;
+	int i, end, length, ret = -1;
+
+	rom = kmalloc(sizeof(*rom) * READ_BIB_ROM_SIZE +
+		      sizeof(*stack) * READ_BIB_STACK_SIZE, GFP_KERNEL);
+	if (rom == NULL)
+		return -ENOMEM;
+
+	stack = &rom[READ_BIB_ROM_SIZE];
 
 	device->max_speed = SCODE_100;
 
 	/* First read the bus info block. */
 	for (i = 0; i < 5; i++) {
 		if (read_rom(device, generation, i, &rom[i]) != RCODE_COMPLETE)
-			return -1;
+			goto out;
 		/*
 		 * As per IEEE1212 7.2, during power-up, devices can
 		 * reply with a 0 for the first quadlet of the config
@@ -445,7 +443,7 @@ static int read_bus_info_block(struct fw_device *device, int generation)
 		 * retry mechanism will try again later.
 		 */
 		if (i == 0 && rom[i] == 0)
-			return -1;
+			goto out;
 	}
 
 	device->max_speed = device->node->max_speed;
@@ -495,26 +493,26 @@ static int read_bus_info_block(struct fw_device *device, int generation)
 		 */
 		key = stack[--sp];
 		i = key & 0xffffff;
-		if (i >= ARRAY_SIZE(rom))
+		if (i >= READ_BIB_ROM_SIZE)
 			/*
 			 * The reference points outside the standard
 			 * config rom area, something's fishy.
 			 */
-			return -1;
+			goto out;
 
 		/* Read header quadlet for the block to get the length. */
 		if (read_rom(device, generation, i, &rom[i]) != RCODE_COMPLETE)
-			return -1;
+			goto out;
 		end = i + (rom[i] >> 16) + 1;
 		i++;
-		if (end > ARRAY_SIZE(rom))
+		if (end > READ_BIB_ROM_SIZE)
 			/*
 			 * This block extends outside standard config
 			 * area (and the array we're reading it
 			 * into).  That's broken, so ignore this
 			 * device.
 			 */
-			return -1;
+			goto out;
 
 		/*
 		 * Now read in the block.  If this is a directory
@@ -524,9 +522,9 @@ static int read_bus_info_block(struct fw_device *device, int generation)
 		while (i < end) {
 			if (read_rom(device, generation, i, &rom[i]) !=
 			    RCODE_COMPLETE)
-				return -1;
+				goto out;
 			if ((key >> 30) == 3 && (rom[i] >> 30) > 1 &&
-			    sp < ARRAY_SIZE(stack))
+			    sp < READ_BIB_STACK_SIZE)
 				stack[sp++] = i + rom[i];
 			i++;
 		}
@@ -534,13 +532,23 @@ static int read_bus_info_block(struct fw_device *device, int generation)
 			length = i;
 	}
 
-	device->config_rom = kmalloc(length * 4, GFP_KERNEL);
-	if (device->config_rom == NULL)
-		return -1;
-	memcpy(device->config_rom, rom, length * 4);
+	old_rom = device->config_rom;
+	new_rom = kmemdup(rom, length * 4, GFP_KERNEL);
+	if (new_rom == NULL)
+		goto out;
+
+	down_write(&fw_device_rwsem);
+	device->config_rom = new_rom;
 	device->config_rom_length = length;
+	up_write(&fw_device_rwsem);
 
-	return 0;
+	kfree(old_rom);
+	ret = 0;
+	device->cmc = rom[2] & 1 << 30;
+ out:
+	kfree(rom);
+
+	return ret;
 }
 
 static void fw_unit_release(struct device *dev)
@@ -604,17 +612,26 @@ static int shutdown_unit(struct device *device, void *data)
 	return 0;
 }
 
-static DECLARE_RWSEM(idr_rwsem);
+/*
+ * fw_device_rwsem acts as dual purpose mutex:
+ *   - serializes accesses to fw_device_idr,
+ *   - serializes accesses to fw_device.config_rom/.config_rom_length and
+ *     fw_unit.directory, unless those accesses happen at safe occasions
+ */
+DECLARE_RWSEM(fw_device_rwsem);
+
 static DEFINE_IDR(fw_device_idr);
 int fw_cdev_major;
 
-struct fw_device *fw_device_from_devt(dev_t devt)
+struct fw_device *fw_device_get_by_devt(dev_t devt)
 {
 	struct fw_device *device;
 
-	down_read(&idr_rwsem);
+	down_read(&fw_device_rwsem);
 	device = idr_find(&fw_device_idr, MINOR(devt));
-	up_read(&idr_rwsem);
+	if (device)
+		fw_device_get(device);
+	up_read(&fw_device_rwsem);
 
 	return device;
 }
@@ -626,13 +643,14 @@ static void fw_device_shutdown(void *w)
 		container_of(work, struct fw_device, work);
 	int minor = MINOR(device->device.devt);
 
-	down_write(&idr_rwsem);
-	idr_remove(&fw_device_idr, minor);
-	up_write(&idr_rwsem);
-
 	fw_device_cdev_remove(device);
 	device_for_each_child(&device->device, NULL, shutdown_unit);
 	device_unregister(&device->device);
+
+	down_write(&fw_device_rwsem);
+	idr_remove(&fw_device_idr, minor);
+	up_write(&fw_device_rwsem);
+	fw_device_put(device);
 }
 
 /*
@@ -664,7 +682,8 @@ static void fw_device_init(void *w)
 	 */
 
 	if (read_bus_info_block(device, device->generation) < 0) {
-		if (device->config_rom_retries < MAX_RETRIES) {
+		if (device->config_rom_retries < MAX_RETRIES &&
+		    atomic_read(&device->state) == FW_DEVICE_INITIALIZING) {
 			device->config_rom_retries++;
 			schedule_delayed_work(&device->work, RETRY_DELAY);
 		} else {
@@ -678,10 +697,13 @@ static void fw_device_init(void *w)
 	}
 
 	err = -ENOMEM;
-	down_write(&idr_rwsem);
+
+	fw_device_get(device);
+	down_write(&fw_device_rwsem);
 	if (idr_pre_get(&fw_device_idr, GFP_KERNEL))
 		err = idr_get_new(&fw_device_idr, device, &minor);
-	up_write(&idr_rwsem);
+	up_write(&fw_device_rwsem);
+
 	if (err < 0)
 		goto error;
 
@@ -713,13 +735,23 @@ static void fw_device_init(void *w)
 	 */
 	if (atomic_cmpxchg(&device->state,
 		    FW_DEVICE_INITIALIZING,
-		    FW_DEVICE_RUNNING) == FW_DEVICE_SHUTDOWN)
-		fw_device_shutdown(&device->work);
-	else
-		fw_notify("created new fw device %s "
-			  " (%d config rom retries, S%d00)\n",
-			  device->device.bus_id, device->config_rom_retries,
-			  1 << device->max_speed);
+		    FW_DEVICE_RUNNING) == FW_DEVICE_SHUTDOWN) {
+		fw_device_shutdown(work);
+	} else {
+		if (device->config_rom_retries)
+			fw_notify("created device %s: GUID %08x%08x, S%d00, "
+				  "%d config ROM retries\n",
+				  device->device.bus_id,
+				  device->config_rom[3], device->config_rom[4],
+				  1 << device->max_speed,
+				  device->config_rom_retries);
+		else
+			fw_notify("created device %s: GUID %08x%08x, S%d00\n",
+				  device->device.bus_id,
+				  device->config_rom[3], device->config_rom[4],
+				  1 << device->max_speed);
+		device->config_rom_retries = 0;
+	}
 
 	/*
 	 * Reschedule the IRM work if we just finished reading the
@@ -733,11 +765,13 @@ static void fw_device_init(void *w)
 	return;
 
  error_with_cdev:
-	down_write(&idr_rwsem);
+	down_write(&fw_device_rwsem);
 	idr_remove(&fw_device_idr, minor);
-	up_write(&idr_rwsem);
+	up_write(&fw_device_rwsem);
  error:
-	put_device(&device->device);
+	fw_device_put(device);		/* fw_device_idr's reference */
+
+	put_device(&device->device);	/* our reference */
 }
 
 static int update_unit(struct device *dev, void *data)
@@ -764,6 +798,107 @@ static void fw_device_update(void *w)
 	device_for_each_child(&device->device, NULL, update_unit);
 }
 
+enum {
+	REREAD_BIB_ERROR,
+	REREAD_BIB_GONE,
+	REREAD_BIB_UNCHANGED,
+	REREAD_BIB_CHANGED,
+};
+
+/* Reread and compare bus info block and header of root directory */
+static int reread_bus_info_block(struct fw_device *device, int generation)
+{
+	u32 q;
+	int i;
+
+	for (i = 0; i < 6; i++) {
+		if (read_rom(device, generation, i, &q) != RCODE_COMPLETE)
+			return REREAD_BIB_ERROR;
+
+		if (i == 0 && q == 0)
+			return REREAD_BIB_GONE;
+
+		if (i > device->config_rom_length || q != device->config_rom[i])
+			return REREAD_BIB_CHANGED;
+	}
+
+	return REREAD_BIB_UNCHANGED;
+}
+
+static void fw_device_refresh(void *w)
+{
+	struct work_struct *work = w;
+	struct fw_device *device =
+		container_of(work, struct fw_device, work);
+	struct fw_card *card = device->card;
+	int node_id = device->node_id;
+
+	switch (reread_bus_info_block(device, device->generation)) {
+	case REREAD_BIB_ERROR:
+		if (device->config_rom_retries < MAX_RETRIES / 2 &&
+		    atomic_read(&device->state) == FW_DEVICE_INITIALIZING) {
+			device->config_rom_retries++;
+			schedule_delayed_work(&device->work, RETRY_DELAY / 2);
+
+			return;
+		}
+		goto give_up;
+
+	case REREAD_BIB_GONE:
+		goto gone;
+
+	case REREAD_BIB_UNCHANGED:
+		if (atomic_cmpxchg(&device->state,
+			    FW_DEVICE_INITIALIZING,
+			    FW_DEVICE_RUNNING) == FW_DEVICE_SHUTDOWN)
+			goto gone;
+
+		fw_device_update(work);
+		device->config_rom_retries = 0;
+		goto out;
+
+	case REREAD_BIB_CHANGED:
+		break;
+	}
+
+	/*
+	 * Something changed.  We keep things simple and don't investigate
+	 * further.  We just destroy all previous units and create new ones.
+	 */
+	device_for_each_child(&device->device, NULL, shutdown_unit);
+
+	if (read_bus_info_block(device, device->generation) < 0) {
+		if (device->config_rom_retries < MAX_RETRIES &&
+		    atomic_read(&device->state) == FW_DEVICE_INITIALIZING) {
+			device->config_rom_retries++;
+			schedule_delayed_work(&device->work, RETRY_DELAY);
+
+			return;
+		}
+		goto give_up;
+	}
+
+	create_units(device);
+
+	if (atomic_cmpxchg(&device->state,
+		    FW_DEVICE_INITIALIZING,
+		    FW_DEVICE_RUNNING) == FW_DEVICE_SHUTDOWN)
+		goto gone;
+
+	fw_notify("refreshed device %s\n", device->device.bus_id);
+	device->config_rom_retries = 0;
+	goto out;
+
+ give_up:
+	fw_notify("giving up on refresh of device %s\n", device->device.bus_id);
+ gone:
+	atomic_set(&device->state, FW_DEVICE_SHUTDOWN);
+	fw_device_shutdown(work);
+ out:
+	if (node_id == card->root_node->node_id)
+		schedule_delayed_work(&card->work, 0);
+}
+
 void fw_node_event(struct fw_card *card, struct fw_node *node, int event)
 {
 	struct fw_device *device;
@@ -773,7 +908,7 @@ void fw_node_event(struct fw_card *card, struct fw_node *node, int event)
 	case FW_NODE_LINK_ON:
 		if (!node->link_on)
 			break;
-
+ create:
 		device = kzalloc(sizeof(*device), GFP_ATOMIC);
 		if (device == NULL)
 			break;
@@ -807,21 +942,39 @@ void fw_node_event(struct fw_card *card, struct fw_node *node, int event)
 		 * power-up after getting plugged in.  We schedule the
 		 * first config rom scan half a second after bus reset.
 		 */
-		INIT_WORK(&device->work, fw_device_init, &(device->work));
+		INIT_WORK(&device->work, fw_device_init, &device->work);
 		schedule_delayed_work(&device->work, INITIAL_DELAY);
 		break;
 
+	case FW_NODE_INITIATED_RESET:
+		device = node->data;
+		if (device == NULL)
+			goto create;
+
+		device->node_id = node->node_id;
+		smp_wmb();  /* update node_id before generation */
+		device->generation = card->generation;
+		if (atomic_cmpxchg(&device->state,
+			    FW_DEVICE_RUNNING,
+			    FW_DEVICE_INITIALIZING) == FW_DEVICE_RUNNING) {
+			PREPARE_WORK(&device->work, fw_device_refresh,
+				     &device->work);
+			schedule_delayed_work(&device->work,
+				node == card->local_node ? 0 : INITIAL_DELAY);
+		}
+		break;
+
 	case FW_NODE_UPDATED:
 		if (!node->link_on || node->data == NULL)
 			break;
 
 		device = node->data;
 		device->node_id = node->node_id;
-		smp_wmb();	/* update node_id before generation */
+		smp_wmb();  /* update node_id before generation */
 		device->generation = card->generation;
 		if (atomic_read(&device->state) == FW_DEVICE_RUNNING) {
 			PREPARE_WORK(&device->work, fw_device_update,
-				     &(device->work));
+				     &device->work);
 			schedule_delayed_work(&device->work, 0);
 		}
 		break;
@@ -847,7 +1000,7 @@ void fw_node_event(struct fw_card *card, struct fw_node *node, int event)
 		if (atomic_xchg(&device->state,
 				FW_DEVICE_SHUTDOWN) == FW_DEVICE_RUNNING) {
 			PREPARE_WORK(&device->work, fw_device_shutdown,
-				     &(device->work));
+				     &device->work);
 			schedule_delayed_work(&device->work, 0);
 		}
 		break;
diff --git a/drivers/firewire/fw-device.h b/drivers/firewire/fw-device.h
index 9f6da87..dd3e507 100644
--- a/drivers/firewire/fw-device.h
+++ b/drivers/firewire/fw-device.h
@@ -21,6 +21,7 @@
 
 #include <linux/fs.h>
 #include <linux/cdev.h>
+#include <linux/rwsem.h>
 #include <asm/atomic.h>
 
 enum fw_device_state {
@@ -35,15 +36,32 @@ struct fw_attribute_group {
 	struct attribute *attrs[11];
 };
 
+/*
+ * Note, fw_device.generation always has to be read before fw_device.node_id.
+ * Use SMP memory barriers to ensure this.  Otherwise requests will be sent
+ * to an outdated node_id if the generation was updated in the meantime due
+ * to a bus reset.
+ *
+ * Likewise, fw-core will take care to update .node_id before .generation so
+ * that whenever fw_device.generation is current WRT the actual bus generation,
+ * fw_device.node_id is guaranteed to be current too.
+ *
+ * The same applies to fw_device.card->node_id vs. fw_device.generation.
+ *
+ * fw_device.config_rom and fw_device.config_rom_length may be accessed during
+ * the lifetime of any fw_unit belonging to the fw_device, before device_del()
+ * was called on the last fw_unit.  Alternatively, they may be accessed while
+ * holding fw_device_rwsem.
+ */
 struct fw_device {
 	atomic_t state;
 	struct fw_node *node;
 	int node_id;
 	int generation;
 	unsigned max_speed;
+	int cmc;
 	struct fw_card *card;
 	struct device device;
-	struct list_head link;
 	struct list_head client_list;
 	u32 *config_rom;
 	size_t config_rom_length;
@@ -52,40 +70,63 @@ struct fw_device {
 	struct fw_attribute_group attribute_group;
 };
 
-static inline struct fw_device *
-fw_device(struct device *dev)
+static inline struct fw_device *fw_device(struct device *dev)
 {
 	return container_of(dev, struct fw_device, device);
 }
 
-static inline int
-fw_device_is_shutdown(struct fw_device *device)
+static inline int fw_device_is_shutdown(struct fw_device *device)
 {
 	return atomic_read(&device->state) == FW_DEVICE_SHUTDOWN;
 }
 
-struct fw_device *fw_device_get(struct fw_device *device);
-void fw_device_put(struct fw_device *device);
+static inline struct fw_device *fw_device_get(struct fw_device *device)
+{
+	get_device(&device->device);
+
+	return device;
+}
+
+static inline void fw_device_put(struct fw_device *device)
+{
+	put_device(&device->device);
+}
+
+struct fw_device *fw_device_get_by_devt(dev_t devt);
 int fw_device_enable_phys_dma(struct fw_device *device);
 
 void fw_device_cdev_update(struct fw_device *device);
 void fw_device_cdev_remove(struct fw_device *device);
 
-struct fw_device *fw_device_from_devt(dev_t devt);
+extern struct rw_semaphore fw_device_rwsem;
 extern int fw_cdev_major;
 
+/*
+ * fw_unit.directory must not be accessed after device_del(&fw_unit.device).
+ */
 struct fw_unit {
 	struct device device;
 	u32 *directory;
 	struct fw_attribute_group attribute_group;
 };
 
-static inline struct fw_unit *
-fw_unit(struct device *dev)
+static inline struct fw_unit *fw_unit(struct device *dev)
 {
 	return container_of(dev, struct fw_unit, device);
 }
 
+static inline struct fw_unit *fw_unit_get(struct fw_unit *unit)
+{
+	get_device(&unit->device);
+
+	return unit;
+}
+
+static inline void fw_unit_put(struct fw_unit *unit)
+{
+	put_device(&unit->device);
+}
+
 #define CSR_OFFSET	0x40
 #define CSR_LEAF	0x80
 #define CSR_DIRECTORY	0xc0
diff --git a/drivers/firewire/fw-ohci.c b/drivers/firewire/fw-ohci.c
index 38d041f..d5c610e 100644
--- a/drivers/firewire/fw-ohci.c
+++ b/drivers/firewire/fw-ohci.c
@@ -1,4 +1,5 @@
 /*
+ * Driver for OHCI 1394 controllers
  *
  * Copyright (C) 2003-2006 Kristian Hoegsberg <krh@bitplanet.net>
  *
@@ -17,10 +18,6 @@
  * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  */
 
-
-
-
-
 #include <linux/compiler.h>
 #include <linux/delay.h>
 #include <linux/dma-mapping.h>
@@ -30,12 +27,17 @@
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/module.h>
+#include <linux/moduleparam.h>
 #include <linux/pci.h>
 #include <linux/spinlock.h>
 
 #include <asm/page.h>
 #include <asm/system.h>
 
+#ifdef CONFIG_PPC_PMAC
+#include <asm/pmac_feature.h>
+#endif
+
 #include "fw-ohci.h"
 #include "fw-transaction.h"
 
@@ -169,16 +171,19 @@ struct iso_context {
 struct fw_ohci {
 	struct fw_card card;
 
-	u32 version;
 	__iomem char *registers;
 	dma_addr_t self_id_bus;
 	__le32 *self_id_cpu;
 	struct tasklet_struct bus_reset_tasklet;
 	int node_id;
 	int generation;
-	int request_generation;
+	int request_generation;	/* for timestamping incoming requests */
 	u32 bus_seconds;
 
+	bool use_dualbuffer;
+	bool old_uninorth;
+	bool bus_reset_packet_quirk;
+
 	/*
 	 * Spinlock for accessing fw_ohci data.  Never call out of
 	 * this driver with this lock held.
@@ -235,6 +240,191 @@ static inline struct fw_ohci *fw_ohci(struct fw_card *card)
 
 static char ohci_driver_name[] = KBUILD_MODNAME;
 
+#ifdef CONFIG_FIREWIRE_OHCI_DEBUG
+
+#define OHCI_PARAM_DEBUG_AT_AR		1
+#define OHCI_PARAM_DEBUG_SELFIDS	2
+#define OHCI_PARAM_DEBUG_IRQS		4
+#define OHCI_PARAM_DEBUG_BUSRESETS	8 /* only effective before chip init */
+
+static int param_debug;
+module_param_named(debug, param_debug, int, 0644);
+MODULE_PARM_DESC(debug, "Verbose logging (default = 0"
+	", AT/AR events = "	__stringify(OHCI_PARAM_DEBUG_AT_AR)
+	", self-IDs = "		__stringify(OHCI_PARAM_DEBUG_SELFIDS)
+	", IRQs = "		__stringify(OHCI_PARAM_DEBUG_IRQS)
+	", busReset events = "	__stringify(OHCI_PARAM_DEBUG_BUSRESETS)
+	", or a combination, or all = -1)");
+
+static void log_irqs(u32 evt)
+{
+	if (likely(!(param_debug &
+			(OHCI_PARAM_DEBUG_IRQS | OHCI_PARAM_DEBUG_BUSRESETS))))
+		return;
+
+	if (!(param_debug & OHCI_PARAM_DEBUG_IRQS) &&
+	    !(evt & OHCI1394_busReset))
+		return;
+
+	fw_notify("IRQ %08x%s%s%s%s%s%s%s%s%s%s%s%s%s\n", evt,
+	    evt & OHCI1394_selfIDComplete	? " selfID"		: "",
+	    evt & OHCI1394_RQPkt		? " AR_req"		: "",
+	    evt & OHCI1394_RSPkt		? " AR_resp"		: "",
+	    evt & OHCI1394_reqTxComplete	? " AT_req"		: "",
+	    evt & OHCI1394_respTxComplete	? " AT_resp"		: "",
+	    evt & OHCI1394_isochRx		? " IR"			: "",
+	    evt & OHCI1394_isochTx		? " IT"			: "",
+	    evt & OHCI1394_postedWriteErr	? " postedWriteErr"	: "",
+	    evt & OHCI1394_cycleTooLong		? " cycleTooLong"	: "",
+	    evt & OHCI1394_cycle64Seconds	? " cycle64Seconds"	: "",
+	    evt & OHCI1394_regAccessFail	? " regAccessFail"	: "",
+	    evt & OHCI1394_busReset		? " busReset"		: "",
+	    evt & ~(OHCI1394_selfIDComplete | OHCI1394_RQPkt |
+		    OHCI1394_RSPkt | OHCI1394_reqTxComplete |
+		    OHCI1394_respTxComplete | OHCI1394_isochRx |
+		    OHCI1394_isochTx | OHCI1394_postedWriteErr |
+		    OHCI1394_cycleTooLong | OHCI1394_cycle64Seconds |
+		    OHCI1394_regAccessFail | OHCI1394_busReset)
+						? " ?"			: "");
+}
+
+static const char *speed[] = {
+	[0] = "S100", [1] = "S200", [2] = "S400",    [3] = "beta",
+};
+static const char *power[] = {
+	[0] = "+0W",  [1] = "+15W", [2] = "+30W",    [3] = "+45W",
+	[4] = "-3W",  [5] = " ?W",  [6] = "-3..-6W", [7] = "-3..-10W",
+};
+static const char port[] = { '.', '-', 'p', 'c', };
+
+static char _p(u32 *s, int shift)
+{
+	return port[*s >> shift & 3];
+}
+
+static void log_selfids(int node_id, int generation, int self_id_count, u32 *s)
+{
+	if (likely(!(param_debug & OHCI_PARAM_DEBUG_SELFIDS)))
+		return;
+
+	fw_notify("%d selfIDs, generation %d, local node ID %04x\n",
+		  self_id_count, generation, node_id);
+
+	for (; self_id_count--; ++s)
+		if ((*s & 1 << 23) == 0)
+			fw_notify("selfID 0: %08x, phy %d [%c%c%c] "
+			    "%s gc=%d %s %s%s%s\n",
+			    *s, *s >> 24 & 63, _p(s, 6), _p(s, 4), _p(s, 2),
+			    speed[*s >> 14 & 3], *s >> 16 & 63,
+			    power[*s >> 8 & 7], *s >> 22 & 1 ? "L" : "",
+			    *s >> 11 & 1 ? "c" : "", *s & 2 ? "i" : "");
+		else
+			fw_notify("selfID n: %08x, phy %d [%c%c%c%c%c%c%c%c]\n",
+			    *s, *s >> 24 & 63,
+			    _p(s, 16), _p(s, 14), _p(s, 12), _p(s, 10),
+			    _p(s,  8), _p(s,  6), _p(s,  4), _p(s,  2));
+}
+
+static const char *evts[] = {
+	[0x00] = "evt_no_status",	[0x01] = "-reserved-",
+	[0x02] = "evt_long_packet",	[0x03] = "evt_missing_ack",
+	[0x04] = "evt_underrun",	[0x05] = "evt_overrun",
+	[0x06] = "evt_descriptor_read",	[0x07] = "evt_data_read",
+	[0x08] = "evt_data_write",	[0x09] = "evt_bus_reset",
+	[0x0a] = "evt_timeout",		[0x0b] = "evt_tcode_err",
+	[0x0c] = "-reserved-",		[0x0d] = "-reserved-",
+	[0x0e] = "evt_unknown",		[0x0f] = "evt_flushed",
+	[0x10] = "-reserved-",		[0x11] = "ack_complete",
+	[0x12] = "ack_pending ",	[0x13] = "-reserved-",
+	[0x14] = "ack_busy_X",		[0x15] = "ack_busy_A",
+	[0x16] = "ack_busy_B",		[0x17] = "-reserved-",
+	[0x18] = "-reserved-",		[0x19] = "-reserved-",
+	[0x1a] = "-reserved-",		[0x1b] = "ack_tardy",
+	[0x1c] = "-reserved-",		[0x1d] = "ack_data_error",
+	[0x1e] = "ack_type_error",	[0x1f] = "-reserved-",
+	[0x20] = "pending/cancelled",
+};
+static const char *tcodes[] = {
+	[0x0] = "QW req",		[0x1] = "BW req",
+	[0x2] = "W resp",		[0x3] = "-reserved-",
+	[0x4] = "QR req",		[0x5] = "BR req",
+	[0x6] = "QR resp",		[0x7] = "BR resp",
+	[0x8] = "cycle start",		[0x9] = "Lk req",
+	[0xa] = "async stream packet",	[0xb] = "Lk resp",
+	[0xc] = "-reserved-",		[0xd] = "-reserved-",
+	[0xe] = "link internal",	[0xf] = "-reserved-",
+};
+static const char *phys[] = {
+	[0x0] = "phy config packet",	[0x1] = "link-on packet",
+	[0x2] = "self-id packet",	[0x3] = "-reserved-",
+};
+
+static void log_ar_at_event(char dir, int speed, u32 *header, int evt)
+{
+	int tcode = header[0] >> 4 & 0xf;
+	char specific[12];
+
+	if (likely(!(param_debug & OHCI_PARAM_DEBUG_AT_AR)))
+		return;
+
+	if (unlikely(evt >= ARRAY_SIZE(evts)))
+			evt = 0x1f;
+
+	if (evt == OHCI1394_evt_bus_reset) {
+		fw_notify("A%c evt_bus_reset, generation %d\n",
+		    dir, (header[2] >> 16) & 0xff);
+		return;
+	}
+
+	if (header[0] == ~header[1]) {
+		fw_notify("A%c %s, %s, %08x\n",
+		    dir, evts[evt], phys[header[0] >> 30 & 0x3], header[0]);
+		return;
+	}
+
+	switch (tcode) {
+	case 0x0: case 0x6: case 0x8:
+		snprintf(specific, sizeof(specific), " = %08x",
+			 be32_to_cpu((__force __be32)header[3]));
+		break;
+	case 0x1: case 0x5: case 0x7: case 0x9: case 0xb:
+		snprintf(specific, sizeof(specific), " %x,%x",
+			 header[3] >> 16, header[3] & 0xffff);
+		break;
+	default:
+		specific[0] = '\0';
+	}
+
+	switch (tcode) {
+	case 0xe: case 0xa:
+		fw_notify("A%c %s, %s\n", dir, evts[evt], tcodes[tcode]);
+		break;
+	case 0x0: case 0x1: case 0x4: case 0x5: case 0x9:
+		fw_notify("A%c spd %x tl %02x, "
+		    "%04x -> %04x, %s, "
+		    "%s, %04x%08x%s\n",
+		    dir, speed, header[0] >> 10 & 0x3f,
+		    header[1] >> 16, header[0] >> 16, evts[evt],
+		    tcodes[tcode], header[1] & 0xffff, header[2], specific);
+		break;
+	default:
+		fw_notify("A%c spd %x tl %02x, "
+		    "%04x -> %04x, %s, "
+		    "%s%s\n",
+		    dir, speed, header[0] >> 10 & 0x3f,
+		    header[1] >> 16, header[0] >> 16, evts[evt],
+		    tcodes[tcode], specific);
+	}
+}
+
+#else
+
+#define log_irqs(evt)
+#define log_selfids(node_id, generation, self_id_count, sid)
+#define log_ar_at_event(dir, speed, header, evt)
+
+#endif /* CONFIG_FIREWIRE_OHCI_DEBUG */
+
 static inline void reg_write(const struct fw_ohci *ohci, int offset, u32 data)
 {
 	writel(data, ohci->registers + offset);
@@ -279,19 +469,13 @@ static int ar_context_add_page(struct ar_context *ctx)
 {
 	struct device *dev = ctx->ohci->card.device;
 	struct ar_buffer *ab;
-	dma_addr_t ab_bus;
+	dma_addr_t uninitialized_var(ab_bus);
 	size_t offset;
 
-	ab = (struct ar_buffer *) __get_free_page(GFP_ATOMIC);
+	ab = dma_alloc_coherent(dev, PAGE_SIZE, &ab_bus, GFP_ATOMIC);
 	if (ab == NULL)
 		return -ENOMEM;
 
-	ab_bus = dma_map_single(dev, ab, PAGE_SIZE, DMA_BIDIRECTIONAL);
-	if (dma_mapping_error(ab_bus)) {
-		free_page((unsigned long) ab);
-		return -ENOMEM;
-	}
-
 	memset(&ab->descriptor, 0, sizeof(ab->descriptor));
 	ab->descriptor.control        = cpu_to_le16(DESCRIPTOR_INPUT_MORE |
 						    DESCRIPTOR_STATUS |
@@ -302,8 +486,6 @@ static int ar_context_add_page(struct ar_context *ctx)
 	ab->descriptor.res_count      = cpu_to_le16(PAGE_SIZE - offset);
 	ab->descriptor.branch_address = 0;
 
-	dma_sync_single_for_device(dev, ab_bus, PAGE_SIZE, DMA_BIDIRECTIONAL);
-
 	ctx->last_buffer->descriptor.branch_address = cpu_to_le32(ab_bus | 1);
 	ctx->last_buffer->next = ab;
 	ctx->last_buffer = ab;
@@ -314,15 +496,23 @@ static int ar_context_add_page(struct ar_context *ctx)
 	return 0;
 }
 
+#if defined(CONFIG_PPC_PMAC) && defined(CONFIG_PPC32)
+#define cond_le32_to_cpu(v) \
+	(ohci->old_uninorth ? (__force __u32)(v) : le32_to_cpu(v))
+#else
+#define cond_le32_to_cpu(v) le32_to_cpu(v)
+#endif
+
 static __le32 *handle_ar_packet(struct ar_context *ctx, __le32 *buffer)
 {
 	struct fw_ohci *ohci = ctx->ohci;
 	struct fw_packet p;
 	u32 status, length, tcode;
+	int evt;
 
-	p.header[0] = le32_to_cpu(buffer[0]);
-	p.header[1] = le32_to_cpu(buffer[1]);
-	p.header[2] = le32_to_cpu(buffer[2]);
+	p.header[0] = cond_le32_to_cpu(buffer[0]);
+	p.header[1] = cond_le32_to_cpu(buffer[1]);
+	p.header[2] = cond_le32_to_cpu(buffer[2]);
 
 	tcode = (p.header[0] >> 4) & 0x0f;
 	switch (tcode) {
@@ -334,7 +524,7 @@ static __le32 *handle_ar_packet(struct ar_context *ctx, __le32 *buffer)
 		break;
 
 	case TCODE_READ_BLOCK_REQUEST :
-		p.header[3] = le32_to_cpu(buffer[3]);
+		p.header[3] = cond_le32_to_cpu(buffer[3]);
 		p.header_length = 16;
 		p.payload_length = 0;
 		break;
@@ -343,7 +533,7 @@ static __le32 *handle_ar_packet(struct ar_context *ctx, __le32 *buffer)
 	case TCODE_READ_BLOCK_RESPONSE:
 	case TCODE_LOCK_REQUEST:
 	case TCODE_LOCK_RESPONSE:
-		p.header[3] = le32_to_cpu(buffer[3]);
+		p.header[3] = cond_le32_to_cpu(buffer[3]);
 		p.header_length = 16;
 		p.payload_length = p.header[3] >> 16;
 		break;
@@ -354,19 +544,27 @@ static __le32 *handle_ar_packet(struct ar_context *ctx, __le32 *buffer)
 		p.header_length = 12;
 		p.payload_length = 0;
 		break;
+
+	default:
+		/* FIXME: Stop context, discard everything, and restart? */
+		p.header_length = 0;
+		p.payload_length = 0;
 	}
 
 	p.payload = (void *) buffer + p.header_length;
 
 	/* FIXME: What to do about evt_* errors? */
 	length = (p.header_length + p.payload_length + 3) / 4;
-	status = le32_to_cpu(buffer[length]);
+	status = cond_le32_to_cpu(buffer[length]);
+	evt    = (status >> 16) & 0x1f;
 
-	p.ack        = ((status >> 16) & 0x1f) - 16;
+	p.ack        = evt - 16;
 	p.speed      = (status >> 21) & 0x7;
 	p.timestamp  = status & 0xffff;
 	p.generation = ohci->request_generation;
 
+	log_ar_at_event('R', p.speed, p.header, evt);
+
 	/*
 	 * The OHCI bus reset handler synthesizes a phy packet with
 	 * the new generation number when a bus reset happens (see
@@ -375,14 +573,19 @@ static __le32 *handle_ar_packet(struct ar_context *ctx, __le32 *buffer)
 	 * generation.  We only need this for requests; for responses
 	 * we use the unique tlabel for finding the matching
 	 * request.
+	 *
+	 * Alas some chips sometimes emit bus reset packets with a
+	 * wrong generation.  We set the correct generation for these
+	 * at a slightly incorrect time (in bus_reset_tasklet).
 	 */
-
-	if (p.ack + 16 == 0x09)
-		ohci->request_generation = (buffer[2] >> 16) & 0xff;
-	else if (ctx == &ohci->ar_request_ctx)
+	if (evt == OHCI1394_evt_bus_reset) {
+		if (!ohci->bus_reset_packet_quirk)
+			ohci->request_generation = (p.header[2] >> 16) & 0xff;
+	} else if (ctx == &ohci->ar_request_ctx) {
 		fw_core_handle_request(&ohci->card, &p);
-	else
+	} else {
 		fw_core_handle_response(&ohci->card, &p);
+	}
 
 	return buffer + length + 1;
 }
@@ -400,6 +603,8 @@ static void ar_context_tasklet(unsigned long data)
 
 	if (d->res_count == 0) {
 		size_t size, rest, offset;
+		dma_addr_t start_bus;
+		void *start;
 
 		/*
 		 * This descriptor is finished and we may have a
@@ -408,11 +613,9 @@ static void ar_context_tasklet(unsigned long data)
 		 */
 
 		offset = offsetof(struct ar_buffer, data);
-		dma_unmap_single(ohci->card.device,
-			le32_to_cpu(ab->descriptor.data_address) - offset,
-			PAGE_SIZE, DMA_BIDIRECTIONAL);
+		start = buffer = ab;
+		start_bus = le32_to_cpu(ab->descriptor.data_address) - offset;
 
-		buffer = ab;
 		ab = ab->next;
 		d = &ab->descriptor;
 		size = buffer + PAGE_SIZE - ctx->pointer;
@@ -426,7 +629,8 @@ static void ar_context_tasklet(unsigned long data)
 		while (buffer < end)
 			buffer = handle_ar_packet(ctx, buffer);
 
-		free_page((unsigned long)buffer);
+		dma_free_coherent(ohci->card.device, PAGE_SIZE,
+				  start, start_bus);
 		ar_context_add_page(ctx);
 	} else {
 		buffer = ctx->pointer;
@@ -535,7 +739,7 @@ static int
 context_add_buffer(struct context *ctx)
 {
 	struct descriptor_buffer *desc;
-	dma_addr_t bus_addr;
+	dma_addr_t uninitialized_var(bus_addr);
 	int offset;
 
 	/*
@@ -768,8 +972,19 @@ at_context_queue_packet(struct context *ctx, struct fw_packet *packet)
 				     DESCRIPTOR_IRQ_ALWAYS |
 				     DESCRIPTOR_BRANCH_ALWAYS);
 
-	/* FIXME: Document how the locking works. */
-	if (ohci->generation != packet->generation) {
+	/*
+	 * If the controller and packet generations don't match, we need to
+	 * bail out and try again.  If IntEvent.busReset is set, the AT context
+	 * is halted, so appending to the context and trying to run it is
+	 * futile.  Most controllers do the right thing and just flush the AT
+	 * queue (per section 7.2.3.2 of the OHCI 1.1 specification), but
+	 * some controllers (like a JMicron JMB381 PCI-e) misbehave and wind
+	 * up stalling out.  So we just bail out in software and try again
+	 * later, and everyone is happy.
+	 * FIXME: Document how the locking works.
+	 */
+	if (ohci->generation != packet->generation ||
+	    reg_read(ohci, OHCI1394_IntEventSet) & OHCI1394_busReset) {
 		if (packet->payload_length > 0)
 			dma_unmap_single(ohci->card.device, payload_bus,
 					 packet->payload_length, DMA_TO_DEVICE);
@@ -815,6 +1030,8 @@ static int handle_at_packet(struct context *context,
 	evt = le16_to_cpu(last->transfer_status) & 0x1f;
 	packet->timestamp = le16_to_cpu(last->res_count);
 
+	log_ar_at_event('T', packet->speed, packet->header, evt);
+
 	switch (evt) {
 	case OHCI1394_evt_timeout:
 		/* Async response transmit timed out. */
@@ -1017,21 +1234,32 @@ static void bus_reset_tasklet(unsigned long data)
 	ohci->node_id = reg & (OHCI1394_NodeID_busNumber |
 			       OHCI1394_NodeID_nodeNumber);
 
+	reg = reg_read(ohci, OHCI1394_SelfIDCount);
+	if (reg & OHCI1394_SelfIDCount_selfIDError) {
+		fw_notify("inconsistent self IDs\n");
+		return;
+	}
 	/*
 	 * The count in the SelfIDCount register is the number of
 	 * bytes in the self ID receive buffer.  Since we also receive
 	 * the inverted quadlets and a header quadlet, we shift one
 	 * bit extra to get the actual number of self IDs.
 	 */
-
-	self_id_count = (reg_read(ohci, OHCI1394_SelfIDCount) >> 3) & 0x3ff;
-	generation = (le32_to_cpu(ohci->self_id_cpu[0]) >> 16) & 0xff;
+	self_id_count = (reg >> 3) & 0x3ff;
+	if (self_id_count == 0) {
+		fw_notify("inconsistent self IDs\n");
+		return;
+	}
+	generation = (cond_le32_to_cpu(ohci->self_id_cpu[0]) >> 16) & 0xff;
 	rmb();
 
 	for (i = 1, j = 0; j < self_id_count; i += 2, j++) {
-		if (ohci->self_id_cpu[i] != ~ohci->self_id_cpu[i + 1])
-			fw_error("inconsistent self IDs\n");
-		ohci->self_id_buffer[j] = le32_to_cpu(ohci->self_id_cpu[i]);
+		if (ohci->self_id_cpu[i] != ~ohci->self_id_cpu[i + 1]) {
+			fw_notify("inconsistent self IDs\n");
+			return;
+		}
+		ohci->self_id_buffer[j] =
+				cond_le32_to_cpu(ohci->self_id_cpu[i]);
 	}
 	rmb();
 
@@ -1064,6 +1292,9 @@ static void bus_reset_tasklet(unsigned long data)
 	context_stop(&ohci->at_response_ctx);
 	reg_write(ohci, OHCI1394_IntEventClear, OHCI1394_busReset);
 
+	if (ohci->bus_reset_packet_quirk)
+		ohci->request_generation = generation;
+
 	/*
 	 * This next bit is unrelated to the AT context stuff but we
 	 * have to do it under the spinlock also.  If a new config rom
@@ -1094,17 +1325,25 @@ static void bus_reset_tasklet(unsigned long data)
 		reg_write(ohci, OHCI1394_ConfigROMhdr, ohci->next_header);
 	}
 
+#ifdef CONFIG_FIREWIRE_OHCI_REMOTE_DMA
+	reg_write(ohci, OHCI1394_PhyReqFilterHiSet, ~0);
+	reg_write(ohci, OHCI1394_PhyReqFilterLoSet, ~0);
+#endif
+
 	spin_unlock_irqrestore(&ohci->lock, flags);
 
 	if (free_rom)
 		dma_free_coherent(ohci->card.device, CONFIG_ROM_SIZE,
 				  free_rom, free_rom_bus);
 
+	log_selfids(ohci->node_id, generation,
+		    self_id_count, ohci->self_id_buffer);
+
 	fw_core_handle_bus_reset(&ohci->card, ohci->node_id, generation,
 				 self_id_count, ohci->self_id_buffer);
 }
 
-static irqreturn_t irq_handler(int irq, void *data)
+static irqreturn_t irq_handler(int irq, void *data, struct pt_regs *regs)
 {
 	struct fw_ohci *ohci = data;
 	u32 event, iso_event, cycle_time;
@@ -1115,7 +1354,9 @@ static irqreturn_t irq_handler(int irq, void *data)
 	if (!event || !~event)
 		return IRQ_NONE;
 
-	reg_write(ohci, OHCI1394_IntEventClear, event);
+	/* busReset must not be cleared yet, see OHCI 1.1 clause 7.2.3.2 */
+	reg_write(ohci, OHCI1394_IntEventClear, event & ~OHCI1394_busReset);
+	log_irqs(event);
 
 	if (event & OHCI1394_selfIDComplete)
 		tasklet_schedule(&ohci->bus_reset_tasklet);
@@ -1150,6 +1391,10 @@ static irqreturn_t irq_handler(int irq, void *data)
 		iso_event &= ~(1 << i);
 	}
 
+	if (unlikely(event & OHCI1394_regAccessFail))
+		fw_error("Register access failure - "
+			 "please notify linux1394-devel@lists.sf.net\n");
+
 	if (unlikely(event & OHCI1394_postedWriteErr))
 		fw_error("PCI posted write error\n");
 
@@ -1189,6 +1434,8 @@ static int ohci_enable(struct fw_card *card, u32 *config_rom, size_t length)
 {
 	struct fw_ohci *ohci = fw_ohci(card);
 	struct pci_dev *dev = to_pci_dev(card->device);
+	u32 lps;
+	int i;
 
 	if (software_reset(ohci)) {
 		fw_error("Failed to reset ohci card.\n");
@@ -1200,17 +1447,31 @@ static int ohci_enable(struct fw_card *card, u32 *config_rom, size_t length)
 	 * most of the registers.  In fact, on some cards (ALI M5251),
 	 * accessing registers in the SClk domain without LPS enabled
 	 * will lock up the machine.  Wait 50msec to make sure we have
-	 * full link enabled.
+	 * full link enabled.  However, with some cards (well, at least
+	 * a JMicron PCIe card), we have to try again sometimes.
 	 */
 	reg_write(ohci, OHCI1394_HCControlSet,
 		  OHCI1394_HCControl_LPS |
 		  OHCI1394_HCControl_postedWriteEnable);
 	flush_writes(ohci);
-	msleep(50);
+
+	for (lps = 0, i = 0; !lps && i < 3; i++) {
+		msleep(50);
+		lps = reg_read(ohci, OHCI1394_HCControlSet) &
+		      OHCI1394_HCControl_LPS;
+	}
+
+	if (!lps) {
+		fw_error("Failed to set Link Power Status\n");
+		return -EIO;
+	}
 
 	reg_write(ohci, OHCI1394_HCControlClear,
 		  OHCI1394_HCControl_noByteSwapData);
 
+	reg_write(ohci, OHCI1394_SelfIDBuffer, ohci->self_id_bus);
+	reg_write(ohci, OHCI1394_LinkControlClear,
+		  OHCI1394_LinkControl_rcvPhyPkt);
 	reg_write(ohci, OHCI1394_LinkControlSet,
 		  OHCI1394_LinkControl_rcvSelfID |
 		  OHCI1394_LinkControl_cycleTimerEnable |
@@ -1224,7 +1485,6 @@ static int ohci_enable(struct fw_card *card, u32 *config_rom, size_t length)
 	ar_context_run(&ohci->ar_request_ctx);
 	ar_context_run(&ohci->ar_response_ctx);
 
-	reg_write(ohci, OHCI1394_SelfIDBuffer, ohci->self_id_bus);
 	reg_write(ohci, OHCI1394_PhyUpperBound, 0x00010000);
 	reg_write(ohci, OHCI1394_IntEventClear, ~0);
 	reg_write(ohci, OHCI1394_IntMaskClear, ~0);
@@ -1234,7 +1494,12 @@ static int ohci_enable(struct fw_card *card, u32 *config_rom, size_t length)
 		  OHCI1394_reqTxComplete | OHCI1394_respTxComplete |
 		  OHCI1394_isochRx | OHCI1394_isochTx |
 		  OHCI1394_postedWriteErr | OHCI1394_cycleTooLong |
-		  OHCI1394_cycle64Seconds | OHCI1394_masterIntEnable);
+		  OHCI1394_cycle64Seconds | OHCI1394_regAccessFail |
+		  OHCI1394_masterIntEnable);
+#ifdef CONFIG_FIREWIRE_OHCI_DEBUG
+	if (param_debug & OHCI_PARAM_DEBUG_BUSRESETS)
+		reg_write(ohci, OHCI1394_IntMaskSet, OHCI1394_busReset);
+#endif
 
 	/* Activate link_on bit and contender bit in our self ID packets.*/
 	if (ohci_update_phy_reg(card, 4, 0,
@@ -1319,7 +1584,7 @@ ohci_set_config_rom(struct fw_card *card, u32 *config_rom, size_t length)
 	unsigned long flags;
 	int retval = -EBUSY;
 	__be32 *next_config_rom;
-	dma_addr_t next_config_rom_bus;
+	dma_addr_t uninitialized_var(next_config_rom_bus);
 
 	ohci = fw_ohci(card);
 
@@ -1418,6 +1683,7 @@ static int ohci_cancel_packet(struct fw_card *card, struct fw_packet *packet)
 	if (packet->ack != 0)
 		goto out;
 
+	log_ar_at_event('T', packet->speed, packet->header, 0x20);
 	driver_data->packet = NULL;
 	packet->ack = RCODE_CANCELLED;
 	packet->callback(packet, &ohci->card, packet->ack);
@@ -1432,6 +1698,9 @@ static int ohci_cancel_packet(struct fw_card *card, struct fw_packet *packet)
 static int
 ohci_enable_phys_dma(struct fw_card *card, int node_id, int generation)
 {
+#ifdef CONFIG_FIREWIRE_OHCI_REMOTE_DMA
+	return 0;
+#else
 	struct fw_ohci *ohci = fw_ohci(card);
 	unsigned long flags;
 	int n, retval = 0;
@@ -1463,6 +1732,7 @@ ohci_enable_phys_dma(struct fw_card *card, int node_id, int generation)
  out:
 	spin_unlock_irqrestore(&ohci->lock, flags);
 	return retval;
+#endif /* CONFIG_FIREWIRE_OHCI_REMOTE_DMA */
 }
 
 static u64
@@ -1478,6 +1748,28 @@ ohci_get_bus_time(struct fw_card *card)
 	return bus_time;
 }
 
+static void copy_iso_headers(struct iso_context *ctx, void *p)
+{
+	int i = ctx->header_length;
+
+	if (i + ctx->base.header_size > PAGE_SIZE)
+		return;
+
+	/*
+	 * The iso header is byteswapped to little endian by
+	 * the controller, but the remaining header quadlets
+	 * are big endian.  We want to present all the headers
+	 * as big endian, so we have to swap the first quadlet.
+	 */
+	if (ctx->base.header_size > 0)
+		*(u32 *) (ctx->header + i) = __swab32(*(u32 *) (p + 4));
+	if (ctx->base.header_size > 4)
+		*(u32 *) (ctx->header + i + 4) = __swab32(*(u32 *) p);
+	if (ctx->base.header_size > 8)
+		memcpy(ctx->header + i + 8, p + 8, ctx->base.header_size - 8);
+	ctx->header_length += ctx->base.header_size;
+}
+
 static int handle_ir_dualbuffer_packet(struct context *context,
 				       struct descriptor *d,
 				       struct descriptor *last)
@@ -1488,9 +1780,8 @@ static int handle_ir_dualbuffer_packet(struct context *context,
 	__le32 *ir_header;
 	size_t header_length;
 	void *p, *end;
-	int i;
 
-	if (db->first_res_count > 0 && db->second_res_count > 0) {
+	if (db->first_res_count != 0 && db->second_res_count != 0) {
 		if (ctx->excess_bytes <= le16_to_cpu(db->second_req_count)) {
 			/* This descriptor isn't done yet, stop iteration. */
 			return 0;
@@ -1501,25 +1792,14 @@ static int handle_ir_dualbuffer_packet(struct context *context,
 	header_length = le16_to_cpu(db->first_req_count) -
 		le16_to_cpu(db->first_res_count);
 
-	i = ctx->header_length;
 	p = db + 1;
 	end = p + header_length;
-	while (p < end && i + ctx->base.header_size <= PAGE_SIZE) {
-		/*
-		 * The iso header is byteswapped to little endian by
-		 * the controller, but the remaining header quadlets
-		 * are big endian.  We want to present all the headers
-		 * as big endian, so we have to swap the first
-		 * quadlet.
-		 */
-		*(u32 *) (ctx->header + i) = __swab32(*(u32 *) (p + 4));
-		memcpy(ctx->header + i + 4, p + 8, ctx->base.header_size - 4);
-		i += ctx->base.header_size;
+	while (p < end) {
+		copy_iso_headers(ctx, p);
 		ctx->excess_bytes +=
-			(le32_to_cpu(*(u32 *)(p + 4)) >> 16) & 0xffff;
-		p += ctx->base.header_size + 4;
+			(le32_to_cpu(*(__le32 *)(p + 4)) >> 16) & 0xffff;
+		p += max(ctx->base.header_size, (size_t)8);
 	}
-	ctx->header_length = i;
 
 	ctx->excess_bytes -= le16_to_cpu(db->second_req_count) -
 		le16_to_cpu(db->second_res_count);
@@ -1545,7 +1825,6 @@ static int handle_ir_packet_per_buffer(struct context *context,
 	struct descriptor *pd;
 	__le32 *ir_header;
 	void *p;
-	int i;
 
 	for (pd = d; pd <= last; pd++) {
 		if (pd->transfer_status)
@@ -1555,21 +1834,8 @@ static int handle_ir_packet_per_buffer(struct context *context,
 		/* Descriptor(s) not done yet, stop iteration */
 		return 0;
 
-	i   = ctx->header_length;
-	p   = last + 1;
-
-	if (ctx->base.header_size > 0 &&
-			i + ctx->base.header_size <= PAGE_SIZE) {
-		/*
-		 * The iso header is byteswapped to little endian by
-		 * the controller, but the remaining header quadlets
-		 * are big endian.  We want to present all the headers
-		 * as big endian, so we have to swap the first quadlet.
-		 */
-		*(u32 *) (ctx->header + i) = __swab32(*(u32 *) (p + 4));
-		memcpy(ctx->header + i + 4, p + 8, ctx->base.header_size - 4);
-		ctx->header_length += ctx->base.header_size;
-	}
+	p = last + 1;
+	copy_iso_headers(ctx, p);
 
 	if (le16_to_cpu(last->control) & DESCRIPTOR_IRQ_ALWAYS) {
 		ir_header = (__le32 *) p;
@@ -1618,7 +1884,7 @@ ohci_allocate_iso_context(struct fw_card *card, int type, size_t header_size)
 	} else {
 		mask = &ohci->ir_context_mask;
 		list = ohci->ir_context_list;
-		if (ohci->version >= OHCI_VERSION_1_1)
+		if (ohci->use_dualbuffer)
 			callback = handle_ir_dualbuffer_packet;
 		else
 			callback = handle_ir_packet_per_buffer;
@@ -1682,7 +1948,7 @@ static int ohci_start_iso(struct fw_iso_context *base,
 	} else {
 		index = ctx - ohci->ir_context_list;
 		control = IR_CONTEXT_ISOCH_HEADER;
-		if (ohci->version >= OHCI_VERSION_1_1)
+		if (ohci->use_dualbuffer)
 			control |= IR_CONTEXT_DUAL_BUFFER_MODE;
 		match = (tags << 28) | (sync << 8) | ctx->base.channel;
 		if (cycle >= 0) {
@@ -1864,11 +2130,11 @@ ohci_queue_iso_receive_dualbuffer(struct fw_iso_context *base,
 	z = 2;
 
 	/*
-	 * The OHCI controller puts the status word in the header
-	 * buffer too, so we need 4 extra bytes per packet.
+	 * The OHCI controller puts the isochronous header and trailer in the
+	 * buffer, so we need at least 8 bytes.
 	 */
 	packet_count = p->header_length / ctx->base.header_size;
-	header_size = packet_count * (ctx->base.header_size + 4);
+	header_size = packet_count * max(ctx->base.header_size, (size_t)8);
 
 	/* Get header size in number of descriptors. */
 	header_z = DIV_ROUND_UP(header_size, sizeof(*d));
@@ -1886,7 +2152,8 @@ ohci_queue_iso_receive_dualbuffer(struct fw_iso_context *base,
 		db = (struct db_descriptor *) d;
 		db->control = cpu_to_le16(DESCRIPTOR_STATUS |
 					  DESCRIPTOR_BRANCH_ALWAYS);
-		db->first_size = cpu_to_le16(ctx->base.header_size + 4);
+		db->first_size =
+		    cpu_to_le16(max(ctx->base.header_size, (size_t)8));
 		if (p->skip && rest == p->payload_length) {
 			db->control |= cpu_to_le16(DESCRIPTOR_WAIT);
 			db->first_req_count = db->first_size;
@@ -1936,11 +2203,11 @@ ohci_queue_iso_receive_packet_per_buffer(struct fw_iso_context *base,
 	int page, offset, packet_count, header_size, payload_per_buffer;
 
 	/*
-	 * The OHCI controller puts the status word in the
-	 * buffer too, so we need 4 extra bytes per packet.
+	 * The OHCI controller puts the isochronous header and trailer in the
+	 * buffer, so we need at least 8 bytes.
 	 */
 	packet_count = p->header_length / ctx->base.header_size;
-	header_size  = ctx->base.header_size + 4;
+	header_size  = max(ctx->base.header_size, (size_t)8);
 
 	/* Get header size in number of descriptors. */
 	header_z = DIV_ROUND_UP(header_size, sizeof(*d));
@@ -2012,7 +2279,7 @@ ohci_queue_iso(struct fw_iso_context *base,
 	spin_lock_irqsave(&ctx->context.ohci->lock, flags);
 	if (base->type == FW_ISO_CONTEXT_TRANSMIT)
 		retval = ohci_queue_iso_transmit(base, packet, buffer, payload);
-	else if (ctx->context.ohci->version >= OHCI_VERSION_1_1)
+	else if (ctx->context.ohci->use_dualbuffer)
 		retval = ohci_queue_iso_receive_dualbuffer(base, packet,
 							 buffer, payload);
 	else
@@ -2025,7 +2292,6 @@ ohci_queue_iso(struct fw_iso_context *base,
 }
 
 static const struct fw_card_driver ohci_driver = {
-	.name			= ohci_driver_name,
 	.enable			= ohci_enable,
 	.update_phy_reg		= ohci_update_phy_reg,
 	.set_config_rom		= ohci_set_config_rom,
@@ -2042,11 +2308,40 @@ static const struct fw_card_driver ohci_driver = {
 	.stop_iso		= ohci_stop_iso,
 };
 
+#ifdef CONFIG_PPC_PMAC
+static void ohci_pmac_on(struct pci_dev *dev)
+{
+	if (machine_is(powermac)) {
+		struct device_node *ofn = pci_device_to_OF_node(dev);
+
+		if (ofn) {
+			pmac_call_feature(PMAC_FTR_1394_CABLE_POWER, ofn, 0, 1);
+			pmac_call_feature(PMAC_FTR_1394_ENABLE, ofn, 0, 1);
+		}
+	}
+}
+
+static void ohci_pmac_off(struct pci_dev *dev)
+{
+	if (machine_is(powermac)) {
+		struct device_node *ofn = pci_device_to_OF_node(dev);
+
+		if (ofn) {
+			pmac_call_feature(PMAC_FTR_1394_ENABLE, ofn, 0, 0);
+			pmac_call_feature(PMAC_FTR_1394_CABLE_POWER, ofn, 0, 0);
+		}
+	}
+}
+#else
+#define ohci_pmac_on(dev)
+#define ohci_pmac_off(dev)
+#endif /* CONFIG_PPC_PMAC */
+
 static int __devinit
 pci_probe(struct pci_dev *dev, const struct pci_device_id *ent)
 {
 	struct fw_ohci *ohci;
-	u32 bus_options, max_receive, link_speed;
+	u32 bus_options, max_receive, link_speed, version;
 	u64 guid;
 	int err;
 	size_t size;
@@ -2059,10 +2354,12 @@ pci_probe(struct pci_dev *dev, const struct pci_device_id *ent)
 
 	fw_card_initialize(&ohci->card, &ohci_driver, &dev->dev);
 
+	ohci_pmac_on(dev);
+
 	err = pci_enable_device(dev);
 	if (err) {
 		fw_error("Failed to enable OHCI hardware.\n");
-		goto fail_put_card;
+		goto fail_free;
 	}
 
 	pci_set_master(dev);
@@ -2087,6 +2384,23 @@ pci_probe(struct pci_dev *dev, const struct pci_device_id *ent)
 		goto fail_iomem;
 	}
 
+	version = reg_read(ohci, OHCI1394_Version) & 0x00ff00ff;
+	ohci->use_dualbuffer = version >= OHCI_VERSION_1_1;
+
+/* x86-32 currently doesn't use highmem for dma_alloc_coherent */
+#if !defined(CONFIG_X86_32)
+	/* dual-buffer mode is broken with descriptor addresses above 2G */
+	if (dev->vendor == PCI_VENDOR_ID_TI &&
+	    dev->device == PCI_DEVICE_ID_TI_TSB43AB22)
+		ohci->use_dualbuffer = false;
+#endif
+
+#if defined(CONFIG_PPC_PMAC) && defined(CONFIG_PPC32)
+	ohci->old_uninorth = dev->vendor == PCI_VENDOR_ID_APPLE &&
+			     dev->device == PCI_DEVICE_ID_APPLE_UNI_N_FW;
+#endif
+	ohci->bus_reset_packet_quirk = dev->vendor == PCI_VENDOR_ID_TI;
+
 	ar_context_init(&ohci->ar_request_ctx, ohci,
 			OHCI1394_AsReqRcvContextControlSet);
 
@@ -2138,9 +2452,8 @@ pci_probe(struct pci_dev *dev, const struct pci_device_id *ent)
 	if (err < 0)
 		goto fail_self_id;
 
-	ohci->version = reg_read(ohci, OHCI1394_Version) & 0x00ff00ff;
 	fw_notify("Added fw-ohci device %s, OHCI version %x.%x\n",
-		  dev->dev.bus_id, ohci->version >> 16, ohci->version & 0xff);
+		  dev->dev.bus_id, version >> 16, version & 0xff);
 	return 0;
 
  fail_self_id:
@@ -2154,8 +2467,9 @@ pci_probe(struct pci_dev *dev, const struct pci_device_id *ent)
 	pci_release_region(dev, 0);
  fail_disable:
 	pci_disable_device(dev);
- fail_put_card:
-	fw_card_put(&ohci->card);
+ fail_free:
+	kfree(&ohci->card);
+	ohci_pmac_off(dev);
 
 	return err;
 }
@@ -2183,39 +2497,42 @@ static void pci_remove(struct pci_dev *dev)
 	pci_iounmap(dev, ohci->registers);
 	pci_release_region(dev, 0);
 	pci_disable_device(dev);
-	fw_card_put(&ohci->card);
+	kfree(&ohci->card);
+	ohci_pmac_off(dev);
 
 	fw_notify("Removed fw-ohci device.\n");
 }
 
 #ifdef CONFIG_PM
-static int pci_suspend(struct pci_dev *pdev, pm_message_t state)
+static int pci_suspend(struct pci_dev *dev, pm_message_t state)
 {
-	struct fw_ohci *ohci = pci_get_drvdata(pdev);
+	struct fw_ohci *ohci = pci_get_drvdata(dev);
 	int err;
 
 	software_reset(ohci);
-	free_irq(pdev->irq, ohci);
-	err = pci_save_state(pdev);
+	free_irq(dev->irq, ohci);
+	err = pci_save_state(dev);
 	if (err) {
 		fw_error("pci_save_state failed\n");
 		return err;
 	}
-	err = pci_set_power_state(pdev, pci_choose_state(pdev, state));
+	err = pci_set_power_state(dev, pci_choose_state(dev, state));
 	if (err)
 		fw_error("pci_set_power_state failed with %d\n", err);
+	ohci_pmac_off(dev);
 
 	return 0;
 }
 
-static int pci_resume(struct pci_dev *pdev)
+static int pci_resume(struct pci_dev *dev)
 {
-	struct fw_ohci *ohci = pci_get_drvdata(pdev);
+	struct fw_ohci *ohci = pci_get_drvdata(dev);
 	int err;
 
-	pci_set_power_state(pdev, PCI_D0);
-	pci_restore_state(pdev);
-	err = pci_enable_device(pdev);
+	ohci_pmac_on(dev);
+	pci_set_power_state(dev, PCI_D0);
+	pci_restore_state(dev);
+	err = pci_enable_device(dev);
 	if (err) {
 		fw_error("pci_enable_device failed\n");
 		return err;
diff --git a/drivers/firewire/fw-ohci.h b/drivers/firewire/fw-ohci.h
index dec4f04..a2fbb62 100644
--- a/drivers/firewire/fw-ohci.h
+++ b/drivers/firewire/fw-ohci.h
@@ -30,6 +30,7 @@
 #define  OHCI1394_HCControl_softReset		0x00010000
 #define OHCI1394_SelfIDBuffer                 0x064
 #define OHCI1394_SelfIDCount                  0x068
+#define  OHCI1394_SelfIDCount_selfIDError	0x80000000
 #define OHCI1394_IRMultiChanMaskHiSet         0x070
 #define OHCI1394_IRMultiChanMaskHiClear       0x074
 #define OHCI1394_IRMultiChanMaskLoSet         0x078
@@ -124,6 +125,7 @@
 #define OHCI1394_lockRespErr		0x00000200
 #define OHCI1394_selfIDComplete		0x00010000
 #define OHCI1394_busReset		0x00020000
+#define OHCI1394_regAccessFail		0x00040000
 #define OHCI1394_phy			0x00080000
 #define OHCI1394_cycleSynch		0x00100000
 #define OHCI1394_cycle64Seconds		0x00200000
diff --git a/drivers/firewire/fw-sbp2.c b/drivers/firewire/fw-sbp2.c
index e9d44fd..a5d0de8 100644
--- a/drivers/firewire/fw-sbp2.c
+++ b/drivers/firewire/fw-sbp2.c
@@ -1,4 +1,5 @@
 /*
+ * SBP2 driver (SCSI over IEEE1394)
  *
  * Copyright (C) 2005-2007  Kristian Hoegsberg <krh@bitplanet.net>
  *
@@ -27,27 +28,29 @@
  * and many others.
  */
 
+#include <linux/blkdev.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
 #include <linux/kernel.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
-#include <linux/mod_devicetable.h>
-#include <linux/device.h>
 #include <linux/scatterlist.h>
-#include <linux/dma-mapping.h>
-#include <linux/blkdev.h>
 #include <linux/string.h>
 #include <linux/stringify.h>
 #include <linux/timer.h>
 #include <linux/workqueue.h>
+#include <asm/system.h>
 
 #include <scsi/scsi.h>
 #include <scsi/scsi_cmnd.h>
 #include <scsi/scsi_device.h>
 #include <scsi/scsi_host.h>
 
-#include "fw-transaction.h"
-#include "fw-topology.h"
 #include "fw-device.h"
+#include "fw-topology.h"
+#include "fw-transaction.h"
 
 /*
  * So far only bridges from Oxford Semiconductor are known to support
@@ -80,6 +83,14 @@ MODULE_PARM_DESC(exclusive_login, "Exclusive login to sbp2 device "
  *   Avoids access beyond actual disk limits on devices with an off-by-one bug.
  *   Don't use this with devices which don't have this bug.
  *
+ * - delay inquiry
+ *   Wait extra SBP2_INQUIRY_DELAY seconds after login before SCSI inquiry.
+ *
+ * - power condition
+ *   Set the power condition field in the START STOP UNIT commands sent by
+ *   sd_mod on suspend, resume, and shutdown (if manage_start_stop is on).
+ *   Some disks need this to spin down or to resume properly.
+ *
  * - override internal blacklist
  *   Instead of adding to the built-in blacklist, use only the workarounds
  *   specified in the module load parameter.
@@ -89,6 +100,9 @@ MODULE_PARM_DESC(exclusive_login, "Exclusive login to sbp2 device "
 #define SBP2_WORKAROUND_INQUIRY_36	0x2
 #define SBP2_WORKAROUND_MODE_SENSE_8	0x4
 #define SBP2_WORKAROUND_FIX_CAPACITY	0x8
+#define SBP2_WORKAROUND_DELAY_INQUIRY	0x10
+#define SBP2_INQUIRY_DELAY		12
+#define SBP2_WORKAROUND_POWER_CONDITION	0x20
 #define SBP2_WORKAROUND_OVERRIDE	0x100
 
 static int sbp2_param_workarounds;
@@ -98,9 +112,14 @@ MODULE_PARM_DESC(workarounds, "Work around device bugs (default = 0"
 	", 36 byte inquiry = "    __stringify(SBP2_WORKAROUND_INQUIRY_36)
 	", skip mode page 8 = "   __stringify(SBP2_WORKAROUND_MODE_SENSE_8)
 	", fix capacity = "       __stringify(SBP2_WORKAROUND_FIX_CAPACITY)
+	", delay inquiry = "      __stringify(SBP2_WORKAROUND_DELAY_INQUIRY)
 	", override internal blacklist = " __stringify(SBP2_WORKAROUND_OVERRIDE)
 	", or a combination)");
-
+/*
+ * No easy way to #if this
+ * ", set power condition in start stop unit = "
+ *				  __stringify(SBP2_WORKAROUND_POWER_CONDITION)
+ */
 /* I don't know why the SCSI stack doesn't define something like this... */
 typedef void (*scsi_done_fn_t)(struct scsi_cmnd *);
 
@@ -113,7 +132,6 @@ static const char sbp2_driver_name[] = "sbp2";
 struct sbp2_logical_unit {
 	struct sbp2_target *tgt;
 	struct list_head link;
-	struct scsi_device *sdev;
 	struct fw_address_handler address_handler;
 	struct list_head orb_list;
 
@@ -130,6 +148,8 @@ struct sbp2_logical_unit {
 	int generation;
 	int retries;
 	struct work_struct work;
+	bool has_sdev;
+	bool blocked;
 };
 
 /*
@@ -139,15 +159,19 @@ struct sbp2_logical_unit {
 struct sbp2_target {
 	struct kref kref;
 	struct fw_unit *unit;
+	const char *bus_id;
 	struct list_head lu_list;
 
 	u64 management_agent_address;
+	u64 guid;
 	int directory_id;
 	int node_id;
 	int address_high;
 	unsigned int workarounds;
-
 	unsigned int mgt_orb_timeout;
+
+	int dont_block;	/* counter for each logical unit */
+	int blocked;	/* ditto */
 };
 
 /*
@@ -158,11 +182,10 @@ struct sbp2_target {
 #define SBP2_MIN_LOGIN_ORB_TIMEOUT	5000U	/* Timeout in ms */
 #define SBP2_MAX_LOGIN_ORB_TIMEOUT	40000U	/* Timeout in ms */
 #define SBP2_ORB_TIMEOUT		2000U	/* Timeout in ms */
-#define SBP2_MAX_SG_ELEMENT_LENGTH	0xf000
 #define SBP2_ORB_NULL			0x80000000
-
-#define SBP2_DIRECTION_TO_MEDIA		0x0
-#define SBP2_DIRECTION_FROM_MEDIA	0x1
+#define SBP2_MAX_SG_ELEMENT_LENGTH	0xf000
+#define SBP2_RETRY_LIMIT		0xf		/* 15 retries */
+#define SBP2_CYCLE_LIMIT		(0xc8 << 12)	/* 200 125us cycles */
 
 /* Unit directory keys */
 #define SBP2_CSR_UNIT_CHARACTERISTICS	0x3a
@@ -210,8 +233,8 @@ struct sbp2_status {
 };
 
 struct sbp2_pointer {
-	u32 high;
-	u32 low;
+	__be32 high;
+	__be32 low;
 };
 
 struct sbp2_orb {
@@ -239,8 +262,8 @@ struct sbp2_management_orb {
 	struct {
 		struct sbp2_pointer password;
 		struct sbp2_pointer response;
-		u32 misc;
-		u32 length;
+		__be32 misc;
+		__be32 length;
 		struct sbp2_pointer status_fifo;
 	} request;
 	__be32 response[4];
@@ -249,21 +272,17 @@ struct sbp2_management_orb {
 	struct sbp2_status status;
 };
 
-#define LOGIN_RESPONSE_GET_LOGIN_ID(v)	((v).misc & 0xffff)
-#define LOGIN_RESPONSE_GET_LENGTH(v)	(((v).misc >> 16) & 0xffff)
-
 struct sbp2_login_response {
-	u32 misc;
+	__be32 misc;
 	struct sbp2_pointer command_block_agent;
-	u32 reconnect_hold;
+	__be32 reconnect_hold;
 };
-
 #define COMMAND_ORB_DATA_SIZE(v)	((v))
 #define COMMAND_ORB_PAGE_SIZE(v)	((v) << 16)
 #define COMMAND_ORB_PAGE_TABLE_PRESENT	((1) << 19)
 #define COMMAND_ORB_MAX_PAYLOAD(v)	((v) << 20)
 #define COMMAND_ORB_SPEED(v)		((v) << 24)
-#define COMMAND_ORB_DIRECTION(v)	((v) << 27)
+#define COMMAND_ORB_DIRECTION		((1) << 27)
 #define COMMAND_ORB_REQUEST_FORMAT(v)	((v) << 29)
 #define COMMAND_ORB_NOTIFY		((1) << 31)
 
@@ -272,7 +291,7 @@ struct sbp2_command_orb {
 	struct {
 		struct sbp2_pointer next;
 		struct sbp2_pointer data_descriptor;
-		u32 misc;
+		__be32 misc;
 		u8 command_block[12];
 	} request;
 	struct scsi_cmnd *cmd;
@@ -301,18 +320,35 @@ static const struct {
 		.firmware_revision	= 0x002800,
 		.model			= 0x001010,
 		.workarounds		= SBP2_WORKAROUND_INQUIRY_36 |
-					  SBP2_WORKAROUND_MODE_SENSE_8,
+					  SBP2_WORKAROUND_MODE_SENSE_8 |
+					  SBP2_WORKAROUND_POWER_CONDITION,
+	},
+	/* DViCO Momobay FX-3A with TSB42AA9A bridge */ {
+		.firmware_revision	= 0x002800,
+		.model			= 0x000000,
+		.workarounds		= SBP2_WORKAROUND_DELAY_INQUIRY |
+					  SBP2_WORKAROUND_POWER_CONDITION,
 	},
 	/* Initio bridges, actually only needed for some older ones */ {
 		.firmware_revision	= 0x000200,
 		.model			= ~0,
 		.workarounds		= SBP2_WORKAROUND_INQUIRY_36,
 	},
+	/* PL-3507 bridge with Prolific firmware */ {
+		.firmware_revision	= 0x012800,
+		.model			= ~0,
+		.workarounds		= SBP2_WORKAROUND_POWER_CONDITION,
+	},
 	/* Symbios bridge */ {
 		.firmware_revision	= 0xa0b800,
 		.model			= ~0,
 		.workarounds		= SBP2_WORKAROUND_128K_MAX_TRANS,
 	},
+	/* Datafab MD2-FW2 with Symbios/LSILogic SYM13FW500 bridge */ {
+		.firmware_revision	= 0x002600,
+		.model			= ~0,
+		.workarounds		= SBP2_WORKAROUND_128K_MAX_TRANS,
+	},
 
 	/*
 	 * There are iPods (2nd gen, 3rd gen) with model_id == 0, but
@@ -414,6 +450,7 @@ complete_transaction(struct fw_card *card, int rcode,
 	 * failed and we didn't already get a status write.
 	 */
 	spin_lock_irqsave(&card->lock, flags);
+
 	if (orb->rcode == -1)
 		orb->rcode = rcode;
 	if (orb->rcode != RCODE_COMPLETE) {
@@ -435,8 +472,7 @@ sbp2_send_orb(struct sbp2_orb *orb, struct sbp2_logical_unit *lu,
 	unsigned long flags;
 
 	orb->pointer.high = 0;
-	orb->pointer.low = orb->request_bus;
-	fw_memcpy_to_be32(&orb->pointer, &orb->pointer, sizeof(orb->pointer));
+	orb->pointer.low = cpu_to_be32(orb->request_bus);
 
 	spin_lock_irqsave(&device->card->lock, flags);
 	list_add_tail(&orb->link, &lu->orb_list);
@@ -498,6 +534,9 @@ sbp2_send_management_orb(struct sbp2_logical_unit *lu, int node_id,
 	unsigned int timeout;
 	int retval = -ENOMEM;
 
+	if (function == SBP2_LOGOUT_REQUEST && fw_device_is_shutdown(device))
+		return 0;
+
 	orb = kzalloc(sizeof(*orb), GFP_ATOMIC);
 	if (orb == NULL)
 		return -ENOMEM;
@@ -509,30 +548,31 @@ sbp2_send_management_orb(struct sbp2_logical_unit *lu, int node_id,
 	if (dma_mapping_error(orb->response_bus))
 		goto fail_mapping_response;
 
-	orb->request.response.high    = 0;
-	orb->request.response.low     = orb->response_bus;
+	orb->request.response.high = 0;
+	orb->request.response.low  = cpu_to_be32(orb->response_bus);
 
-	orb->request.misc =
+	orb->request.misc = cpu_to_be32(
 		MANAGEMENT_ORB_NOTIFY |
 		MANAGEMENT_ORB_FUNCTION(function) |
-		MANAGEMENT_ORB_LUN(lun_or_login_id);
-	orb->request.length =
-		MANAGEMENT_ORB_RESPONSE_LENGTH(sizeof(orb->response));
+		MANAGEMENT_ORB_LUN(lun_or_login_id));
+	orb->request.length = cpu_to_be32(
+		MANAGEMENT_ORB_RESPONSE_LENGTH(sizeof(orb->response)));
 
-	orb->request.status_fifo.high = lu->address_handler.offset >> 32;
-	orb->request.status_fifo.low  = lu->address_handler.offset;
+	orb->request.status_fifo.high =
+		cpu_to_be32(lu->address_handler.offset >> 32);
+	orb->request.status_fifo.low  =
+		cpu_to_be32(lu->address_handler.offset);
 
 	if (function == SBP2_LOGIN_REQUEST) {
-		orb->request.misc |=
-			MANAGEMENT_ORB_EXCLUSIVE(sbp2_param_exclusive_login) |
-			MANAGEMENT_ORB_RECONNECT(0);
+		/* Ask for 2^2 == 4 seconds reconnect grace period */
+		orb->request.misc |= cpu_to_be32(
+			MANAGEMENT_ORB_RECONNECT(2) |
+			MANAGEMENT_ORB_EXCLUSIVE(sbp2_param_exclusive_login));
 		timeout = lu->tgt->mgt_orb_timeout;
 	} else {
 		timeout = SBP2_ORB_TIMEOUT;
 	}
 
-	fw_memcpy_to_be32(&orb->request, &orb->request, sizeof(orb->request));
-
 	init_completion(&orb->done);
 	orb->base.callback = complete_management_orb;
 
@@ -549,32 +589,20 @@ sbp2_send_management_orb(struct sbp2_logical_unit *lu, int node_id,
 
 	retval = -EIO;
 	if (sbp2_cancel_orbs(lu) == 0) {
-		/*
-		 * Logout requests frequently get sent to devices that aren't
-		 * there any more, resulting in extraneous error messages in
-		 * the logs.  Unfortunately, this means logout requests that
-		 * actually fail don't get logged.
-		 */
-		if (function != SBP2_LOGOUT_REQUEST)
-			fw_error("orb reply timed out, rcode=0x%02x\n",
-				 orb->base.rcode);
+		fw_error("%s: orb reply timed out, rcode=0x%02x\n",
+			 lu->tgt->bus_id, orb->base.rcode);
 		goto out;
 	}
 
 	if (orb->base.rcode != RCODE_COMPLETE) {
-		/*
-                 * On device removal from the bus, sometimes the logout
-		 * request times out, sometimes it just fails.
-		 */
-		if (function != SBP2_LOGOUT_REQUEST)
-			fw_error("management write failed, rcode 0x%02x\n",
-				 orb->base.rcode);
+		fw_error("%s: management write failed, rcode 0x%02x\n",
+			 lu->tgt->bus_id, orb->base.rcode);
 		goto out;
 	}
 
 	if (STATUS_GET_RESPONSE(orb->status) != 0 ||
 	    STATUS_GET_SBP_STATUS(orb->status) != 0) {
-		fw_error("error status: %d:%d\n",
+		fw_error("%s: error status: %d:%d\n", lu->tgt->bus_id,
 			 STATUS_GET_RESPONSE(orb->status),
 			 STATUS_GET_SBP_STATUS(orb->status));
 		goto out;
@@ -589,38 +617,165 @@ sbp2_send_management_orb(struct sbp2_logical_unit *lu, int node_id,
 			 sizeof(orb->response), DMA_FROM_DEVICE);
  fail_mapping_response:
 	if (response)
-		fw_memcpy_from_be32(response,
-				    orb->response, sizeof(orb->response));
+		memcpy(response, orb->response, sizeof(orb->response));
 	kref_put(&orb->base.kref, free_orb);
 
 	return retval;
 }
 
-static void
-complete_agent_reset_write(struct fw_card *card, int rcode,
-			   void *payload, size_t length, void *data)
+static void sbp2_agent_reset(struct sbp2_logical_unit *lu)
 {
-	struct fw_transaction *t = data;
+	struct fw_device *device = fw_device(lu->tgt->unit->device.parent);
+	__be32 d = 0;
 
-	kfree(t);
+	fw_run_transaction(device->card, TCODE_WRITE_QUADLET_REQUEST,
+			   lu->tgt->node_id, lu->generation, device->max_speed,
+			   lu->command_block_agent_address + SBP2_AGENT_RESET,
+			   &d, sizeof(d));
 }
 
-static int sbp2_agent_reset(struct sbp2_logical_unit *lu)
+static void
+complete_agent_reset_write_no_wait(struct fw_card *card, int rcode,
+				   void *payload, size_t length, void *data)
+{
+	kfree(data);
+}
+
+static void sbp2_agent_reset_no_wait(struct sbp2_logical_unit *lu)
 {
 	struct fw_device *device = fw_device(lu->tgt->unit->device.parent);
 	struct fw_transaction *t;
-	static u32 zero;
+	static __be32 d;
 
-	t = kzalloc(sizeof(*t), GFP_ATOMIC);
+	t = kmalloc(sizeof(*t), GFP_ATOMIC);
 	if (t == NULL)
-		return -ENOMEM;
+		return;
 
 	fw_send_request(device->card, t, TCODE_WRITE_QUADLET_REQUEST,
 			lu->tgt->node_id, lu->generation, device->max_speed,
 			lu->command_block_agent_address + SBP2_AGENT_RESET,
-			&zero, sizeof(zero), complete_agent_reset_write, t);
+			&d, sizeof(d), complete_agent_reset_write_no_wait, t);
+}
 
-	return 0;
+static void sbp2_set_generation(struct sbp2_logical_unit *lu, int generation)
+{
+	struct fw_card *card = fw_device(lu->tgt->unit->device.parent)->card;
+	unsigned long flags;
+
+	/* serialize with comparisons of lu->generation and card->generation */
+	spin_lock_irqsave(&card->lock, flags);
+	lu->generation = generation;
+	spin_unlock_irqrestore(&card->lock, flags);
+}
+
+static inline void sbp2_allow_block(struct sbp2_logical_unit *lu)
+{
+	/*
+	 * We may access dont_block without taking card->lock here:
+	 * All callers of sbp2_allow_block() and all callers of sbp2_unblock()
+	 * are currently serialized against each other.
+	 * And a wrong result in sbp2_conditionally_block()'s access of
+	 * dont_block is rather harmless, it simply misses its first chance.
+	 */
+	--lu->tgt->dont_block;
+}
+
+/*
+ * Blocks lu->tgt if all of the following conditions are met:
+ *   - Login, INQUIRY, and high-level SCSI setup of all of the target's
+ *     logical units have been finished (indicated by dont_block == 0).
+ *   - lu->generation is stale.
+ *
+ * Note, scsi_block_requests() must be called while holding card->lock,
+ * otherwise it might foil sbp2_[conditionally_]unblock()'s attempt to
+ * unblock the target.
+ */
+static void sbp2_conditionally_block(struct sbp2_logical_unit *lu)
+{
+	struct sbp2_target *tgt = lu->tgt;
+	struct fw_card *card = fw_device(tgt->unit->device.parent)->card;
+	struct Scsi_Host *shost =
+		container_of((void *)tgt, struct Scsi_Host, hostdata[0]);
+	unsigned long flags;
+
+	spin_lock_irqsave(&card->lock, flags);
+	if (!tgt->dont_block && !lu->blocked &&
+	    lu->generation != card->generation) {
+		lu->blocked = true;
+		if (++tgt->blocked == 1)
+			scsi_block_requests(shost);
+	}
+	spin_unlock_irqrestore(&card->lock, flags);
+}
+
+/*
+ * Unblocks lu->tgt as soon as all its logical units can be unblocked.
+ * Note, it is harmless to run scsi_unblock_requests() outside the
+ * card->lock protected section.  On the other hand, running it inside
+ * the section might clash with shost->host_lock.
+ */
+static void sbp2_conditionally_unblock(struct sbp2_logical_unit *lu)
+{
+	struct sbp2_target *tgt = lu->tgt;
+	struct fw_card *card = fw_device(tgt->unit->device.parent)->card;
+	struct Scsi_Host *shost =
+		container_of((void *)tgt, struct Scsi_Host, hostdata[0]);
+	unsigned long flags;
+	bool unblock = false;
+
+	spin_lock_irqsave(&card->lock, flags);
+	if (lu->blocked && lu->generation == card->generation) {
+		lu->blocked = false;
+		unblock = --tgt->blocked == 0;
+	}
+	spin_unlock_irqrestore(&card->lock, flags);
+
+	if (unblock)
+		scsi_unblock_requests(shost);
+}
+
+/*
+ * Prevents future blocking of tgt and unblocks it.
+ * Note, it is harmless to run scsi_unblock_requests() outside the
+ * card->lock protected section.  On the other hand, running it inside
+ * the section might clash with shost->host_lock.
+ */
+static void sbp2_unblock(struct sbp2_target *tgt)
+{
+	struct fw_card *card = fw_device(tgt->unit->device.parent)->card;
+	struct Scsi_Host *shost =
+		container_of((void *)tgt, struct Scsi_Host, hostdata[0]);
+	unsigned long flags;
+
+	spin_lock_irqsave(&card->lock, flags);
+	++tgt->dont_block;
+	spin_unlock_irqrestore(&card->lock, flags);
+
+	scsi_unblock_requests(shost);
+}
+
+/* not exported in 2.6.18, so local copy */
+static int scsilun_to_int(struct scsi_lun *scsilun)
+{
+	int i;
+	unsigned int lun;
+
+	lun = 0;
+	for (i = 0; i < sizeof(lun); i += 2)
+		lun = lun | (((scsilun->scsi_lun[i] << 8) |
+			      scsilun->scsi_lun[i + 1]) << (i * 8));
+	return lun;
+}
+
+static int sbp2_lun2int(u16 lun)
+{
+	struct scsi_lun eight_bytes_lun;
+
+	memset(&eight_bytes_lun, 0, sizeof(eight_bytes_lun));
+	eight_bytes_lun.scsi_lun[0] = (lun >> 8) & 0xff;
+	eight_bytes_lun.scsi_lun[1] = lun & 0xff;
+
+	return scsilun_to_int(&eight_bytes_lun);
 }
 
 static void sbp2_release_target(struct kref *kref)
@@ -629,22 +784,31 @@ static void sbp2_release_target(struct kref *kref)
 	struct sbp2_logical_unit *lu, *next;
 	struct Scsi_Host *shost =
 		container_of((void *)tgt, struct Scsi_Host, hostdata[0]);
+	struct scsi_device *sdev;
+	struct fw_device *device = fw_device(tgt->unit->device.parent);
 
-	list_for_each_entry_safe(lu, next, &tgt->lu_list, link) {
-		if (lu->sdev)
-			scsi_remove_device(lu->sdev);
+	/* prevent deadlocks */
+	sbp2_unblock(tgt);
 
+	list_for_each_entry_safe(lu, next, &tgt->lu_list, link) {
+		sdev = scsi_device_lookup(shost, 0, 0, sbp2_lun2int(lu->lun));
+		if (sdev) {
+			scsi_remove_device(sdev);
+			scsi_device_put(sdev);
+		}
 		sbp2_send_management_orb(lu, tgt->node_id, lu->generation,
 				SBP2_LOGOUT_REQUEST, lu->login_id, NULL);
+
 		fw_core_remove_address_handler(&lu->address_handler);
 		list_del(&lu->link);
 		kfree(lu);
 	}
 	scsi_remove_host(shost);
-	fw_notify("released %s\n", tgt->unit->device.bus_id);
+	fw_notify("released %s, target %d:0:0\n", tgt->bus_id, shost->host_no);
 
-	put_device(&tgt->unit->device);
+	fw_unit_put(tgt->unit);
 	scsi_host_put(shost);
+	fw_device_put(device);
 }
 
 static struct workqueue_struct *sbp2_wq;
@@ -664,117 +828,143 @@ static void sbp2_target_put(struct sbp2_target *tgt)
 	kref_put(&tgt->kref, sbp2_release_target);
 }
 
-static void sbp2_reconnect(void *w);
-
-/* not exported in 2.6.18, so local copy */
-static int scsilun_to_int(struct scsi_lun *scsilun)
+/*
+ * Write retransmit retry values into the BUSY_TIMEOUT register.
+ * - The single-phase retry protocol is supported by all SBP-2 devices, but the
+ *   default retry_limit value is 0 (i.e. never retry transmission). We write a
+ *   saner value after logging into the device.
+ * - The dual-phase retry protocol is optional to implement, and if not
+ *   supported, writes to the dual-phase portion of the register will be
+ *   ignored. We try to write the original 1394-1995 default here.
+ * - In the case of devices that are also SBP-3-compliant, all writes are
+ *   ignored, as the register is read-only, but contains single-phase retry of
+ *   15, which is what we're trying to set for all SBP-2 device anyway, so this
+ *   write attempt is safe and yields more consistent behavior for all devices.
+ *
+ * See section 8.3.2.3.5 of the 1394-1995 spec, section 6.2 of the SBP-2 spec,
+ * and section 6.4 of the SBP-3 spec for further details.
+ */
+static void sbp2_set_busy_timeout(struct sbp2_logical_unit *lu)
 {
-	int i;
-	unsigned int lun;
+	struct fw_device *device = fw_device(lu->tgt->unit->device.parent);
+	__be32 d = cpu_to_be32(SBP2_CYCLE_LIMIT | SBP2_RETRY_LIMIT);
 
-	lun = 0;
-	for (i = 0; i < sizeof(lun); i += 2)
-		lun = lun | (((scsilun->scsi_lun[i] << 8) |
-			      scsilun->scsi_lun[i + 1]) << (i * 8));
-	return lun;
+	fw_run_transaction(device->card, TCODE_WRITE_QUADLET_REQUEST,
+			   lu->tgt->node_id, lu->generation, device->max_speed,
+			   CSR_REGISTER_BASE + CSR_BUSY_TIMEOUT,
+			   &d, sizeof(d));
 }
 
+static void sbp2_reconnect(void *w);
+
 static void sbp2_login(void *w)
 {
 	struct work_struct *work = w;
 	struct sbp2_logical_unit *lu =
 		container_of(work, struct sbp2_logical_unit, work);
-	struct Scsi_Host *shost =
-		container_of((void *)lu->tgt, struct Scsi_Host, hostdata[0]);
+	struct sbp2_target *tgt = lu->tgt;
+	struct fw_device *device = fw_device(tgt->unit->device.parent);
+	struct Scsi_Host *shost;
 	struct scsi_device *sdev;
-	struct scsi_lun eight_bytes_lun;
-	struct fw_unit *unit = lu->tgt->unit;
-	struct fw_device *device = fw_device(unit->device.parent);
 	struct sbp2_login_response response;
 	int generation, node_id, local_node_id;
 
+	if (fw_device_is_shutdown(device))
+		goto out;
+
 	generation    = device->generation;
 	smp_rmb();    /* node_id must not be older than generation */
 	node_id       = device->node_id;
 	local_node_id = device->card->node_id;
 
+	/* If this is a re-login attempt, log out, or we might be rejected. */
+	if (lu->has_sdev)
+		sbp2_send_management_orb(lu, device->node_id, generation,
+				SBP2_LOGOUT_REQUEST, lu->login_id, NULL);
+
 	if (sbp2_send_management_orb(lu, node_id, generation,
 				SBP2_LOGIN_REQUEST, lu->lun, &response) < 0) {
-		if (lu->retries++ < 5)
+		if (lu->retries++ < 5) {
 			sbp2_queue_work(lu, DIV_ROUND_UP(HZ, 5));
-		else
-			fw_error("failed to login to %s LUN %04x\n",
-				 unit->device.bus_id, lu->lun);
+		} else {
+			fw_error("%s: failed to login to LUN %04x\n",
+				 tgt->bus_id, lu->lun);
+			/* Let any waiting I/O fail from now on. */
+			sbp2_unblock(lu->tgt);
+		}
 		goto out;
 	}
-	lu->generation        = generation;
-	lu->tgt->node_id      = node_id;
-	lu->tgt->address_high = local_node_id << 16;
 
-	/* Get command block agent offset and login id. */
+	tgt->node_id	  = node_id;
+	tgt->address_high = local_node_id << 16;
+	sbp2_set_generation(lu, generation);
+
 	lu->command_block_agent_address =
-		((u64) (response.command_block_agent.high & 0xffff) << 32) |
-		response.command_block_agent.low;
-	lu->login_id = LOGIN_RESPONSE_GET_LOGIN_ID(response);
+		((u64)(be32_to_cpu(response.command_block_agent.high) & 0xffff)
+		      << 32) | be32_to_cpu(response.command_block_agent.low);
+	lu->login_id = be32_to_cpu(response.misc) & 0xffff;
 
-	fw_notify("logged in to %s LUN %04x (%d retries)\n",
-		  unit->device.bus_id, lu->lun, lu->retries);
+	fw_notify("%s: logged in to LUN %04x (%d retries)\n",
+		  tgt->bus_id, lu->lun, lu->retries);
 
-#if 0
-	/* FIXME: The linux1394 sbp2 does this last step. */
-	sbp2_set_busy_timeout(scsi_id);
-#endif
+	/* set appropriate retry limit(s) in BUSY_TIMEOUT register */
+	sbp2_set_busy_timeout(lu);
 
-	PREPARE_WORK(&lu->work, sbp2_reconnect, &(lu->work));
+	PREPARE_WORK(&lu->work, sbp2_reconnect, &lu->work);
 	sbp2_agent_reset(lu);
 
-	memset(&eight_bytes_lun, 0, sizeof(eight_bytes_lun));
-	eight_bytes_lun.scsi_lun[0] = (lu->lun >> 8) & 0xff;
-	eight_bytes_lun.scsi_lun[1] = lu->lun & 0xff;
+	/* This was a re-login. */
+	if (lu->has_sdev) {
+		sbp2_cancel_orbs(lu);
+		sbp2_conditionally_unblock(lu);
+		goto out;
+	}
 
-	sdev = __scsi_add_device(shost, 0, 0,
-				 scsilun_to_int(&eight_bytes_lun), lu);
-	if (IS_ERR(sdev)) {
-		/*
-		 * The most frequent cause for __scsi_add_device() to fail
-		 * is a bust reset while sending the SCSI INQUIRY.  Try again.
-		 */
-		goto out_logout_login;
+	if (lu->tgt->workarounds & SBP2_WORKAROUND_DELAY_INQUIRY)
+		ssleep(SBP2_INQUIRY_DELAY);
 
-	} else if (sdev->sdev_state == SDEV_OFFLINE) {
-		/* FIXME: We are unable to perform reconnects while in
-		 * sbp2_login().  Therefore __scsi_add_device() will get
-		 * into trouble if a bus reset happens in parallel.
-		 * It will either fail (that's OK, see above) or take sdev
-		 * offline.  Here is a crude workaround for the latter.
-		 */
-		scsi_device_put(sdev);
-		scsi_remove_device(sdev);
+	shost = container_of((void *)tgt, struct Scsi_Host, hostdata[0]);
+	sdev = __scsi_add_device(shost, 0, 0, sbp2_lun2int(lu->lun), lu);
+	/*
+	 * FIXME:  We are unable to perform reconnects while in sbp2_login().
+	 * Therefore __scsi_add_device() will get into trouble if a bus reset
+	 * happens in parallel.  It will either fail or leave us with an
+	 * unusable sdev.  As a workaround we check for this and retry the
+	 * whole login and SCSI probing.
+	 */
+
+	/* Reported error during __scsi_add_device() */
+	if (IS_ERR(sdev))
 		goto out_logout_login;
 
-	} else {
-		/*
-		 * Can you believe it?  Everything went well.
-		 */
-		lu->sdev = sdev;
+	/* Unreported error during __scsi_add_device() */
+	smp_rmb(); /* get current card generation */
+	if (generation != device->card->generation) {
+		scsi_remove_device(sdev);
 		scsi_device_put(sdev);
-		goto out;
+		goto out_logout_login;
 	}
 
+	/* No error during __scsi_add_device() */
+	lu->has_sdev = true;
+	scsi_device_put(sdev);
+	sbp2_allow_block(lu);
+	goto out;
+
  out_logout_login:
 	smp_rmb(); /* generation may have changed */
 	generation = device->generation;
 	smp_rmb(); /* node_id must not be older than generation */
 
 	sbp2_send_management_orb(lu, device->node_id, generation,
-			SBP2_LOGOUT_REQUEST, lu->login_id, NULL);
+				 SBP2_LOGOUT_REQUEST, lu->login_id, NULL);
 	/*
 	 * If a bus reset happened, sbp2_update will have requeued
-	 * lu->work already. Reset the work from reconnect to login.
+	 * lu->work already.  Reset the work from reconnect to login.
 	 */
-	PREPARE_WORK(&lu->work, sbp2_login, &(lu->work));
+	PREPARE_WORK(&lu->work, sbp2_login, &lu->work);
  out:
-	sbp2_target_put(lu->tgt);
+	sbp2_target_put(tgt);
 }
 
 static int sbp2_add_logical_unit(struct sbp2_target *tgt, int lun_entry)
@@ -795,12 +985,14 @@ static int sbp2_add_logical_unit(struct sbp2_target *tgt, int lun_entry)
 		return -ENOMEM;
 	}
 
-	lu->tgt  = tgt;
-	lu->sdev = NULL;
-	lu->lun  = lun_entry & 0xffff;
-	lu->retries = 0;
+	lu->tgt      = tgt;
+	lu->lun      = lun_entry & 0xffff;
+	lu->retries  = 0;
+	lu->has_sdev = false;
+	lu->blocked  = false;
+	++tgt->dont_block;
 	INIT_LIST_HEAD(&lu->orb_list);
-	INIT_WORK(&lu->work, sbp2_login, &(lu->work));
+	INIT_WORK(&lu->work, sbp2_login, &lu->work);
 
 	list_add_tail(&lu->link, &tgt->lu_list);
 	return 0;
@@ -832,7 +1024,7 @@ static int sbp2_scan_unit_dir(struct sbp2_target *tgt, u32 *directory,
 
 		case CSR_DEPENDENT_INFO | CSR_OFFSET:
 			tgt->management_agent_address =
-				0xfffff0000000ULL + 4 * value;
+					CSR_REGISTER_BASE + 4 * value;
 			break;
 
 		case CSR_DIRECTORY_ID:
@@ -852,12 +1044,12 @@ static int sbp2_scan_unit_dir(struct sbp2_target *tgt, u32 *directory,
 			timeout = ((unsigned int) value >> 8 & 0xff) * 500;
 			timeout = max(timeout, SBP2_MIN_LOGIN_ORB_TIMEOUT);
 			tgt->mgt_orb_timeout =
-				min(timeout, SBP2_MAX_LOGIN_ORB_TIMEOUT);
+				  min(timeout, SBP2_MAX_LOGIN_ORB_TIMEOUT);
 
 			if (timeout > tgt->mgt_orb_timeout)
 				fw_notify("%s: config rom contains %ds "
 					  "management ORB timeout, limiting "
-					  "to %ds\n", tgt->unit->device.bus_id,
+					  "to %ds\n", tgt->bus_id,
 					  timeout / 1000,
 					  tgt->mgt_orb_timeout / 1000);
 			break;
@@ -868,7 +1060,8 @@ static int sbp2_scan_unit_dir(struct sbp2_target *tgt, u32 *directory,
 			break;
 
 		case SBP2_CSR_LOGICAL_UNIT_DIRECTORY:
-			if (sbp2_scan_logical_unit_dir(tgt, ci.p + value) < 0)
+			/* Adjust for the increment in the iterator */
+			if (sbp2_scan_logical_unit_dir(tgt, ci.p - 1 + value) < 0)
 				return -ENOMEM;
 			break;
 		}
@@ -885,7 +1078,7 @@ static void sbp2_init_workarounds(struct sbp2_target *tgt, u32 model,
 	if (w)
 		fw_notify("Please notify linux1394-devel@lists.sourceforge.net "
 			  "if you need the workarounds parameter for %s\n",
-			  tgt->unit->device.bus_id);
+			  tgt->bus_id);
 
 	if (w & SBP2_WORKAROUND_OVERRIDE)
 		goto out;
@@ -907,8 +1100,7 @@ static void sbp2_init_workarounds(struct sbp2_target *tgt, u32 model,
 	if (w)
 		fw_notify("Workarounds for %s: 0x%x "
 			  "(firmware_revision 0x%06x, model_id 0x%06x)\n",
-			  tgt->unit->device.bus_id,
-			  w, firmware_revision, model);
+			  tgt->bus_id, w, firmware_revision, model);
 	tgt->workarounds = w;
 }
 
@@ -932,6 +1124,8 @@ static int sbp2_probe(struct device *dev)
 	tgt->unit = unit;
 	kref_init(&tgt->kref);
 	INIT_LIST_HEAD(&tgt->lu_list);
+	tgt->bus_id = unit->device.bus_id;
+	tgt->guid = (u64)device->config_rom[3] << 32 | device->config_rom[4];
 
 	if (fw_device_enable_phys_dma(device) < 0)
 		goto fail_shost_put;
@@ -939,6 +1133,9 @@ static int sbp2_probe(struct device *dev)
 	if (scsi_add_host(shost, &unit->device) < 0)
 		goto fail_shost_put;
 
+	fw_device_get(device);
+	fw_unit_get(unit);
+
 	/* Initialize to values that won't match anything in our table. */
 	firmware_revision = 0xff000000;
 	model = 0xff000000;
@@ -953,8 +1150,6 @@ static int sbp2_probe(struct device *dev)
 
 	sbp2_init_workarounds(tgt, model, firmware_revision);
 
-	get_device(&unit->device);
-
 	/* Do the login in a workqueue so we can easily reschedule retries. */
 	list_for_each_entry(lu, &tgt->lu_list, link)
 		sbp2_queue_work(lu, 0);
@@ -983,10 +1178,13 @@ static void sbp2_reconnect(void *w)
 	struct work_struct *work = w;
 	struct sbp2_logical_unit *lu =
 		container_of(work, struct sbp2_logical_unit, work);
-	struct fw_unit *unit = lu->tgt->unit;
-	struct fw_device *device = fw_device(unit->device.parent);
+	struct sbp2_target *tgt = lu->tgt;
+	struct fw_device *device = fw_device(tgt->unit->device.parent);
 	int generation, node_id, local_node_id;
 
+	if (fw_device_is_shutdown(device))
+		goto out;
+
 	generation    = device->generation;
 	smp_rmb();    /* node_id must not be older than generation */
 	node_id       = device->node_id;
@@ -995,28 +1193,36 @@ static void sbp2_reconnect(void *w)
 	if (sbp2_send_management_orb(lu, node_id, generation,
 				     SBP2_RECONNECT_REQUEST,
 				     lu->login_id, NULL) < 0) {
-		if (lu->retries++ >= 5) {
-			fw_error("failed to reconnect to %s\n",
-				 unit->device.bus_id);
-			/* Fall back and try to log in again. */
+		/*
+		 * If reconnect was impossible even though we are in the
+		 * current generation, fall back and try to log in again.
+		 *
+		 * We could check for "Function rejected" status, but
+		 * looking at the bus generation as simpler and more general.
+		 */
+		smp_rmb(); /* get current card generation */
+		if (generation == device->card->generation ||
+		    lu->retries++ >= 5) {
+			fw_error("%s: failed to reconnect\n", tgt->bus_id);
 			lu->retries = 0;
-			PREPARE_WORK(&lu->work, sbp2_login, &(lu->work));
+			PREPARE_WORK(&lu->work, sbp2_login, &lu->work);
 		}
 		sbp2_queue_work(lu, DIV_ROUND_UP(HZ, 5));
 		goto out;
 	}
 
-	lu->generation        = generation;
-	lu->tgt->node_id      = node_id;
-	lu->tgt->address_high = local_node_id << 16;
+	tgt->node_id      = node_id;
+	tgt->address_high = local_node_id << 16;
+	sbp2_set_generation(lu, generation);
 
-	fw_notify("reconnected to %s LUN %04x (%d retries)\n",
-		  unit->device.bus_id, lu->lun, lu->retries);
+	fw_notify("%s: reconnected to LUN %04x (%d retries)\n",
+		  tgt->bus_id, lu->lun, lu->retries);
 
 	sbp2_agent_reset(lu);
 	sbp2_cancel_orbs(lu);
+	sbp2_conditionally_unblock(lu);
  out:
-	sbp2_target_put(lu->tgt);
+	sbp2_target_put(tgt);
 }
 
 static void sbp2_update(struct fw_unit *unit)
@@ -1031,6 +1237,7 @@ static void sbp2_update(struct fw_unit *unit)
 	 * Iteration over tgt->lu_list is therefore safe here.
 	 */
 	list_for_each_entry(lu, &tgt->lu_list, link) {
+		sbp2_conditionally_block(lu);
 		lu->retries = 0;
 		sbp2_queue_work(lu, 0);
 	}
@@ -1108,7 +1315,7 @@ complete_command_orb(struct sbp2_orb *base_orb, struct sbp2_status *status)
 
 	if (status != NULL) {
 		if (STATUS_GET_DEAD(*status))
-			sbp2_agent_reset(orb->lu);
+			sbp2_agent_reset_no_wait(orb->lu);
 
 		switch (STATUS_GET_RESPONSE(*status)) {
 		case SBP2_STATUS_REQUEST_COMPLETE:
@@ -1134,16 +1341,16 @@ complete_command_orb(struct sbp2_orb *base_orb, struct sbp2_status *status)
 		 * or when sending the write (less likely).
 		 */
 		result = DID_BUS_BUSY << 16;
+		sbp2_conditionally_block(orb->lu);
 	}
 
 	dma_unmap_single(device->card->device, orb->base.request_bus,
 			 sizeof(orb->request), DMA_TO_DEVICE);
 
-	if (orb->cmd->use_sg > 0)
-		dma_unmap_sg(device->card->device,
-			     (struct scatterlist *)orb->cmd->request_buffer,
-			     orb->cmd->use_sg, orb->cmd->sc_data_direction);
-
+	if (scsi_sg_count(orb->cmd) > 0)
+		dma_unmap_sg(device->card->device, scsi_sglist(orb->cmd),
+			     scsi_sg_count(orb->cmd),
+			     orb->cmd->sc_data_direction);
 
 	if (orb->page_table_bus != 0)
 		dma_unmap_single(device->card->device, orb->page_table_bus,
@@ -1161,10 +1368,9 @@ sbp2_map_scatterlist(struct sbp2_command_orb *orb, struct fw_device *device,
 	int sg_len, l, i, j, count;
 	dma_addr_t sg_addr;
 
-	sg = (struct scatterlist *)orb->cmd->request_buffer;
-	count = dma_map_sg(device->card->device, sg, orb->cmd->use_sg,
+	sg = scsi_sglist(orb->cmd);
+	count = dma_map_sg(device->card->device, sg, scsi_sg_count(orb->cmd),
 			   orb->cmd->sc_data_direction);
-
 	if (count == 0)
 		goto fail;
 
@@ -1176,9 +1382,12 @@ sbp2_map_scatterlist(struct sbp2_command_orb *orb, struct fw_device *device,
 	 * tables.
 	 */
 	if (count == 1 && sg_dma_len(sg) < SBP2_MAX_SG_ELEMENT_LENGTH) {
-		orb->request.data_descriptor.high = lu->tgt->address_high;
-		orb->request.data_descriptor.low  = sg_dma_address(sg);
-		orb->request.misc |= COMMAND_ORB_DATA_SIZE(sg_dma_len(sg));
+		orb->request.data_descriptor.high =
+			cpu_to_be32(lu->tgt->address_high);
+		orb->request.data_descriptor.low  =
+			cpu_to_be32(sg_dma_address(sg));
+		orb->request.misc |=
+			cpu_to_be32(COMMAND_ORB_DATA_SIZE(sg_dma_len(sg)));
 		return 0;
 	}
 
@@ -1199,16 +1408,14 @@ sbp2_map_scatterlist(struct sbp2_command_orb *orb, struct fw_device *device,
 				goto fail_page_table;
 			}
 			l = min(sg_len, SBP2_MAX_SG_ELEMENT_LENGTH);
-			orb->page_table[j].low = sg_addr;
-			orb->page_table[j].high = (l << 16);
+			orb->page_table[j].low = cpu_to_be32(sg_addr);
+			orb->page_table[j].high = cpu_to_be32(l << 16);
 			sg_addr += l;
 			sg_len -= l;
 			j++;
 		}
 	}
 
-	fw_memcpy_to_be32(orb->page_table, orb->page_table,
-			  sizeof(orb->page_table[0]) * j);
 	orb->page_table_bus =
 		dma_map_single(device->card->device, orb->page_table,
 			       sizeof(orb->page_table), DMA_TO_DEVICE);
@@ -1222,16 +1429,15 @@ sbp2_map_scatterlist(struct sbp2_command_orb *orb, struct fw_device *device,
 	 * initiator (i.e. us), but data_descriptor can refer to data
 	 * on other nodes so we need to put our ID in descriptor.high.
 	 */
-	orb->request.data_descriptor.high = lu->tgt->address_high;
-	orb->request.data_descriptor.low  = orb->page_table_bus;
-	orb->request.misc |=
-		COMMAND_ORB_PAGE_TABLE_PRESENT |
-		COMMAND_ORB_DATA_SIZE(j);
+	orb->request.data_descriptor.high = cpu_to_be32(lu->tgt->address_high);
+	orb->request.data_descriptor.low  = cpu_to_be32(orb->page_table_bus);
+	orb->request.misc |= cpu_to_be32(COMMAND_ORB_PAGE_TABLE_PRESENT |
+					 COMMAND_ORB_DATA_SIZE(j));
 
 	return 0;
 
  fail_page_table:
-	dma_unmap_sg(device->card->device, sg, orb->cmd->use_sg,
+	dma_unmap_sg(device->card->device, sg, scsi_sg_count(orb->cmd),
 		     orb->cmd->sc_data_direction);
  fail:
 	return -ENOMEM;
@@ -1272,8 +1478,7 @@ static int sbp2_scsi_queuecommand(struct scsi_cmnd *cmd, scsi_done_fn_t done)
 	orb->done = done;
 	orb->cmd  = cmd;
 
-	orb->request.next.high   = SBP2_ORB_NULL;
-	orb->request.next.low    = 0x0;
+	orb->request.next.high   = cpu_to_be32(SBP2_ORB_NULL);
 	/*
 	 * At speed 100 we can do 512 bytes per packet, at speed 200,
 	 * 1024 bytes per packet etc.  The SBP-2 max_payload field
@@ -1282,29 +1487,20 @@ static int sbp2_scsi_queuecommand(struct scsi_cmnd *cmd, scsi_done_fn_t done)
 	 */
 	max_payload = min(device->max_speed + 7,
 			  device->card->max_receive - 1);
-	orb->request.misc =
+	orb->request.misc = cpu_to_be32(
 		COMMAND_ORB_MAX_PAYLOAD(max_payload) |
 		COMMAND_ORB_SPEED(device->max_speed) |
-		COMMAND_ORB_NOTIFY;
+		COMMAND_ORB_NOTIFY);
 
 	if (cmd->sc_data_direction == DMA_FROM_DEVICE)
-		orb->request.misc |=
-			COMMAND_ORB_DIRECTION(SBP2_DIRECTION_FROM_MEDIA);
-	else if (cmd->sc_data_direction == DMA_TO_DEVICE)
-		orb->request.misc |=
-			COMMAND_ORB_DIRECTION(SBP2_DIRECTION_TO_MEDIA);
+		orb->request.misc |= cpu_to_be32(COMMAND_ORB_DIRECTION);
 
-	if (cmd->use_sg && sbp2_map_scatterlist(orb, device, lu) < 0)
+	if (scsi_sg_count(cmd) && sbp2_map_scatterlist(orb, device, lu) < 0)
 		goto out;
 
-	fw_memcpy_to_be32(&orb->request, &orb->request, sizeof(orb->request));
-
-	memset(orb->request.command_block,
-	       0, sizeof(orb->request.command_block));
-	memcpy(orb->request.command_block, cmd->cmnd, COMMAND_SIZE(*cmd->cmnd));
+	memcpy(orb->request.command_block, cmd->cmnd, cmd->cmd_len);
 
 	orb->base.callback = complete_command_orb;
-
 	orb->base.request_bus =
 		dma_map_single(device->card->device, &orb->request,
 			       sizeof(orb->request), DMA_TO_DEVICE);
@@ -1323,8 +1519,16 @@ static int sbp2_scsi_slave_alloc(struct scsi_device *sdev)
 {
 	struct sbp2_logical_unit *lu = sdev->hostdata;
 
+	/* (Re-)Adding logical units via the SCSI stack is not supported. */
+	if (!lu)
+		return -ENOSYS;
+
 	sdev->allow_restart = 1;
 
+	/* SBP-2 requires quadlet alignment of the data buffers. */
+	if (4 - 1 > sdev->request_queue->dma_alignment)
+		sdev->request_queue->dma_alignment = 4 - 1;
+
 	if (lu->tgt->workarounds & SBP2_WORKAROUND_INQUIRY_36)
 		sdev->inquiry_len = 36;
 
@@ -1337,6 +1541,9 @@ static int sbp2_scsi_slave_configure(struct scsi_device *sdev)
 
 	sdev->use_10_for_rw = 1;
 
+	if (sbp2_param_exclusive_login)
+		sdev->manage_start_stop = 1;
+
 	if (sdev->type == TYPE_ROM)
 		sdev->use_10_for_ms = 1;
 
@@ -1347,6 +1554,11 @@ static int sbp2_scsi_slave_configure(struct scsi_device *sdev)
 	if (lu->tgt->workarounds & SBP2_WORKAROUND_FIX_CAPACITY)
 		sdev->fix_capacity = 1;
 
+#if 0
+	if (lu->tgt->workarounds & SBP2_WORKAROUND_POWER_CONDITION)
+		sdev->start_stop_pwr_cond = 1;
+#endif
+
 	if (lu->tgt->workarounds & SBP2_WORKAROUND_128K_MAX_TRANS)
 		blk_queue_max_sectors(sdev->request_queue, 128 * 1024 / 512);
 
@@ -1361,7 +1573,7 @@ static int sbp2_scsi_abort(struct scsi_cmnd *cmd)
 {
 	struct sbp2_logical_unit *lu = cmd->device->hostdata;
 
-	fw_notify("sbp2_scsi_abort\n");
+	fw_notify("%s: sbp2_scsi_abort\n", lu->tgt->bus_id);
 	sbp2_agent_reset(lu);
 	sbp2_cancel_orbs(lu);
 
@@ -1381,17 +1593,15 @@ sbp2_sysfs_ieee1394_id_show(struct device *dev, struct device_attribute *attr,
 {
 	struct scsi_device *sdev = to_scsi_device(dev);
 	struct sbp2_logical_unit *lu;
-	struct fw_device *device;
 
 	if (!sdev)
 		return 0;
 
 	lu = sdev->hostdata;
-	device = fw_device(lu->tgt->unit->device.parent);
 
-	return sprintf(buf, "%08x%08x:%06x:%04x\n",
-		       device->config_rom[3], device->config_rom[4],
-		       lu->tgt->directory_id, lu->lun);
+	return sprintf(buf, "%016llx:%06x:%04x\n",
+			(unsigned long long)lu->tgt->guid,
+			lu->tgt->directory_id, lu->lun);
 }
 
 static DEVICE_ATTR(ieee1394_id, S_IRUGO, sbp2_sysfs_ieee1394_id_show, NULL);
diff --git a/drivers/firewire/fw-topology.c b/drivers/firewire/fw-topology.c
index 8fad17e..e41da22 100644
--- a/drivers/firewire/fw-topology.c
+++ b/drivers/firewire/fw-topology.c
@@ -1,5 +1,5 @@
 /*
- *  Incremental bus scan, based on bus topology
+ * Incremental bus scan, based on bus topology
  *
  * Copyright (C) 2004-2006 Kristian Hoegsberg <krh@bitplanet.net>
  *
@@ -21,6 +21,8 @@
 #include <linux/module.h>
 #include <linux/wait.h>
 #include <linux/errno.h>
+#include <asm/bug.h>
+#include <asm/system.h>
 #include "fw-transaction.h"
 #include "fw-topology.h"
 
@@ -106,6 +108,7 @@ static struct fw_node *fw_node_create(u32 sid, int port_count, int color)
 	node->node_id = LOCAL_BUS | SELF_ID_PHY_ID(sid);
 	node->link_on = SELF_ID_LINK_ON(sid);
 	node->phy_speed = SELF_ID_PHY_SPEED(sid);
+	node->initiated_reset = SELF_ID_PHY_INITIATOR(sid);
 	node->port_count = port_count;
 
 	atomic_set(&node->ref_count, 1);
@@ -177,7 +180,7 @@ static struct fw_node *build_tree(struct fw_card *card,
 	u32 *next_sid, *end, q;
 	int i, port_count, child_port_count, phy_id, parent_count, stack_depth;
 	int gap_count;
-	int beta_repeaters_present;
+	bool beta_repeaters_present;
 
 	local_node = NULL;
 	node = NULL;
@@ -187,7 +190,7 @@ static struct fw_node *build_tree(struct fw_card *card,
 	phy_id = 0;
 	irm_node = NULL;
 	gap_count = SELF_ID_GAP_COUNT(*sid);
-	beta_repeaters_present = 0;
+	beta_repeaters_present = false;
 
 	while (sid < end) {
 		next_sid = count_ports(sid, &port_count, &child_port_count);
@@ -282,17 +285,16 @@ static struct fw_node *build_tree(struct fw_card *card,
 		list_add_tail(&node->link, &stack);
 		stack_depth += 1 - child_port_count;
 
-	    if (node->phy_speed == SCODE_BETA &&
-		parent_count + child_port_count > 1)
-		    beta_repeaters_present = 1;
+		if (node->phy_speed == SCODE_BETA &&
+		    parent_count + child_port_count > 1)
+			beta_repeaters_present = true;
 
 		/*
-		 * If all PHYs does not report the same gap count
-		 * setting, we fall back to 63 which will force a gap
-		 * count reconfiguration and a reset.
+		 * If PHYs report different gap counts, set an invalid count
+		 * which will force a gap count reconfiguration and a reset.
 		 */
 		if (SELF_ID_GAP_COUNT(q) != gap_count)
-			gap_count = 63;
+			gap_count = 0;
 
 		update_hop_count(node);
 
@@ -382,6 +384,7 @@ void fw_destroy_nodes(struct fw_card *card)
 	card->color++;
 	if (card->local_node != NULL)
 		for_each_fw_node(card, card->local_node, report_lost_node);
+	card->local_node = NULL;
 	spin_unlock_irqrestore(&card->lock, flags);
 }
 
@@ -422,12 +425,14 @@ update_tree(struct fw_card *card, struct fw_node *root)
 	node1 = fw_node(list1.next);
 
 	while (&node0->link != &list0) {
+		WARN_ON(node0->port_count != node1->port_count);
 
-		/* assert(node0->port_count == node1->port_count); */
 		if (node0->link_on && !node1->link_on)
 			event = FW_NODE_LINK_OFF;
 		else if (!node0->link_on && node1->link_on)
 			event = FW_NODE_LINK_ON;
+		else if (node1->initiated_reset && node1->link_on)
+			event = FW_NODE_INITIATED_RESET;
 		else
 			event = FW_NODE_UPDATED;
 
@@ -505,7 +510,17 @@ fw_core_handle_bus_reset(struct fw_card *card,
 	struct fw_node *local_node;
 	unsigned long flags;
 
-	fw_flush_transactions(card);
+	/*
+	 * If the selfID buffer is not the immediate successor of the
+	 * previously processed one, we cannot reliably compare the
+	 * old and new topologies.
+	 */
+	if (!is_next_generation(generation, card->generation) &&
+	    card->local_node != NULL) {
+		fw_notify("skipped bus generations, destroying all nodes\n");
+		fw_destroy_nodes(card);
+		card->bm_retries = 0;
+	}
 
 	spin_lock_irqsave(&card->lock, flags);
 
diff --git a/drivers/firewire/fw-topology.h b/drivers/firewire/fw-topology.h
index 5b8efd3..addb9f8 100644
--- a/drivers/firewire/fw-topology.h
+++ b/drivers/firewire/fw-topology.h
@@ -20,11 +20,12 @@
 #define __fw_topology_h
 
 enum {
-	FW_NODE_CREATED =   0x00,
-	FW_NODE_UPDATED =   0x01,
-	FW_NODE_DESTROYED = 0x02,
-	FW_NODE_LINK_ON =   0x03,
-	FW_NODE_LINK_OFF =  0x04,
+	FW_NODE_CREATED,
+	FW_NODE_UPDATED,
+	FW_NODE_DESTROYED,
+	FW_NODE_LINK_ON,
+	FW_NODE_LINK_OFF,
+	FW_NODE_INITIATED_RESET,
 };
 
 struct fw_node {
@@ -35,8 +36,8 @@ struct fw_node {
 	u8 initiated_reset : 1;
 	u8 b_path : 1;
 	u8 phy_speed : 2; /* As in the self ID packet. */
-	u8 max_speed : 2; /* Minimum of all phy-speeds and port speeds on
-			   * the path from the local node to this node. */
+	u8 max_speed : 2; /* Minimum of all phy-speeds on the path from the
+			   * local node to this node. */
 	u8 max_depth : 4; /* Maximum depth to any leaf node */
 	u8 max_hops : 4;  /* Max hops in this sub tree */
 	atomic_t ref_count;
@@ -71,4 +72,5 @@ fw_destroy_nodes(struct fw_card *card);
 int
 fw_compute_block_crc(u32 *block);
 
+
 #endif /* __fw_topology_h */
diff --git a/drivers/firewire/fw-transaction.c b/drivers/firewire/fw-transaction.c
index 7fcc59d..022ac4f 100644
--- a/drivers/firewire/fw-transaction.c
+++ b/drivers/firewire/fw-transaction.c
@@ -18,8 +18,11 @@
  * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  */
 
+#include <linux/completion.h>
 #include <linux/kernel.h>
+#include <linux/kref.h>
 #include <linux/module.h>
+#include <linux/mutex.h>
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/pci.h>
@@ -28,7 +31,6 @@
 #include <linux/list.h>
 #include <linux/kthread.h>
 #include <asm/uaccess.h>
-#include <asm/semaphore.h>
 
 #include "fw-transaction.h"
 #include "fw-topology.h"
@@ -54,6 +56,9 @@
 #define HEADER_GET_DATA_LENGTH(q)	(((q) >> 16) & 0xffff)
 #define HEADER_GET_EXTENDED_TCODE(q)	(((q) >> 0) & 0xffff)
 
+#define HEADER_DESTINATION_IS_BROADCAST(q) \
+	(((q) & HEADER_DESTINATION(0x3f)) == HEADER_DESTINATION(0x3f))
+
 #define PHY_CONFIG_GAP_COUNT(gap_count)	(((gap_count) << 16) | (1 << 22))
 #define PHY_CONFIG_ROOT_ID(node_id)	((((node_id) & 0x3f) << 24) | (1 << 23))
 #define PHY_IDENTIFIER(id)		((id) << 30)
@@ -147,7 +152,7 @@ transmit_complete_callback(struct fw_packet *packet,
 
 static void
 fw_fill_request(struct fw_packet *packet, int tcode, int tlabel,
-		int node_id, int source_id, int generation, int speed,
+		int destination_id, int source_id, int generation, int speed,
 		unsigned long long offset, void *payload, size_t length)
 {
 	int ext_tcode;
@@ -162,7 +167,7 @@ fw_fill_request(struct fw_packet *packet, int tcode, int tlabel,
 		HEADER_RETRY(RETRY_X) |
 		HEADER_TLABEL(tlabel) |
 		HEADER_TCODE(tcode) |
-		HEADER_DESTINATION(node_id);
+		HEADER_DESTINATION(destination_id);
 	packet->header[1] =
 		HEADER_OFFSET_HIGH(offset >> 32) | HEADER_SOURCE(source_id);
 	packet->header[2] =
@@ -242,13 +247,13 @@ fw_fill_request(struct fw_packet *packet, int tcode, int tlabel,
  */
 void
 fw_send_request(struct fw_card *card, struct fw_transaction *t,
-		int tcode, int node_id, int generation, int speed,
+		int tcode, int destination_id, int generation, int speed,
 		unsigned long long offset,
 		void *payload, size_t length,
 		fw_transaction_callback_t callback, void *callback_data)
 {
 	unsigned long flags;
-	int tlabel, source;
+	int tlabel;
 
 	/*
 	 * Bump the flush timer up 100ms first of all so we
@@ -264,7 +269,6 @@ fw_send_request(struct fw_card *card, struct fw_transaction *t,
 
 	spin_lock_irqsave(&card->lock, flags);
 
-	source = card->node_id;
 	tlabel = card->current_tlabel;
 	if (card->tlabel_mask & (1 << tlabel)) {
 		spin_unlock_irqrestore(&card->lock, flags);
@@ -275,61 +279,98 @@ fw_send_request(struct fw_card *card, struct fw_transaction *t,
 	card->current_tlabel = (card->current_tlabel + 1) & 0x1f;
 	card->tlabel_mask |= (1 << tlabel);
 
-	list_add_tail(&t->link, &card->transaction_list);
-
-	spin_unlock_irqrestore(&card->lock, flags);
-
-	/* Initialize rest of transaction, fill out packet and send it. */
-	t->node_id = node_id;
+	t->node_id = destination_id;
 	t->tlabel = tlabel;
 	t->callback = callback;
 	t->callback_data = callback_data;
 
 	fw_fill_request(&t->packet, tcode, t->tlabel,
-			node_id, source, generation,
+			destination_id, card->node_id, generation,
 			speed, offset, payload, length);
 	t->packet.callback = transmit_complete_callback;
 
+	list_add_tail(&t->link, &card->transaction_list);
+
+	spin_unlock_irqrestore(&card->lock, flags);
+
 	card->driver->send_request(card, &t->packet);
 }
 EXPORT_SYMBOL(fw_send_request);
 
-static void
-transmit_phy_packet_callback(struct fw_packet *packet,
-			     struct fw_card *card, int status)
+struct transaction_callback_data {
+	struct completion done;
+	void *payload;
+	int rcode;
+};
+
+static void transaction_callback(struct fw_card *card, int rcode,
+				 void *payload, size_t length, void *data)
 {
-	kfree(packet);
+	struct transaction_callback_data *d = data;
+
+	if (rcode == RCODE_COMPLETE)
+		memcpy(d->payload, payload, length);
+	d->rcode = rcode;
+	complete(&d->done);
 }
 
-static void send_phy_packet(struct fw_card *card, u32 data, int generation)
+/**
+ * fw_run_transaction - send request and sleep until transaction is completed
+ *
+ * Returns the RCODE.
+ */
+int fw_run_transaction(struct fw_card *card, int tcode, int destination_id,
+		int generation, int speed, unsigned long long offset,
+		void *data, size_t length)
 {
-	struct fw_packet *packet;
+	struct transaction_callback_data d;
+	struct fw_transaction t;
 
-	packet = kzalloc(sizeof(*packet), GFP_ATOMIC);
-	if (packet == NULL)
-		return;
+	init_completion(&d.done);
+	d.payload = data;
+	fw_send_request(card, &t, tcode, destination_id, generation, speed,
+			offset, data, length, transaction_callback, &d);
+	wait_for_completion(&d.done);
 
-	packet->header[0] = data;
-	packet->header[1] = ~data;
-	packet->header_length = 8;
-	packet->payload_length = 0;
-	packet->speed = SCODE_100;
-	packet->generation = generation;
-	packet->callback = transmit_phy_packet_callback;
+	return d.rcode;
+}
+EXPORT_SYMBOL(fw_run_transaction);
+
+static DEFINE_MUTEX(phy_config_mutex);
+static DECLARE_COMPLETION(phy_config_done);
 
-	card->driver->send_request(card, packet);
+static void transmit_phy_packet_callback(struct fw_packet *packet,
+					 struct fw_card *card, int status)
+{
+	complete(&phy_config_done);
 }
 
+static struct fw_packet phy_config_packet = {
+	.header_length	= 8,
+	.payload_length	= 0,
+	.speed		= SCODE_100,
+	.callback	= transmit_phy_packet_callback,
+};
+
 void fw_send_phy_config(struct fw_card *card,
 			int node_id, int generation, int gap_count)
 {
-	u32 q;
+	long timeout = DIV_ROUND_UP(HZ, 10);
+	u32 data = PHY_IDENTIFIER(PHY_PACKET_CONFIG) |
+		   PHY_CONFIG_ROOT_ID(node_id) |
+		   PHY_CONFIG_GAP_COUNT(gap_count);
+
+	mutex_lock(&phy_config_mutex);
+
+	phy_config_packet.header[0] = data;
+	phy_config_packet.header[1] = ~data;
+	phy_config_packet.generation = generation;
+	INIT_COMPLETION(phy_config_done);
 
-	q = PHY_IDENTIFIER(PHY_PACKET_CONFIG) |
-		PHY_CONFIG_ROOT_ID(node_id) |
-		PHY_CONFIG_GAP_COUNT(gap_count);
+	card->driver->send_request(card, &phy_config_packet);
+	wait_for_completion_timeout(&phy_config_done, timeout);
 
-	send_phy_packet(card, q, generation);
+	mutex_unlock(&phy_config_mutex);
 }
 
 void fw_flush_transactions(struct fw_card *card)
@@ -389,21 +430,21 @@ lookup_enclosing_address_handler(struct list_head *list,
 static DEFINE_SPINLOCK(address_handler_lock);
 static LIST_HEAD(address_handler_list);
 
-const struct fw_address_region fw_low_memory_region =
-	{ .start = 0x000000000000ULL, .end = 0x000100000000ULL,  };
 const struct fw_address_region fw_high_memory_region =
 	{ .start = 0x000100000000ULL, .end = 0xffffe0000000ULL,  };
+EXPORT_SYMBOL(fw_high_memory_region);
+
+#if 0
+const struct fw_address_region fw_low_memory_region =
+	{ .start = 0x000000000000ULL, .end = 0x000100000000ULL,  };
 const struct fw_address_region fw_private_region =
 	{ .start = 0xffffe0000000ULL, .end = 0xfffff0000000ULL,  };
 const struct fw_address_region fw_csr_region =
-	{ .start = 0xfffff0000000ULL, .end = 0xfffff0000800ULL,  };
+	{ .start = CSR_REGISTER_BASE,
+	  .end   = CSR_REGISTER_BASE | CSR_CONFIG_ROM_END,  };
 const struct fw_address_region fw_unit_space_region =
 	{ .start = 0xfffff0000900ULL, .end = 0x1000000000000ULL, };
-EXPORT_SYMBOL(fw_low_memory_region);
-EXPORT_SYMBOL(fw_high_memory_region);
-EXPORT_SYMBOL(fw_private_region);
-EXPORT_SYMBOL(fw_csr_region);
-EXPORT_SYMBOL(fw_unit_space_region);
+#endif  /*  0  */
 
 /**
  * Allocate a range of addresses in the node space of the OHCI
@@ -574,7 +615,8 @@ allocate_request(struct fw_packet *p)
 		break;
 
 	default:
-		BUG();
+		fw_error("ERROR - corrupt request received - %08x %08x %08x\n",
+			 p->header[0], p->header[1], p->header[2]);
 		return NULL;
 	}
 
@@ -606,12 +648,9 @@ allocate_request(struct fw_packet *p)
 void
 fw_send_response(struct fw_card *card, struct fw_request *request, int rcode)
 {
-	/*
-	 * Broadcast packets are reported as ACK_COMPLETE, so this
-	 * check is sufficient to ensure we don't send response to
-	 * broadcast packets or posted writes.
-	 */
-	if (request->ack != ACK_PENDING) {
+	/* unified transaction or broadcast transaction: don't respond */
+	if (request->ack != ACK_PENDING ||
+	    HEADER_DESTINATION_IS_BROADCAST(request->request_header[0])) {
 		kfree(request);
 		return;
 	}
@@ -736,12 +775,19 @@ fw_core_handle_response(struct fw_card *card, struct fw_packet *p)
 		break;
 	}
 
+	/*
+	 * The response handler may be executed while the request handler
+	 * is still pending.  Cancel the request handler.
+	 */
+	card->driver->cancel_packet(card, &t->packet);
+
 	t->callback(card, rcode, data, data_length, t->callback_data);
 }
 EXPORT_SYMBOL(fw_core_handle_response);
 
 static const struct fw_address_region topology_map_region =
-	{ .start = 0xfffff0001000ull, .end = 0xfffff0001400ull, };
+	{ .start = CSR_REGISTER_BASE | CSR_TOPOLOGY_MAP,
+	  .end   = CSR_REGISTER_BASE | CSR_TOPOLOGY_MAP_END, };
 
 static void
 handle_topology_map(struct fw_card *card, struct fw_request *request,
@@ -751,7 +797,7 @@ handle_topology_map(struct fw_card *card, struct fw_request *request,
 		    void *payload, size_t length, void *callback_data)
 {
 	int i, start, end;
-	u32 *map;
+	__be32 *map;
 
 	if (!TCODE_IS_READ_REQUEST(tcode)) {
 		fw_send_response(card, request, RCODE_TYPE_ERROR);
@@ -779,7 +825,8 @@ static struct fw_address_handler topology_map = {
 };
 
 static const struct fw_address_region registers_region =
-	{ .start = 0xfffff0000000ull, .end = 0xfffff0000400ull, };
+	{ .start = CSR_REGISTER_BASE,
+	  .end   = CSR_REGISTER_BASE | CSR_CONFIG_ROM, };
 
 static void
 handle_registers(struct fw_card *card, struct fw_request *request,
@@ -788,15 +835,16 @@ handle_registers(struct fw_card *card, struct fw_request *request,
 		 unsigned long long offset,
 		 void *payload, size_t length, void *callback_data)
 {
-	int reg = offset - CSR_REGISTER_BASE;
+	int reg = offset & ~CSR_REGISTER_BASE;
 	unsigned long long bus_time;
 	__be32 *data = payload;
+	int rcode = RCODE_COMPLETE;
 
 	switch (reg) {
 	case CSR_CYCLE_TIME:
 	case CSR_BUS_TIME:
 		if (!TCODE_IS_READ_REQUEST(tcode) || length != 4) {
-			fw_send_response(card, request, RCODE_TYPE_ERROR);
+			rcode = RCODE_TYPE_ERROR;
 			break;
 		}
 
@@ -805,7 +853,17 @@ handle_registers(struct fw_card *card, struct fw_request *request,
 			*data = cpu_to_be32(bus_time);
 		else
 			*data = cpu_to_be32(bus_time >> 25);
-		fw_send_response(card, request, RCODE_COMPLETE);
+		break;
+
+	case CSR_BROADCAST_CHANNEL:
+		if (tcode == TCODE_READ_QUADLET_REQUEST)
+			*data = cpu_to_be32(card->broadcast_channel);
+		else if (tcode == TCODE_WRITE_QUADLET_REQUEST)
+			card->broadcast_channel =
+			    (be32_to_cpu(*data) & BROADCAST_CHANNEL_VALID) |
+			    BROADCAST_CHANNEL_INITIAL;
+		else
+			rcode = RCODE_TYPE_ERROR;
 		break;
 
 	case CSR_BUS_MANAGER_ID:
@@ -824,10 +882,13 @@ handle_registers(struct fw_card *card, struct fw_request *request,
 
 	case CSR_BUSY_TIMEOUT:
 		/* FIXME: Implement this. */
+
 	default:
-		fw_send_response(card, request, RCODE_ADDRESS_ERROR);
+		rcode = RCODE_ADDRESS_ERROR;
 		break;
 	}
+
+	fw_send_response(card, request, rcode);
 }
 
 static struct fw_address_handler registers = {
diff --git a/drivers/firewire/fw-transaction.h b/drivers/firewire/fw-transaction.h
index e3e8a9f..fcf4439 100644
--- a/drivers/firewire/fw-transaction.h
+++ b/drivers/firewire/fw-transaction.h
@@ -19,13 +19,15 @@
 #ifndef __fw_transaction_h
 #define __fw_transaction_h
 
+#include <linux/completion.h>
 #include <linux/device.h>
-#include <linux/timer.h>
-#include <linux/interrupt.h>
-#include <linux/list.h>
-#include <linux/fs.h>
 #include <linux/dma-mapping.h>
 #include <linux/firewire-constants.h>
+#include <linux/kref.h>
+#include <linux/list.h>
+#include <linux/spinlock_types.h>
+#include <linux/timer.h>
+#include <linux/workqueue.h>
 
 #define TCODE_IS_READ_REQUEST(tcode)	(((tcode) & ~1) == 4)
 #define TCODE_IS_BLOCK_PACKET(tcode)	(((tcode) &  1) != 0)
@@ -79,18 +81,21 @@
 #define CSR_SPEED_MAP			0x2000
 #define CSR_SPEED_MAP_END		0x3000
 
+#define BROADCAST_CHANNEL_INITIAL	(1 << 31 | 31)
+#define BROADCAST_CHANNEL_VALID		(1 << 30)
+
 #define fw_notify(s, args...) printk(KERN_NOTICE KBUILD_MODNAME ": " s, ## args)
 #define fw_error(s, args...) printk(KERN_ERR KBUILD_MODNAME ": " s, ## args)
 
 static inline void
 fw_memcpy_from_be32(void *_dst, void *_src, size_t size)
 {
-	u32 *dst = _dst;
-	u32 *src = _src;
+	u32    *dst = _dst;
+	__be32 *src = _src;
 	int i;
 
 	for (i = 0; i < size / 4; i++)
-		dst[i] = cpu_to_be32(src[i]);
+		dst[i] = be32_to_cpu(src[i]);
 }
 
 static inline void
@@ -116,7 +121,7 @@ int fw_core_add_descriptor(struct fw_descriptor *desc);
 void fw_core_remove_descriptor(struct fw_descriptor *desc);
 
 typedef void (*fw_packet_callback_t)(struct fw_packet *packet,
-				      struct fw_card *card, int status);
+				     struct fw_card *card, int status);
 
 typedef void (*fw_transaction_callback_t)(struct fw_card *card, int rcode,
 					  void *data,
@@ -200,11 +205,7 @@ struct fw_address_region {
 	u64 end;
 };
 
-extern const struct fw_address_region fw_low_memory_region;
 extern const struct fw_address_region fw_high_memory_region;
-extern const struct fw_address_region fw_private_region;
-extern const struct fw_address_region fw_csr_region;
-extern const struct fw_address_region fw_unit_space_region;
 
 int fw_core_add_address_handler(struct fw_address_handler *handler,
 				const struct fw_address_region *region);
@@ -220,11 +221,10 @@ struct fw_card {
 	const struct fw_card_driver *driver;
 	struct device *device;
 	struct kref kref;
+	struct completion done;
 
 	int node_id;
 	int generation;
-	/* This is the generation used for timestamping incoming requests. */
-	int request_generation;
 	int current_tlabel, tlabel_mask;
 	struct list_head transaction_list;
 	struct timer_list flush_timer;
@@ -241,6 +241,7 @@ struct fw_card {
 	 */
 	int self_id_count;
 	u32 topology_map[252 + 3];
+	u32 broadcast_channel;
 
 	spinlock_t lock; /* Take this lock when handling the lists in
 			  * this struct. */
@@ -261,8 +262,28 @@ struct fw_card {
 	int bm_generation;
 };
 
-struct fw_card *fw_card_get(struct fw_card *card);
-void fw_card_put(struct fw_card *card);
+static inline struct fw_card *fw_card_get(struct fw_card *card)
+{
+	kref_get(&card->kref);
+
+	return card;
+}
+
+void fw_card_release(struct kref *kref);
+
+static inline void fw_card_put(struct fw_card *card)
+{
+	kref_put(&card->kref, fw_card_release);
+}
+
+/*
+ * Check whether new_generation is the immediate successor of old_generation.
+ * Take counter roll-over at 255 (as per to OHCI) into account.
+ */
+static inline bool is_next_generation(int new_generation, int old_generation)
+{
+	return (new_generation & 0xff) == ((old_generation + 1) & 0xff);
+}
 
 /*
  * The iso packet format allows for an immediate header/payload part
@@ -356,8 +377,6 @@ int
 fw_iso_context_stop(struct fw_iso_context *ctx);
 
 struct fw_card_driver {
-	const char *name;
-
 	/*
 	 * Enable the given card with the given initial config rom.
 	 * This function is expected to activate the card, and either
@@ -416,11 +435,14 @@ fw_core_initiate_bus_reset(struct fw_card *card, int short_reset);
 
 void
 fw_send_request(struct fw_card *card, struct fw_transaction *t,
-		int tcode, int node_id, int generation, int speed,
-		unsigned long long offset,
-		void *data, size_t length,
+		int tcode, int destination_id, int generation, int speed,
+		unsigned long long offset, void *data, size_t length,
 		fw_transaction_callback_t callback, void *callback_data);
 
+int fw_run_transaction(struct fw_card *card, int tcode, int destination_id,
+		       int generation, int speed, unsigned long long offset,
+		       void *data, size_t length);
+
 int fw_cancel_transaction(struct fw_card *card,
 			  struct fw_transaction *transaction);
 
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index ca2680c..c79f134 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -740,6 +740,7 @@
 #define PCI_VENDOR_ID_TI		0x104c
 #define PCI_DEVICE_ID_TI_TVP4020	0x3d07
 #define PCI_DEVICE_ID_TI_4450		0x8011
+#define PCI_DEVICE_ID_TI_TSB43AB22	0x8023
 #define PCI_DEVICE_ID_TI_XX21_XX11	0x8031
 #define PCI_DEVICE_ID_TI_XX21_XX11_SD	0x8034
 #define PCI_DEVICE_ID_TI_X515		0x8036