Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > 27922b4260f65d317aabda37e42bbbff > files > 2069

kernel-2.6.18-238.el5.src.rpm

From: Eduardo Habkost <ehabkost@redhat.com>
Date: Wed, 24 Sep 2008 12:47:44 -0300
Subject: [misc] preempt-notifier fixes
Message-id: 20080924154744.GM11604@blackpad
O-Subject: Re: [RHEL5.3 PATCH] preempt-notifiers implementation (respin)
Bugzilla: 459838
RH-Acked-by: Peter Zijlstra <pzijlstr@redhat.com>
RH-Acked-by: Glauber Costa <glommer@redhat.com>
RH-Acked-by: Mark McLoughlin <markmc@redhat.com>
RH-Acked-by: john cooper <john.cooper@redhat.com>

Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=459838

There is a potential deadlock on the current preempt-notifiers code
because the preempt_notifier functions grabs the notifier list spinlock
without disabling IRQs.

Deadlock would go like this: CPU 1 grabs the lock
on preempt_notifier_register(). CPU 2 grabs the lock on
schedule()/fire_*_preempt_notifiers() (with IRQs disabled). CPU 1 is
interrupted while holding the lock and tries to send an IPI to CPU 2
-> deadlock.

We caught a similar deadlock involving IPI on earlier versions of the
patch, where we were holding the spinlock while calling the preempt
notifier functions.

Fix is a simple replacement of spin_lock() for spin_lock_irqsave()
on the preempt notifier functions.

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
On Wed, Sep 24, 2008 at 11:33:11AM -0400, Don Zickus wrote:

diff --git a/kernel/sched.c b/kernel/sched.c
index acccea6..c01759b 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1817,15 +1817,16 @@ void preempt_notifier_register(struct preempt_notifier *notifier)
 {
 	struct task_struct *task = current;
 	struct notifier_hbucket *b;
+	unsigned long flags;
 
 	BUG_ON(task->flags & PF_PREEMPT_NOTIFIER);
 	task->flags |= PF_PREEMPT_NOTIFIER;
 	notifier->task = task;
 
 	b = task_hbucket(task);
-	spin_lock(&b->lock);
+	spin_lock_irqsave(&b->lock, flags);
 	hlist_add_head(&notifier->link, &b->notifiers);
-	spin_unlock(&b->lock);
+	spin_unlock_irqrestore(&b->lock, flags);
 }
 EXPORT_SYMBOL_GPL(preempt_notifier_register);
 
@@ -1838,10 +1839,11 @@ void preempt_notifier_unregister(struct preempt_notifier *notifier)
 {
 	struct task_struct *task = notifier->task;
 	struct notifier_hbucket *b = task_hbucket(task);
+	unsigned long flags;
 
-	spin_lock(&b->lock);
+	spin_lock_irqsave(&b->lock, flags);
 	hlist_del(&notifier->link);
-	spin_unlock(&b->lock);
+	spin_unlock_irqrestore(&b->lock, flags);
 
 	notifier->task = NULL;
 	task->flags &= ~PF_PREEMPT_NOTIFIER;
@@ -1855,18 +1857,19 @@ fire_sched_in_preempt_notifiers(struct task_struct *curr)
 	int found = 0;
 	struct hlist_node *node;
 	struct notifier_hbucket *b;
+	unsigned long flags;
 
 	if (!(curr->flags & PF_PREEMPT_NOTIFIER))
 		return;
 
 	b = task_hbucket(curr);
-	spin_lock(&b->lock);
+	spin_lock_irqsave(&b->lock, flags);
 	hlist_for_each_entry(notifier, node, &b->notifiers, link)
 		if (notifier->task == curr) {
 			found = 1;
 			break;
 		}
-	spin_unlock(&b->lock);
+	spin_unlock_irqrestore(&b->lock, flags);
 
 	if (found)
 		notifier->ops->sched_in(notifier, raw_smp_processor_id());
@@ -1880,18 +1883,19 @@ fire_sched_out_preempt_notifiers(struct task_struct *curr,
 	int found = 0;
 	struct hlist_node *node;
 	struct notifier_hbucket *b;
+	unsigned long flags;
 
 	if (!(curr->flags & PF_PREEMPT_NOTIFIER))
 		return;
 
 	b = task_hbucket(curr);
-	spin_lock(&b->lock);
+	spin_lock_irqsave(&b->lock, flags);
 	hlist_for_each_entry(notifier, node, &b->notifiers, link)
 		if (notifier->task == curr) {
 			found = 1;
 			break;
 		}
-	spin_unlock(&b->lock);
+	spin_unlock_irqrestore(&b->lock, flags);
 
 	if (found)
 		notifier->ops->sched_out(notifier, next);