Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > fc11cd6e1c513a17304da94a5390f3cd > files > 887

kernel-2.6.18-194.11.1.el5.src.rpm

From: Jeff Layton <jlayton@redhat.com>
Date: Wed, 16 Jul 2008 09:36:08 -0400
Subject: [fs] lockd: nlmsvc_lookup_host called with f_sema held
Message-id: 1216215368-12308-1-git-send-email-jlayton@redhat.com
O-Subject: [RHEL5.3 PATCH] BZ#453094: lockd: don't call nlmsvc_lookup_host with the f_sema held
Bugzilla: 453094
RH-Acked-by: Peter Staubach <staubach@redhat.com>

There is a possible deadlock in the NLM code where lockd can try to take
the f_sema for an nlm_file that it already holds. The call chain is
something like:

nlmsvc_lock (take the f_sema)
  nlmsvc_create_block
     nlmsvc_lookup_host
	nlm_lookup_host
          nlm_gc_hosts
            nlmsvc_traverse_files (try to take the f_sema)

...obviously, this will deadlock. This appears to be a regression caused
by the patch for BZ#196318. Prior to that patch, nlmsvc_create_block
wasn't called with the f_sema held.

The fix is to just use the host pointer that we get from
nlmsvc_retrieve_args and pass it down to nlmsvc_create_block. This is
also more efficient since we're currently doing two calls into
nlm_lookup_hosts for each lock and testlock.

This was originally fixed upstream in 2.6.25 by just moving the
nlmsvc_lookup_host call from nlmsvc_create_block into its callers.  The
change to just pass the host pointer instead of doing these duplicate
calls was just taken upstream into Bruce Fields' git tree and is slated
for 2.6.27.

This is a difficult problem to reproduce on a stock kernel because
nlm_gc_hosts is only called by nlm_lookup_host when a timer expires.  I
was able to easily reproduce it however by modifying nlm_lookup_host to
always call nlm_gc_hosts. With that I was able to verify that this patch
fixes the problem.

diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 2eb0a61..236b211 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -103,7 +103,7 @@ nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
 			rpc_drop_reply :rpc_success;
 
 	/* Now check for conflicting locks */
-	resp->status = nlmsvc_testlock(rqstp, file, &argp->lock,
+	resp->status = nlmsvc_testlock(rqstp, file, host, &argp->lock,
 				       &resp->lock, &resp->cookie);
 	if (resp->status == nlm_drop_reply)
 		return rpc_drop_reply;
@@ -149,7 +149,7 @@ nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
 #endif
 
 	/* Now try to lock the file */
-	resp->status = nlmsvc_lock(rqstp, file, &argp->lock,
+	resp->status = nlmsvc_lock(rqstp, file, host, &argp->lock,
 					argp->block, &argp->cookie);
 	if (resp->status == nlm_drop_reply)
 		return rpc_drop_reply;
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 60ca5f1..48acce0 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -171,18 +171,14 @@ nlmsvc_find_block(struct nlm_cookie *cookie,  struct sockaddr_in *sin)
  * logging industries.
  */
 static inline struct nlm_block *
-nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_file *file,
-		struct nlm_lock *lock, struct nlm_cookie *cookie, int conf)
+nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_host *host,
+		struct nlm_file *file, struct nlm_lock *lock,
+		struct nlm_cookie *cookie, int conf)
 {
 	struct nlm_block	*block;
-	struct nlm_host		*host;
 	struct nlm_rqst		*call = NULL;
 
-	/* Create host handle for callback */
-	host = nlmsvc_lookup_host(rqstp);
-	if (host == NULL)
-		return NULL;
-
+	nlm_get_host(host);
 	call = nlm_alloc_call(host);
 	if (call == NULL)
 		return NULL;
@@ -387,7 +383,8 @@ nlmsvc_defer_lock_rqst(struct svc_rqst *rqstp, struct nlm_block *block)
  */
 u32
 nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
-			struct nlm_lock *lock, int wait, struct nlm_cookie *cookie)
+		struct nlm_host *host, struct nlm_lock *lock,
+		int wait, struct nlm_cookie *cookie)
 {
 	struct nlm_block	*block = NULL;
 	int			error;
@@ -401,7 +398,6 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
 				(long long)lock->fl.fl_end,
 				wait);
 
-
 	/* Lock file against concurrent access */
 	down(&file->f_sema);
 	/* Get existing block (in case client is busy-waiting)
@@ -409,7 +405,7 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
 	 */
 	block = nlmsvc_lookup_block(file, lock);
 	if (block == NULL) {
-		block = nlmsvc_create_block(rqstp, file, lock, cookie, 0);
+		block = nlmsvc_create_block(rqstp, host, file, lock, cookie, 0);
 		ret = nlm_lck_denied_nolocks;
 		if (block == NULL)
 			goto out;
@@ -482,8 +478,8 @@ out:
  */
 u32
 nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
-		struct nlm_lock *lock, struct nlm_lock *conflock,
-		struct nlm_cookie *cookie)
+		struct nlm_host *host, struct nlm_lock *lock,
+		struct nlm_lock *conflock, struct nlm_cookie *cookie)
 {
 	struct nlm_block 	*block = NULL;
 	int			error;
@@ -500,7 +496,7 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
 	block = nlmsvc_lookup_block(file, lock);
 
 	if (block == NULL) {
-		block = nlmsvc_create_block(rqstp, file, lock, cookie, 1);
+		block = nlmsvc_create_block(rqstp, host, file, lock, cookie, 1);
 		if (block == NULL)
 			return nlm_granted;
 	}
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index dc6e5f7..46cf060 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -132,8 +132,8 @@ nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
 			rpc_drop_reply :rpc_success;
 
 	/* Now check for conflicting locks */
-	resp->status = cast_status(nlmsvc_testlock(rqstp, file, &argp->lock,
-				   &resp->lock, &resp->cookie));
+	resp->status = cast_status(nlmsvc_testlock(rqstp, file, host,
+				   &argp->lock, &resp->lock, &resp->cookie));
 	if (resp->status == nlm_drop_reply)
 		return rpc_drop_reply;
 
@@ -179,7 +179,7 @@ nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
 #endif
 
 	/* Now try to lock the file */
-	resp->status = cast_status(nlmsvc_lock(rqstp, file, &argp->lock,
+	resp->status = cast_status(nlmsvc_lock(rqstp, file, host, &argp->lock,
 					       argp->block, &argp->cookie));
 	if (resp->status == nlm_drop_reply)
 		return rpc_drop_reply;
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 0f92d53..eeaf356 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -186,11 +186,12 @@ extern struct nlm_host *nlm_find_client(void);
  * Server-side lock handling
  */
 u32		  nlmsvc_lock(struct svc_rqst *, struct nlm_file *,
-					struct nlm_lock *, int, struct nlm_cookie *);
+				struct nlm_host *, struct nlm_lock *, int,
+				struct nlm_cookie *);
 u32		  nlmsvc_unlock(struct nlm_file *, struct nlm_lock *);
 u32		  nlmsvc_testlock(struct svc_rqst *, struct nlm_file *,
-				  struct nlm_lock *, struct nlm_lock *,
-				  struct nlm_cookie *);
+				  struct nlm_host *, struct nlm_lock *,
+				  struct nlm_lock *, struct nlm_cookie *);
 u32		  nlmsvc_cancel_blocked(struct nlm_file *, struct nlm_lock *);
 unsigned long	  nlmsvc_retry_blocked(void);
 void		  nlmsvc_traverse_blocks(struct nlm_host *, struct nlm_file *,