Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > 27922b4260f65d317aabda37e42bbbff > files > 1132

kernel-2.6.18-238.el5.src.rpm

From: Jeff Layton <jlayton@redhat.com>
Date: Thu, 8 Jul 2010 14:30:13 -0400
Subject: [fs] nfs: fix resolution in nfs_inode_attrs_need_update
Message-id: <1278599414-23803-1-git-send-email-jlayton@redhat.com>
Patchwork-id: 26766
O-Subject: [RHEL5.6 PATCH 7/4] BZ#601800: nfs: fix resolution problem with
	nfs_inode_attrs_need_update
Bugzilla: 601800
RH-Acked-by: Steve Dickson <SteveD@redhat.com>

From: Trond Myklebust <Trond.Myklebust@netapp.com>

Jarod Wilson and Jeff Burke noticed that we were still getting
intermittent connectathon test failures even after the last patches for
this bug were added on. The test failures were always in the same spot
and it suggested that the client was occasionally ignoring metadata
updates that it shouldn't have. This patch is a backported fusion of the
following two upstream patches:

4704f0e274829e3af00737d2d9adace2d71a9605
ae05f269400533cbb32bfba131ab528d78dffd16

The main idea is to turn the last_updated timestamp in the nfs inode to
a counter. The upstream code used atomic_long_t for the counter, but
RHEL5 doesn't have that so I used atomic_t instead. The code is guarded
against overflow of the counter and atomic_long_t == atomic_t on 32 bit
arches anyway. Based on that I believe this should be sufficient.

This was tested by Jarod and Jeff with RHTS and it seems to have fixed
all of the connectathon test failures that they were seeing. I also ran
several thousand Connectathon test passes on this kernel.

----------------------[snip]-------------------

NFS: Fix the resolution problem with nfs_inode_attrs_need_update()

It appears that 'jiffies' timestamps do not have high enough resolution
for nfs_inode_attrs_need_update(). One problem is that a GETATTR can be
launched within < 1 jiffy of the last operation that updated the
attribute.  Another problem is that RPC calls can take < 1 jiffy to
execute.

We can fix this by switching the variables to use a simple global
counter that gets incremented every time we start another GETATTR call.

----------------------[snip]-------------------

NFS: Convert nfs_attr_generation_counter into an atomic_long

The most important property we need from nfs_attr_generation_counter is
monotonicity, which is not guaranteed by the current system of smp
memory barriers. We should convert it to an atomic_long_t, and drop the
memory barriers.

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index e07dd1c..3d90422 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -152,8 +152,9 @@ typedef struct {
 	struct nfs_entry *entry;
 	decode_dirent_t	decode;
 	int		plus;
-	unsigned long   timestamp;
-	int             timestamp_valid;
+	unsigned long	timestamp;
+	int		gencount;
+	int		timestamp_valid;
 } nfs_readdir_descriptor_t;
 
 /* Now we cache directories properly, by stuffing the dirent
@@ -175,6 +176,7 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page)
 	struct inode	*inode = file->f_dentry->d_inode;
 	struct rpc_cred	*cred = nfs_file_cred(file);
 	unsigned long	timestamp;
+	int		gencount;
 	int		error;
 
 	dfprintk(DIRCACHE, "NFS: %s: reading cookie %Lu into page %lu\n",
@@ -183,6 +185,7 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page)
 
  again:
 	timestamp = jiffies;
+	gencount = nfs_inc_attr_generation_counter();
 	error = NFS_PROTO(inode)->readdir(file->f_dentry, cred, desc->entry->cookie, page,
 					  NFS_SERVER(inode)->dtsize, desc->plus);
 	if (error < 0) {
@@ -196,6 +199,7 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page)
 		goto error;
 	}
 	desc->timestamp = timestamp;
+	desc->gencount = gencount;
 	desc->timestamp_valid = 1;
 	SetPageUptodate(page);
 	/* Ensure consistent page alignment of the data.
@@ -221,9 +225,10 @@ int dir_decode(nfs_readdir_descriptor_t *desc)
 	if (IS_ERR(p))
 		return PTR_ERR(p);
 	desc->ptr = p;
-	if (desc->timestamp_valid)
+	if (desc->timestamp_valid) {
 		desc->entry->fattr->time_start = desc->timestamp;
-	else
+		desc->entry->fattr->gencount = desc->gencount;
+	} else
 		desc->entry->fattr->valid &= ~NFS_ATTR_FATTR;
 	return 0;
 }
@@ -474,6 +479,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
 	struct page	*page = NULL;
 	int		status;
 	unsigned long	timestamp;
+	int		gencount;
 
 	dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n",
 			(unsigned long long)*desc->dir_cookie);
@@ -484,6 +490,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
 		goto out;
 	}
 	timestamp = jiffies;
+	gencount = nfs_inc_attr_generation_counter();
 	status = NFS_PROTO(inode)->readdir(file->f_dentry, cred,
 						*desc->dir_cookie, page,
 						NFS_SERVER(inode)->dtsize,
@@ -492,6 +499,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
 	desc->ptr = kmap(page);		/* matching kunmap in nfs_do_filldir */
 	if (status >= 0) {
 		desc->timestamp = timestamp;
+		desc->gencount = gencount;
 		desc->timestamp_valid = 1;
 		if ((status = dir_decode(desc)) == 0)
 			desc->entry->prev_cookie = *desc->dir_cookie;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index e8ef5cf..68d89c6 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -303,7 +303,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
 			init_special_inode(inode, inode->i_mode, fattr->rdev);
 
 		nfsi->read_cache_jiffies = fattr->time_start;
-		nfsi->last_updated = jiffies;
+		nfsi->attr_gencount = fattr->gencount;
 		inode->i_atime = fattr->atime;
 		inode->i_mtime = fattr->mtime;
 		inode->i_ctime = fattr->ctime;
@@ -862,6 +862,25 @@ static int nfs_size_need_update(struct inode *inode, const struct nfs_fattr *fat
 	return nfs_size_to_loff_t(fattr->size) > i_size_read(inode);
 }
 
+static atomic_t nfs_attr_generation_counter;
+
+static int nfs_read_attr_generation_counter(void)
+{
+	return atomic_read(&nfs_attr_generation_counter);
+}
+
+int nfs_inc_attr_generation_counter(void)
+{
+	return atomic_inc_return(&nfs_attr_generation_counter);
+}
+
+void nfs_fattr_init(struct nfs_fattr *fattr)
+{
+	fattr->valid = 0;
+	fattr->time_start = jiffies;
+	fattr->gencount = nfs_inc_attr_generation_counter();
+}
+
 /**
  * nfs_inode_attrs_need_update - check if the inode attributes need updating
  * @inode - pointer to inode
@@ -875,8 +894,7 @@ static int nfs_size_need_update(struct inode *inode, const struct nfs_fattr *fat
  * catch the case where ctime either didn't change, or went backwards
  * (if someone reset the clock on the server) by looking at whether
  * or not this RPC call was started after the inode was last updated.
- * Note also the check for jiffy wraparound if the last_updated timestamp
- * is later than 'jiffies'.
+ * Note also the check for wraparound of 'attr_gencount'
  *
  * The function returns 'true' if it thinks the attributes in 'fattr' are
  * more recent than the ones cached in the inode.
@@ -886,10 +904,10 @@ static int nfs_inode_attrs_need_update(struct inode *inode, struct nfs_fattr *fa
 {
 	const struct nfs_inode *nfsi = NFS_I(inode);
 
-	return time_after(fattr->time_start, nfsi->last_updated) ||
+	return (fattr->gencount - nfsi->attr_gencount) > 0 ||
 		nfs_ctime_need_update(inode, fattr) ||
 		nfs_size_need_update(inode, fattr) ||
-		time_after(nfsi->last_updated, jiffies);
+		(nfsi->attr_gencount - nfs_read_attr_generation_counter() > 0);
 }
 
 static int nfs_refresh_inode_locked(struct inode *inode, struct nfs_fattr *fattr)
@@ -1033,7 +1051,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 		}
 		/* If ctime has changed we should definitely clear access+acl caches */
 		if (!timespec_equal(&inode->i_ctime, &fattr->ctime))
-			invalid |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
+			invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
 	} else if (nfsi->change_attr != fattr->change_attr) {
 		dprintk("NFS: change_attr change on server for file %s/%ld\n",
 				inode->i_sb->s_id, inode->i_ino);
@@ -1085,7 +1103,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 		nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE);
 		nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
 		nfsi->attrtimeo_timestamp = jiffies;
-		nfsi->last_updated = jiffies;
+		nfsi->attr_gencount = nfs_inc_attr_generation_counter();
 	} else if (!time_in_range_open(jiffies, nfsi->attrtimeo_timestamp,
 				nfsi->attrtimeo_timestamp + nfsi->attrtimeo)) {
 		if ((nfsi->attrtimeo <<= 1) > NFS_MAXATTRTIMEO(inode))
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 4fc3a1d..60da650 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -139,7 +139,7 @@ struct nfs_inode {
 	unsigned long		attrtimeo_timestamp;
 	__u64			change_attr;		/* v4 only */
 
-	unsigned long		last_updated;
+	int			attr_gencount;
 	/* "Generation counter" for the attribute cache. This is
 	 * bumped whenever we update the metadata on the
 	 * server.
@@ -332,15 +332,11 @@ extern struct vfsmount *nfs_do_submount(const struct vfsmount *mnt_parent,
 					struct nfs_fh *fh,
 					struct nfs_fattr *fattr);
 extern u64 nfs_compat_user_ino64(u64 fileid);
+extern void nfs_fattr_init(struct nfs_fattr *fattr);
 
 /* linux/net/ipv4/ipconfig.c: trims ip addr off front of name, too. */
 extern u32 root_nfs_parse_addr(char *name); /*__init*/
-
-static inline void nfs_fattr_init(struct nfs_fattr *fattr)
-{
-	fattr->valid = 0;
-	fattr->time_start = jiffies;
-}
+extern int nfs_inc_attr_generation_counter(void);
 
 /*
  * linux/fs/nfs/file.c
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 317713a..f340b94 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -57,6 +57,7 @@ struct nfs_fattr {
 	__u64			change_attr;	/* NFSv4 change attribute */
 	__u64			pre_change_attr;/* pre-op NFSv4 change attribute */
 	unsigned long		time_start;
+	int			gencount;
 };
 
 #define NFS_ATTR_WCC		0x0001		/* pre-op WCC data    */