Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > fc11cd6e1c513a17304da94a5390f3cd > files > 2553

kernel-2.6.18-194.11.1.el5.src.rpm

Date: Fri, 29 Sep 2006 12:41:36 -0400
From: Steve Dickson <SteveD@redhat.com>
Subject: [RHEL5][PATCH 1/3 ] NFS is revalidating directory entries too often


NFS: Add a new ACL cache to the linux nfs client

From: Trond Myklebust <Trond.Myklebust@netapp.com>

The current ACL cache only allows one ACL at a time to be cached for each
inode. Add a per-inode red-black tree in order to allow more than one to
be cached at a time.

Should significantly cut down the access time for shared directories such
as /bin etc.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---

--- linux-2.6.18.i686/fs/nfs/dir.c.001	2006-09-29 10:12:21.000000000 -0400
+++ linux-2.6.18.i686/fs/nfs/dir.c	2006-09-29 12:13:59.000000000 -0400
@@ -1634,35 +1634,134 @@ out:
 	return error;
 }
 
-int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res)
+static void nfs_access_free_entry(struct nfs_access_entry *entry)
+{
+	put_rpccred(entry->cred);
+	kfree(entry);
+}
+
+static void __nfs_access_zap_cache(struct inode *inode)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
-	struct nfs_access_entry *cache = &nfsi->cache_access;
+	struct rb_root *root_node = &nfsi->access_cache;
+	struct rb_node *n, *dispose = NULL;
+	struct nfs_access_entry *entry;
+
+	/* Unhook entries from the cache */
+	while ((n = rb_first(root_node)) != NULL) {
+		entry = rb_entry(n, struct nfs_access_entry, rb_node);
+		rb_erase(n, root_node);
+		n->rb_left = dispose;
+		dispose = n;
+	}
+	nfsi->cache_validity &= ~NFS_INO_INVALID_ACCESS;
+	spin_unlock(&inode->i_lock);
 
-	if (cache->cred != cred
-			|| time_after(jiffies, cache->jiffies + NFS_ATTRTIMEO(inode))
-			|| (nfsi->cache_validity & NFS_INO_INVALID_ACCESS))
-		return -ENOENT;
-	memcpy(res, cache, sizeof(*res));
-	return 0;
+	/* Now kill them all! */
+	while (dispose != NULL) {
+		n = dispose;
+		dispose = n->rb_left;
+		nfs_access_free_entry(rb_entry(n, struct nfs_access_entry, rb_node));
+	}
 }
 
-void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set)
+void nfs_access_zap_cache(struct inode *inode)
 {
-	struct nfs_inode *nfsi = NFS_I(inode);
-	struct nfs_access_entry *cache = &nfsi->cache_access;
+	spin_lock(&inode->i_lock);
+	/* This will release the spinlock */
+	__nfs_access_zap_cache(inode);
+}
+
+static struct nfs_access_entry *nfs_access_search_rbtree(struct inode *inode, struct rpc_cred *cred)
+{
+	struct rb_node *n = NFS_I(inode)->access_cache.rb_node;
+	struct nfs_access_entry *entry;
+
+	while (n != NULL) {
+		entry = rb_entry(n, struct nfs_access_entry, rb_node);
 
-	if (cache->cred != set->cred) {
-		if (cache->cred)
-			put_rpccred(cache->cred);
-		cache->cred = get_rpccred(set->cred);
+		if (cred < entry->cred)
+			n = n->rb_left;
+		else if (cred > entry->cred)
+			n = n->rb_right;
+		else
+			return entry;
 	}
-	/* FIXME: replace current access_cache BKL reliance with inode->i_lock */
+	return NULL;
+}
+
+int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res)
+{
+	struct nfs_inode *nfsi = NFS_I(inode);
+	struct nfs_access_entry *cache;
+	int err = -ENOENT;
+
 	spin_lock(&inode->i_lock);
-	nfsi->cache_validity &= ~NFS_INO_INVALID_ACCESS;
+	if (nfsi->cache_validity & NFS_INO_INVALID_ACCESS)
+		goto out_zap;
+	cache = nfs_access_search_rbtree(inode, cred);
+	if (cache == NULL)
+		goto out;
+	if (time_after(jiffies, cache->jiffies + NFS_ATTRTIMEO(inode)))
+		goto out_stale;
+	res->jiffies = cache->jiffies;
+	res->cred = cache->cred;
+	res->mask = cache->mask;
+	err = 0;
+out:
 	spin_unlock(&inode->i_lock);
+	return err;
+out_stale:
+	rb_erase(&cache->rb_node, &nfsi->access_cache);
+	spin_unlock(&inode->i_lock);
+	nfs_access_free_entry(cache);
+	return -ENOENT;
+out_zap:
+	/* This will release the spinlock */
+	__nfs_access_zap_cache(inode);
+	return -ENOENT;
+}
+
+static void nfs_access_add_rbtree(struct inode *inode, struct nfs_access_entry *set)
+{
+	struct rb_root *root_node = &NFS_I(inode)->access_cache;
+	struct rb_node **p = &root_node->rb_node;
+	struct rb_node *parent = NULL;
+	struct nfs_access_entry *entry;
+
+	spin_lock(&inode->i_lock);
+	while (*p != NULL) {
+		parent = *p;
+		entry = rb_entry(parent, struct nfs_access_entry, rb_node);
+
+		if (set->cred < entry->cred)
+			p = &parent->rb_left;
+		else if (set->cred > entry->cred)
+			p = &parent->rb_right;
+		else
+			goto found;
+	}
+	rb_link_node(&set->rb_node, parent, p);
+	rb_insert_color(&set->rb_node, root_node);
+	spin_unlock(&inode->i_lock);
+	return;
+found:
+	rb_replace_node(parent, &set->rb_node, root_node);
+	spin_unlock(&inode->i_lock);
+	nfs_access_free_entry(entry);
+}
+
+void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set)
+{
+	struct nfs_access_entry *cache = kmalloc(sizeof(*cache), GFP_KERNEL);
+	if (cache == NULL)
+		return;
+	RB_CLEAR_NODE(&cache->rb_node);
 	cache->jiffies = set->jiffies;
+	cache->cred = get_rpccred(set->cred);
 	cache->mask = set->mask;
+
+	nfs_access_add_rbtree(inode, cache);
 }
 
 static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask)
--- linux-2.6.18.i686/fs/nfs/inode.c.001	2006-09-29 10:12:21.000000000 -0400
+++ linux-2.6.18.i686/fs/nfs/inode.c	2006-09-29 12:16:50.000000000 -0400
@@ -70,21 +70,16 @@ int nfs_write_inode(struct inode *inode,
 
 void nfs_clear_inode(struct inode *inode)
 {
-	struct nfs_inode *nfsi = NFS_I(inode);
-	struct rpc_cred *cred;
-
 	/*
 	 * The following should never happen...
 	 */
 	BUG_ON(nfs_have_writebacks(inode));
-	BUG_ON (!list_empty(&nfsi->open_files));
+	BUG_ON (!list_empty(&NFS_I(inode)->open_files));
+	BUG_ON(atomic_read(&NFS_I(inode)->data_updates) != 0);
 	nfs_zap_acl_cache(inode);
-	cred = nfsi->cache_access.cred;
-	if (cred)
-		put_rpccred(cred);
+	nfs_access_zap_cache(inode);
 
-	nfs_fscache_release_fh_cookie(NFS_SERVER(inode), nfsi);
-	BUG_ON(atomic_read(&nfsi->data_updates) != 0);
+	nfs_fscache_release_fh_cookie(NFS_SERVER(inode), NFS_I(inode));
 }
 
 /**
@@ -291,7 +286,7 @@ nfs_fhget(struct super_block *sb, struct
 		nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
 		nfsi->attrtimeo_timestamp = jiffies;
 		memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
-		nfsi->cache_access.cred = NULL;
+		nfsi->access_cache = RB_ROOT;
 
 		nfs_fscache_get_fh_cookie(sb, nfsi, maycache);
 
--- linux-2.6.18.i686/include/linux/nfs_fs.h.001	2006-09-29 10:12:21.000000000 -0400
+++ linux-2.6.18.i686/include/linux/nfs_fs.h	2006-09-29 12:14:00.000000000 -0400
@@ -42,6 +42,7 @@
 #include <linux/in.h>
 #include <linux/mm.h>
 #include <linux/pagemap.h>
+#include <linux/rbtree.h>
 #include <linux/rwsem.h>
 #include <linux/wait.h>
 
@@ -70,6 +71,7 @@
  * NFSv3/v4 Access mode cache entry
  */
 struct nfs_access_entry {
+	struct rb_node		rb_node;
 	unsigned long		jiffies;
 	struct rpc_cred *	cred;
 	int			mask;
@@ -146,7 +148,7 @@ struct nfs_inode {
 	 */
 	atomic_t		data_updates;
 
-	struct nfs_access_entry	cache_access;
+	struct rb_root		access_cache;
 #ifdef CONFIG_NFS_V3_ACL
 	struct posix_acl	*acl_access;
 	struct posix_acl	*acl_default;
@@ -301,6 +303,7 @@ extern int nfs_getattr(struct vfsmount *
 extern int nfs_permission(struct inode *, int, struct nameidata *);
 extern int nfs_access_get_cached(struct inode *, struct rpc_cred *, struct nfs_access_entry *);
 extern void nfs_access_add_cache(struct inode *, struct nfs_access_entry *);
+extern void nfs_access_zap_cache(struct inode *inode);
 extern int nfs_open(struct inode *, struct file *);
 extern int nfs_release(struct inode *, struct file *);
 extern int nfs_attribute_timeout(struct inode *inode);

Date: Fri, 29 Sep 2006 12:42:27 -0400
From: Steve Dickson <SteveD@redhat.com>
Subject: [RHEL5][PATCH 2/3 ] NFS is revalidating directory entries too often


NFS: Add a global LRU list for the ACL cache

From: Trond Myklebust <Trond.Myklebust@netapp.com>

...in order to allow the addition of a memory shrinker.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---

--- linux-2.6.18.i686/fs/nfs/dir.c.002	2006-09-29 12:13:59.000000000 -0400
+++ linux-2.6.18.i686/fs/nfs/dir.c	2006-09-29 12:20:58.000000000 -0400
@@ -1634,10 +1634,17 @@ out:
 	return error;
 }
 
+static DEFINE_SPINLOCK(nfs_access_lru_lock);
+static LIST_HEAD(nfs_access_lru_list);
+static atomic_long_t nfs_access_nr_entries;
+
 static void nfs_access_free_entry(struct nfs_access_entry *entry)
 {
 	put_rpccred(entry->cred);
 	kfree(entry);
+	smp_mb__before_atomic_dec();
+	atomic_long_dec(&nfs_access_nr_entries);
+	smp_mb__after_atomic_dec();
 }
 
 static void __nfs_access_zap_cache(struct inode *inode)
@@ -1651,6 +1658,7 @@ static void __nfs_access_zap_cache(struc
 	while ((n = rb_first(root_node)) != NULL) {
 		entry = rb_entry(n, struct nfs_access_entry, rb_node);
 		rb_erase(n, root_node);
+		list_del(&entry->lru);
 		n->rb_left = dispose;
 		dispose = n;
 	}
@@ -1667,6 +1675,13 @@ static void __nfs_access_zap_cache(struc
 
 void nfs_access_zap_cache(struct inode *inode)
 {
+	/* Remove from global LRU init */
+	if (test_and_clear_bit(NFS_INO_ACL_LRU_SET, &NFS_FLAGS(inode))) {
+		spin_lock(&nfs_access_lru_lock);
+		list_del_init(&NFS_I(inode)->access_cache_inode_lru);
+		spin_unlock(&nfs_access_lru_lock);
+	}
+
 	spin_lock(&inode->i_lock);
 	/* This will release the spinlock */
 	__nfs_access_zap_cache(inode);
@@ -1707,12 +1722,14 @@ int nfs_access_get_cached(struct inode *
 	res->jiffies = cache->jiffies;
 	res->cred = cache->cred;
 	res->mask = cache->mask;
+	list_move_tail(&cache->lru, &nfsi->access_cache_entry_lru);
 	err = 0;
 out:
 	spin_unlock(&inode->i_lock);
 	return err;
 out_stale:
 	rb_erase(&cache->rb_node, &nfsi->access_cache);
+	list_del(&cache->lru);
 	spin_unlock(&inode->i_lock);
 	nfs_access_free_entry(cache);
 	return -ENOENT;
@@ -1724,7 +1741,8 @@ out_zap:
 
 static void nfs_access_add_rbtree(struct inode *inode, struct nfs_access_entry *set)
 {
-	struct rb_root *root_node = &NFS_I(inode)->access_cache;
+	struct nfs_inode *nfsi = NFS_I(inode);
+	struct rb_root *root_node = &nfsi->access_cache;
 	struct rb_node **p = &root_node->rb_node;
 	struct rb_node *parent = NULL;
 	struct nfs_access_entry *entry;
@@ -1743,10 +1761,13 @@ static void nfs_access_add_rbtree(struct
 	}
 	rb_link_node(&set->rb_node, parent, p);
 	rb_insert_color(&set->rb_node, root_node);
+	list_add_tail(&set->lru, &nfsi->access_cache_entry_lru);
 	spin_unlock(&inode->i_lock);
 	return;
 found:
 	rb_replace_node(parent, &set->rb_node, root_node);
+	list_add_tail(&set->lru, &nfsi->access_cache_entry_lru);
+	list_del(&entry->lru);
 	spin_unlock(&inode->i_lock);
 	nfs_access_free_entry(entry);
 }
@@ -1762,6 +1783,18 @@ void nfs_access_add_cache(struct inode *
 	cache->mask = set->mask;
 
 	nfs_access_add_rbtree(inode, cache);
+
+	/* Update accounting */
+	smp_mb__before_atomic_inc();
+	atomic_long_inc(&nfs_access_nr_entries);
+	smp_mb__after_atomic_inc();
+
+	/* Add inode to global LRU list */
+	if (!test_and_set_bit(NFS_INO_ACL_LRU_SET, &NFS_FLAGS(inode))) {
+		spin_lock(&nfs_access_lru_lock);
+		list_add_tail(&NFS_I(inode)->access_cache_inode_lru, &nfs_access_lru_list);
+		spin_unlock(&nfs_access_lru_lock);
+	}
 }
 
 static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask)
--- linux-2.6.18.i686/fs/nfs/inode.c.002	2006-09-29 12:16:50.000000000 -0400
+++ linux-2.6.18.i686/fs/nfs/inode.c	2006-09-29 12:20:58.000000000 -0400
@@ -1114,6 +1114,8 @@ static void init_once(void * foo, kmem_c
 		INIT_LIST_HEAD(&nfsi->dirty);
 		INIT_LIST_HEAD(&nfsi->commit);
 		INIT_LIST_HEAD(&nfsi->open_files);
+		INIT_LIST_HEAD(&nfsi->access_cache_entry_lru);
+		INIT_LIST_HEAD(&nfsi->access_cache_inode_lru);
 		INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC);
 		atomic_set(&nfsi->data_updates, 0);
 		nfsi->ndirty = 0;
--- linux-2.6.18.i686/include/linux/nfs_fs.h.002	2006-09-29 12:14:00.000000000 -0400
+++ linux-2.6.18.i686/include/linux/nfs_fs.h	2006-09-29 12:20:59.000000000 -0400
@@ -72,6 +72,7 @@
  */
 struct nfs_access_entry {
 	struct rb_node		rb_node;
+	struct list_head	lru;
 	unsigned long		jiffies;
 	struct rpc_cred *	cred;
 	int			mask;
@@ -149,6 +150,8 @@ struct nfs_inode {
 	atomic_t		data_updates;
 
 	struct rb_root		access_cache;
+	struct list_head	access_cache_entry_lru;
+	struct list_head	access_cache_inode_lru;
 #ifdef CONFIG_NFS_V3_ACL
 	struct posix_acl	*acl_access;
 	struct posix_acl	*acl_default;
@@ -205,6 +208,7 @@ struct nfs_inode {
 #define NFS_INO_REVALIDATING	(0)		/* revalidating attrs */
 #define NFS_INO_ADVISE_RDPLUS	(1)		/* advise readdirplus */
 #define NFS_INO_STALE		(2)		/* possible stale inode */
+#define NFS_INO_ACL_LRU_SET	(3)		/* Inode is on the LRU list */
 
 static inline struct nfs_inode *NFS_I(struct inode *inode)
 {

Date: Fri, 29 Sep 2006 12:42:56 -0400
From: Steve Dickson <SteveD@redhat.com>
Subject: [RHEL5][PATCH 3/3 ] NFS is revalidating directory entries too often


NFS: Add acl cache shrinker for the VM

From: Trond Myklebust <Trond.Myklebust@netapp.com>

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---

--- linux-2.6.18.i686/fs/nfs/dir.c.003	2006-09-29 12:20:58.000000000 -0400
+++ linux-2.6.18.i686/fs/nfs/dir.c	2006-09-29 12:22:24.000000000 -0400
@@ -1647,6 +1647,50 @@ static void nfs_access_free_entry(struct
 	smp_mb__after_atomic_dec();
 }
 
+int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask)
+{
+	LIST_HEAD(head);
+	struct nfs_inode *nfsi;
+	struct nfs_access_entry *cache;
+
+	spin_lock(&nfs_access_lru_lock);
+restart:
+	list_for_each_entry(nfsi, &nfs_access_lru_list, access_cache_inode_lru) {
+		struct inode *inode;
+
+		if (nr_to_scan-- == 0)
+			break;
+		inode = igrab(&nfsi->vfs_inode);
+		if (inode == NULL)
+			continue;
+		spin_lock(&inode->i_lock);
+		if (list_empty(&nfsi->access_cache_entry_lru))
+			goto remove_lru_entry;
+		cache = list_entry(nfsi->access_cache_entry_lru.next,
+				struct nfs_access_entry, lru);
+		list_move(&cache->lru, &head);
+		rb_erase(&cache->rb_node, &nfsi->access_cache);
+		if (!list_empty(&nfsi->access_cache_entry_lru))
+			list_move_tail(&nfsi->access_cache_inode_lru,
+					&nfs_access_lru_list);
+		else {
+remove_lru_entry:
+			list_del_init(&nfsi->access_cache_inode_lru);
+			clear_bit(NFS_INO_ACL_LRU_SET, &nfsi->flags);
+		}
+		spin_unlock(&inode->i_lock);
+		iput(inode);
+		goto restart;
+	}
+	spin_unlock(&nfs_access_lru_lock);
+	while (!list_empty(&head)) {
+		cache = list_entry(head.next, struct nfs_access_entry, lru);
+		list_del(&cache->lru);
+		nfs_access_free_entry(cache);
+	}
+	return (atomic_long_read(&nfs_access_nr_entries) / 100) * sysctl_vfs_cache_pressure;
+}
+
 static void __nfs_access_zap_cache(struct inode *inode)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
--- linux-2.6.18.i686/fs/nfs/internal.h.003	2006-09-29 10:12:21.000000000 -0400
+++ linux-2.6.18.i686/fs/nfs/internal.h	2006-09-29 12:22:24.000000000 -0400
@@ -142,6 +142,9 @@ extern int nfs4_proc_fs_locations(struct
 				  struct page *page);
 #endif
 
+/* dir.c */
+extern int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask);
+
 /* inode.c */
 extern struct inode *nfs_alloc_inode(struct super_block *sb);
 extern void nfs_destroy_inode(struct inode *);
--- linux-2.6.18.i686/fs/nfs/super.c.003	2006-09-29 10:12:21.000000000 -0400
+++ linux-2.6.18.i686/fs/nfs/super.c	2006-09-29 12:22:24.000000000 -0400
@@ -137,6 +137,8 @@ static struct super_operations nfs4_sops
 };
 #endif
 
+static struct shrinker *acl_shrinker;
+
 /*
  * Register the NFS filesystems
  */
@@ -156,6 +158,7 @@ int __init register_nfs_fs(void)
 	if (ret < 0)
 		goto error_2;
 #endif
+	acl_shrinker = set_shrinker(DEFAULT_SEEKS, nfs_access_cache_shrinker);
 	return 0;
 
 #ifdef CONFIG_NFS_V4
@@ -173,6 +176,8 @@ error_0:
  */
 void __exit unregister_nfs_fs(void)
 {
+	if (acl_shrinker != NULL)
+		remove_shrinker(acl_shrinker);
 #ifdef CONFIG_NFS_V4
 	unregister_filesystem(&nfs4_fs_type);
 	nfs_unregister_sysctl();