From: Steve Dickson <SteveD@redhat.com> Date: Mon, 30 Aug 2010 17:39:39 -0400 Subject: [fs] nfsv4: ensure we track lock state in r/w requests Message-id: <1283189983-10655-2-git-send-email-steved@redhat.com> Patchwork-id: 27947 O-Subject: [RHEL5.5.z PATCH 1/5] NFSv4: Ensure that we track the NFSv4 lock state in read/write requests. Bugzilla: 620502 RH-Acked-by: Jeff Layton <jlayton@redhat.com> RH-Acked-by: J. Bruce Fields <bfields@redhat.com> From: Trond Myklebust <Trond.Myklebust@netapp.com> This patch fixes bugzilla entry 14501: https://bugzilla.kernel.org/show_bug.cgi?id=14501 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com> Signed-off-by: Steve Dickson <steved@redhat.com> diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index fbcf17d..034f401 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -69,6 +69,7 @@ struct nfs_direct_req { /* I/O parameters */ struct nfs_open_context *ctx; /* file open context info */ + struct nfs_lock_context *l_ctx; /* Lock context info */ struct kiocb * iocb; /* controlling i/o request */ struct inode * inode; /* target file of i/o */ @@ -160,6 +161,7 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void) INIT_LIST_HEAD(&dreq->rewrite_list); dreq->iocb = NULL; dreq->ctx = NULL; + dreq->l_ctx = NULL; spin_lock_init(&dreq->lock); atomic_set(&dreq->io_count, 0); dreq->count = 0; @@ -173,6 +175,8 @@ static void nfs_direct_req_free(struct kref *kref) { struct nfs_direct_req *dreq = container_of(kref, struct nfs_direct_req, kref); + if (dreq->l_ctx != NULL) + nfs_put_lock_context(dreq->l_ctx); if (dreq->ctx != NULL) put_nfs_open_context(dreq->ctx); kmem_cache_free(nfs_direct_cachep, dreq); @@ -308,6 +312,7 @@ static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned lo data->cred = ctx->cred; data->args.fh = NFS_FH(inode); data->args.context = get_nfs_open_context(ctx); + data->args.lock_context = dreq->l_ctx; data->args.offset = pos; data->args.pgbase = pgbase; data->args.pages = data->pagevec; @@ -354,18 +359,21 @@ static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned lo static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos) { - ssize_t result = 0; + ssize_t result = -ENOMEM; sigset_t oldset; struct inode *inode = iocb->ki_filp->f_mapping->host; struct rpc_clnt *clnt = NFS_CLIENT(inode); struct nfs_direct_req *dreq; dreq = nfs_direct_req_alloc(); - if (!dreq) - return -ENOMEM; + if (dreq == NULL) + goto out; dreq->inode = inode; dreq->ctx = get_nfs_open_context((struct nfs_open_context *)iocb->ki_filp->private_data); + dreq->l_ctx = nfs_get_lock_context(dreq->ctx); + if (dreq->l_ctx == NULL) + goto out_release; if (!is_sync_kiocb(iocb)) dreq->iocb = iocb; @@ -375,8 +383,9 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size if (!result) result = nfs_direct_wait(dreq); rpc_clnt_sigunmask(clnt, &oldset); +out_release: nfs_direct_req_release(dreq); - +out: return result; } @@ -477,6 +486,7 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq) data->args.offset = 0; data->args.count = 0; data->args.context = get_nfs_open_context(dreq->ctx); + data->args.lock_context = dreq->l_ctx; data->res.count = 0; data->res.fattr = &data->fattr; data->res.verf = &data->verf; @@ -648,6 +658,7 @@ static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned l data->cred = ctx->cred; data->args.fh = NFS_FH(inode); data->args.context = get_nfs_open_context(ctx); + data->args.lock_context = dreq->l_ctx; data->args.offset = pos; data->args.pgbase = pgbase; data->args.pages = data->pagevec; @@ -696,7 +707,7 @@ static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned l static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos) { - ssize_t result = 0; + ssize_t result = -ENOMEM; sigset_t oldset; struct inode *inode = iocb->ki_filp->f_mapping->host; struct rpc_clnt *clnt = NFS_CLIENT(inode); @@ -706,7 +717,7 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, siz dreq = nfs_direct_req_alloc(); if (!dreq) - return -ENOMEM; + goto out; nfs_alloc_commit_data(dreq); if (dreq->commit_data == NULL || count < wsize) @@ -714,6 +725,9 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, siz dreq->inode = inode; dreq->ctx = get_nfs_open_context((struct nfs_open_context *)iocb->ki_filp->private_data); + dreq->l_ctx = nfs_get_lock_context(dreq->ctx); + if (dreq->l_ctx != NULL) + goto out_release; if (!is_sync_kiocb(iocb)) dreq->iocb = iocb; @@ -724,8 +738,9 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, siz if (!result) result = nfs_direct_wait(dreq); rpc_clnt_sigunmask(clnt, &oldset); +out_release: nfs_direct_req_release(dreq); - +out: return result; } diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index cf0a826..6c27b9e 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -381,6 +381,68 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr) return error; } +static void nfs_init_lock_context(struct nfs_lock_context *l_ctx) +{ + atomic_set(&l_ctx->count, 1); + l_ctx->lockowner = current->files; + l_ctx->pid = current->tgid; + INIT_LIST_HEAD(&l_ctx->list); +} + +static struct nfs_lock_context *__nfs_find_lock_context(struct nfs_open_context *ctx) +{ + struct nfs_lock_context *pos; + + list_for_each_entry(pos, &ctx->lock_context.list, list) { + if (pos->lockowner != current->files) + continue; + if (pos->pid != current->tgid) + continue; + atomic_inc(&pos->count); + return pos; + } + return NULL; +} + +struct nfs_lock_context *nfs_get_lock_context(struct nfs_open_context *ctx) +{ + struct nfs_lock_context *res, *new = NULL; + struct inode *inode = ctx->path.dentry->d_inode; + + spin_lock(&inode->i_lock); + res = __nfs_find_lock_context(ctx); + if (res == NULL) { + spin_unlock(&inode->i_lock); + new = kmalloc(sizeof(*new), GFP_KERNEL); + if (new == NULL) + return NULL; + nfs_init_lock_context(new); + spin_lock(&inode->i_lock); + res = __nfs_find_lock_context(ctx); + if (res == NULL) { + list_add_tail(&new->list, &ctx->lock_context.list); + new->open_context = ctx; + res = new; + new = NULL; + } + } + spin_unlock(&inode->i_lock); + kfree(new); + return res; +} + +void nfs_put_lock_context(struct nfs_lock_context *l_ctx) +{ + struct nfs_open_context *ctx = l_ctx->open_context; + struct inode *inode = ctx->path.dentry->d_inode; + + if (!atomic_dec_and_lock(&l_ctx->count, &inode->i_lock)) + return; + list_del(&l_ctx->list); + spin_unlock(&inode->i_lock); + kfree(l_ctx); +} + /** * nfs_setattr_update_inode - Update inode metadata after a setattr call. * @inode: pointer to struct inode @@ -502,11 +564,11 @@ static struct nfs_open_context *alloc_nfs_open_context(struct vfsmount *mnt, str ctx->path.mnt = mntget(mnt); ctx->cred = get_rpccred(cred); ctx->state = NULL; - ctx->lockowner = current->files; ctx->flags = 0; ctx->error = 0; ctx->dir_cookie = 0; - atomic_set(&ctx->count, 1); + nfs_init_lock_context(&ctx->lock_context); + ctx->lock_context.open_context = ctx; } return ctx; } @@ -514,7 +576,7 @@ static struct nfs_open_context *alloc_nfs_open_context(struct vfsmount *mnt, str struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx) { if (ctx != NULL) - atomic_inc(&ctx->count); + atomic_inc(&ctx->lock_context.count); return ctx; } @@ -527,7 +589,7 @@ static void __put_nfs_open_context(struct nfs_open_context *ctx, int wait) inode = ctx->path.dentry->d_inode; - if (!atomic_dec_and_lock(&ctx->count, &inode->i_lock)) + if (!atomic_dec_and_lock(&ctx->lock_context.count, &inode->i_lock)) return; list_del(&ctx->list); spin_unlock(&inode->i_lock); diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 888a5d3..5fb8788 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1039,14 +1039,14 @@ static int encode_putrootfh(struct xdr_stream *xdr) return 0; } -static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context *ctx) +static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context *ctx, const struct nfs_lock_context *l_ctx) { nfs4_stateid stateid; uint32_t *p; RESERVE_SPACE(16); if (ctx->state != NULL) { - nfs4_copy_stateid(&stateid, ctx->state, ctx->lockowner); + nfs4_copy_stateid(&stateid, ctx->state, l_ctx->lockowner); WRITEMEM(stateid.data, sizeof(stateid.data)); } else WRITEMEM(zero_stateid.data, sizeof(zero_stateid.data)); @@ -1059,7 +1059,7 @@ static int encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args) RESERVE_SPACE(4); WRITE32(OP_READ); - encode_stateid(xdr, args->context); + encode_stateid(xdr, args->context, args->lock_context); RESERVE_SPACE(12); WRITE64(args->offset); @@ -1267,7 +1267,7 @@ static int encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *args RESERVE_SPACE(4); WRITE32(OP_WRITE); - encode_stateid(xdr, args->context); + encode_stateid(xdr, args->context, args->lock_context); RESERVE_SPACE(16); WRITE64(args->offset); diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 245d8be..801347d 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -93,6 +93,7 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode, req->wb_bytes = count; atomic_set(&req->wb_count, 1); req->wb_context = get_nfs_open_context(ctx); + req->wb_lock_context = nfs_get_lock_context(ctx); return req; } @@ -153,10 +154,16 @@ void nfs_clear_page_writeback(struct nfs_page *req) void nfs_clear_request(struct nfs_page *req) { struct page *page = req->wb_page; + struct nfs_lock_context *l_ctx = req->wb_lock_context; + if (page != NULL) { page_cache_release(page); req->wb_page = NULL; } + if (l_ctx != NULL) { + nfs_put_lock_context(l_ctx); + req->wb_lock_context = NULL; + } } @@ -227,7 +234,7 @@ nfs_coalesce_requests(struct list_head *head, struct list_head *dst, if (prev) { if (req->wb_context->cred != prev->wb_context->cred) break; - if (req->wb_context->lockowner != prev->wb_context->lockowner) + if (req->wb_lock_context->lockowner != prev->wb_lock_context->lockowner) break; if (req->wb_context->state != prev->wb_context->state) break; diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 768a20c..6cec3d8 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -292,6 +292,7 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, data->args.pages = data->pagevec; data->args.count = count; data->args.context = get_nfs_open_context(req->wb_context); + data->args.lock_context = req->wb_lock_context; data->res.fattr = &data->fattr; data->res.count = count; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 1de7756..c69be8c 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -822,7 +822,9 @@ int nfs_flush_incompatible(struct file *file, struct page *page) */ req = nfs_find_request(inode, page->index); if (req) { - if (req->wb_page != page || ctx != req->wb_context) + if (req->wb_page != page || ctx != req->wb_context || + req->wb_lock_context->lockowner != current->files || + req->wb_lock_context->pid != current->tgid) status = nfs_wb_page(inode, page); nfs_release_request(req); } @@ -988,6 +990,7 @@ static void nfs_write_rpcsetup(struct nfs_page *req, data->args.pages = data->pagevec; data->args.count = count; data->args.context = get_nfs_open_context(req->wb_context); + data->args.lock_context = req->wb_lock_context; data->res.fattr = &data->fattr; data->res.count = count; diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 60da650..113162f 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -76,12 +76,20 @@ struct path { struct vfsmount *mnt; struct dentry *dentry; }; -struct nfs_open_context { + +struct nfs_lock_context { atomic_t count; + struct list_head list; + struct nfs_open_context *open_context; + fl_owner_t lockowner; + pid_t pid; +}; + +struct nfs_open_context { + struct nfs_lock_context lock_context; struct path path; struct rpc_cred *cred; struct nfs4_state *state; - fl_owner_t lockowner; int mode; unsigned long flags; @@ -327,6 +335,8 @@ extern void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr); extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx); extern void put_nfs_open_context(struct nfs_open_context *ctx); extern struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_cred *cred, int mode); +extern struct nfs_lock_context *nfs_get_lock_context(struct nfs_open_context *ctx); +extern void nfs_put_lock_context(struct nfs_lock_context *l_ctx); extern struct vfsmount *nfs_do_submount(const struct vfsmount *mnt_parent, const struct dentry *dentry, struct nfs_fh *fh, diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 1f7bd28..6591a44 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -37,6 +37,7 @@ struct nfs_page { *wb_list_head; /* read/write/commit */ struct page *wb_page; /* page to read in/write out */ struct nfs_open_context *wb_context; /* File state context info */ + struct nfs_lock_context *wb_lock_context; /* lock context info */ atomic_t wb_complete; /* i/os we're waiting for */ unsigned long wb_index; /* Offset >> PAGE_CACHE_SHIFT */ unsigned int wb_offset, /* Offset & ~PAGE_CACHE_MASK */ diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index f340b942..e05943f 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -242,6 +242,7 @@ struct nfs4_delegreturnres { struct nfs_readargs { struct nfs_fh * fh; struct nfs_open_context *context; + struct nfs_lock_context *lock_context; __u64 offset; __u32 count; unsigned int pgbase; @@ -260,6 +261,7 @@ struct nfs_readres { struct nfs_writeargs { struct nfs_fh * fh; struct nfs_open_context *context; + struct nfs_lock_context *lock_context; __u64 offset; __u32 count; enum nfs3_stable_how stable;