From: Mikulas Patocka <mpatocka@redhat.com> Date: Mon, 1 Sep 2008 19:51:34 -0400 Subject: [md] deadlock with nested LVMs Message-id: Pine.LNX.4.64.0809011949040.30592@hs20-bc2-1.build.redhat.com O-Subject: [UPDATE RHEL 5.3 PATCH 1/2] bz460845 Deadlock with nested LVMs Bugzilla: 460845 RH-Acked-by: Alasdair G Kergon <agk@redhat.com> I was told that even internal structures are considered in the calculation of kABI hashes --- so the patch that changes only *.c breaks the hash but not functionality. So here I'm sending the updated patch that doesn't break Module.symvers hash. Mikulas diff --git a/drivers/md/kcopyd.c b/drivers/md/kcopyd.c index ea3d77a..6ebc27c 100644 --- a/drivers/md/kcopyd.c +++ b/drivers/md/kcopyd.c @@ -25,14 +25,6 @@ #include "kcopyd.h" -static struct workqueue_struct *_kcopyd_wq; -static struct work_struct _kcopyd_work; - -static inline void wake(void) -{ - queue_work(_kcopyd_wq, &_kcopyd_work); -} - /*----------------------------------------------------------------- * Each kcopyd client has its own little pool of preallocated * pages for kcopyd io. @@ -49,9 +41,31 @@ struct kcopyd_client { atomic_t nr_jobs; #ifndef __GENKSYMS__ struct dm_io_client *io_client; + + struct workqueue_struct *kcopyd_wq; + struct work_struct kcopyd_work; + +/* + * We maintain three lists of jobs: + * + * i) jobs waiting for pages + * ii) jobs that have pages, and are waiting for the io to be issued. + * iii) jobs that have completed. + * + * All three of these are protected by job_lock. + */ + spinlock_t job_lock; + struct list_head complete_jobs; + struct list_head io_jobs; + struct list_head pages_jobs; #endif }; +static void wake(struct kcopyd_client *kc) +{ + queue_work(kc->kcopyd_wq, &kc->kcopyd_work); +} + static struct page_list *alloc_pl(void) { struct page_list *pl; @@ -209,21 +223,6 @@ struct kcopyd_job { static kmem_cache_t *_job_cache; static mempool_t *_job_pool; -/* - * We maintain three lists of jobs: - * - * i) jobs waiting for pages - * ii) jobs that have pages, and are waiting for the io to be issued. - * iii) jobs that have completed. - * - * All three of these are protected by job_lock. - */ -static DEFINE_SPINLOCK(_job_lock); - -static LIST_HEAD(_complete_jobs); -static LIST_HEAD(_io_jobs); -static LIST_HEAD(_pages_jobs); - static int jobs_init(void) { _job_cache = kmem_cache_create("kcopyd-jobs", @@ -244,10 +243,6 @@ static int jobs_init(void) static void jobs_exit(void) { - BUG_ON(!list_empty(&_complete_jobs)); - BUG_ON(!list_empty(&_io_jobs)); - BUG_ON(!list_empty(&_pages_jobs)); - mempool_destroy(_job_pool); kmem_cache_destroy(_job_cache); _job_pool = NULL; @@ -258,18 +253,19 @@ static void jobs_exit(void) * Functions to push and pop a job onto the head of a given job * list. */ -static inline struct kcopyd_job *pop(struct list_head *jobs) +static struct kcopyd_job *pop(struct list_head *jobs, + struct kcopyd_client *kc) { struct kcopyd_job *job = NULL; unsigned long flags; - spin_lock_irqsave(&_job_lock, flags); + spin_lock_irqsave(&kc->job_lock, flags); if (!list_empty(jobs)) { job = list_entry(jobs->next, struct kcopyd_job, list); list_del(&job->list); } - spin_unlock_irqrestore(&_job_lock, flags); + spin_unlock_irqrestore(&kc->job_lock, flags); return job; } @@ -277,10 +273,11 @@ static inline struct kcopyd_job *pop(struct list_head *jobs) static inline void push(struct list_head *jobs, struct kcopyd_job *job) { unsigned long flags; + struct kcopyd_client *kc = job->kc; - spin_lock_irqsave(&_job_lock, flags); + spin_lock_irqsave(&kc->job_lock, flags); list_add_tail(&job->list, jobs); - spin_unlock_irqrestore(&_job_lock, flags); + spin_unlock_irqrestore(&kc->job_lock, flags); } /* @@ -313,6 +310,7 @@ static int run_complete_job(struct kcopyd_job *job) static void complete_io(unsigned long error, void *context) { struct kcopyd_job *job = (struct kcopyd_job *) context; + struct kcopyd_client *kc = job->kc; if (error) { if (job->rw == WRITE) @@ -321,21 +319,21 @@ static void complete_io(unsigned long error, void *context) job->read_err = 1; if (!test_bit(KCOPYD_IGNORE_ERROR, &job->flags)) { - push(&_complete_jobs, job); - wake(); + push(&kc->complete_jobs, job); + wake(kc); return; } } if (job->rw == WRITE) - push(&_complete_jobs, job); + push(&kc->complete_jobs, job); else { job->rw = WRITE; - push(&_io_jobs, job); + push(&kc->io_jobs, job); } - wake(); + wake(kc); } /* @@ -372,7 +370,7 @@ static int run_pages_job(struct kcopyd_job *job) r = kcopyd_get_pages(job->kc, job->nr_pages, &job->pages); if (!r) { /* this job is ready for io */ - push(&_io_jobs, job); + push(&job->kc->io_jobs, job); return 0; } @@ -387,12 +385,13 @@ static int run_pages_job(struct kcopyd_job *job) * Run through a list for as long as possible. Returns the count * of successful jobs. */ -static int process_jobs(struct list_head *jobs, int (*fn) (struct kcopyd_job *)) +static int process_jobs(struct list_head *jobs, struct kcopyd_client *kc, + int (*fn) (struct kcopyd_job *)) { struct kcopyd_job *job; int r, count = 0; - while ((job = pop(jobs))) { + while ((job = pop(jobs, kc))) { r = fn(job); @@ -402,7 +401,7 @@ static int process_jobs(struct list_head *jobs, int (*fn) (struct kcopyd_job *)) job->write_err = (unsigned int) -1; else job->read_err = 1; - push(&_complete_jobs, job); + push(&kc->complete_jobs, job); break; } @@ -424,8 +423,10 @@ static int process_jobs(struct list_head *jobs, int (*fn) (struct kcopyd_job *)) /* * kcopyd does this every time it's woken up. */ -static void do_work(void *ignored) +static void do_work(void *data) { + struct kcopyd_client *kc = data; + /* * The order that these are called is *very* important. * complete jobs can free some pages for pages jobs. @@ -433,9 +434,9 @@ static void do_work(void *ignored) * list. io jobs call wake when they complete and it all * starts again. */ - process_jobs(&_complete_jobs, run_complete_job); - process_jobs(&_pages_jobs, run_pages_job); - process_jobs(&_io_jobs, run_io_job); + process_jobs(&kc->complete_jobs, kc, run_complete_job); + process_jobs(&kc->pages_jobs, kc, run_pages_job); + process_jobs(&kc->io_jobs, kc, run_io_job); } /* @@ -445,9 +446,10 @@ static void do_work(void *ignored) */ static void dispatch_job(struct kcopyd_job *job) { - atomic_inc(&job->kc->nr_jobs); - push(&_pages_jobs, job); - wake(); + struct kcopyd_client *kc = job->kc; + atomic_inc(&kc->nr_jobs); + push(&kc->pages_jobs, job); + wake(kc); } #define SUB_JOB_SIZE 128 @@ -627,15 +629,7 @@ static int kcopyd_init(void) return r; } - _kcopyd_wq = create_singlethread_workqueue("kcopyd"); - if (!_kcopyd_wq) { - jobs_exit(); - mutex_unlock(&kcopyd_init_lock); - return -ENOMEM; - } - kcopyd_clients++; - INIT_WORK(&_kcopyd_work, do_work, NULL); mutex_unlock(&kcopyd_init_lock); return 0; } @@ -646,8 +640,6 @@ static void kcopyd_exit(void) kcopyd_clients--; if (!kcopyd_clients) { jobs_exit(); - destroy_workqueue(_kcopyd_wq); - _kcopyd_wq = NULL; } mutex_unlock(&kcopyd_init_lock); } @@ -663,15 +655,31 @@ int kcopyd_client_create(unsigned int nr_pages, struct kcopyd_client **result) kc = kmalloc(sizeof(*kc), GFP_KERNEL); if (!kc) { + r = -ENOMEM; kcopyd_exit(); - return -ENOMEM; + return r; } spin_lock_init(&kc->lock); + spin_lock_init(&kc->job_lock); + INIT_LIST_HEAD(&kc->complete_jobs); + INIT_LIST_HEAD(&kc->io_jobs); + INIT_LIST_HEAD(&kc->pages_jobs); + + INIT_WORK(&kc->kcopyd_work, do_work, kc); + kc->kcopyd_wq = create_singlethread_workqueue("kcopyd"); + if (!kc->kcopyd_wq) { + r = -ENOMEM; + kfree(kc); + kcopyd_exit(); + return r; + } + kc->pages = NULL; kc->nr_pages = kc->nr_free_pages = 0; r = client_alloc_pages(kc, nr_pages); if (r) { + destroy_workqueue(kc->kcopyd_wq); kfree(kc); kcopyd_exit(); return r; @@ -681,6 +689,7 @@ int kcopyd_client_create(unsigned int nr_pages, struct kcopyd_client **result) if (IS_ERR(kc->io_client)) { r = PTR_ERR(kc->io_client); client_free_pages(kc); + destroy_workqueue(kc->kcopyd_wq); kfree(kc); kcopyd_exit(); return r; @@ -699,6 +708,10 @@ void kcopyd_client_destroy(struct kcopyd_client *kc) /* Wait for completion of all jobs submitted by this client. */ wait_event(kc->destroyq, !atomic_read(&kc->nr_jobs)); + BUG_ON(!list_empty(&kc->complete_jobs)); + BUG_ON(!list_empty(&kc->io_jobs)); + BUG_ON(!list_empty(&kc->pages_jobs)); + destroy_workqueue(kc->kcopyd_wq); dm_io_client_destroy(kc->io_client); client_free_pages(kc); client_del(kc);