From: Don Zickus <dzickus@redhat.com> Date: Thu, 28 Aug 2008 21:51:47 -0400 Subject: Revert "[mm] add support for fast get user pages" This reverts commit c8ccc5eabdea44fce4969d4bc927bd8af46a8b55. Message-id: 48A44CFE.6060904@redhat.com O-Subject: [RHEL 5.3 PATCH] RHBZ 447649 fast_gup: Improve thread scalability for TPC-C Bugzilla: 447649 RH-Acked-by: Rik van Riel <riel@redhat.com> Bugzilla: 447649 https://bugzilla.redhat.com/show_bug.cgi?id=447649 Brew: http://brewweb.devel.redhat.com/brew/taskinfo?taskID=1428917 Description: Improve thread scalability for TPC-C benchmarking, in particular via reduction of DIO induced mmap_sem contention and lock contention in follow_hugetlb_page() This has been discussed upstream as "fast_gup" and has been accepted into the 2.6.27 tree. backported to RHEL from upstream commit #s: b379d790197cdf8a95fb67507d75a24ac0a1678d 7e675137a8e1a4d45822746456dd389b65745bf6 a0a8f5364a5ad248aec6cb705e0092ff563edc2f 21cc199baa815d7b3f1ace4be20b9558cbddc00f 8174c430e445a93016ef18f717fe570214fa38bf 9b79022ca909b66e2cd0cfd9248f832fc165f77f f5dd33c494a427b1d1a3b574de5c9e511c888864 bc40d73c950146725e9e768e856a416ec8949065 diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig index 19fb9ef..fe3be72 100644 --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig @@ -12,7 +12,6 @@ mainmenu "Linux Kernel Configuration" config X86_64 bool default y - select HAVE_GET_USER_PAGES_FAST help Port to the x86-64 architecture. x86-64 is a 64-bit extension to the classical 32-bit x86 architecture. For details see diff --git a/arch/x86_64/mm/Makefile b/arch/x86_64/mm/Makefile index 88d0a8a..21ff5d9 100644 --- a/arch/x86_64/mm/Makefile +++ b/arch/x86_64/mm/Makefile @@ -3,7 +3,6 @@ # obj-y := init.o fault.o ioremap.o extable.o pageattr.o mmap.o -obj-$(CONFIG_HAVE_GET_USER_PAGES_FAST) += gup.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_NUMA) += numa.o obj-$(CONFIG_K8_NUMA) += k8topology.o diff --git a/arch/x86_64/mm/gup.c b/arch/x86_64/mm/gup.c deleted file mode 100644 index acb5d93..0000000 --- a/arch/x86_64/mm/gup.c +++ /dev/null @@ -1,262 +0,0 @@ -/* - * Lockless get_user_pages_fast for x86 - * - * Copyright (C) 2008 Nick Piggin - * Copyright (C) 2008 Novell Inc. - */ -#include <linux/sched.h> -#include <linux/mm.h> -#include <linux/vmstat.h> -#include <linux/highmem.h> -#include <linux/uaccess.h> - -#include <asm/pgtable.h> - -static inline pte_t gup_get_pte(pte_t *ptep) -{ -#ifndef CONFIG_X86_PAE - return *ptep; -#else - /* - * With get_user_pages_fast, we walk down the pagetables without taking - * any locks. For this we would like to load the pointers atoimcally, - * but that is not possible (without expensive cmpxchg8b) on PAE. What - * we do have is the guarantee that a pte will only either go from not - * present to present, or present to not present or both -- it will not - * switch to a completely different present page without a TLB flush in - * between; something that we are blocking by holding interrupts off. - * - * Setting ptes from not present to present goes: - * ptep->pte_high = h; - * smp_wmb(); - * ptep->pte_low = l; - * - * And present to not present goes: - * ptep->pte_low = 0; - * smp_wmb(); - * ptep->pte_high = 0; - * - * We must ensure here that the load of pte_low sees l iff pte_high - * sees h. We load pte_high *after* loading pte_low, which ensures we - * don't see an older value of pte_high. *Then* we recheck pte_low, - * which ensures that we haven't picked up a changed pte high. We might - * have got rubbish values from pte_low and pte_high, but we are - * guaranteed that pte_low will not have the present bit set *unless* - * it is 'l'. And get_user_pages_fast only operates on present ptes, so - * we're safe. - * - * gup_get_pte should not be used or copied outside gup.c without being - * very careful -- it does not atomically load the pte or anything that - * is likely to be useful for you. - */ - pte_t pte; - -retry: - pte.pte_low = ptep->pte_low; - smp_rmb(); - pte.pte_high = ptep->pte_high; - smp_rmb(); - if (unlikely(pte.pte_low != ptep->pte_low)) - goto retry; - - return pte; -#endif -} - -/* - * The performance critical leaf functions are made noinline otherwise gcc - * inlines everything into a single function which results in too much - * register pressure. - */ -static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, - unsigned long end, int write, struct page **pages, int *nr) -{ - unsigned long mask; - pte_t *ptep; - - mask = _PAGE_PRESENT|_PAGE_USER; - if (write) - mask |= _PAGE_RW; - - ptep = pte_offset_map(&pmd, addr); - do { - pte_t pte = gup_get_pte(ptep); - struct page *page; - - if ((pte_val(pte) & (mask | _PAGE_SPECIAL)) != mask) { - pte_unmap(ptep); - return 0; - } - BUG_ON(!pfn_valid(pte_pfn(pte))); - page = pte_page(pte); - get_page(page); - pages[*nr] = page; - (*nr)++; - - } while (ptep++, addr += PAGE_SIZE, addr != end); - pte_unmap(ptep - 1); - - return 1; -} - -static inline void get_head_page_multiple(struct page *page, int nr) -{ - BUG_ON(page != compound_head(page)); - BUG_ON(page_count(page) == 0); - atomic_add(nr, &page->_count); -} - -static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr, - unsigned long end, int write, struct page **pages, int *nr) -{ - unsigned long mask; - pte_t pte = *(pte_t *)&pmd; - struct page *head, *page; - int refs; - - mask = _PAGE_PRESENT|_PAGE_USER; - if (write) - mask |= _PAGE_RW; - if ((pte_val(pte) & mask) != mask) - return 0; - /* hugepages are never "special" */ - BUG_ON(pte_val(pte) & _PAGE_SPECIAL); - BUG_ON(!pfn_valid(pte_pfn(pte))); - - refs = 0; - head = pte_page(pte); - page = head + ((addr & ~HPAGE_MASK) >> PAGE_SHIFT); - do { - BUG_ON(compound_head(page) != head); - pages[*nr] = page; - (*nr)++; - page++; - refs++; - } while (addr += PAGE_SIZE, addr != end); - get_head_page_multiple(head, refs); - - return 1; -} - -static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, - int write, struct page **pages, int *nr) -{ - unsigned long next; - pmd_t *pmdp; - - pmdp = pmd_offset(&pud, addr); - do { - pmd_t pmd = *pmdp; - - next = pmd_addr_end(addr, end); - if (pmd_none(pmd)) - return 0; - if (unlikely(pmd_large(pmd))) { - if (!gup_huge_pmd(pmd, addr, next, write, pages, nr)) - return 0; - } else { - if (!gup_pte_range(pmd, addr, next, write, pages, nr)) - return 0; - } - } while (pmdp++, addr = next, addr != end); - - return 1; -} - -static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end, - int write, struct page **pages, int *nr) -{ - unsigned long next; - pud_t *pudp; - - pudp = pud_offset(&pgd, addr); - do { - pud_t pud = *pudp; - - next = pud_addr_end(addr, end); - if (pud_none(pud)) - return 0; - if (!gup_pmd_range(pud, addr, next, write, pages, nr)) - return 0; - } while (pudp++, addr = next, addr != end); - - return 1; -} - -int get_user_pages_fast(unsigned long start, int nr_pages, int write, - struct page **pages) -{ - struct mm_struct *mm = current->mm; - unsigned long addr, len, end; - unsigned long next; - pgd_t *pgdp; - int nr = 0; - - start &= PAGE_MASK; - addr = start; - len = (unsigned long) nr_pages << PAGE_SHIFT; - end = start + len; - if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ, - start, len))) - goto slow_irqon; - - /* - * XXX: batch / limit 'nr', to avoid large irq off latency - * needs some instrumenting to determine the common sizes used by - * important workloads (eg. DB2), and whether limiting the batch size - * will decrease performance. - * - * It seems like we're in the clear for the moment. Direct-IO is - * the main guy that batches up lots of get_user_pages, and even - * they are limited to 64-at-a-time which is not so many. - */ - /* - * This doesn't prevent pagetable teardown, but does prevent - * the pagetables and pages from being freed on x86. - * - * So long as we atomically load page table pointers versus teardown - * (which we do on x86, with the above PAE exception), we can follow the - * address down to the the page and take a ref on it. - */ - local_irq_disable(); - pgdp = pgd_offset(mm, addr); - do { - pgd_t pgd = *pgdp; - - next = pgd_addr_end(addr, end); - if (pgd_none(pgd)) - goto slow; - if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) - goto slow; - } while (pgdp++, addr = next, addr != end); - local_irq_enable(); - - BUG_ON(nr != (end - start) >> PAGE_SHIFT); - return nr; - - { - int ret; - -slow: - local_irq_enable(); -slow_irqon: - /* Try to get the remaining pages with get_user_pages */ - start += nr << PAGE_SHIFT; - pages += nr; - - down_read(&mm->mmap_sem); - ret = get_user_pages(current, mm, start, - (end - start) >> PAGE_SHIFT, write, 0, pages, NULL); - up_read(&mm->mmap_sem); - - /* Have to be a bit careful with return values */ - if (nr > 0) { - if (ret < 0) - ret = nr; - else - ret += nr; - } - - return ret; - } -} diff --git a/fs/bio.c b/fs/bio.c index 00a0b7f..6a0b9ad 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -648,8 +648,12 @@ static struct bio *__bio_map_user_iov(request_queue_t *q, const int local_nr_pages = end - start; const int page_limit = cur_page + local_nr_pages; - ret = get_user_pages_fast(uaddr, local_nr_pages, - write_to_vm, &pages[cur_page]); + down_read(¤t->mm->mmap_sem); + ret = get_user_pages(current, current->mm, uaddr, + local_nr_pages, + write_to_vm, 0, &pages[cur_page], NULL); + up_read(¤t->mm->mmap_sem); + if (ret < local_nr_pages) { ret = -EFAULT; goto out_unmap; diff --git a/fs/direct-io.c b/fs/direct-io.c index 6046e0e..da7a1d6 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -149,11 +149,17 @@ static int dio_refill_pages(struct dio *dio) int nr_pages; nr_pages = min(dio->total_pages - dio->curr_page, DIO_PAGES); - ret = get_user_pages_fast( + down_read(¤t->mm->mmap_sem); + ret = get_user_pages( + current, /* Task for fault acounting */ + current->mm, /* whose pages? */ dio->curr_user_address, /* Where from? */ nr_pages, /* How many pages? */ dio->rw == READ, /* Write to memory? */ - &dio->pages[0]); /* Put results here */ + 0, /* force (?) */ + &dio->pages[0], + NULL); /* vmas */ + up_read(¤t->mm->mmap_sem); if (ret < 0 && dio->blocks_available && (dio->rw & WRITE)) { struct page *page = ZERO_PAGE(dio->curr_user_address); diff --git a/fs/splice.c b/fs/splice.c index 1f8c911..cee12d8 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -1124,6 +1124,12 @@ static int get_iovec_page_array(const struct iovec __user *iov, { int buffers = 0, error = 0; + /* + * It's ok to take the mmap_sem for reading, even + * across a "get_user()". + */ + down_read(¤t->mm->mmap_sem); + while (nr_vecs) { unsigned long off, npages; void __user *base; @@ -1170,8 +1176,9 @@ static int get_iovec_page_array(const struct iovec __user *iov, if (npages > PIPE_BUFFERS - buffers) npages = PIPE_BUFFERS - buffers; - error = get_user_pages_fast((unsigned long)base, npages, - 0, &pages[buffers]); + error = get_user_pages(current, current->mm, + (unsigned long) base, npages, 0, 0, + &pages[buffers], NULL); if (unlikely(error <= 0)) break; @@ -1210,6 +1217,8 @@ static int get_iovec_page_array(const struct iovec __user *iov, iov++; } + up_read(¤t->mm->mmap_sem); + if (buffers) return buffers; diff --git a/include/asm-alpha/pgtable.h b/include/asm-alpha/pgtable.h index 3f69582..93eaa58 100644 --- a/include/asm-alpha/pgtable.h +++ b/include/asm-alpha/pgtable.h @@ -269,7 +269,6 @@ extern inline int pte_exec(pte_t pte) { return !(pte_val(pte) & _PAGE_FOE); } extern inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } extern inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } extern inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; } -extern inline int pte_special(pte_t pte) { return 0; } extern inline pte_t pte_wrprotect(pte_t pte) { pte_val(pte) |= _PAGE_FOW; return pte; } extern inline pte_t pte_rdprotect(pte_t pte) { pte_val(pte) |= _PAGE_FOR; return pte; } @@ -281,7 +280,6 @@ extern inline pte_t pte_mkread(pte_t pte) { pte_val(pte) &= ~_PAGE_FOR; return p extern inline pte_t pte_mkexec(pte_t pte) { pte_val(pte) &= ~_PAGE_FOE; return pte; } extern inline pte_t pte_mkdirty(pte_t pte) { pte_val(pte) |= __DIRTY_BITS; return pte; } extern inline pte_t pte_mkyoung(pte_t pte) { pte_val(pte) |= __ACCESS_BITS; return pte; } -extern inline pte_t pte_mkspecial(pte_t pte) { return pte; } #define PAGE_DIR_OFFSET(tsk,address) pgd_offset((tsk),(address)) diff --git a/include/asm-arm/pgtable.h b/include/asm-arm/pgtable.h index cfb60f0..8d3919c 100644 --- a/include/asm-arm/pgtable.h +++ b/include/asm-arm/pgtable.h @@ -243,7 +243,6 @@ extern struct page *empty_zero_page; #define pte_exec(pte) (pte_val(pte) & L_PTE_EXEC) #define pte_dirty(pte) (pte_val(pte) & L_PTE_DIRTY) #define pte_young(pte) (pte_val(pte) & L_PTE_YOUNG) -#define pte_special(pte) (0) /* * The following only works if pte_present() is not true. @@ -268,8 +267,6 @@ PTE_BIT_FUNC(mkdirty, |= L_PTE_DIRTY); PTE_BIT_FUNC(mkold, &= ~L_PTE_YOUNG); PTE_BIT_FUNC(mkyoung, |= L_PTE_YOUNG); -static inline pte_t pte_mkspecial(pte_t pte) { return pte; } - /* * Mark the prot value as uncacheable and unbufferable. */ diff --git a/include/asm-cris/pgtable.h b/include/asm-cris/pgtable.h index 75833eb..5d76c1c 100644 --- a/include/asm-cris/pgtable.h +++ b/include/asm-cris/pgtable.h @@ -117,7 +117,6 @@ static inline int pte_exec(pte_t pte) { return pte_val(pte) & _PAGE_RE static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_MODIFIED; } static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; } -static inline int pte_special(pte_t pte) { return 0; } static inline pte_t pte_wrprotect(pte_t pte) { @@ -193,7 +192,6 @@ static inline pte_t pte_mkyoung(pte_t pte) } return pte; } -static inline pte_t pte_mkspecial(pte_t pte) { return pte; } /* * Conversion functions: convert a page and protection to a page entry, diff --git a/include/asm-frv/pgtable.h b/include/asm-frv/pgtable.h index 72164f8..7af7485 100644 --- a/include/asm-frv/pgtable.h +++ b/include/asm-frv/pgtable.h @@ -380,7 +380,6 @@ static inline int pte_exec(pte_t pte) { return !((pte).pte & _PAGE_SUPER); } static inline int pte_dirty(pte_t pte) { return (pte).pte & _PAGE_DIRTY; } static inline int pte_young(pte_t pte) { return (pte).pte & _PAGE_ACCESSED; } static inline int pte_write(pte_t pte) { return !((pte).pte & _PAGE_WP); } -static inline int pte_special(pte_t pte) { return 0; } static inline pte_t pte_rdprotect(pte_t pte) { (pte).pte |= _PAGE_SUPER; return pte; } static inline pte_t pte_exprotect(pte_t pte) { (pte).pte |= _PAGE_SUPER; return pte; } @@ -392,7 +391,6 @@ static inline pte_t pte_mkexec(pte_t pte) { (pte).pte &= ~_PAGE_SUPER; return pt static inline pte_t pte_mkdirty(pte_t pte) { (pte).pte |= _PAGE_DIRTY; return pte; } static inline pte_t pte_mkyoung(pte_t pte) { (pte).pte |= _PAGE_ACCESSED; return pte; } static inline pte_t pte_mkwrite(pte_t pte) { (pte).pte &= ~_PAGE_WP; return pte; } -static inline pte_t pte_mkspecial(pte_t pte) { return pte; } static inline int ptep_test_and_clear_dirty(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { diff --git a/include/asm-i386/mach-xen/asm/pgtable.h b/include/asm-i386/mach-xen/asm/pgtable.h index 7c3b34f..05b6e71 100644 --- a/include/asm-i386/mach-xen/asm/pgtable.h +++ b/include/asm-i386/mach-xen/asm/pgtable.h @@ -228,7 +228,6 @@ static inline int pte_dirty(pte_t pte) { return (pte).pte_low & _PAGE_DIRTY; } static inline int pte_young(pte_t pte) { return (pte).pte_low & _PAGE_ACCESSED; } static inline int pte_write(pte_t pte) { return (pte).pte_low & _PAGE_RW; } static inline int pte_huge(pte_t pte) { return (pte).pte_low & _PAGE_PSE; } -static inline int pte_special(pte_t pte) { return 0; } /* * The following only works if pte_present() is not true. @@ -246,7 +245,6 @@ static inline pte_t pte_mkdirty(pte_t pte) { (pte).pte_low |= _PAGE_DIRTY; retur static inline pte_t pte_mkyoung(pte_t pte) { (pte).pte_low |= _PAGE_ACCESSED; return pte; } static inline pte_t pte_mkwrite(pte_t pte) { (pte).pte_low |= _PAGE_RW; return pte; } static inline pte_t pte_mkhuge(pte_t pte) { (pte).pte_low |= _PAGE_PSE; return pte; } -static inline pte_t pte_mkspecial(pte_t pte) { return pte; } #ifdef CONFIG_X86_PAE # include <asm/pgtable-3level.h> diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h index c55caa4..09697fe 100644 --- a/include/asm-i386/pgtable.h +++ b/include/asm-i386/pgtable.h @@ -222,7 +222,6 @@ static inline int pte_dirty(pte_t pte) { return (pte).pte_low & _PAGE_DIRTY; } static inline int pte_young(pte_t pte) { return (pte).pte_low & _PAGE_ACCESSED; } static inline int pte_write(pte_t pte) { return (pte).pte_low & _PAGE_RW; } static inline int pte_huge(pte_t pte) { return (pte).pte_low & _PAGE_PSE; } -static inline int pte_special(pte_t pte) { return 0; } /* * The following only works if pte_present() is not true. @@ -240,7 +239,6 @@ static inline pte_t pte_mkdirty(pte_t pte) { (pte).pte_low |= _PAGE_DIRTY; retur static inline pte_t pte_mkyoung(pte_t pte) { (pte).pte_low |= _PAGE_ACCESSED; return pte; } static inline pte_t pte_mkwrite(pte_t pte) { (pte).pte_low |= _PAGE_RW; return pte; } static inline pte_t pte_mkhuge(pte_t pte) { (pte).pte_low |= _PAGE_PSE; return pte; } -static inline pte_t pte_mkspecial(pte_t pte) { return pte; } #ifdef CONFIG_X86_PAE # include <asm/pgtable-3level.h> diff --git a/include/asm-ia64/pgtable.h b/include/asm-ia64/pgtable.h index 50d7665..228981c 100644 --- a/include/asm-ia64/pgtable.h +++ b/include/asm-ia64/pgtable.h @@ -302,8 +302,6 @@ ia64_phys_addr_valid (unsigned long addr) #define pte_dirty(pte) ((pte_val(pte) & _PAGE_D) != 0) #define pte_young(pte) ((pte_val(pte) & _PAGE_A) != 0) #define pte_file(pte) ((pte_val(pte) & _PAGE_FILE) != 0) -#define pte_special(pte) 0 - /* * Note: we convert AR_RWX to AR_RX and AR_RW to AR_R by clearing the 2nd bit in the * access rights: @@ -316,7 +314,6 @@ ia64_phys_addr_valid (unsigned long addr) #define pte_mkclean(pte) (__pte(pte_val(pte) & ~_PAGE_D)) #define pte_mkdirty(pte) (__pte(pte_val(pte) | _PAGE_D)) #define pte_mkhuge(pte) (__pte(pte_val(pte))) -#define pte_mkspecial(pte) (pte) /* * Make page protection values cacheable, uncacheable, or write- diff --git a/include/asm-m32r/pgtable.h b/include/asm-m32r/pgtable.h index 8b141e5..1983b7f 100644 --- a/include/asm-m32r/pgtable.h +++ b/include/asm-m32r/pgtable.h @@ -236,11 +236,6 @@ static inline pte_t pte_exprotect(pte_t pte) return pte; } -static inline int pte_special(pte_t pte) -{ - return 0; -} - static inline pte_t pte_mkclean(pte_t pte) { pte_val(pte) &= ~_PAGE_DIRTY; @@ -294,11 +289,6 @@ static inline int ptep_test_and_clear_dirty(struct vm_area_struct *vma, unsigne return test_and_clear_bit(_PAGE_BIT_DIRTY, ptep); } -static inline pte_t pte_mkspecial(pte_t pte) -{ - return pte; -} - static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { return test_and_clear_bit(_PAGE_BIT_ACCESSED, ptep); diff --git a/include/asm-m68k/motorola_pgtable.h b/include/asm-m68k/motorola_pgtable.h index ea9fd67..1ccc733 100644 --- a/include/asm-m68k/motorola_pgtable.h +++ b/include/asm-m68k/motorola_pgtable.h @@ -169,7 +169,6 @@ static inline int pte_exec(pte_t pte) { return 1; } static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; } -static inline int pte_special(pte_t pte) { return 0; } static inline pte_t pte_wrprotect(pte_t pte) { pte_val(pte) |= _PAGE_RONLY; return pte; } static inline pte_t pte_rdprotect(pte_t pte) { return pte; } @@ -191,7 +190,6 @@ static inline pte_t pte_mkcache(pte_t pte) pte_val(pte) = (pte_val(pte) & _CACHEMASK040) | m68k_supervisor_cachemode; return pte; } -static inline pte_t pte_mkspecial(pte_t pte) { return pte; } #define PAGE_DIR_OFFSET(tsk,address) pgd_offset((tsk),(address)) diff --git a/include/asm-m68k/sun3_pgtable.h b/include/asm-m68k/sun3_pgtable.h index 4761900..5156a28 100644 --- a/include/asm-m68k/sun3_pgtable.h +++ b/include/asm-m68k/sun3_pgtable.h @@ -171,7 +171,6 @@ static inline int pte_exec(pte_t pte) { return 1; } static inline int pte_dirty(pte_t pte) { return pte_val(pte) & SUN3_PAGE_MODIFIED; } static inline int pte_young(pte_t pte) { return pte_val(pte) & SUN3_PAGE_ACCESSED; } static inline int pte_file(pte_t pte) { return pte_val(pte) & SUN3_PAGE_ACCESSED; } -static inline int pte_special(pte_t pte) { return 0; } static inline pte_t pte_wrprotect(pte_t pte) { pte_val(pte) &= ~SUN3_PAGE_WRITEABLE; return pte; } static inline pte_t pte_rdprotect(pte_t pte) { return pte; } @@ -188,7 +187,6 @@ static inline pte_t pte_mknocache(pte_t pte) { pte_val(pte) |= SUN3_PAGE_NOCACHE //static inline pte_t pte_mkcache(pte_t pte) { pte_val(pte) &= SUN3_PAGE_NOCACHE; return pte; } // until then, use: static inline pte_t pte_mkcache(pte_t pte) { return pte; } -static inline pte_t pte_mkspecial(pte_t pte) { return pte; } extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; extern pgd_t kernel_pg_dir[PTRS_PER_PGD]; diff --git a/include/asm-mips/pgtable.h b/include/asm-mips/pgtable.h index 365a24d..a36ca1b 100644 --- a/include/asm-mips/pgtable.h +++ b/include/asm-mips/pgtable.h @@ -325,8 +325,6 @@ static inline pte_t pte_mkyoung(pte_t pte) return pte; } #endif -static inline int pte_special(pte_t pte) { return 0; } -static inline pte_t pte_mkspecial(pte_t pte) { return pte; } /* * Macro to make mark a page protection value as "uncacheable". Note diff --git a/include/asm-parisc/pgtable.h b/include/asm-parisc/pgtable.h index b0424c3..5066c54 100644 --- a/include/asm-parisc/pgtable.h +++ b/include/asm-parisc/pgtable.h @@ -341,7 +341,6 @@ extern inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; extern inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_WRITE; } extern inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; } extern inline int pte_user(pte_t pte) { return pte_val(pte) & _PAGE_USER; } -static inline int pte_special(pte_t pte) { return 0; } extern inline pte_t pte_rdprotect(pte_t pte) { pte_val(pte) &= ~_PAGE_READ; return pte; } extern inline pte_t pte_mkclean(pte_t pte) { pte_val(pte) &= ~_PAGE_DIRTY; return pte; } @@ -351,7 +350,6 @@ extern inline pte_t pte_mkread(pte_t pte) { pte_val(pte) |= _PAGE_READ; return p extern inline pte_t pte_mkdirty(pte_t pte) { pte_val(pte) |= _PAGE_DIRTY; return pte; } extern inline pte_t pte_mkyoung(pte_t pte) { pte_val(pte) |= _PAGE_ACCESSED; return pte; } extern inline pte_t pte_mkwrite(pte_t pte) { pte_val(pte) |= _PAGE_WRITE; return pte; } -static inline pte_t pte_mkspecial(pte_t pte) { return pte; } /* * Conversion functions: convert a page and protection to a page entry, diff --git a/include/asm-powerpc/pgtable.h b/include/asm-powerpc/pgtable.h index b81c9e3..8dbf5ad 100644 --- a/include/asm-powerpc/pgtable.h +++ b/include/asm-powerpc/pgtable.h @@ -243,7 +243,6 @@ static inline int pte_exec(pte_t pte) { return pte_val(pte) & _PAGE_EXEC;} static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY;} static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED;} static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE;} -static inline int pte_special(pte_t pte) { return 0; } static inline void pte_uncache(pte_t pte) { pte_val(pte) |= _PAGE_NO_CACHE; } static inline void pte_cache(pte_t pte) { pte_val(pte) &= ~_PAGE_NO_CACHE; } @@ -270,8 +269,6 @@ static inline pte_t pte_mkyoung(pte_t pte) { pte_val(pte) |= _PAGE_ACCESSED; return pte; } static inline pte_t pte_mkhuge(pte_t pte) { return pte; } -static inline pte_t pte_mkspecial(pte_t pte) { - return pte; } /* Atomic PTE updates */ static inline unsigned long pte_update(pte_t *p, unsigned long clr) diff --git a/include/asm-ppc/pgtable.h b/include/asm-ppc/pgtable.h index 5d27843..51fa7c6 100644 --- a/include/asm-ppc/pgtable.h +++ b/include/asm-ppc/pgtable.h @@ -539,7 +539,6 @@ static inline int pte_exec(pte_t pte) { return pte_val(pte) & _PAGE_EXEC; } static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; } -static inline int pte_special(pte_t pte) { return 0; } static inline void pte_uncache(pte_t pte) { pte_val(pte) |= _PAGE_NO_CACHE; } static inline void pte_cache(pte_t pte) { pte_val(pte) &= ~_PAGE_NO_CACHE; } @@ -565,8 +564,6 @@ static inline pte_t pte_mkdirty(pte_t pte) { pte_val(pte) |= _PAGE_DIRTY; return pte; } static inline pte_t pte_mkyoung(pte_t pte) { pte_val(pte) |= _PAGE_ACCESSED; return pte; } -static inline pte_t pte_mkspecial(pte_t pte) { - return pte; } static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { diff --git a/include/asm-s390/pgtable.h b/include/asm-s390/pgtable.h index f10f399..3515ed8 100644 --- a/include/asm-s390/pgtable.h +++ b/include/asm-s390/pgtable.h @@ -423,11 +423,6 @@ static inline int pte_file(pte_t pte) return (pte_val(pte) & mask) == _PAGE_TYPE_FILE; } -static inline int pte_special(pte_t pte) -{ - return 0; -} - #define pte_same(a,b) (pte_val(a) == pte_val(b)) /* @@ -560,11 +555,6 @@ static inline pte_t pte_mkyoung(pte_t pte) return pte; } -static inline pte_t pte_mkspecial(pte_t pte) -{ - return pte; -} - static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { return 0; diff --git a/include/asm-sh/pgtable.h b/include/asm-sh/pgtable.h index 34865c8..dcd23a0 100644 --- a/include/asm-sh/pgtable.h +++ b/include/asm-sh/pgtable.h @@ -184,7 +184,6 @@ static inline int pte_young(pte_t pte){ return pte_val(pte) & _PAGE_ACCESSED; } static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; } static inline int pte_write(pte_t pte){ return pte_val(pte) & _PAGE_RW; } static inline int pte_not_present(pte_t pte){ return !(pte_val(pte) & _PAGE_PRESENT); } -static inline int pte_special(pte_t pte) { return 0; } static inline pte_t pte_rdprotect(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_USER)); return pte; } static inline pte_t pte_exprotect(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_USER)); return pte; } @@ -286,8 +285,6 @@ typedef pte_t *pte_addr_t; struct mm_struct; -static inline pte_t pte_mkspecial(pte_t pte) { return pte; } - /* * No page table caches to initialise */ diff --git a/include/asm-sh64/pgtable.h b/include/asm-sh64/pgtable.h index 641df64..54c7821 100644 --- a/include/asm-sh64/pgtable.h +++ b/include/asm-sh64/pgtable.h @@ -413,13 +413,12 @@ extern void __handle_bad_pmd_kernel(pmd_t * pmd); /* * The following have defined behavior only work if pte_present() is true. */ -static inline int pte_read(pte_t pte) { return pte_val(pte) & _PAGE_READ; } -static inline int pte_exec(pte_t pte) { return pte_val(pte) & _PAGE_EXECUTE; } -static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } -static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } -static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; } -static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_WRITE; } -static inline int pte_special(pte_t pte){ return 0; } +static inline int pte_read(pte_t pte) { return pte_val(pte) & _PAGE_READ; } +static inline int pte_exec(pte_t pte) { return pte_val(pte) & _PAGE_EXECUTE; } +static inline int pte_dirty(pte_t pte){ return pte_val(pte) & _PAGE_DIRTY; } +static inline int pte_young(pte_t pte){ return pte_val(pte) & _PAGE_ACCESSED; } +static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; } +static inline int pte_write(pte_t pte){ return pte_val(pte) & _PAGE_WRITE; } static inline pte_t pte_rdprotect(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_READ)); return pte; } static inline pte_t pte_wrprotect(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_WRITE)); return pte; } @@ -433,7 +432,6 @@ static inline pte_t pte_mkexec(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _ static inline pte_t pte_mkdirty(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_DIRTY)); return pte; } static inline pte_t pte_mkyoung(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_ACCESSED)); return pte; } static inline pte_t pte_mkhuge(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_SZHUGE)); return pte; } -static inline pte_t pte_mkspecial(pte_t pte) { return pte; } /* diff --git a/include/asm-sparc/pgtable.h b/include/asm-sparc/pgtable.h index 599a2cf..226c647 100644 --- a/include/asm-sparc/pgtable.h +++ b/include/asm-sparc/pgtable.h @@ -222,11 +222,6 @@ static inline int pte_file(pte_t pte) return pte_val(pte) & BTFIXUP_HALF(pte_filei); } -static inline int pte_special(pte_t pte) -{ - return 0; -} - /* */ BTFIXUPDEF_HALF(pte_wrprotecti) @@ -259,8 +254,6 @@ BTFIXUPDEF_CALL_CONST(pte_t, pte_mkyoung, pte_t) #define pte_mkdirty(pte) BTFIXUP_CALL(pte_mkdirty)(pte) #define pte_mkyoung(pte) BTFIXUP_CALL(pte_mkyoung)(pte) -#define pte_mkspecial(pte) (pte) - #define pfn_pte(pfn, prot) mk_pte(pfn_to_page(pfn), prot) BTFIXUPDEF_CALL(unsigned long, pte_pfn, pte_t) diff --git a/include/asm-sparc64/pgtable.h b/include/asm-sparc64/pgtable.h index 97c0a9e..ebfe395 100644 --- a/include/asm-sparc64/pgtable.h +++ b/include/asm-sparc64/pgtable.h @@ -503,11 +503,6 @@ static inline pte_t pte_mkyoung(pte_t pte) return __pte(pte_val(pte) | mask); } -static inline pte_t pte_mkspecial(pte_t pte) -{ - return pte; -} - static inline unsigned long pte_young(pte_t pte) { unsigned long mask; @@ -628,11 +623,6 @@ static inline unsigned long pte_present(pte_t pte) return val; } -static inline int pte_special(pte_t pte) -{ - return 0; -} - #define pmd_set(pmdp, ptep) \ (pmd_val(*(pmdp)) = (__pa((unsigned long) (ptep)) >> 11UL)) #define pud_set(pudp, pmdp) \ diff --git a/include/asm-um/pgtable.h b/include/asm-um/pgtable.h index a4e33ed..ac64eb9 100644 --- a/include/asm-um/pgtable.h +++ b/include/asm-um/pgtable.h @@ -226,11 +226,6 @@ static inline int pte_newprot(pte_t pte) return(pte_present(pte) && (pte_get_bits(pte, _PAGE_NEWPROT))); } -static inline int pte_special(pte_t pte) -{ - return 0; -} - /* * ================================= * Flags setting section. @@ -317,11 +312,6 @@ static inline pte_t pte_mknewpage(pte_t pte) return(pte); } -static inline pte_t pte_mkspecial(pte_t pte) -{ - return(pte); -} - static inline void set_pte(pte_t *pteptr, pte_t pteval) { pte_copy(*pteptr, pteval); diff --git a/include/asm-x86_64/mach-xen/asm/pgtable.h b/include/asm-x86_64/mach-xen/asm/pgtable.h index d0b91cc..f5cfe8e 100644 --- a/include/asm-x86_64/mach-xen/asm/pgtable.h +++ b/include/asm-x86_64/mach-xen/asm/pgtable.h @@ -212,7 +212,6 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long #define _PAGE_BIT_PSE 7 /* 4 MB (or 2MB) page */ #define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */ #define _PAGE_BIT_SOFTDIRTY 9 /* save dirty state when hdw dirty bit cleared */ -#define _PAGE_BIT_SPECIAL 10 #define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */ #define _PAGE_PRESENT 0x001 @@ -226,8 +225,6 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long #define _PAGE_FILE 0x040 /* nonlinear file mapping, saved PTE; unset:swap */ #define _PAGE_GLOBAL 0x100 /* Global TLB entry */ #define _PAGE_SOFTDIRTY 0x200 -#define _PAGE_SPECIAL 0x400 -#define __HAVE_ARCH_PTE_SPECIAL #define _PAGE_PROTNONE 0x080 /* If not present */ #define _PAGE_NX (_AC(1,UL)<<_PAGE_BIT_NX) @@ -361,7 +358,6 @@ static inline int pte_young(pte_t pte) { return __pte_val(pte) & _PAGE_ACCESSED static inline int pte_write(pte_t pte) { return __pte_val(pte) & _PAGE_RW; } static inline int pte_file(pte_t pte) { return __pte_val(pte) & _PAGE_FILE; } static inline int pte_huge(pte_t pte) { return __pte_val(pte) & _PAGE_PSE; } -static inline int pte_special(pte_t pte) { return pte_val(pte) & _PAGE_SPECIAL; } static inline pte_t pte_rdprotect(pte_t pte) { __pte_val(pte) &= ~_PAGE_USER; return pte; } static inline pte_t pte_exprotect(pte_t pte) { __pte_val(pte) &= ~_PAGE_USER; return pte; } @@ -379,7 +375,6 @@ static inline pte_t pte_mkdirty(pte_t pte) { __pte_val(pte) |= _PAGE_DIRTY; retu static inline pte_t pte_mkyoung(pte_t pte) { __pte_val(pte) |= _PAGE_ACCESSED; return pte; } static inline pte_t pte_mkwrite(pte_t pte) { __pte_val(pte) |= _PAGE_RW; return pte; } static inline pte_t pte_mkhuge(pte_t pte) { __pte_val(pte) |= _PAGE_PSE; return pte; } -static inline pte_t pte_mkspecial(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_SPECIAL)); return pte; } struct vm_area_struct; diff --git a/include/asm-x86_64/pgtable.h b/include/asm-x86_64/pgtable.h index 0f8ff9e..bd3d0c1 100644 --- a/include/asm-x86_64/pgtable.h +++ b/include/asm-x86_64/pgtable.h @@ -168,7 +168,6 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long #define _PAGE_BIT_PSE 7 /* 4 MB (or 2MB) page */ #define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */ #define _PAGE_BIT_SOFTDIRTY 9 /* save dirty state when hdw dirty bit cleared */ -#define _PAGE_BIT_SPECIAL 10 #define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */ #define _PAGE_PRESENT 0x001 @@ -182,8 +181,6 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long #define _PAGE_FILE 0x040 /* nonlinear file mapping, saved PTE; unset:swap */ #define _PAGE_GLOBAL 0x100 /* Global TLB entry */ #define _PAGE_SOFTDIRTY 0x200 -#define _PAGE_SPECIAL 0x400 -#define __HAVE_ARCH_PTE_SPECIAL #define _PAGE_PROTNONE 0x080 /* If not present */ #define _PAGE_NX (_AC(1,UL)<<_PAGE_BIT_NX) @@ -297,7 +294,6 @@ static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_RW; } static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; } static inline int pte_huge(pte_t pte) { return pte_val(pte) & _PAGE_PSE; } -static inline int pte_special(pte_t pte) { return pte_val(pte) & _PAGE_SPECIAL; } static inline pte_t pte_rdprotect(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_USER)); return pte; } static inline pte_t pte_exprotect(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_USER)); return pte; } @@ -310,7 +306,6 @@ static inline pte_t pte_mkdirty(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | static inline pte_t pte_mkyoung(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_ACCESSED)); return pte; } static inline pte_t pte_mkwrite(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_RW)); return pte; } static inline pte_t pte_mkhuge(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_PSE)); return pte; } -static inline pte_t pte_mkspecial(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_SPECIAL)); return pte; } struct vm_area_struct; diff --git a/include/asm-xtensa/pgtable.h b/include/asm-xtensa/pgtable.h index 4429070..7b15afb 100644 --- a/include/asm-xtensa/pgtable.h +++ b/include/asm-xtensa/pgtable.h @@ -241,8 +241,6 @@ static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_RW; } static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; } -static inline int pte_special(pte_t pte) { return 0; } - static inline pte_t pte_wrprotect(pte_t pte) { pte_val(pte) &= ~(_PAGE_RW | _PAGE_WRENABLE); return pte; } static inline pte_t pte_rdprotect(pte_t pte) { pte_val(pte) &= ~_PAGE_USER; return pte; } static inline pte_t pte_mkclean(pte_t pte) { pte_val(pte) &= ~_PAGE_DIRTY; return pte; } @@ -251,7 +249,6 @@ static inline pte_t pte_mkread(pte_t pte) { pte_val(pte) |= _PAGE_USER; return p static inline pte_t pte_mkdirty(pte_t pte) { pte_val(pte) |= _PAGE_DIRTY; return pte; } static inline pte_t pte_mkyoung(pte_t pte) { pte_val(pte) |= _PAGE_ACCESSED; return pte; } static inline pte_t pte_mkwrite(pte_t pte) { pte_val(pte) |= _PAGE_RW; return pte; } -static inline pte_t pte_mkspecial(pte_t pte) { return pte; } /* * Conversion functions: convert a page and protection to a page entry, diff --git a/include/linux/mm.h b/include/linux/mm.h index 7a40b34..8fe1de1 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -173,7 +173,6 @@ extern unsigned int kobjsize(const void *objp); #define VM_FOREIGN 0x04000000 /* Has pages belonging to another VM */ #endif #define VM_ALWAYSDUMP 0x08000000 /* Always include in core dumps */ -#define VM_MIXEDMAP 0x10000000 /* Can contain "struct page" and pure PFN pages */ #ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */ #define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS @@ -763,9 +762,7 @@ struct zap_details { unsigned long truncate_count; /* Compare vm_truncate_count */ }; -struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, - pte_t pte); - +struct page *vm_normal_page(struct vm_area_struct *, unsigned long, pte_t); unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address, unsigned long size, struct zap_details *); unsigned long unmap_vmas(struct mmu_gather **tlb, @@ -836,39 +833,6 @@ extern unsigned long do_mremap(unsigned long addr, unsigned long old_len, unsigned long new_len, unsigned long flags, unsigned long new_addr); -#ifdef CONFIG_HAVE_GET_USER_PAGES_FAST -/* - * get_user_pages_fast provides equivalent functionality to get_user_pages, - * operating on current and current->mm (force=0 and doesn't return any vmas). - * - * get_user_pages_fast may take mmap_sem and page tables, so no assumptions - * can be made about locking. get_user_pages_fast is to be implemented in a - * way that is advantageous (vs get_user_pages()) when the user memory area is - * already faulted in and present in ptes. However if the pages have to be - * faulted in, it may turn out to be slightly slower). - */ -int get_user_pages_fast(unsigned long start, int nr_pages, int write, - struct page **pages); - -#else -/* - * Should probably be moved to asm-generic, and architectures can include it if - * they don't implement their own get_user_pages_fast. - */ -#define get_user_pages_fast(start, nr_pages, write, pages) \ -({ \ - struct mm_struct *mm = current->mm; \ - int ret; \ - \ - down_read(&mm->mmap_sem); \ - ret = get_user_pages(current, mm, start, nr_pages, \ - write, 0, pages, NULL); \ - up_read(&mm->mmap_sem); \ - \ - ret; \ -}) -#endif - /* * Prototype to add a shrinker callback for ageable caches. * diff --git a/mm/Kconfig b/mm/Kconfig index fdbc3dc..59d3216 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -77,9 +77,6 @@ config FLAT_NODE_MEM_MAP def_bool y depends on !SPARSEMEM -config HAVE_GET_USER_PAGES_FAST - bool - # # Both the NUMA code and DISCONTIGMEM use arrays of pg_data_t's # to represent different areas of memory. This variable allows diff --git a/mm/memory.c b/mm/memory.c index 9de1c7d..9ea444b 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -367,93 +367,56 @@ static inline int is_cow_mapping(unsigned int flags) } /* - * vm_normal_page -- This function gets the "struct page" associated with a pte. + * This function gets the "struct page" associated with a pte. * - * "Special" mappings do not wish to be associated with a "struct page" (either - * it doesn't exist, or it exists but they don't want to touch it). In this - * case, NULL is returned here. "Normal" mappings do have a struct page. + * NOTE! Some mappings do not have "struct pages". A raw PFN mapping + * will have each page table entry just pointing to a raw page frame + * number, and as far as the VM layer is concerned, those do not have + * pages associated with them - even if the PFN might point to memory + * that otherwise is perfectly fine and has a "struct page". * - * There are 2 broad cases. Firstly, an architecture may define a pte_special() - * pte bit, in which case this function is trivial. Secondly, an architecture - * may not have a spare pte bit, which requires a more complicated scheme, - * described below. - * - * A raw VM_PFNMAP mapping (ie. one that is not COWed) is always considered a - * special mapping (even if there are underlying and valid "struct pages"). - * COWed pages of a VM_PFNMAP are always normal. - * - * The way we recognize COWed pages within VM_PFNMAP mappings is through the - * rules set up by "remap_pfn_range()": the vma will have the VM_PFNMAP bit - * set, and the vm_pgoff will point to the first PFN mapped: thus every special - * mapping will always honor the rule + * The way we recognize those mappings is through the rules set up + * by "remap_pfn_range()": the vma will have the VM_PFNMAP bit set, + * and the vm_pgoff will point to the first PFN mapped: thus every + * page that is a raw mapping will always honor the rule * * pfn_of_page == vma->vm_pgoff + ((addr - vma->vm_start) >> PAGE_SHIFT) * - * And for normal mappings this is false. - * - * This restricts such mappings to be a linear translation from virtual address - * to pfn. To get around this restriction, we allow arbitrary mappings so long - * as the vma is not a COW mapping; in that case, we know that all ptes are - * special (because none can have been COWed). - * - * - * In order to support COW of arbitrary special mappings, we have VM_MIXEDMAP. - * - * VM_MIXEDMAP mappings can likewise contain memory with or without "struct - * page" backing, however the difference is that _all_ pages with a struct - * page (that is, those where pfn_valid is true) are refcounted and considered - * normal pages by the VM. The disadvantage is that pages are refcounted - * (which can be slower and simply not an option for some PFNMAP users). The - * advantage is that we don't have to follow the strict linearity rule of - * PFNMAP mappings in order to support COWable mappings. - * + * and if that isn't true, the page has been COW'ed (in which case it + * _does_ have a "struct page" associated with it even if it is in a + * VM_PFNMAP range). */ -#ifdef __HAVE_ARCH_PTE_SPECIAL -# define HAVE_PTE_SPECIAL 1 -#else -# define HAVE_PTE_SPECIAL 0 -#endif -struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, - pte_t pte) +struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, pte_t pte) { - unsigned long pfn; + unsigned long pfn = pte_pfn(pte); - if (HAVE_PTE_SPECIAL) { - if (likely(!pte_special(pte))) { - BUG_ON(!pfn_valid(pte_pfn(pte))); - return pte_page(pte); - } - BUG_ON(!(vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP))); - return NULL; + if (unlikely(vma->vm_flags & VM_PFNMAP)) { + unsigned long off = (addr - vma->vm_start) >> PAGE_SHIFT; + if (pfn == vma->vm_pgoff + off) + return NULL; + if (!is_cow_mapping(vma->vm_flags)) + return NULL; } - /* !HAVE_PTE_SPECIAL case follows: */ - - pfn = pte_pfn(pte); - - if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) { - if (vma->vm_flags & VM_MIXEDMAP) { - if (!pfn_valid(pfn)) - return NULL; - goto out; - } else { - unsigned long off; - off = (addr - vma->vm_start) >> PAGE_SHIFT; - if (pfn == vma->vm_pgoff + off) - return NULL; - if (!is_cow_mapping(vma->vm_flags)) - return NULL; - } + /* + * Add some anal sanity checks for now. Eventually, + * we should just do "return pfn_to_page(pfn)", but + * in the meantime we check that we get a valid pfn, + * and that the resulting page looks ok. + */ + if (unlikely(!pfn_valid(pfn))) { + if (!(vma->vm_flags & VM_RESERVED)) + print_bad_pte(vma, pte, addr); + return NULL; } - BUG_ON(!pfn_valid(pfn)); - /* - * NOTE! We still have PageReserved() pages in the page tables. + * NOTE! We still have PageReserved() pages in the page + * tables. * - * eg. VDSO mappings can cause them to exist. + * The PAGE_ZERO() pages and various VDSO mappings can + * cause them to exist. */ -out: return pfn_to_page(pfn); } @@ -1346,17 +1309,8 @@ int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr, pte_t *pte, entry; spinlock_t *ptl; - /* - * Technically, architectures with pte_special can avoid all these - * restrictions (same for remap_pfn_range). However we would like - * consistency in testing and feature parity among all, so we should - * try to keep these invariants in place for everybody. - */ - BUG_ON(!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))); - BUG_ON((vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) == - (VM_PFNMAP|VM_MIXEDMAP)); - BUG_ON((vma->vm_flags & VM_PFNMAP) && is_cow_mapping(vma->vm_flags)); - BUG_ON((vma->vm_flags & VM_MIXEDMAP) && pfn_valid(pfn)); + BUG_ON(!(vma->vm_flags & VM_PFNMAP)); + BUG_ON(is_cow_mapping(vma->vm_flags)); retval = -ENOMEM; pte = get_locked_pte(mm, addr, &ptl); @@ -1367,7 +1321,7 @@ int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr, goto out_unlock; /* Ok, finally just insert the thing.. */ - entry = pte_mkspecial(pfn_pte(pfn, vma->vm_page_prot)); + entry = pfn_pte(pfn, vma->vm_page_prot); set_pte_at(mm, addr, pte, entry); update_mmu_cache(vma, addr, entry); @@ -1397,7 +1351,7 @@ static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd, return -ENOMEM; do { BUG_ON(!pte_none(*pte)); - set_pte_at(mm, addr, pte, pte_mkspecial(pfn_pte(pfn, prot))); + set_pte_at(mm, addr, pte, pfn_pte(pfn, prot)); pfn++; } while (pte++, addr += PAGE_SIZE, addr != end); pte_unmap_unlock(pte - 1, ptl); @@ -2503,13 +2457,10 @@ static noinline int do_no_pfn(struct mm_struct *mm, struct vm_area_struct *vma, int ret = VM_FAULT_MINOR; pte_unmap(page_table); - BUG_ON(!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))); - BUG_ON((vma->vm_flags & VM_PFNMAP) && is_cow_mapping(vma->vm_flags)); + BUG_ON(!(vma->vm_flags & VM_PFNMAP)); + BUG_ON(is_cow_mapping(vma->vm_flags)); pfn = vma->vm_ops->nopfn(vma, address & PAGE_MASK); - - BUG_ON((vma->vm_flags & VM_MIXEDMAP) && pfn_valid(pfn)); - if (unlikely(pfn == NOPFN_OOM)) return VM_FAULT_OOM; else if (unlikely(pfn == NOPFN_SIGBUS))