diff -urNp ref/arch/alpha/kernel/smp.c 2.4.20pre7aa1/arch/alpha/kernel/smp.c --- ref/arch/alpha/kernel/smp.c Wed Sep 18 00:46:57 2002 +++ 2.4.20pre7aa1/arch/alpha/kernel/smp.c Wed Sep 18 00:47:01 2002 @@ -121,6 +121,7 @@ smp_store_cpu_info(int cpuid) cpu_data[cpuid].asn_lock = 0; local_irq_count(cpuid) = 0; local_bh_count(cpuid) = 0; + INIT_LIST_HEAD(&cpu_data[cpuid].pte_cache); } /* diff -urNp ref/arch/alpha/mm/init.c 2.4.20pre7aa1/arch/alpha/mm/init.c --- ref/arch/alpha/mm/init.c Fri Aug 9 14:52:01 2002 +++ 2.4.20pre7aa1/arch/alpha/mm/init.c Wed Sep 18 00:47:01 2002 @@ -43,7 +43,9 @@ extern void die_if_kernel(char *,struct struct thread_struct original_pcb; #ifndef CONFIG_SMP -struct pgtable_cache_struct quicklists; +struct pgtable_cache_struct quicklists = { + pte_cache: LIST_HEAD_INIT(quicklists.pte_cache), +}; #endif pgd_t * @@ -82,8 +84,8 @@ int do_check_pgt_cache(int low, int high pmd_free_slow(pmd_alloc_one_fast(NULL, 0)); freed++; } - if(pte_quicklist) { - pte_free_slow(pte_alloc_one_fast(NULL, 0)); + if (!list_empty(&pte_quicklist)) { + pte_free_slow(pte_alloc_one_fast_lifo(NULL, 0)); freed++; } } while(pgtable_cache_size > low); diff -urNp ref/arch/i386/kernel/setup.c 2.4.20pre7aa1/arch/i386/kernel/setup.c --- ref/arch/i386/kernel/setup.c Wed Sep 18 00:47:00 2002 +++ 2.4.20pre7aa1/arch/i386/kernel/setup.c Wed Sep 18 00:47:01 2002 @@ -120,7 +120,12 @@ */ char ignore_irq13; /* set if exception 16 works */ -struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; +struct cpuinfo_x86 boot_cpu_data = { + wp_works_ok: -1, + hlt_works_ok: 1, + cpuid_level: -1, + pte_quick: LIST_HEAD_INIT(boot_cpu_data.pte_quick), +}; unsigned long mmu_cr4_features; diff -urNp ref/arch/i386/kernel/smpboot.c 2.4.20pre7aa1/arch/i386/kernel/smpboot.c --- ref/arch/i386/kernel/smpboot.c Wed Sep 18 00:46:57 2002 +++ 2.4.20pre7aa1/arch/i386/kernel/smpboot.c Wed Sep 18 00:47:01 2002 @@ -144,7 +144,7 @@ void __init smp_store_cpu_info(int id) struct cpuinfo_x86 *c = cpu_data + id; *c = boot_cpu_data; - c->pte_quick = 0; + INIT_LIST_HEAD(&c->pte_quick); c->pmd_quick = 0; c->pgd_quick = 0; c->pgtable_cache_sz = 0; diff -urNp ref/arch/i386/kernel/vm86.c 2.4.20pre7aa1/arch/i386/kernel/vm86.c --- ref/arch/i386/kernel/vm86.c Fri Aug 9 14:52:06 2002 +++ 2.4.20pre7aa1/arch/i386/kernel/vm86.c Wed Sep 18 00:47:01 2002 @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -121,7 +122,7 @@ static void mark_screen_rdonly(struct ta { pgd_t *pgd; pmd_t *pmd; - pte_t *pte; + pte_t *pte, *pte_orig; int i; spin_lock(&tsk->mm->page_table_lock); @@ -141,12 +142,13 @@ static void mark_screen_rdonly(struct ta pmd_clear(pmd); goto out; } - pte = pte_offset(pmd, 0xA0000); + pte_orig = pte = pte_offset_atomic(pmd, 0xA0000); for (i = 0; i < 32; i++) { if (pte_present(*pte)) set_pte(pte, pte_wrprotect(*pte)); pte++; } + pte_kunmap(pte_orig); out: spin_unlock(&tsk->mm->page_table_lock); flush_tlb(); diff -urNp ref/arch/i386/mm/fault.c 2.4.20pre7aa1/arch/i386/mm/fault.c --- ref/arch/i386/mm/fault.c Wed Sep 18 00:46:58 2002 +++ 2.4.20pre7aa1/arch/i386/mm/fault.c Wed Sep 18 00:47:01 2002 @@ -19,6 +19,7 @@ #include #include #include /* For unblank_screen() */ +#include #include #include @@ -322,12 +323,14 @@ no_context: asm("movl %%cr3,%0":"=r" (page)); page = ((unsigned long *) __va(page))[address >> 22]; printk(KERN_ALERT "*pde = %08lx\n", page); +#ifndef CONFIG_HIGHMEM if (page & 1) { page &= PAGE_MASK; address &= 0x003ff000; page = ((unsigned long *) __va(page))[address >> PAGE_SHIFT]; printk(KERN_ALERT "*pte = %08lx\n", page); } +#endif die("Oops", regs, error_code); bust_spinlocks(0); spin_unlock(&oops_lock); @@ -381,7 +384,9 @@ vmalloc_fault: int offset = __pgd_offset(address); pgd_t *pgd, *pgd_k; pmd_t *pmd, *pmd_k; - pte_t *pte_k; + pte_t *pte_k, *pte_k_orig; + struct page * page; + int present; asm("movl %%cr3,%0":"=r" (pgd)); pgd = offset + (pgd_t *)__va(pgd); @@ -397,8 +402,14 @@ vmalloc_fault: goto no_context; set_pmd(pmd, *pmd_k); - pte_k = pte_offset(pmd_k, address); - if (!pte_present(*pte_k)) + local_irq_disable(); + page = __pmd_page(*pmd_k); + pte_k_orig = pte_k = (pte_t *) kmap_atomic(page, KM_BH_IRQ); + pte_k += __pte_offset(address); + present = pte_present(*pte_k); + kunmap_atomic(pte_k_orig, KM_BH_IRQ); + local_irq_enable(); + if (!present) goto no_context; return; } diff -urNp ref/arch/i386/mm/init.c 2.4.20pre7aa1/arch/i386/mm/init.c --- ref/arch/i386/mm/init.c Fri Sep 13 06:13:34 2002 +++ 2.4.20pre7aa1/arch/i386/mm/init.c Wed Sep 18 00:47:01 2002 @@ -56,8 +56,8 @@ int do_check_pgt_cache(int low, int high pmd_free_slow(pmd_alloc_one_fast(NULL, 0)); freed++; } - if (pte_quicklist) { - pte_free_slow(pte_alloc_one_fast(NULL, 0)); + if (!list_empty(&pte_quicklist)) { + pte_free_slow(pte_alloc_one_fast_lifo(NULL, 0)); freed++; } } while(pgtable_cache_size > low); @@ -76,7 +76,7 @@ pte_t *kmap_pte; pgprot_t kmap_prot; #define kmap_get_fixmap_pte(vaddr) \ - pte_offset(pmd_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr)) + pte_offset_lowmem(pmd_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr)) void __init kmap_init(void) { @@ -142,7 +142,7 @@ static inline void set_pte_phys (unsigne printk("PAE BUG #01!\n"); return; } - pte = pte_offset(pmd, vaddr); + pte = pte_offset_lowmem(pmd, vaddr); /* stored as-is, to permit clearing entries */ set_pte(pte, mk_pte_phys(phys, flags)); @@ -164,42 +164,54 @@ void __set_fixmap (enum fixed_addresses set_pte_phys(address, phys, flags); } -static void __init fixrange_init (unsigned long start, unsigned long end, pgd_t *pgd_base) +static void __init fixrange_init (unsigned long start, unsigned long end, pgd_t *pgd_base, int contigous_pte) { pgd_t *pgd; pmd_t *pmd; pte_t *pte; int i, j; - unsigned long vaddr; + int nr_pte; + void * pte_array; - vaddr = start; - i = __pgd_offset(vaddr); - j = __pmd_offset(vaddr); + if (start & ~PAGE_MASK) + BUG(); + + start &= PMD_MASK; + + i = __pgd_offset(start); + j = __pmd_offset(start); pgd = pgd_base + i; - for ( ; (i < PTRS_PER_PGD) && (vaddr != end); pgd++, i++) { -#if CONFIG_X86_PAE - if (pgd_none(*pgd)) { - pmd = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); - set_pgd(pgd, __pgd(__pa(pmd) + 0x1)); - if (pmd != pmd_offset(pgd, 0)) - printk("PAE BUG #02!\n"); - } - pmd = pmd_offset(pgd, vaddr); -#else - pmd = (pmd_t *)pgd; -#endif - for (; (j < PTRS_PER_PMD) && (vaddr != end); pmd++, j++) { + if (contigous_pte) { + if (start >= end) + BUG(); + nr_pte = (end - start + PMD_SIZE - 1) >> PMD_SHIFT; + pte_array = alloc_bootmem_low_pages(PAGE_SIZE * nr_pte); + } + for ( ; (i < PTRS_PER_PGD) && (start < end); pgd++, i++) { + pmd = pmd_offset(pgd, start); + for (; (j < PTRS_PER_PMD) && (start < end); pmd++, j++) { if (pmd_none(*pmd)) { - pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); - set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(pte))); - if (pte != pte_offset(pmd, 0)) + if (contigous_pte) { + pte = (pte_t *) pte_array; + pte_array += PAGE_SIZE; + nr_pte--; + } else + pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); + set_pmd(pmd, mk_pmd_phys(__pa(pte), __pgprot(_KERNPG_TABLE))); + if (pte != pte_offset_lowmem(pmd, 0)) BUG(); } - vaddr += PMD_SIZE; + start += PMD_SIZE; } j = 0; } + if (contigous_pte) { + if (nr_pte < 0) + BUG(); + if (nr_pte > 0) + free_bootmem((unsigned long) pte_array, nr_pte * PAGE_SIZE); + } } static void __init pagetable_init (void) @@ -218,8 +230,24 @@ static void __init pagetable_init (void) pgd_base = swapper_pg_dir; #if CONFIG_X86_PAE - for (i = 0; i < PTRS_PER_PGD; i++) + /* + * First set all four entries of the pgd. + * Usually only one page is needed here: if PAGE_OFFSET lowered, + * maybe three pages: need not be contiguous, but might as well. + */ + pmd = (pmd_t *)alloc_bootmem_low_pages(KERNEL_PGD_PTRS*PAGE_SIZE); + for (i = 1; i < USER_PGD_PTRS; i++) set_pgd(pgd_base + i, __pgd(1 + __pa(empty_zero_page))); + for (; i < PTRS_PER_PGD; i++, pmd += PTRS_PER_PMD) + set_pgd(pgd_base + i, __pgd(1 + __pa(pmd))); + /* + * Add low memory identity-mappings - SMP needs it when + * starting up on an AP from real-mode. In the non-PAE + * case we already have these mappings through head.S. + * All user-space mappings are explicitly cleared after + * SMP startup. + */ + pgd_base[0] = pgd_base[USER_PGD_PTRS]; #endif i = __pgd_offset(PAGE_OFFSET); pgd = pgd_base + i; @@ -228,30 +256,23 @@ static void __init pagetable_init (void) vaddr = i*PGDIR_SIZE; if (end && (vaddr >= end)) break; -#if CONFIG_X86_PAE - pmd = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); - set_pgd(pgd, __pgd(__pa(pmd) + 0x1)); -#else - pmd = (pmd_t *)pgd; -#endif - if (pmd != pmd_offset(pgd, 0)) - BUG(); + pmd = pmd_offset(pgd, 0); for (j = 0; j < PTRS_PER_PMD; pmd++, j++) { vaddr = i*PGDIR_SIZE + j*PMD_SIZE; if (end && (vaddr >= end)) break; if (cpu_has_pse) { - unsigned long __pe; + unsigned long prot; set_in_cr4(X86_CR4_PSE); boot_cpu_data.wp_works_ok = 1; - __pe = _KERNPG_TABLE + _PAGE_PSE + __pa(vaddr); + prot = _KERNPG_TABLE + _PAGE_PSE; /* Make it "global" too if supported */ if (cpu_has_pge) { set_in_cr4(X86_CR4_PGE); - __pe += _PAGE_GLOBAL; + prot += _PAGE_GLOBAL; } - set_pmd(pmd, __pmd(__pe)); + set_pmd(pmd, mk_pmd_phys(__pa(vaddr), __pgprot(prot))); continue; } @@ -263,43 +284,35 @@ static void __init pagetable_init (void) break; *pte = mk_pte_phys(__pa(vaddr), PAGE_KERNEL); } - set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(pte_base))); - if (pte_base != pte_offset(pmd, 0)) + set_pmd(pmd, mk_pmd_phys(__pa(pte_base), __pgprot(_KERNPG_TABLE))); + if (pte_base != pte_offset_lowmem(pmd, 0)) BUG(); } } - /* - * Fixed mappings, only the page table structure has to be - * created - mappings will be set by set_fixmap(): - */ - vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; - fixrange_init(vaddr, 0, pgd_base); - #if CONFIG_HIGHMEM /* - * Permanent kmaps: + * Permanent kmaps: initialize before the fixmap area + * because here the ptes needs to be contigous. */ vaddr = PKMAP_BASE; - fixrange_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base); + fixrange_init(vaddr, vaddr + PKMAP_SIZE, pgd_base, 1); pgd = swapper_pg_dir + __pgd_offset(vaddr); pmd = pmd_offset(pgd, vaddr); - pte = pte_offset(pmd, vaddr); + pte = pte_offset_lowmem(pmd, vaddr); pkmap_page_table = pte; #endif -#if CONFIG_X86_PAE /* - * Add low memory identity-mappings - SMP needs it when - * starting up on an AP from real-mode. In the non-PAE - * case we already have these mappings through head.S. - * All user-space mappings are explicitly cleared after - * SMP startup. + * Fixed mappings, only the page table structure has to be + * created - mappings will be set by set_fixmap(). + * It is ok if we partially overlap on the PKMAP_BASE + * due the difference between __FIXADDR_START and FIXADDR_START. */ - pgd_base[0] = pgd_base[USER_PTRS_PER_PGD]; -#endif + vaddr = __FIXADDR_START; + fixrange_init(vaddr, vaddr + __FIXADDR_SIZE, pgd_base, 0); } void __init zap_low_mappings (void) @@ -397,7 +410,7 @@ void __init test_wp_bit(void) pgd = swapper_pg_dir + __pgd_offset(vaddr); pmd = pmd_offset(pgd, vaddr); - pte = pte_offset(pmd, vaddr); + pte = pte_offset_lowmem(pmd, vaddr); old_pte = *pte; *pte = mk_pte_phys(0, PAGE_READONLY); local_flush_tlb(); diff -urNp ref/arch/i386/mm/ioremap.c 2.4.20pre7aa1/arch/i386/mm/ioremap.c --- ref/arch/i386/mm/ioremap.c Fri Aug 9 14:52:06 2002 +++ 2.4.20pre7aa1/arch/i386/mm/ioremap.c Wed Sep 18 00:47:01 2002 @@ -9,6 +9,7 @@ */ #include +#include #include #include @@ -49,10 +50,11 @@ static inline int remap_area_pmd(pmd_t * if (address >= end) BUG(); do { - pte_t * pte = pte_alloc(&init_mm, pmd, address); + pte_t * pte = pte_alloc_atomic(&init_mm, pmd, address); if (!pte) return -ENOMEM; remap_area_pte(pte, address, end - address, address + phys_addr, flags); + pte_kunmap(pte); address = (address + PMD_SIZE) & PMD_MASK; pmd++; } while (address && (address < end)); diff -urNp ref/arch/ia64/kernel/perfmon.c 2.4.20pre7aa1/arch/ia64/kernel/perfmon.c --- ref/arch/ia64/kernel/perfmon.c Fri Sep 13 06:13:34 2002 +++ 2.4.20pre7aa1/arch/ia64/kernel/perfmon.c Wed Sep 18 00:47:01 2002 @@ -477,34 +477,8 @@ pfm_get_stamp(void) return ia64_get_itc(); } -/* Given PGD from the address space's page table, return the kernel - * virtual mapping of the physical memory mapped at ADR. - */ -static inline unsigned long -uvirt_to_kva(pgd_t *pgd, unsigned long adr) -{ - unsigned long ret = 0UL; - pmd_t *pmd; - pte_t *ptep, pte; - - if (!pgd_none(*pgd)) { - pmd = pmd_offset(pgd, adr); - if (!pmd_none(*pmd)) { - ptep = pte_offset(pmd, adr); - pte = *ptep; - if (pte_present(pte)) { - ret = (unsigned long) page_address(pte_page(pte)); - ret |= (adr & (PAGE_SIZE - 1)); - } - } - } - DBprintk(("[%d] uv2kva(%lx-->%lx)\n", current->pid, adr, ret)); - return ret; -} - /* Here we want the physical address of the memory. - * This is used when initializing the contents of the - * area and marking the pages as reserved. + * This is used when initializing the contents of the area. */ static inline unsigned long pfm_kvirt_to_pa(unsigned long adr) @@ -538,13 +512,12 @@ pfm_rvmalloc(unsigned long size) static void pfm_rvfree(void *mem, unsigned long size) { - unsigned long adr, page = 0; + unsigned long adr; if (mem) { adr=(unsigned long) mem; - while (size > 0) { - page = pfm_kvirt_to_pa(adr); - mem_map_unreserve(virt_to_page(__va(page))); + while ((long) size > 0) { + mem_map_unreserve(vmalloc_to_page((void *)adr)); adr+=PAGE_SIZE; size-=PAGE_SIZE; } diff -urNp ref/arch/ia64/kernel/setup.c 2.4.20pre7aa1/arch/ia64/kernel/setup.c --- ref/arch/ia64/kernel/setup.c Fri Sep 13 06:13:34 2002 +++ 2.4.20pre7aa1/arch/ia64/kernel/setup.c Wed Sep 18 00:47:01 2002 @@ -656,6 +656,9 @@ cpu_init (void) printk ("cpu_init: PAL RSE info failed, assuming 96 physical stacked regs\n"); num_phys_stacked = 96; } + + INIT_LIST_HEAD(&local_cpu_data->pte_quick); + local_cpu_data->phys_stacked_size_p8 = num_phys_stacked*8 + 8; platform_cpu_init(); diff -urNp ref/arch/ia64/mm/init.c 2.4.20pre7aa1/arch/ia64/mm/init.c --- ref/arch/ia64/mm/init.c Fri Sep 13 06:13:34 2002 +++ 2.4.20pre7aa1/arch/ia64/mm/init.c Wed Sep 18 00:47:01 2002 @@ -56,8 +56,8 @@ do_check_pgt_cache (int low, int high) free_page((unsigned long)pgd_alloc_one_fast(0)), ++freed; if (pmd_quicklist) free_page((unsigned long)pmd_alloc_one_fast(0, 0)), ++freed; - if (pte_quicklist) - free_page((unsigned long)pte_alloc_one_fast(0, 0)), ++freed; + if (!list_empty(&pte_quicklist)) + pte_free_slow(pte_alloc_one_fast_lifo(0, 0)), ++freed; } while (pgtable_cache_size > low); } return freed; diff -urNp ref/drivers/char/drm/drm_proc.h 2.4.20pre7aa1/drivers/char/drm/drm_proc.h --- ref/drivers/char/drm/drm_proc.h Fri Sep 13 06:13:40 2002 +++ 2.4.20pre7aa1/drivers/char/drm/drm_proc.h Wed Sep 18 00:47:01 2002 @@ -448,7 +448,7 @@ static int DRM(_vma_info)(char *buf, cha for (i = vma->vm_start; i < vma->vm_end; i += PAGE_SIZE) { pgd = pgd_offset(vma->vm_mm, i); pmd = pmd_offset(pgd, i); - pte = pte_offset(pmd, i); + pte = pte_offset_atomic(pmd, i); if (pte_present(*pte)) { address = __pa(pte_page(*pte)) + (i & (PAGE_SIZE-1)); @@ -464,6 +464,7 @@ static int DRM(_vma_info)(char *buf, cha } else { DRM_PROC_PRINT(" 0x%08lx\n", i); } + pte_kunmap(pte); } #endif } diff -urNp ref/drivers/char/drm-4.0/proc.c 2.4.20pre7aa1/drivers/char/drm-4.0/proc.c --- ref/drivers/char/drm-4.0/proc.c Mon Feb 25 22:05:06 2002 +++ 2.4.20pre7aa1/drivers/char/drm-4.0/proc.c Wed Sep 18 00:47:01 2002 @@ -425,6 +425,7 @@ static int _drm_vma_info(char *buf, char } else { DRM_PROC_PRINT(" 0x%08lx\n", i); } + pte_kunmap(pte); } #endif } diff -urNp ref/drivers/sgi/char/graphics.c 2.4.20pre7aa1/drivers/sgi/char/graphics.c --- ref/drivers/sgi/char/graphics.c Fri Sep 13 06:13:48 2002 +++ 2.4.20pre7aa1/drivers/sgi/char/graphics.c Wed Sep 18 00:47:01 2002 @@ -221,6 +221,7 @@ sgi_graphics_nopage (struct vm_area_stru int board = GRAPHICS_CARD (vma->vm_dentry->d_inode->i_rdev); unsigned long virt_add, phys_add; + struct page * page; #ifdef DEBUG printk ("Got a page fault for board %d address=%lx guser=%lx\n", board, @@ -247,8 +248,10 @@ sgi_graphics_nopage (struct vm_area_stru pgd = pgd_offset(current->mm, address); pmd = pmd_offset(pgd, address); - pte = pte_offset(pmd, address); - return pte_page(*pte); + pte = pte_offset_atomic(pmd, address); + page = pte_page(*pte); + pte_kunmap(pte); + return page; } /* diff -urNp ref/fs/exec.c 2.4.20pre7aa1/fs/exec.c --- ref/fs/exec.c Fri Sep 13 06:13:49 2002 +++ 2.4.20pre7aa1/fs/exec.c Wed Sep 18 00:47:01 2002 @@ -291,11 +291,11 @@ void put_dirty_page(struct task_struct * pmd = pmd_alloc(tsk->mm, pgd, address); if (!pmd) goto out; - pte = pte_alloc(tsk->mm, pmd, address); + pte = pte_alloc_atomic(tsk->mm, pmd, address); if (!pte) goto out; if (!pte_none(*pte)) - goto out; + goto out_unmap; lru_cache_add(page); flush_dcache_page(page); flush_page_to_ram(page); @@ -303,8 +303,12 @@ void put_dirty_page(struct task_struct * tsk->mm->rss++; spin_unlock(&tsk->mm->page_table_lock); + pte_kunmap(pte); + /* no need for flush_tlb */ return; +out_unmap: + pte_kunmap(pte); out: spin_unlock(&tsk->mm->page_table_lock); __free_page(page); diff -urNp ref/fs/proc/array.c 2.4.20pre7aa1/fs/proc/array.c --- ref/fs/proc/array.c Wed Sep 18 00:46:57 2002 +++ 2.4.20pre7aa1/fs/proc/array.c Wed Sep 18 00:47:01 2002 @@ -398,7 +398,7 @@ int proc_pid_stat(struct task_struct *ta static inline void statm_pte_range(pmd_t * pmd, unsigned long address, unsigned long size, int * pages, int * shared, int * dirty, int * total) { - pte_t * pte; + pte_t * pte, * pte_orig; unsigned long end; if (pmd_none(*pmd)) @@ -408,7 +408,7 @@ static inline void statm_pte_range(pmd_t pmd_clear(pmd); return; } - pte = pte_offset(pmd, address); + pte_orig = pte = pte_offset(pmd, address); address &= ~PMD_MASK; end = address + size; if (end > PMD_SIZE) @@ -435,6 +435,7 @@ static inline void statm_pte_range(pmd_t if (page_count(pte_page(page)) > 1) ++*shared; } while (address < end); + pte_kunmap(pte_orig); } static inline void statm_pmd_range(pgd_t * pgd, unsigned long address, unsigned long size, diff -urNp ref/include/asm-alpha/pgalloc.h 2.4.20pre7aa1/include/asm-alpha/pgalloc.h --- ref/include/asm-alpha/pgalloc.h Fri Aug 9 14:52:20 2002 +++ 2.4.20pre7aa1/include/asm-alpha/pgalloc.h Wed Sep 18 00:47:01 2002 @@ -2,6 +2,7 @@ #define _ALPHA_PGALLOC_H #include +#include #ifndef __EXTERN_INLINE #define __EXTERN_INLINE extern inline @@ -234,7 +235,7 @@ extern void flush_tlb_range(struct mm_st extern struct pgtable_cache_struct { unsigned long *pgd_cache; unsigned long *pmd_cache; - unsigned long *pte_cache; + struct list_head pte_cache; unsigned long pgtable_cache_sz; } quicklists; #else @@ -246,7 +247,7 @@ extern struct pgtable_cache_struct { #define pte_quicklist (quicklists.pte_cache) #define pgtable_cache_size (quicklists.pgtable_cache_sz) -#define pmd_populate(mm, pmd, pte) pmd_set(pmd, pte) +#define pmd_populate(mm, pmd, page) do { *(pmd) = mk_pmd(page, __pgprot(_PAGE_TABLE)); } while (0) #define pgd_populate(mm, pgd, pmd) pgd_set(pgd, pmd) extern pgd_t *get_pgd_slow(void); @@ -288,8 +289,8 @@ static inline pmd_t *pmd_alloc_one_fast( { unsigned long *ret; - if ((ret = (unsigned long *)pte_quicklist) != NULL) { - pte_quicklist = (unsigned long *)(*ret); + if ((ret = (unsigned long *)pmd_quicklist) != NULL) { + pmd_quicklist = (unsigned long *)(*ret); ret[0] = 0; pgtable_cache_size--; } @@ -298,8 +299,8 @@ static inline pmd_t *pmd_alloc_one_fast( static inline void pmd_free_fast(pmd_t *pmd) { - *(unsigned long *)pmd = (unsigned long) pte_quicklist; - pte_quicklist = (unsigned long *) pmd; + *(unsigned long *)pmd = (unsigned long) pmd_quicklist; + pmd_quicklist = (unsigned long *) pmd; pgtable_cache_size++; } @@ -308,36 +309,48 @@ static inline void pmd_free_slow(pmd_t * free_page((unsigned long)pmd); } -static inline pte_t *pte_alloc_one(struct mm_struct *mm, unsigned long address) +static inline struct page * pte_alloc_one_fast(struct mm_struct *mm, + unsigned long address) { - pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL); - if (pte) - clear_page(pte); - return pte; + struct list_head * entry = pte_quicklist.next; /* FIFO */ + struct page * page = NULL; + + if (entry != &pte_quicklist) { + list_del(entry); + page = list_entry(entry, struct page, list); + pgtable_cache_size--; + } + return page; } -static inline pte_t *pte_alloc_one_fast(struct mm_struct *mm, unsigned long address) +static inline struct page * pte_alloc_one_fast_lifo(struct mm_struct *mm, + unsigned long address) { - unsigned long *ret; + struct list_head * entry = pte_quicklist.prev; /* LIFO */ + struct page * page = NULL; - if ((ret = (unsigned long *)pte_quicklist) != NULL) { - pte_quicklist = (unsigned long *)(*ret); - ret[0] = 0; + if (entry != &pte_quicklist) { + list_del(entry); + page = list_entry(entry, struct page, list); pgtable_cache_size--; } - return (pte_t *)ret; + return page; } -static inline void pte_free_fast(pte_t *pte) +static inline void pte_free_fast(struct page * page) { - *(unsigned long *)pte = (unsigned long) pte_quicklist; - pte_quicklist = (unsigned long *) pte; + list_add(&page->list, &pte_quicklist); pgtable_cache_size++; } -static inline void pte_free_slow(pte_t *pte) +static __inline__ void pte_free_slow(struct page * page) +{ + __free_page(page); +} + +static inline void pte_free_via_pmd(pmd_t pmd) { - free_page((unsigned long)pte); + pte_free_fast(virt_to_page(pte_offset(&pmd, 0))); } #define pte_free(pte) pte_free_fast(pte) diff -urNp ref/include/asm-alpha/pgtable.h 2.4.20pre7aa1/include/asm-alpha/pgtable.h --- ref/include/asm-alpha/pgtable.h Fri Aug 9 14:52:20 2002 +++ 2.4.20pre7aa1/include/asm-alpha/pgtable.h Wed Sep 18 00:47:01 2002 @@ -221,6 +221,29 @@ extern unsigned long __zero_page(void); }) #endif +#ifndef CONFIG_DISCONTIGMEM +#define mk_pmd(page, pgprot) \ +({ \ + pmd_t pmd; \ + \ + pmd_val(pmd) = ((unsigned long)(page - mem_map) << 32) | \ + pgprot_val(pgprot); \ + pmd; \ +}) +#else +#define mk_pmd(page, pgprot) \ +({ \ + pmd_t pmd; \ + unsigned long pfn; \ + \ + pfn = ((unsigned long)((page)-page_zone(page)->zone_mem_map)) << 32; \ + pfn += page_zone(page)->zone_start_paddr << (32-PAGE_SHIFT); \ + pmd_val(pmd) = pfn | pgprot_val(pgprot); \ + \ + pmd; \ +}) +#endif + extern inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot) { pte_t pte; pte_val(pte) = (PHYS_TWIDDLE(physpage) << (32-PAGE_SHIFT)) | pgprot_val(pgprot); return pte; } @@ -310,6 +333,15 @@ extern inline pte_t * pte_offset(pmd_t * { return (pte_t *) pmd_page(*dir) + ((address >> PAGE_SHIFT) & (PTRS_PER_PAGE - 1)); } +#define pte_offset2(dir, address) pte_offset(dir, address) +#define pte_offset_atomic(dir, address) pte_offset(dir, address) +#define pte_offset_atomic2(dir, address) pte_offset(dir, address) +#define pte_offset_under_lock(dir, address, mm) pte_offset(dir, address) +#define pte_offset2_under_lock(dir, address, mm) pte_offset(dir, address) +#define pte_kunmap(ptep) do { } while(0) +#define pte_kunmap2(ptep) do { } while(0) +#define pte_kunmap_atomic2(ptep) do { } while(0) +#define pte_alloc_atomic(mm, pmd, address) pte_alloc(mm, pmd, address) extern pgd_t swapper_pg_dir[1024]; diff -urNp ref/include/asm-alpha/prefetch.h 2.4.20pre7aa1/include/asm-alpha/prefetch.h --- ref/include/asm-alpha/prefetch.h Thu Jan 1 01:00:00 1970 +++ 2.4.20pre7aa1/include/asm-alpha/prefetch.h Wed Sep 18 00:47:01 2002 @@ -0,0 +1,23 @@ +#ifndef __ASM_ALPHA_PREFETCH_H +#define __ASM_ALPHA_PREFETCH_H + +#define ARCH_HAS_PREFETCH +#define ARCH_HAS_PREFETCHW +#define ARCH_HAS_SPINLOCK_PREFETCH + +extern inline void prefetch(const void *ptr) +{ + __asm__ ("ldl $31,%0" : : "m"(*(char *)ptr)); +} + +extern inline void prefetchw(const void *ptr) +{ + __asm__ ("ldl $31,%0" : : "m"(*(char *)ptr)); +} + +extern inline void spin_lock_prefetch(const void *ptr) +{ + __asm__ ("ldl $31,%0" : : "m"(*(char *)ptr)); +} + +#endif /* __ASM_ALPHA_PREFETCH_H */ diff -urNp ref/include/asm-alpha/processor.h 2.4.20pre7aa1/include/asm-alpha/processor.h --- ref/include/asm-alpha/processor.h Tue Jan 22 18:54:25 2002 +++ 2.4.20pre7aa1/include/asm-alpha/processor.h Wed Sep 18 00:47:01 2002 @@ -150,25 +150,4 @@ unsigned long get_wchan(struct task_stru #define cpu_relax() do { } while (0) -#define ARCH_HAS_PREFETCH -#define ARCH_HAS_PREFETCHW -#define ARCH_HAS_SPINLOCK_PREFETCH - -extern inline void prefetch(const void *ptr) -{ - __asm__ ("ldl $31,%0" : : "m"(*(char *)ptr)); -} - -extern inline void prefetchw(const void *ptr) -{ - __asm__ ("ldl $31,%0" : : "m"(*(char *)ptr)); -} - -extern inline void spin_lock_prefetch(const void *ptr) -{ - __asm__ ("ldl $31,%0" : : "m"(*(char *)ptr)); -} - - - #endif /* __ASM_ALPHA_PROCESSOR_H */ diff -urNp ref/include/asm-alpha/smp.h 2.4.20pre7aa1/include/asm-alpha/smp.h --- ref/include/asm-alpha/smp.h Wed Sep 18 00:46:57 2002 +++ 2.4.20pre7aa1/include/asm-alpha/smp.h Wed Sep 18 00:47:01 2002 @@ -3,6 +3,7 @@ #include #include +#include /* HACK: Cabrio WHAMI return value is bogus if more than 8 bits used.. :-( */ @@ -30,7 +31,7 @@ struct cpuinfo_alpha { int asn_lock; unsigned long *pgd_cache; unsigned long *pmd_cache; - unsigned long *pte_cache; + struct list_head pte_cache; unsigned long pgtable_cache_sz; unsigned long ipi_count; unsigned long irq_attempt[NR_IRQS]; diff -urNp ref/include/asm-i386/fixmap.h 2.4.20pre7aa1/include/asm-i386/fixmap.h --- ref/include/asm-i386/fixmap.h Fri Sep 13 06:13:52 2002 +++ 2.4.20pre7aa1/include/asm-i386/fixmap.h Wed Sep 18 00:47:01 2002 @@ -94,8 +94,10 @@ extern void __set_fixmap (enum fixed_add * at the top of mem.. */ #define FIXADDR_TOP (0xffffe000UL) -#define __FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT) -#define FIXADDR_START (FIXADDR_TOP - __FIXADDR_SIZE) +#define FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT) +#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE + PAGE_SIZE) +#define __FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) +#define __FIXADDR_START (FIXADDR_TOP - __FIXADDR_SIZE + PAGE_SIZE) #define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT)) diff -urNp ref/include/asm-i386/highmem.h 2.4.20pre7aa1/include/asm-i386/highmem.h --- ref/include/asm-i386/highmem.h Mon Sep 16 21:30:39 2002 +++ 2.4.20pre7aa1/include/asm-i386/highmem.h Wed Sep 18 00:47:01 2002 @@ -41,31 +41,31 @@ extern pte_t *pkmap_page_table; extern void kmap_init(void) __init; -/* - * Right now we initialize only a single pte table. It can be extended - * easily, subsequent pte tables have to be allocated in one physical - * chunk of RAM. - */ -#define PKMAP_BASE (0xfe000000UL) -#ifdef CONFIG_X86_PAE -#define LAST_PKMAP 512 -#else +enum km_serie_type { + KM_SERIE_DEFAULT, + KM_SERIE_PAGETABLE, + KM_NR_SERIES, +}; + #define LAST_PKMAP 1024 -#endif -#define LAST_PKMAP_MASK (LAST_PKMAP-1) -#define PKMAP_NR(virt) ((virt-PKMAP_BASE) >> PAGE_SHIFT) +#define PKMAP_SIZE ((LAST_PKMAP*KM_NR_SERIES) << PAGE_SHIFT) +#define PKMAP_BASE (FIXADDR_START - PKMAP_SIZE - PAGE_SIZE) /* left a page in between */ +#define PKMAP_NR(virt) (((virt)-PKMAP_BASE) >> PAGE_SHIFT) #define PKMAP_ADDR(nr) (PKMAP_BASE + ((nr) << PAGE_SHIFT)) -extern void * FASTCALL(kmap_high(struct page *page)); -extern void FASTCALL(kunmap_high(struct page *page)); +extern void * FASTCALL(kmap_high(struct page *page, int serie)); +extern void FASTCALL(kunmap_high(void * vaddr, int serie)); + +#define kmap(page) kmap_serie(page, KM_SERIE_DEFAULT) +#define kmap_pagetable(page) kmap_serie(page, KM_SERIE_PAGETABLE) -static inline void *kmap(struct page *page) +static inline void *kmap_serie(struct page *page, int serie) { if (in_interrupt()) out_of_line_bug(); if (page < highmem_start_page) return page_address(page); - return kmap_high(page); + return kmap_high(page, serie); } static inline void kunmap(struct page *page) @@ -74,7 +74,16 @@ static inline void kunmap(struct page *p out_of_line_bug(); if (page < highmem_start_page) return; - kunmap_high(page); + kunmap_high(page->virtual, KM_SERIE_DEFAULT); +} + +static inline void kunmap_vaddr(void *kvaddr) +{ + if (in_interrupt()) + out_of_line_bug(); + if ((unsigned long) kvaddr < PKMAP_BASE) + return; + kunmap_high(kvaddr, KM_SERIE_DEFAULT); } /* @@ -124,6 +133,22 @@ static inline void kunmap_atomic(void *k #endif } +static inline void kunmap_pagetable(void *kvaddr) +{ + /* direct map */ + if ((unsigned long) kvaddr < PKMAP_BASE) + return; + /* atomic kmap */ + if ((unsigned long) kvaddr >= FIXADDR_START) { + kvaddr = (void *) ((unsigned long) kvaddr & PAGE_MASK); + kunmap_atomic(kvaddr, KM_USER0); + return; + } + if (in_interrupt()) + out_of_line_bug(); + kunmap_high(kvaddr, KM_SERIE_PAGETABLE); +} + #endif /* __KERNEL__ */ #endif /* _ASM_HIGHMEM_H */ diff -urNp ref/include/asm-i386/page.h 2.4.20pre7aa1/include/asm-i386/page.h --- ref/include/asm-i386/page.h Wed Sep 18 00:46:55 2002 +++ 2.4.20pre7aa1/include/asm-i386/page.h Wed Sep 18 00:47:01 2002 @@ -38,20 +38,21 @@ */ #if CONFIG_X86_PAE typedef struct { unsigned long pte_low, pte_high; } pte_t; -typedef struct { unsigned long long pmd; } pmd_t; +typedef struct { unsigned long pmd_low, pmd_high; } pmd_t; typedef struct { unsigned long long pgd; } pgd_t; #define pte_val(x) ((x).pte_low | ((unsigned long long)(x).pte_high << 32)) +#define pmd_val(x) ((x).pmd_low | ((unsigned long long)(x).pmd_high << 32)) #else typedef struct { unsigned long pte_low; } pte_t; -typedef struct { unsigned long pmd; } pmd_t; +typedef struct { unsigned long pmd_low; } pmd_t; typedef struct { unsigned long pgd; } pgd_t; #define pte_val(x) ((x).pte_low) +#define pmd_val(x) ((x).pmd_low) #endif #define PTE_MASK PAGE_MASK typedef struct { unsigned long pgprot; } pgprot_t; -#define pmd_val(x) ((x).pmd) #define pgd_val(x) ((x).pgd) #define pgprot_val(x) ((x).pgprot) diff -urNp ref/include/asm-i386/pgalloc.h 2.4.20pre7aa1/include/asm-i386/pgalloc.h --- ref/include/asm-i386/pgalloc.h Wed Sep 18 00:47:00 2002 +++ 2.4.20pre7aa1/include/asm-i386/pgalloc.h Wed Sep 18 00:47:01 2002 @@ -11,8 +11,7 @@ #define pte_quicklist (current_cpu_data.pte_quick) #define pgtable_cache_size (current_cpu_data.pgtable_cache_sz) -#define pmd_populate(mm, pmd, pte) \ - set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte))) +#define pmd_populate(mm, pmd, page) set_pmd(pmd, mk_pmd(page, __pgprot(_PAGE_TABLE))) /* * Allocate and free page tables. @@ -104,39 +103,48 @@ static inline void free_pgd_slow(pgd_t * #endif } -static inline pte_t *pte_alloc_one(struct mm_struct *mm, unsigned long address) +static inline struct page * pte_alloc_one_fast(struct mm_struct *mm, + unsigned long address) { - pte_t *pte; + struct list_head * entry = pte_quicklist.next; /* FIFO */ + struct page * page = NULL; - pte = (pte_t *) __get_free_page(GFP_KERNEL); - if (pte) - clear_page(pte); - return pte; + if (entry != &pte_quicklist) { + list_del(entry); + page = list_entry(entry, struct page, list); + pgtable_cache_size--; + } + return page; } -static inline pte_t *pte_alloc_one_fast(struct mm_struct *mm, - unsigned long address) +static inline struct page * pte_alloc_one_fast_lifo(struct mm_struct *mm, + unsigned long address) { - unsigned long *ret; + struct list_head * entry = pte_quicklist.prev; /* LIFO */ + struct page * page = NULL; - if ((ret = (unsigned long *)pte_quicklist) != NULL) { - pte_quicklist = (unsigned long *)(*ret); - ret[0] = ret[1]; + if (entry != &pte_quicklist) { + list_del(entry); + page = list_entry(entry, struct page, list); pgtable_cache_size--; } - return (pte_t *)ret; + return page; } -static inline void pte_free_fast(pte_t *pte) +static inline void pte_free_fast(struct page * page) { - *(unsigned long *)pte = (unsigned long) pte_quicklist; - pte_quicklist = (unsigned long *) pte; + list_add(&page->list, &pte_quicklist); pgtable_cache_size++; } -static __inline__ void pte_free_slow(pte_t *pte) +static inline void pte_free_via_pmd(pmd_t pmd) +{ + pte_free_fast(__pmd_page(pmd)); +} + +static __inline__ void pte_free_slow(struct page * page) { - free_page((unsigned long)pte); + __free_page(page); } #define pte_free(pte) pte_free_fast(pte) diff -urNp ref/include/asm-i386/pgtable-2level.h 2.4.20pre7aa1/include/asm-i386/pgtable-2level.h --- ref/include/asm-i386/pgtable-2level.h Fri Sep 13 06:13:54 2002 +++ 2.4.20pre7aa1/include/asm-i386/pgtable-2level.h Wed Sep 18 00:47:01 2002 @@ -60,6 +60,9 @@ static inline pmd_t * pmd_offset(pgd_t * #define pte_same(a, b) ((a).pte_low == (b).pte_low) #define pte_page(x) (mem_map+((unsigned long)(((x).pte_low >> PAGE_SHIFT)))) #define pte_none(x) (!(x).pte_low) +#define __pmd_page(x) (mem_map + ((x).pmd_low >> PAGE_SHIFT)) +#define pmd_none(x) (!(x).pmd_low) #define __mk_pte(page_nr,pgprot) __pte(((page_nr) << PAGE_SHIFT) | pgprot_val(pgprot)) +#define __mk_pmd(page_nr,pgprot) __pmd(((page_nr) << PAGE_SHIFT) | pgprot_val(pgprot)) #endif /* _I386_PGTABLE_2LEVEL_H */ diff -urNp ref/include/asm-i386/pgtable-3level.h 2.4.20pre7aa1/include/asm-i386/pgtable-3level.h --- ref/include/asm-i386/pgtable-3level.h Fri Sep 13 06:13:54 2002 +++ 2.4.20pre7aa1/include/asm-i386/pgtable-3level.h Wed Sep 18 00:47:01 2002 @@ -49,8 +49,13 @@ static inline void set_pte(pte_t *ptep, smp_wmb(); ptep->pte_low = pte.pte_low; } -#define set_pmd(pmdptr,pmdval) \ - set_64bit((unsigned long long *)(pmdptr),pmd_val(pmdval)) + +static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) +{ + pmdp->pmd_high = pmd.pmd_high; + smp_wmb(); + pmdp->pmd_low = pmd.pmd_low; +} #define set_pgd(pgdptr,pgdval) \ set_64bit((unsigned long long *)(pgdptr),pgd_val(pgdval)) #define set_pte_atomic(pteptr,pteval) \ @@ -91,6 +96,8 @@ static inline int pte_same(pte_t a, pte_ #define pte_page(x) (mem_map+(((x).pte_low >> PAGE_SHIFT) | ((x).pte_high << (32 - PAGE_SHIFT)))) #define pte_none(x) (!(x).pte_low && !(x).pte_high) +#define __pmd_page(x) (mem_map + (((x).pmd_low >> PAGE_SHIFT) | ((x).pmd_high << (32-PAGE_SHIFT)))) +#define pmd_none(x) (!(x).pmd_low && !(x).pmd_high) static inline pte_t __mk_pte(unsigned long page_nr, pgprot_t pgprot) { @@ -101,4 +108,13 @@ static inline pte_t __mk_pte(unsigned lo return pte; } +static inline pmd_t __mk_pmd(unsigned long page_nr, pgprot_t pgprot) +{ + pmd_t pmd; + + pmd.pmd_high = page_nr >> (32 - PAGE_SHIFT); + pmd.pmd_low = (page_nr << PAGE_SHIFT) | pgprot_val(pgprot); + return pmd; +} + #endif /* _I386_PGTABLE_3LEVEL_H */ diff -urNp ref/include/asm-i386/pgtable.h 2.4.20pre7aa1/include/asm-i386/pgtable.h --- ref/include/asm-i386/pgtable.h Wed Sep 18 00:46:56 2002 +++ 2.4.20pre7aa1/include/asm-i386/pgtable.h Wed Sep 18 00:47:01 2002 @@ -269,10 +269,9 @@ extern unsigned long pg0[1024]; #define pte_present(x) ((x).pte_low & (_PAGE_PRESENT | _PAGE_PROTNONE)) #define pte_clear(xp) do { set_pte(xp, __pte(0)); } while (0) -#define pmd_none(x) (!pmd_val(x)) -#define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT) +#define pmd_present(x) ((x).pmd_low & _PAGE_PRESENT) #define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0) -#define pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE) +#define pmd_bad(x) (((x).pmd_low & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE) #define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT)) @@ -309,9 +308,11 @@ static inline void ptep_mkdirty(pte_t *p */ #define mk_pte(page, pgprot) __mk_pte((page) - mem_map, (pgprot)) +#define mk_pmd(page, pgprot) __mk_pmd((page) - mem_map, (pgprot)) /* This takes a physical page address that is used by the remapping functions */ #define mk_pte_phys(physpage, pgprot) __mk_pte((physpage) >> PAGE_SHIFT, pgprot) +#define mk_pmd_phys(physpage, pgprot) __mk_pmd((physpage) >> PAGE_SHIFT, pgprot) static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { @@ -322,8 +323,60 @@ static inline pte_t pte_modify(pte_t pte #define page_pte(page) page_pte_prot(page, __pgprot(0)) -#define pmd_page(pmd) \ -((unsigned long) __va(pmd_val(pmd) & PAGE_MASK)) +#define pmd_page(pmd) \ +({ \ + struct page * __page = __pmd_page(pmd); \ + kmap_pagetable(__page); \ +}) + +#define pmd_page_atomic(pmd) \ +({ \ + struct page * __page = __pmd_page(pmd); \ + kmap_atomic(__page, KM_USER0); \ +}) + +#define pmd_page_atomic2(pmd) \ +({ \ + struct page * __page = __pmd_page(pmd); \ + kmap_atomic(__page, KM_USER1); \ +}) + +#define pmd_page_under_lock(pmd, mm) \ +({ \ + struct page * __page = __pmd_page(pmd); \ + int page_highmem = PageHighMem(__page); \ + void *__kvaddr; \ + \ + if (page_highmem) \ + spin_unlock(&(mm)->page_table_lock); \ + __kvaddr = kmap_pagetable(__page); \ + if (page_highmem) \ + spin_lock(&(mm)->page_table_lock); \ + __kvaddr; \ +}) + +/* + * pte_offset2_under_lock, invoking pmd_page2_under_lock, + * is used by nothing except mremap's get_one_pte: it uses + * the default kmap on src pagetable, before kmap_pagetable + * is used on dst pagetable, to avoid potential deadlock. + */ +#define pmd_page2_under_lock(pmd, mm) \ +({ \ + struct page * __page = __pmd_page(pmd); \ + int page_highmem = PageHighMem(__page); \ + void *__kvaddr; \ + \ + if (page_highmem) \ + spin_unlock(&(mm)->page_table_lock); \ + __kvaddr = kmap(__page); \ + if (page_highmem) \ + spin_lock(&(mm)->page_table_lock); \ + __kvaddr; \ +}) + +#define pmd_page_lowmem(pmd) \ + (__va((pmd).pmd_low & PAGE_MASK)) /* to find an entry in a page-table-directory. */ #define pgd_index(address) ((address >> PGDIR_SHIFT) & (PTRS_PER_PGD-1)) @@ -343,6 +396,22 @@ static inline pte_t pte_modify(pte_t pte ((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) #define pte_offset(dir, address) ((pte_t *) pmd_page(*(dir)) + \ __pte_offset(address)) +#define pte_offset_atomic(dir, address) ((pte_t *) pmd_page_atomic(*(dir)) + \ + __pte_offset(address)) +#define pte_offset_atomic2(dir, address) ((pte_t *) pmd_page_atomic2(*(dir)) + \ + __pte_offset(address)) +#define pte_offset_under_lock(dir, address, mm) ((pte_t *) pmd_page_under_lock(*(dir), mm) + \ + __pte_offset(address)) +#define pte_offset2_under_lock(dir, address, mm) ((pte_t *) pmd_page2_under_lock(*(dir), mm) + \ + __pte_offset(address)) +#define pte_offset_lowmem(dir, address) ((pte_t *) pmd_page_lowmem(*(dir)) + \ + __pte_offset(address)) +#define pte_kunmap(ptep) kunmap_pagetable(ptep) +#define pte_kunmap2(ptep) kunmap_vaddr(ptep) +#define pte_kunmap_atomic2(ptep) kunmap_atomic((void *) ((unsigned long) (ptep) & PAGE_MASK), KM_USER1) +#ifndef CONFIG_HIGHMEM +#define pte_alloc_atomic(mm, pmd, address) pte_alloc(mm, pmd, address) +#endif /* * The i386 doesn't have any external MMU info: the kernel page diff -urNp ref/include/asm-i386/prefetch.h 2.4.20pre7aa1/include/asm-i386/prefetch.h --- ref/include/asm-i386/prefetch.h Thu Jan 1 01:00:00 1970 +++ 2.4.20pre7aa1/include/asm-i386/prefetch.h Wed Sep 18 00:47:01 2002 @@ -0,0 +1,34 @@ +#ifndef __ASM_I386_PREFETCH_H +#define __ASM_I386_PREFETCH_H + +#include + +/* Prefetch instructions for Pentium III and AMD Athlon */ +#ifdef CONFIG_MPENTIUMIII + +#define ARCH_HAS_PREFETCH +extern inline void prefetch(const void *x) +{ + __asm__ __volatile__ ("prefetchnta (%0)" : : "r"(x)); +} + +#elif CONFIG_X86_USE_3DNOW + +#define ARCH_HAS_PREFETCH +#define ARCH_HAS_PREFETCHW +#define ARCH_HAS_SPINLOCK_PREFETCH + +extern inline void prefetch(const void *x) +{ + __asm__ __volatile__ ("prefetch (%0)" : : "r"(x)); +} + +extern inline void prefetchw(const void *x) +{ + __asm__ __volatile__ ("prefetchw (%0)" : : "r"(x)); +} +#define spin_lock_prefetch(x) prefetchw(x) + +#endif + +#endif /* __ASM_I386_PREFETCH_H */ diff -urNp ref/include/asm-i386/processor.h 2.4.20pre7aa1/include/asm-i386/processor.h --- ref/include/asm-i386/processor.h Wed Sep 18 00:46:59 2002 +++ 2.4.20pre7aa1/include/asm-i386/processor.h Wed Sep 18 00:47:01 2002 @@ -17,6 +17,7 @@ #include #include #include +#include /* * Default implementation of macro that returns current @@ -51,7 +52,7 @@ struct cpuinfo_x86 { unsigned long loops_per_jiffy; unsigned long *pgd_quick; unsigned long *pmd_quick; - unsigned long *pte_quick; + struct list_head pte_quick; unsigned long pgtable_cache_sz; } __attribute__((__aligned__(SMP_CACHE_BYTES))); @@ -488,32 +489,4 @@ static inline void rep_nop(void) #define ARCH_HAS_SMP_BALANCE -/* Prefetch instructions for Pentium III and AMD Athlon */ -#ifdef CONFIG_MPENTIUMIII - -#define ARCH_HAS_PREFETCH -extern inline void prefetch(const void *x) -{ - __asm__ __volatile__ ("prefetchnta (%0)" : : "r"(x)); -} - -#elif CONFIG_X86_USE_3DNOW - -#define ARCH_HAS_PREFETCH -#define ARCH_HAS_PREFETCHW -#define ARCH_HAS_SPINLOCK_PREFETCH - -extern inline void prefetch(const void *x) -{ - __asm__ __volatile__ ("prefetch (%0)" : : "r"(x)); -} - -extern inline void prefetchw(const void *x) -{ - __asm__ __volatile__ ("prefetchw (%0)" : : "r"(x)); -} -#define spin_lock_prefetch(x) prefetchw(x) - -#endif - #endif /* __ASM_I386_PROCESSOR_H */ diff -urNp ref/include/asm-ia64/pgalloc.h 2.4.20pre7aa1/include/asm-ia64/pgalloc.h --- ref/include/asm-ia64/pgalloc.h Fri Sep 13 06:13:54 2002 +++ 2.4.20pre7aa1/include/asm-ia64/pgalloc.h Wed Sep 18 00:47:01 2002 @@ -109,43 +109,57 @@ pmd_free (pmd_t *pmd) } static inline void -pmd_populate (struct mm_struct *mm, pmd_t *pmd_entry, pte_t *pte) +pmd_populate (struct mm_struct *mm, pmd_t *pmd_entry, struct page *page) { - pmd_val(*pmd_entry) = __pa(pte); + *pmd_entry = mk_pmd(page, __pgprot(0)); } -static inline pte_t* +static inline struct page * pte_alloc_one_fast (struct mm_struct *mm, unsigned long addr) { - unsigned long *ret = (unsigned long *)pte_quicklist; + struct list_head *entry = pte_quicklist.next; /* FIFO */ + struct page *page = NULL; - if (__builtin_expect(ret != NULL, 1)) { - pte_quicklist = (unsigned long *)(*ret); - ret[0] = 0; + if (entry != &pte_quicklist) { + list_del(entry); + page = list_entry(entry, struct page, list); --pgtable_cache_size; } - return (pte_t *)ret; + return page; } - -static inline pte_t* -pte_alloc_one (struct mm_struct *mm, unsigned long addr) +static inline struct page * +pte_alloc_one_fast_lifo (struct mm_struct *mm, unsigned long addr) { - pte_t *pte = (pte_t *) __get_free_page(GFP_KERNEL); + struct list_head *entry = pte_quicklist.prev; /* LIFO */ + struct page *page = NULL; - if (__builtin_expect(pte != NULL, 1)) - clear_page(pte); - return pte; + if (entry != &pte_quicklist) { + list_del(entry); + page = list_entry(entry, struct page, list); + --pgtable_cache_size; + } + return page; } static inline void -pte_free (pte_t *pte) +pte_free (struct page *page) { - *(unsigned long *)pte = (unsigned long) pte_quicklist; - pte_quicklist = (unsigned long *) pte; + list_add(&page->list, &pte_quicklist); ++pgtable_cache_size; } +static inline void +pte_free_slow(struct page * page) +{ + __free_page(page); +} + +static inline void pte_free_via_pmd(pmd_t pmd) +{ + pte_free(virt_to_page(pte_offset(&pmd, 0))); +} + extern int do_check_pgt_cache (int, int); /* diff -urNp ref/include/asm-ia64/pgtable.h 2.4.20pre7aa1/include/asm-ia64/pgtable.h --- ref/include/asm-ia64/pgtable.h Fri Sep 13 06:13:54 2002 +++ 2.4.20pre7aa1/include/asm-ia64/pgtable.h Wed Sep 18 00:47:01 2002 @@ -217,6 +217,13 @@ ia64_phys_addr_valid (unsigned long addr pte_val(__pte) = ((page - mem_map) << PAGE_SHIFT) | pgprot_val(pgprot); \ __pte; \ }) +#define mk_pmd(page,pgprot) \ +({ \ + pmd_t __pmd; \ + \ + pmd_val(__pmd) = ((page - mem_map) << PAGE_SHIFT) | pgprot_val(pgprot); \ + __pmd; \ +}) /* This takes a physical page address that is used by the remapping functions */ #define mk_pte_phys(physpage, pgprot) \ @@ -343,6 +350,15 @@ pgd_offset (struct mm_struct *mm, unsign /* Find an entry in the third-level page table.. */ #define pte_offset(dir,addr) \ ((pte_t *) pmd_page(*(dir)) + (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))) +#define pte_offset2(dir, address) pte_offset(dir, address) +#define pte_offset_atomic(dir, address) pte_offset(dir, address) +#define pte_offset_atomic2(dir, address) pte_offset(dir, address) +#define pte_offset_under_lock(dir, address, mm) pte_offset(dir, address) +#define pte_offset2_under_lock(dir, address, mm) pte_offset(dir, address) +#define pte_kunmap(ptep) do { } while(0) +#define pte_kunmap2(ptep) do { } while(0) +#define pte_kunmap_atomic2(ptep) do { } while(0) +#define pte_alloc_atomic(mm, pmd, address) pte_alloc(mm, pmd, address) /* atomic versions of the some PTE manipulations: */ diff -urNp ref/include/asm-ia64/processor.h 2.4.20pre7aa1/include/asm-ia64/processor.h --- ref/include/asm-ia64/processor.h Fri Sep 13 06:13:54 2002 +++ 2.4.20pre7aa1/include/asm-ia64/processor.h Wed Sep 18 00:47:49 2002 @@ -80,6 +80,7 @@ #include #include +#include #include #include @@ -146,7 +147,7 @@ struct cpuinfo_ia64 { __u64 itm_next; /* interval timer mask value to use for next clock tick */ __u64 *pgd_quick; __u64 *pmd_quick; - __u64 *pte_quick; + struct list_head pte_quick; __u64 pgtable_cache_sz; /* CPUID-derived information: */ __u64 ppn; diff -urNp ref/include/linux/highmem.h 2.4.20pre7aa1/include/linux/highmem.h --- ref/include/linux/highmem.h Fri Sep 13 06:13:57 2002 +++ 2.4.20pre7aa1/include/linux/highmem.h Wed Sep 18 00:47:01 2002 @@ -12,6 +12,7 @@ extern struct page *highmem_start_page; /* declarations for linux/mm/highmem.c */ unsigned int nr_free_highpages(void); +extern void init_kmap(void); extern struct buffer_head *create_bounce(int rw, struct buffer_head * bh_orig); @@ -64,13 +65,16 @@ static inline void bh_kunmap_irq(char *b #else /* CONFIG_HIGHMEM */ static inline unsigned int nr_free_highpages(void) { return 0; } +#define init_kmap() do { } while(0) static inline void *kmap(struct page *page) { return page_address(page); } -#define kunmap(page) do { } while (0) - +#define kunmap(page) do { } while (0) +#define kunmap_vaddr(vaddr) do { } while (0) +#define kmap_pagetable(page) kmap(page) +#define kunmap_pagetable(vaddr) do { } while (0) #define kmap_atomic(page,idx) kmap(page) -#define kunmap_atomic(page,idx) kunmap(page) +#define kunmap_atomic(vaddr,idx) do { } while (0) #define bh_kmap(bh) ((bh)->b_data) #define bh_kunmap(bh) do { } while (0) @@ -93,6 +97,13 @@ static inline void clear_highpage(struct kunmap(page); } +static inline void clear_pagetable(struct page *page) +{ + void * vaddr = kmap_pagetable(page); + clear_page(vaddr); + kunmap_pagetable(vaddr); +} + /* * Same but also flushes aliased cache contents to RAM. */ diff -urNp ref/include/linux/list.h 2.4.20pre7aa1/include/linux/list.h --- ref/include/linux/list.h Fri Sep 13 06:13:57 2002 +++ 2.4.20pre7aa1/include/linux/list.h Wed Sep 18 00:47:01 2002 @@ -3,8 +3,6 @@ #if defined(__KERNEL__) || defined(_LVM_H_INCLUDE) -#include - /* * Simple doubly linked list implementation. * @@ -184,6 +182,8 @@ static inline void list_splice_init(list #define list_entry(ptr, type, member) \ ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member))) +#include + /** * list_for_each - iterate over a list * @pos: the &list_t to use as a loop counter. diff -urNp ref/include/linux/mm.h 2.4.20pre7aa1/include/linux/mm.h --- ref/include/linux/mm.h Wed Sep 18 00:47:00 2002 +++ 2.4.20pre7aa1/include/linux/mm.h Wed Sep 18 00:47:01 2002 @@ -481,6 +481,7 @@ extern int zeromap_page_range(unsigned l extern int vmtruncate(struct inode * inode, loff_t offset); extern pmd_t *FASTCALL(__pmd_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)); extern pte_t *FASTCALL(pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)); +extern pte_t *FASTCALL(pte_alloc_atomic(struct mm_struct *mm, pmd_t *pmd, unsigned long address)); extern int handle_mm_fault(struct mm_struct *mm,struct vm_area_struct *vma, unsigned long address, int write_access); extern int make_pages_present(unsigned long addr, unsigned long end); extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write); diff -urNp ref/include/linux/prefetch.h 2.4.20pre7aa1/include/linux/prefetch.h --- ref/include/linux/prefetch.h Tue Sep 17 01:41:47 2002 +++ 2.4.20pre7aa1/include/linux/prefetch.h Wed Sep 18 00:47:01 2002 @@ -10,7 +10,7 @@ #ifndef _LINUX_PREFETCH_H #define _LINUX_PREFETCH_H -#include +#include #include /* diff -urNp ref/init/main.c 2.4.20pre7aa1/init/main.c --- ref/init/main.c Wed Sep 18 00:47:00 2002 +++ 2.4.20pre7aa1/init/main.c Wed Sep 18 00:47:01 2002 @@ -385,6 +385,7 @@ asmlinkage void __init start_kernel(void mem_init(); kmem_cache_sizes_init(); pgtable_cache_init(); + init_kmap(); /* * For architectures that have highmem, num_mappedpages represents diff -urNp ref/mm/filemap.c 2.4.20pre7aa1/mm/filemap.c --- ref/mm/filemap.c Wed Sep 18 00:46:59 2002 +++ 2.4.20pre7aa1/mm/filemap.c Wed Sep 18 00:47:01 2002 @@ -2060,7 +2060,7 @@ static inline int filemap_sync_pte_range unsigned long address, unsigned long size, struct vm_area_struct *vma, unsigned long offset, unsigned int flags) { - pte_t * pte; + pte_t * pte, * pte_orig; unsigned long end; int error; @@ -2071,7 +2071,7 @@ static inline int filemap_sync_pte_range pmd_clear(pmd); return 0; } - pte = pte_offset(pmd, address); + pte_orig = pte = pte_offset_atomic(pmd, address); offset += address & PMD_MASK; address &= ~PMD_MASK; end = address + size; @@ -2083,6 +2083,7 @@ static inline int filemap_sync_pte_range address += PAGE_SIZE; pte++; } while (address && (address < end)); + pte_kunmap(pte_orig); return error; } diff -urNp ref/mm/highmem.c 2.4.20pre7aa1/mm/highmem.c --- ref/mm/highmem.c Fri Sep 13 06:13:57 2002 +++ 2.4.20pre7aa1/mm/highmem.c Wed Sep 18 00:47:01 2002 @@ -21,6 +21,8 @@ #include #include #include +#include +#include /* * Virtual_count is not a pure "count". @@ -30,14 +32,29 @@ * since the last TLB flush - so we can't use it. * n means that there are (n-1) current users of it. */ -static int pkmap_count[LAST_PKMAP]; +static int pkmap_count[LAST_PKMAP*KM_NR_SERIES]; +static int pkmap_holds[KM_NR_SERIES]; static unsigned int last_pkmap_nr; static spinlock_cacheline_t kmap_lock_cacheline = {SPIN_LOCK_UNLOCKED}; #define kmap_lock kmap_lock_cacheline.lock +#if HIGHMEM_DEBUG +static int kmap_ready; +#endif pte_t * pkmap_page_table; -static DECLARE_WAIT_QUEUE_HEAD(pkmap_map_wait); +static wait_queue_head_t pkmap_map_wait[KM_NR_SERIES]; + +void __init init_kmap(void) +{ + int i; + + for (i = 0; i < KM_NR_SERIES; i++) + init_waitqueue_head(pkmap_map_wait + i); +#if HIGHMEM_DEBUG + kmap_ready = 1; +#endif +} static void flush_all_zero_pkmaps(void) { @@ -45,7 +62,7 @@ static void flush_all_zero_pkmaps(void) flush_cache_all(); - for (i = 0; i < LAST_PKMAP; i++) { + for (i = 0; i < LAST_PKMAP*KM_NR_SERIES; i++) { struct page *page; /* @@ -62,6 +79,8 @@ static void flush_all_zero_pkmaps(void) if (pte_none(pkmap_page_table[i])) BUG(); + page = pte_page(pkmap_page_table[i]); + page->virtual = NULL; /* * Don't need an atomic fetch-and-clear op here; * no-one has the page mapped, and cannot get at @@ -69,10 +88,8 @@ static void flush_all_zero_pkmaps(void) * getting the kmap_lock (which is held here). * So no dangers, even with speculative execution. */ - page = pte_page(pkmap_page_table[i]); pte_clear(&pkmap_page_table[i]); - page->virtual = NULL; } flush_tlb_all(); } @@ -80,43 +97,14 @@ static void flush_all_zero_pkmaps(void) static inline unsigned long map_new_virtual(struct page *page) { unsigned long vaddr; - int count; -start: - count = LAST_PKMAP; /* Find an empty entry */ - for (;;) { - last_pkmap_nr = (last_pkmap_nr + 1) & LAST_PKMAP_MASK; - if (!last_pkmap_nr) { + do { + if (++last_pkmap_nr >= LAST_PKMAP*KM_NR_SERIES) { + last_pkmap_nr = 0; flush_all_zero_pkmaps(); - count = LAST_PKMAP; } - if (!pkmap_count[last_pkmap_nr]) - break; /* Found a usable entry */ - if (--count) - continue; - - /* - * Sleep for somebody else to unmap their entries - */ - { - DECLARE_WAITQUEUE(wait, current); - - current->state = TASK_UNINTERRUPTIBLE; - add_wait_queue(&pkmap_map_wait, &wait); - spin_unlock(&kmap_lock); - schedule(); - remove_wait_queue(&pkmap_map_wait, &wait); - spin_lock(&kmap_lock); - - /* Somebody else might have mapped it while we slept */ - if (page->virtual) - return (unsigned long) page->virtual; - - /* Re-start */ - goto start; - } - } + } while (pkmap_count[last_pkmap_nr]); vaddr = PKMAP_ADDR(last_pkmap_nr); set_pte(&(pkmap_page_table[last_pkmap_nr]), mk_pte(page, kmap_prot)); @@ -126,17 +114,37 @@ start: return vaddr; } -void *kmap_high(struct page *page) +static inline void wait_for_map(int serie) +{ + DECLARE_WAITQUEUE(wait, current); + + current->state = TASK_UNINTERRUPTIBLE; + add_wait_queue(&pkmap_map_wait[serie], &wait); + spin_unlock(&kmap_lock); + schedule(); + remove_wait_queue(&pkmap_map_wait[serie], &wait); + spin_lock(&kmap_lock); +} + +void *kmap_high(struct page *page, int serie) { unsigned long vaddr; +#if HIGHMEM_DEBUG + if (!kmap_ready) + BUG(); +#endif + /* * For highmem pages, we can't trust "virtual" until - * after we have the lock. - * - * We cannot call this from interrupts, as it may block + * after we have the lock; and even if virtual is already + * set, we cannot let a serie exceed its quota of maps + * concurrently in use, or else we _might_ deadlock. */ spin_lock(&kmap_lock); + while (pkmap_holds[serie] >= LAST_PKMAP) + wait_for_map(serie); + pkmap_holds[serie]++; vaddr = (unsigned long) page->virtual; if (!vaddr) vaddr = map_new_virtual(page); @@ -147,44 +155,41 @@ void *kmap_high(struct page *page) return (void*) vaddr; } -void kunmap_high(struct page *page) +void kunmap_high(void * vaddr, int serie) { - unsigned long vaddr; unsigned long nr; int need_wakeup; - spin_lock(&kmap_lock); - vaddr = (unsigned long) page->virtual; - if (!vaddr) + if (in_interrupt()) + BUG(); + if ((unsigned long) vaddr < PKMAP_BASE) BUG(); - nr = PKMAP_NR(vaddr); + nr = PKMAP_NR((unsigned long) vaddr); /* * A count must never go down to zero * without a TLB flush! */ need_wakeup = 0; - switch (--pkmap_count[nr]) { - case 0: + spin_lock(&kmap_lock); + if (--pkmap_count[nr] <= 0) BUG(); - case 1: - /* - * Avoid an unnecessary wake_up() function call. - * The common case is pkmap_count[] == 1, but - * no waiters. - * The tasks queued in the wait-queue are guarded - * by both the lock in the wait-queue-head and by - * the kmap_lock. As the kmap_lock is held here, - * no need for the wait-queue-head's lock. Simply - * test if the queue is empty. - */ - need_wakeup = waitqueue_active(&pkmap_map_wait); - } + if (--pkmap_holds[serie] < 0) + BUG(); + /* + * Avoid an unnecessary wake_up() function call. + * The tasks queued in the wait-queue are guarded + * by both the lock in the wait-queue-head and by + * the kmap_lock. As the kmap_lock is held here, + * no need for the wait-queue-head's lock. Simply + * test if the queue is empty. + */ + need_wakeup = waitqueue_active(&pkmap_map_wait[serie]); spin_unlock(&kmap_lock); /* do wake-up, if needed, race-free outside of the spin lock */ if (need_wakeup) - wake_up(&pkmap_map_wait); + wake_up(&pkmap_map_wait[serie]); } #define POOL_SIZE 32 diff -urNp ref/mm/memory.c 2.4.20pre7aa1/mm/memory.c --- ref/mm/memory.c Wed Sep 18 00:46:58 2002 +++ 2.4.20pre7aa1/mm/memory.c Wed Sep 18 00:47:01 2002 @@ -92,7 +92,7 @@ void __free_pte(pte_t pte) */ static inline void free_one_pmd(pmd_t * dir) { - pte_t * pte; + pmd_t pmd; if (pmd_none(*dir)) return; @@ -101,9 +101,9 @@ static inline void free_one_pmd(pmd_t * pmd_clear(dir); return; } - pte = pte_offset(dir, 0); + pmd = *dir; pmd_clear(dir); - pte_free(pte); + pte_free_via_pmd(pmd); } static inline void free_one_pgd(pgd_t * dir) @@ -239,10 +239,10 @@ skip_copy_pte_range: address = (address goto cont_copy_pmd_range; } - src_pte = pte_offset(src_pmd, address); - dst_pte = pte_alloc(dst, dst_pmd, address); + dst_pte = pte_alloc_atomic(dst, dst_pmd, address); if (!dst_pte) goto nomem; + src_pte = pte_offset_atomic2(src_pmd, address); spin_lock(&src->page_table_lock); do { @@ -277,13 +277,19 @@ skip_copy_pte_range: address = (address cont_copy_pte_range: set_pte(dst_pte, pte); cont_copy_pte_range_noset: address += PAGE_SIZE; - if (address >= end) + if (address >= end) { + pte_kunmap_atomic2(src_pte); + pte_kunmap(dst_pte); goto out_unlock; + } src_pte++; dst_pte++; } while ((unsigned long)src_pte & PTE_TABLE_MASK); spin_unlock(&src->page_table_lock); - + + pte_kunmap_atomic2((src_pte - 1)); + pte_kunmap((dst_pte - 1)); + cont_copy_pmd_range: src_pmd++; dst_pmd++; } while ((unsigned long)src_pmd & PMD_TABLE_MASK); @@ -310,7 +316,7 @@ static inline void forget_pte(pte_t page static inline int zap_pte_range(mmu_gather_t *tlb, pmd_t * pmd, unsigned long address, unsigned long size) { unsigned long offset; - pte_t * ptep; + pte_t * ptep, * ptep_orig; int freed = 0; if (pmd_none(*pmd)) @@ -320,7 +326,7 @@ static inline int zap_pte_range(mmu_gath pmd_clear(pmd); return 0; } - ptep = pte_offset(pmd, address); + ptep_orig = ptep = pte_offset_atomic(pmd, address); offset = address & ~PMD_MASK; if (offset + size > PMD_SIZE) size = PMD_SIZE - offset; @@ -340,6 +346,7 @@ static inline int zap_pte_range(mmu_gath pte_clear(ptep); } } + pte_kunmap(ptep_orig); return freed; } @@ -432,11 +439,10 @@ static struct page * follow_page(struct if (pmd_none(*pmd) || pmd_bad(*pmd)) goto out; - ptep = pte_offset(pmd, address); - if (!ptep) - goto out; + ptep = pte_offset_atomic(pmd, address); pte = *ptep; + pte_kunmap(ptep); if (pte_present(pte)) { if (!write || (pte_write(pte) && pte_dirty(pte))) @@ -791,10 +797,11 @@ static inline int zeromap_pmd_range(stru if (end > PGDIR_SIZE) end = PGDIR_SIZE; do { - pte_t * pte = pte_alloc(mm, pmd, address); + pte_t * pte = pte_alloc_atomic(mm, pmd, address); if (!pte) return -ENOMEM; zeromap_pte_range(pte, address, end - address, prot); + pte_kunmap(pte); address = (address + PMD_SIZE) & PMD_MASK; pmd++; } while (address && (address < end)); @@ -871,10 +878,11 @@ static inline int remap_pmd_range(struct end = PGDIR_SIZE; phys_addr -= address; do { - pte_t * pte = pte_alloc(mm, pmd, address); + pte_t * pte = pte_alloc_atomic(mm, pmd, address); if (!pte) return -ENOMEM; remap_pte_range(pte, address, end - address, address + phys_addr, prot); + pte_kunmap(pte); address = (address + PMD_SIZE) & PMD_MASK; pmd++; } while (address && (address < end)); @@ -960,7 +968,7 @@ static inline void break_cow(struct vm_a * with the page_table_lock released. */ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma, - unsigned long address, pte_t *page_table, pte_t pte) + unsigned long address, pte_t *page_table, pmd_t * pmd, pte_t pte) { struct page *old_page, *new_page; @@ -972,6 +980,7 @@ static int do_wp_page(struct mm_struct * flush_cache_page(vma, address); establish_pte(vma, address, page_table, pte_mkyoung(pte_mkdirty(pte_mkwrite(pte)))); spin_unlock(&mm->page_table_lock); + pte_kunmap(page_table); return 1; /* Minor fault */ } @@ -980,6 +989,7 @@ static int do_wp_page(struct mm_struct * */ page_cache_get(old_page); spin_unlock(&mm->page_table_lock); + pte_kunmap(page_table); new_page = alloc_page(GFP_HIGHUSER); if (!new_page) @@ -989,6 +999,7 @@ static int do_wp_page(struct mm_struct * /* * Re-check the pte - we dropped the lock */ + page_table = pte_offset_atomic(pmd, address); spin_lock(&mm->page_table_lock); /* * keep the page pinned until we return runnable @@ -1013,11 +1024,13 @@ static int do_wp_page(struct mm_struct * new_page = old_page; } spin_unlock(&mm->page_table_lock); + pte_kunmap(page_table); page_cache_release(new_page); return 1; /* Minor fault */ bad_wp_page: spin_unlock(&mm->page_table_lock); + pte_kunmap(page_table); printk("do_wp_page: bogus page at address %08lx (page 0x%lx)\n",address,(unsigned long)old_page); return -1; no_mem: @@ -1138,7 +1151,7 @@ void swapin_readahead(swp_entry_t entry) */ static int do_swap_page(struct mm_struct * mm, struct vm_area_struct * vma, unsigned long address, - pte_t * page_table, pte_t orig_pte, int write_access) + pte_t * page_table, pmd_t * pmd, pte_t orig_pte, int write_access) { struct page *page; swp_entry_t entry = pte_to_swp_entry(orig_pte); @@ -1146,6 +1159,7 @@ static int do_swap_page(struct mm_struct int ret = 1; spin_unlock(&mm->page_table_lock); + pte_kunmap(page_table); page = lookup_swap_cache(entry); if (!page) { swapin_readahead(entry); @@ -1156,9 +1170,11 @@ static int do_swap_page(struct mm_struct * we released the page table lock. */ int retval; + page_table = pte_offset_atomic(pmd, address); spin_lock(&mm->page_table_lock); retval = pte_same(*page_table, orig_pte) ? -1 : 1; spin_unlock(&mm->page_table_lock); + pte_kunmap(page_table); return retval; } @@ -1173,9 +1189,11 @@ static int do_swap_page(struct mm_struct * Back out if somebody else faulted in this pte while we * released the page table lock. */ + page_table = pte_offset_atomic(pmd, address); spin_lock(&mm->page_table_lock); if (!pte_same(*page_table, orig_pte)) { spin_unlock(&mm->page_table_lock); + pte_kunmap(page_table); page_cache_release(page); return 1; } @@ -1200,6 +1218,7 @@ static int do_swap_page(struct mm_struct /* No need to invalidate - it was non-present before */ update_mmu_cache(vma, address, pte); spin_unlock(&mm->page_table_lock); + pte_kunmap(page_table); return ret; } @@ -1208,7 +1227,7 @@ static int do_swap_page(struct mm_struct * spinlock held to protect against concurrent faults in * multithreaded programs. */ -static int do_anonymous_page(struct mm_struct * mm, struct vm_area_struct * vma, pte_t *page_table, int write_access, unsigned long addr) +static int do_anonymous_page(struct mm_struct * mm, struct vm_area_struct * vma, pte_t *page_table, pmd_t * pmd, int write_access, unsigned long addr) { pte_t entry; @@ -1221,15 +1240,18 @@ static int do_anonymous_page(struct mm_s /* Allocate our own private page. */ spin_unlock(&mm->page_table_lock); + pte_kunmap(page_table); page = alloc_page(GFP_HIGHUSER); if (!page) goto no_mem; clear_user_highpage(page, addr); + page_table = pte_offset_atomic(pmd, addr); spin_lock(&mm->page_table_lock); if (!pte_none(*page_table)) { spin_unlock(&mm->page_table_lock); + pte_kunmap(page_table); page_cache_release(page); return 1; } @@ -1244,6 +1266,7 @@ static int do_anonymous_page(struct mm_s /* No need to invalidate - it was non-present before */ update_mmu_cache(vma, addr, entry); spin_unlock(&mm->page_table_lock); + pte_kunmap(page_table); return 1; /* Minor fault */ no_mem: @@ -1263,14 +1286,15 @@ no_mem: * spinlock held. Exit with the spinlock released. */ static int do_no_page(struct mm_struct * mm, struct vm_area_struct * vma, - unsigned long address, int write_access, pte_t *page_table) + unsigned long address, int write_access, pte_t *page_table, pmd_t * pmd) { struct page * new_page; pte_t entry; if (!vma->vm_ops || !vma->vm_ops->nopage) - return do_anonymous_page(mm, vma, page_table, write_access, address); + return do_anonymous_page(mm, vma, page_table, pmd, write_access, address); spin_unlock(&mm->page_table_lock); + pte_kunmap(page_table); new_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, 0); @@ -1294,7 +1318,9 @@ static int do_no_page(struct mm_struct * new_page = page; } + page_table = pte_offset_atomic(pmd, address); spin_lock(&mm->page_table_lock); + /* * This silly early PAGE_DIRTY setting removes a race * due to the bad i386 page protection. But it's valid @@ -1316,6 +1342,7 @@ static int do_no_page(struct mm_struct * set_pte(page_table, entry); } else { spin_unlock(&mm->page_table_lock); + pte_kunmap(page_table); /* One of our sibling threads was faster, back out. */ page_cache_release(new_page); return 1; @@ -1324,6 +1351,7 @@ static int do_no_page(struct mm_struct * /* no need to invalidate: a not-present page shouldn't be cached */ update_mmu_cache(vma, address, entry); spin_unlock(&mm->page_table_lock); + pte_kunmap(page_table); return 2; /* Major fault */ } @@ -1350,7 +1378,7 @@ static int do_no_page(struct mm_struct * */ static inline int handle_pte_fault(struct mm_struct *mm, struct vm_area_struct * vma, unsigned long address, - int write_access, pte_t * pte) + int write_access, pte_t *pte, pmd_t * pmd) { pte_t entry; @@ -1362,19 +1390,20 @@ static inline int handle_pte_fault(struc * drop the lock. */ if (pte_none(entry)) - return do_no_page(mm, vma, address, write_access, pte); - return do_swap_page(mm, vma, address, pte, entry, write_access); + return do_no_page(mm, vma, address, write_access, pte, pmd); + return do_swap_page(mm, vma, address, pte, pmd, entry, write_access); } if (write_access) { if (!pte_write(entry)) - return do_wp_page(mm, vma, address, pte, entry); + return do_wp_page(mm, vma, address, pte, pmd, entry); entry = pte_mkdirty(entry); } entry = pte_mkyoung(entry); establish_pte(vma, address, pte, entry); spin_unlock(&mm->page_table_lock); + pte_kunmap(pte); return 1; } @@ -1398,9 +1427,9 @@ int handle_mm_fault(struct mm_struct *mm pmd = pmd_alloc(mm, pgd, address); if (pmd) { - pte_t * pte = pte_alloc(mm, pmd, address); + pte_t * pte = pte_alloc_atomic(mm, pmd, address); if (pte) - return handle_pte_fault(mm, vma, address, write_access, pte); + return handle_pte_fault(mm, vma, address, write_access, pte, pmd); } spin_unlock(&mm->page_table_lock); return -1; @@ -1443,24 +1472,36 @@ out: return pmd_offset(pgd, address); } +static inline struct page * pte_alloc_one(struct mm_struct *mm, unsigned long address) +{ + struct page * page; + + page = alloc_page(GFP_KERNEL | __GFP_HIGHMEM); + if (page) + clear_pagetable(page); + return page; +} + /* * Allocate the page table directory. * * We've already handled the fast-path in-line, and we own the * page table lock. */ -pte_t *pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address) +static inline pte_t * __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address, int atomic) { + pte_t * pte; + if (pmd_none(*pmd)) { - pte_t *new; + struct page * page; /* "fast" allocation can happen without dropping the lock.. */ - new = pte_alloc_one_fast(mm, address); - if (!new) { + page = pte_alloc_one_fast(mm, address); + if (!page) { spin_unlock(&mm->page_table_lock); - new = pte_alloc_one(mm, address); + page = pte_alloc_one(mm, address); spin_lock(&mm->page_table_lock); - if (!new) + if (unlikely(!page)) return NULL; /* @@ -1468,16 +1509,32 @@ pte_t *pte_alloc(struct mm_struct *mm, p * entry, as somebody else could have populated it.. */ if (!pmd_none(*pmd)) { - pte_free(new); + pte_free(page); check_pgt_cache(); goto out; } } - pmd_populate(mm, pmd, new); + pmd_populate(mm, pmd, page); } out: - return pte_offset(pmd, address); + if (atomic) + pte = pte_offset_atomic(pmd, address); + else + pte = pte_offset_under_lock(pmd, address, mm); + return pte; +} + +pte_t * pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address) +{ + return __pte_alloc(mm, pmd, address, 0); +} + +#ifdef CONFIG_HIGHMEM +pte_t * pte_alloc_atomic(struct mm_struct *mm, pmd_t *pmd, unsigned long address) +{ + return __pte_alloc(mm, pmd, address, 1); } +#endif int make_pages_present(unsigned long addr, unsigned long end) { @@ -1501,16 +1558,18 @@ struct page * vmalloc_to_page(void * vma unsigned long addr = (unsigned long) vmalloc_addr; struct page *page = NULL; pmd_t *pmd; - pte_t *pte; + pte_t *ptep, pte; pgd_t *pgd; pgd = pgd_offset_k(addr); if (!pgd_none(*pgd)) { pmd = pmd_offset(pgd, addr); if (!pmd_none(*pmd)) { - pte = pte_offset(pmd, addr); - if (pte_present(*pte)) { - page = pte_page(*pte); + ptep = pte_offset_atomic(pmd, addr); + pte = *ptep; + pte_kunmap(ptep); + if (pte_present(pte)) { + page = pte_page(pte); } } } diff -urNp ref/mm/mprotect.c 2.4.20pre7aa1/mm/mprotect.c --- ref/mm/mprotect.c Fri Aug 9 14:52:29 2002 +++ 2.4.20pre7aa1/mm/mprotect.c Wed Sep 18 00:47:01 2002 @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -15,7 +16,7 @@ static inline void change_pte_range(pmd_t * pmd, unsigned long address, unsigned long size, pgprot_t newprot) { - pte_t * pte; + pte_t * pte, * pte_orig; unsigned long end; if (pmd_none(*pmd)) @@ -25,7 +26,7 @@ static inline void change_pte_range(pmd_ pmd_clear(pmd); return; } - pte = pte_offset(pmd, address); + pte_orig = pte = pte_offset_atomic(pmd, address); address &= ~PMD_MASK; end = address + size; if (end > PMD_SIZE) @@ -44,6 +45,7 @@ static inline void change_pte_range(pmd_ address += PAGE_SIZE; pte++; } while (address && (address < end)); + pte_kunmap(pte_orig); } static inline void change_pmd_range(pgd_t * pgd, unsigned long address, diff -urNp ref/mm/mremap.c 2.4.20pre7aa1/mm/mremap.c --- ref/mm/mremap.c Tue Jan 22 18:53:56 2002 +++ 2.4.20pre7aa1/mm/mremap.c Wed Sep 18 00:47:01 2002 @@ -9,13 +9,14 @@ #include #include #include +#include #include #include extern int vm_enough_memory(long pages); -static inline pte_t *get_one_pte(struct mm_struct *mm, unsigned long addr) +static pte_t *get_one_pte(struct mm_struct *mm, unsigned long addr) { pgd_t * pgd; pmd_t * pmd; @@ -39,9 +40,11 @@ static inline pte_t *get_one_pte(struct goto end; } - pte = pte_offset(pmd, addr); - if (pte_none(*pte)) + pte = pte_offset2_under_lock(pmd, addr, mm); + if (pte_none(*pte)) { + pte_kunmap2(pte); pte = NULL; + } end: return pte; } @@ -57,32 +60,32 @@ static inline pte_t *alloc_one_pte(struc return pte; } -static inline int copy_one_pte(struct mm_struct *mm, pte_t * src, pte_t * dst) +static inline void copy_one_pte(pte_t * src, pte_t * dst) { - int error = 0; pte_t pte; if (!pte_none(*src)) { pte = ptep_get_and_clear(src); - if (!dst) { - /* No dest? We must put it back. */ - dst = src; - error++; - } set_pte(dst, pte); } - return error; } static int move_one_page(struct mm_struct *mm, unsigned long old_addr, unsigned long new_addr) { int error = 0; - pte_t * src; + pte_t * src, * dst; spin_lock(&mm->page_table_lock); src = get_one_pte(mm, old_addr); - if (src) - error = copy_one_pte(mm, src, alloc_one_pte(mm, new_addr)); + if (src) { + dst = alloc_one_pte(mm, new_addr); + if (dst) { + copy_one_pte(src, dst); + pte_kunmap(dst); + } else + error = 1; + pte_kunmap2(src); + } spin_unlock(&mm->page_table_lock); return error; } diff -urNp ref/mm/swapfile.c 2.4.20pre7aa1/mm/swapfile.c --- ref/mm/swapfile.c Wed Sep 18 00:46:58 2002 +++ 2.4.20pre7aa1/mm/swapfile.c Wed Sep 18 00:47:01 2002 @@ -401,7 +401,7 @@ static inline void unuse_pmd(struct vm_a unsigned long address, unsigned long size, unsigned long offset, swp_entry_t entry, struct page* page) { - pte_t * pte; + pte_t * pte, * pte_orig; unsigned long end; if (pmd_none(*dir)) @@ -411,7 +411,7 @@ static inline void unuse_pmd(struct vm_a pmd_clear(dir); return; } - pte = pte_offset(dir, address); + pte_orig = pte = pte_offset_atomic(dir, address); offset += address & PMD_MASK; address &= ~PMD_MASK; end = address + size; @@ -422,6 +422,7 @@ static inline void unuse_pmd(struct vm_a address += PAGE_SIZE; pte++; } while (address && (address < end)); + pte_kunmap(pte_orig); } /* mmlist_lock and vma->vm_mm->page_table_lock are held */ diff -urNp ref/mm/vmalloc.c 2.4.20pre7aa1/mm/vmalloc.c --- ref/mm/vmalloc.c Wed Sep 18 00:46:58 2002 +++ 2.4.20pre7aa1/mm/vmalloc.c Wed Sep 18 00:47:01 2002 @@ -21,7 +21,7 @@ struct vm_struct * vmlist; static inline void free_area_pte(pmd_t * pmd, unsigned long address, unsigned long size) { - pte_t * pte; + pte_t * pte, * pte_orig; unsigned long end; if (pmd_none(*pmd)) @@ -31,7 +31,7 @@ static inline void free_area_pte(pmd_t * pmd_clear(pmd); return; } - pte = pte_offset(pmd, address); + pte_orig = pte = pte_offset_atomic(pmd, address); address &= ~PMD_MASK; end = address + size; if (end > PMD_SIZE) @@ -51,6 +51,7 @@ static inline void free_area_pte(pmd_t * } printk(KERN_CRIT "Whee.. Swapped out page in kernel page table\n"); } while (address < end); + pte_kunmap(pte_orig); } static inline void free_area_pmd(pgd_t * dir, unsigned long address, unsigned long size) @@ -126,10 +127,13 @@ static inline int alloc_area_pmd(pmd_t * if (end > PGDIR_SIZE) end = PGDIR_SIZE; do { + int err; pte_t * pte = pte_alloc(&init_mm, pmd, address); if (!pte) return -ENOMEM; - if (alloc_area_pte(pte, address, end - address, gfp_mask, prot)) + err = alloc_area_pte(pte, address, end - address, gfp_mask, prot); + pte_kunmap(pte); + if (err) return -ENOMEM; address = (address + PMD_SIZE) & PMD_MASK; pmd++; diff -urNp ref/mm/vmscan.c 2.4.20pre7aa1/mm/vmscan.c --- ref/mm/vmscan.c Wed Sep 18 00:46:58 2002 +++ 2.4.20pre7aa1/mm/vmscan.c Wed Sep 18 00:47:01 2002 @@ -183,7 +183,7 @@ preserve: /* mm->page_table_lock is held. mmap_sem is not held */ static inline int swap_out_pmd(struct mm_struct * mm, struct vm_area_struct * vma, pmd_t *dir, unsigned long address, unsigned long end, int count, zone_t * classzone) { - pte_t * pte; + pte_t * pte, * pte_orig; unsigned long pmd_end; if (pmd_none(*dir)) @@ -194,7 +194,7 @@ static inline int swap_out_pmd(struct mm return count; } - pte = pte_offset(dir, address); + pte_orig = pte = pte_offset_atomic(dir, address); pmd_end = (address + PMD_SIZE) & PMD_MASK; if (end > pmd_end) @@ -215,6 +215,7 @@ static inline int swap_out_pmd(struct mm address += PAGE_SIZE; pte++; } while (address && (address < end)); + pte_kunmap(pte_orig); mm->swap_address = address; return count; }