--- linux/fs/inode.c.orig Wed Sep 26 19:30:11 2001 +++ linux/fs/inode.c Wed Sep 26 19:31:22 2001 @@ -109,6 +109,7 @@ INIT_LIST_HEAD(&inode->i_devices); sema_init(&inode->i_sem, 1); sema_init(&inode->i_zombie, 1); + spin_lock_init(&inode->i_data.page_lock); spin_lock_init(&inode->i_data.i_shared_lock); } } --- linux/mm/vmscan.c.orig Wed Sep 26 19:30:12 2001 +++ linux/mm/vmscan.c Wed Sep 26 20:06:37 2001 @@ -328,6 +328,8 @@ static int shrink_cache(int nr_pages, int max_scan, zone_t * classzone, unsigned int gfp_mask) { struct list_head * entry; + spinlock_t *pg_lock; + struct address_space * mapping; spin_lock(&pagemap_lru_lock); while (max_scan && (entry = inactive_list.prev) != &inactive_list) { @@ -448,16 +450,19 @@ } } - if (unlikely(!page->mapping)) + mapping = page->mapping; + if (unlikely(!mapping)) BUG(); - if (unlikely(!spin_trylock(&pagecache_lock))) { + pg_lock = PAGECACHE_LOCK(page); + if (unlikely(!spin_trylock(pg_lock))) { /* we hold the page lock so the page cannot go away from under us */ spin_unlock(&pagemap_lru_lock); - spin_lock(&pagecache_lock); + spin_lock(pg_lock); spin_lock(&pagemap_lru_lock); } + spin_lock(&mapping->page_lock); /* * this is the non-racy check, it is critical to check @@ -465,7 +470,8 @@ * so not in use by anybody. */ if (!is_page_cache_freeable(page) || PageDirty(page)) { - spin_unlock(&pagecache_lock); + spin_unlock(pg_lock); + spin_unlock(&mapping->page_lock); UnlockPage(page); continue; } @@ -478,7 +484,8 @@ swap.val = page->index; __delete_from_swap_cache(page); } - spin_unlock(&pagecache_lock); + spin_unlock(pg_lock); + spin_unlock(&mapping->page_lock); __lru_cache_del(page); --- linux/mm/filemap.c.orig Wed Sep 26 19:30:12 2001 +++ linux/mm/filemap.c Wed Sep 26 19:45:53 2001 @@ -43,15 +43,34 @@ */ atomic_t page_cache_size = ATOMIC_INIT(0); + unsigned int page_hash_bits; -struct page **page_hash_table; +struct page_cache_bucket *page_hash_table; -spinlock_t pagecache_lock ____cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED; -/* - * NOTE: to avoid deadlocking you must never acquire the pagecache_lock with - * the pagemap_lru_lock held. +/* Page-cache SMP locking rules: + * + * 1) The identity of a page (mapping, index) is only changed + * under PAGECACHE_LOCK. + * + * 2) The deadlock-free ordering of lock acquisition is + * PAGECACHE_LOCK ==> pagemap_lru_lock ==> mapping->page_lock + * There are cases where two of these locks need to be held + * simultaneously but cannot be obtained in the correct order. + * The way to handle this situation is as follows: + * + * repeat: + * spin_lock(&pagemap_lru_lock); + * some_loop_over_lru_pages() { + * ... + * if (!spin_trylock(PAGECACHE_LOCK(page))) { + * spin_unlock(&pagemap_lru_lock); + * goto repeat; + * } + * ... + * } */ -spinlock_t pagemap_lru_lock ____cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED; + +spinlock_t pagemap_lru_lock = SPIN_LOCK_UNLOCKED; #define CLUSTER_PAGES (1 << page_cluster) #define CLUSTER_OFFSET(x) (((x) >> page_cluster) << page_cluster) @@ -74,15 +93,24 @@ { struct list_head *head = &mapping->clean_pages; + spin_lock(&mapping->page_lock); mapping->nrpages++; list_add(&page->list, head); page->mapping = mapping; + spin_unlock(&mapping->page_lock); } static inline void remove_page_from_inode_queue(struct page * page) { struct address_space * mapping = page->mapping; +#if CONFIG_SMP + if (!spin_is_locked(PAGECACHE_LOCK(page))) + BUG(); + if (!spin_is_locked(&mapping->page_lock)) + BUG(); +#endif + mapping->nrpages--; list_del(&page->list); page->mapping = NULL; @@ -114,12 +142,81 @@ void remove_inode_page(struct page *page) { + struct address_space * mapping; + spinlock_t *pg_lock; + if (!PageLocked(page)) PAGE_BUG(page); - spin_lock(&pagecache_lock); + pg_lock = PAGECACHE_LOCK(page); + + spin_lock(pg_lock); + mapping = page->mapping; + spin_lock(&mapping->page_lock); __remove_inode_page(page); - spin_unlock(&pagecache_lock); + spin_unlock(&mapping->page_lock); + spin_unlock(pg_lock); +} + +/* + * Flush clean pages from the pagecache. + */ +void flush_inode_pages (struct inode * inode) +{ + unsigned long end_index = inode->i_size >> PAGE_CACHE_SHIFT; + struct address_space * mapping = inode->i_mapping; + struct list_head *head, *curr; + struct page * page; + +retry: + head = &inode->i_mapping->clean_pages; + spin_lock(&pagemap_lru_lock); + spin_lock(&mapping->page_lock); + curr = head->next; + + while (curr != head) { + spinlock_t *pg_lock; + + page = list_entry(curr, struct page, list); + curr = curr->next; + + if (page->index == end_index) + continue; + pg_lock = PAGECACHE_LOCK(page); + if (!spin_trylock(pg_lock)) { + spin_unlock(&mapping->page_lock); + spin_unlock(&pagemap_lru_lock); + goto retry; + } + + /* We cannot invalidate a locked page */ + if (TryLockPage(page)) { + spin_unlock(pg_lock); + continue; + } + + /* + * We cannot flush a page if buffers are still active. + */ + if (page->buffers) { + spin_unlock(pg_lock); + spin_unlock(&mapping->page_lock); + spin_unlock(&pagemap_lru_lock); + try_to_free_buffers(page, GFP_KERNEL); + UnlockPage(page); + goto retry; + } + + __lru_cache_del(page); + __remove_inode_page(page); + spin_unlock(pg_lock); + + UnlockPage(page); + page_cache_release(page); + } + + spin_unlock(&mapping->page_lock); + spin_unlock(&pagemap_lru_lock); } static inline int sync_page(struct page *page) @@ -136,12 +233,20 @@ */ void __set_page_dirty(struct page *page) { - struct address_space *mapping = page->mapping; + struct address_space *mapping; + spinlock_t *pg_lock; + + pg_lock = PAGECACHE_LOCK(page); + spin_lock(pg_lock); + + mapping = page->mapping; + spin_lock(&mapping->page_lock); - spin_lock(&pagecache_lock); list_del(&page->list); list_add(&page->list, &mapping->dirty_pages); - spin_unlock(&pagecache_lock); + + spin_unlock(&mapping->page_lock); + spin_unlock(pg_lock); if (mapping->host) mark_inode_dirty_pages(mapping->host); @@ -157,39 +262,58 @@ void invalidate_inode_pages(struct inode * inode) { + struct address_space * mapping = inode->i_mapping; struct list_head *head, *curr; struct page * page; head = &inode->i_mapping->clean_pages; - spin_lock(&pagecache_lock); - spin_lock(&pagemap_lru_lock); +retry: + spin_lock(&pagemap_lru_lock); + spin_lock(&mapping->page_lock); curr = head->next; while (curr != head) { + spinlock_t *pg_lock; + page = list_entry(curr, struct page, list); + pg_lock = PAGECACHE_LOCK(page); + if (!spin_trylock(pg_lock)) { + spin_unlock(&mapping->page_lock); + spin_unlock(&pagemap_lru_lock); + goto retry; + } curr = curr->next; /* We cannot invalidate something in use.. */ - if (page_count(page) != 1) + if (page_count(page) != 1) { + spin_unlock(pg_lock); continue; + } /* ..or dirty.. */ - if (PageDirty(page)) + if (PageDirty(page)) { + spin_unlock(pg_lock); continue; + } /* ..or locked */ - if (TryLockPage(page)) + if (TryLockPage(page)) { + spin_unlock(pg_lock); continue; + } __lru_cache_del(page); __remove_inode_page(page); + spin_unlock(pg_lock); + UnlockPage(page); page_cache_release(page); + } + spin_unlock(&mapping->page_lock); spin_unlock(&pagemap_lru_lock); - spin_unlock(&pagecache_lock); } static inline void truncate_partial_page(struct page *page, unsigned partial) @@ -215,13 +339,12 @@ * all sorts of fun problems ... */ ClearPageDirty(page); - ClearPageUptodate(page); remove_inode_page(page); page_cache_release(page); } -static int FASTCALL(truncate_list_pages(struct list_head *, unsigned long, unsigned *)); -static int truncate_list_pages(struct list_head *head, unsigned long start, unsigned *partial) +static int FASTCALL(truncate_list_pages(struct address_space * mapping, struct list_head *, unsigned long, unsigned *)); +static int truncate_list_pages(struct address_space * mapping, struct list_head *head, unsigned long start, unsigned *partial) { struct list_head *curr; struct page * page; @@ -238,6 +361,10 @@ /* Is one of the pages to truncate? */ if ((offset >= start) || (*partial && (offset + 1) == start)) { int failed; + spinlock_t *pg_lock = PAGECACHE_LOCK(page); + + if (!spin_trylock(pg_lock)) + return 1; page_cache_get(page); failed = TryLockPage(page); @@ -250,7 +377,8 @@ /* Restart on this page */ list_add(head, curr); - spin_unlock(&pagecache_lock); + spin_unlock(pg_lock); + spin_unlock(&mapping->page_lock); unlocked = 1; if (!failed) { @@ -271,7 +399,7 @@ schedule(); } - spin_lock(&pagecache_lock); + spin_unlock(&mapping->page_lock); goto restart; } curr = curr->prev; @@ -295,24 +423,25 @@ unsigned partial = lstart & (PAGE_CACHE_SIZE - 1); int unlocked; - spin_lock(&pagecache_lock); + spin_lock(&mapping->page_lock); do { - unlocked = truncate_list_pages(&mapping->clean_pages, start, &partial); - unlocked |= truncate_list_pages(&mapping->dirty_pages, start, &partial); - unlocked |= truncate_list_pages(&mapping->locked_pages, start, &partial); + unlocked = truncate_list_pages(mapping, &mapping->clean_pages, start, &partial); + unlocked |= truncate_list_pages(mapping, &mapping->dirty_pages, start, &partial); + unlocked |= truncate_list_pages(mapping, &mapping->locked_pages, start, &partial); } while (unlocked); /* Traversed all three lists without dropping the lock */ - spin_unlock(&pagecache_lock); + spin_unlock(&mapping->page_lock); } static inline int invalidate_this_page2(struct page * page, struct list_head * curr, - struct list_head * head) + struct list_head * head, + struct address_space * mapping) { int unlocked = 1; /* - * The page is locked and we hold the pagecache_lock as well + * The page is locked and we hold the pagecache locks as well * so both page_count(page) and page->buffers stays constant here. */ if (page_count(page) == 1 + !!page->buffers) { @@ -321,7 +450,7 @@ list_add_tail(head, curr); page_cache_get(page); - spin_unlock(&pagecache_lock); + spin_unlock(&mapping->page_lock); truncate_complete_page(page); } else { if (page->buffers) { @@ -330,7 +459,7 @@ list_add_tail(head, curr); page_cache_get(page); - spin_unlock(&pagecache_lock); + spin_unlock(&mapping->page_lock); block_invalidate_page(page); } else unlocked = 0; @@ -342,8 +471,8 @@ return unlocked; } -static int FASTCALL(invalidate_list_pages2(struct list_head *)); -static int invalidate_list_pages2(struct list_head *head) +static int FASTCALL(invalidate_list_pages2(struct list_head *, struct address_space * mapping)); +static int invalidate_list_pages2(struct list_head *head, struct address_space * mapping) { struct list_head *curr; struct page * page; @@ -357,7 +486,7 @@ if (!TryLockPage(page)) { int __unlocked; - __unlocked = invalidate_this_page2(page, curr, head); + __unlocked = invalidate_this_page2(page, curr, head, mapping); UnlockPage(page); unlocked |= __unlocked; if (!__unlocked) { @@ -370,7 +499,7 @@ list_add(head, curr); page_cache_get(page); - spin_unlock(&pagecache_lock); + spin_unlock(&mapping->page_lock); unlocked = 1; wait_on_page(page); } @@ -381,7 +510,7 @@ schedule(); } - spin_lock(&pagecache_lock); + spin_lock(&mapping->page_lock); goto restart; } return unlocked; @@ -396,13 +525,13 @@ { int unlocked; - spin_lock(&pagecache_lock); do { - unlocked = invalidate_list_pages2(&mapping->clean_pages); - unlocked |= invalidate_list_pages2(&mapping->dirty_pages); - unlocked |= invalidate_list_pages2(&mapping->locked_pages); + spin_lock(&mapping->page_lock); + unlocked = invalidate_list_pages2(&mapping->clean_pages, mapping); + unlocked |= invalidate_list_pages2(&mapping->dirty_pages, mapping); + unlocked |= invalidate_list_pages2(&mapping->locked_pages, mapping); + spin_unlock(&mapping->page_lock); } while (unlocked); - spin_unlock(&pagecache_lock); } static inline struct page * __find_page_nolock(struct address_space *mapping, unsigned long offset, struct page *page) @@ -459,13 +588,13 @@ return error; } -static int do_buffer_fdatasync(struct list_head *head, unsigned long start, unsigned long end, int (*fn)(struct page *)) +static int do_buffer_fdatasync(struct address_space * mapping, struct list_head *head, unsigned long start, unsigned long end, int (*fn)(struct page *)) { struct list_head *curr; struct page *page; int retval = 0; - spin_lock(&pagecache_lock); + spin_lock(&mapping->page_lock); curr = head->next; while (curr != head) { page = list_entry(curr, struct page, list); @@ -478,7 +607,7 @@ continue; page_cache_get(page); - spin_unlock(&pagecache_lock); + spin_unlock(&mapping->page_lock); lock_page(page); /* The buffers could have been free'd while we waited for the page lock */ @@ -486,11 +615,11 @@ retval |= fn(page); UnlockPage(page); - spin_lock(&pagecache_lock); + spin_lock(&mapping->page_lock); curr = page->list.next; page_cache_release(page); } - spin_unlock(&pagecache_lock); + spin_unlock(&mapping->page_lock); return retval; } @@ -501,17 +630,18 @@ */ int generic_buffer_fdatasync(struct inode *inode, unsigned long start_idx, unsigned long end_idx) { + struct address_space * mapping = inode->i_mapping; int retval; /* writeout dirty buffers on pages from both clean and dirty lists */ - retval = do_buffer_fdatasync(&inode->i_mapping->dirty_pages, start_idx, end_idx, writeout_one_page); - retval |= do_buffer_fdatasync(&inode->i_mapping->clean_pages, start_idx, end_idx, writeout_one_page); - retval |= do_buffer_fdatasync(&inode->i_mapping->locked_pages, start_idx, end_idx, writeout_one_page); + retval = do_buffer_fdatasync(mapping, &mapping->dirty_pages, start_idx, end_idx, writeout_one_page); + retval |= do_buffer_fdatasync(mapping, &mapping->clean_pages, start_idx, end_idx, writeout_one_page); + retval |= do_buffer_fdatasync(mapping, &mapping->locked_pages, start_idx, end_idx, writeout_one_page); /* now wait for locked buffers on pages from both clean and dirty lists */ - retval |= do_buffer_fdatasync(&inode->i_mapping->dirty_pages, start_idx, end_idx, waitfor_one_page); - retval |= do_buffer_fdatasync(&inode->i_mapping->clean_pages, start_idx, end_idx, waitfor_one_page); - retval |= do_buffer_fdatasync(&inode->i_mapping->locked_pages, start_idx, end_idx, waitfor_one_page); + retval |= do_buffer_fdatasync(mapping, &mapping->dirty_pages, start_idx, end_idx, waitfor_one_page); + retval |= do_buffer_fdatasync(mapping, &mapping->clean_pages, start_idx, end_idx, waitfor_one_page); + retval |= do_buffer_fdatasync(mapping, &mapping->locked_pages, start_idx, end_idx, waitfor_one_page); return retval; } @@ -527,19 +657,29 @@ { int (*writepage)(struct page *) = mapping->a_ops->writepage; - spin_lock(&pagecache_lock); +repeat: + spin_lock(&mapping->page_lock); while (!list_empty(&mapping->dirty_pages)) { struct page *page = list_entry(mapping->dirty_pages.next, struct page, list); + spinlock_t *pg_lock; + pg_lock = PAGECACHE_LOCK(page); + if (!spin_trylock(pg_lock)) { + spin_unlock(&mapping->page_lock); + goto repeat; + } list_del(&page->list); list_add(&page->list, &mapping->locked_pages); - if (!PageDirty(page)) + if (!PageDirty(page)) { + spin_unlock(pg_lock); continue; + } page_cache_get(page); - spin_unlock(&pagecache_lock); + spin_unlock(pg_lock); + spin_unlock(&mapping->page_lock); lock_page(page); @@ -550,9 +690,9 @@ UnlockPage(page); page_cache_release(page); - spin_lock(&pagecache_lock); + spin_lock(&mapping->page_lock); } - spin_unlock(&pagecache_lock); + spin_unlock(&mapping->page_lock); } /** @@ -564,26 +704,36 @@ */ void filemap_fdatawait(struct address_space * mapping) { - spin_lock(&pagecache_lock); +repeat: + spin_lock(&mapping->page_lock); while (!list_empty(&mapping->locked_pages)) { struct page *page = list_entry(mapping->locked_pages.next, struct page, list); + spinlock_t *pg_lock; + pg_lock = PAGECACHE_LOCK(page); + if (!spin_trylock(pg_lock)) { + spin_unlock(&mapping->page_lock); + goto repeat; + } list_del(&page->list); list_add(&page->list, &mapping->clean_pages); - if (!PageLocked(page)) + if (!PageLocked(page)) { + spin_unlock(pg_lock); continue; + } page_cache_get(page); - spin_unlock(&pagecache_lock); + spin_unlock(pg_lock); + spin_unlock(&mapping->page_lock); ___wait_on_page(page); page_cache_release(page); - spin_lock(&pagecache_lock); + spin_lock(&mapping->page_lock); } - spin_unlock(&pagecache_lock); + spin_unlock(&mapping->page_lock); } /* @@ -594,16 +744,18 @@ */ void add_to_page_cache_locked(struct page * page, struct address_space *mapping, unsigned long index) { + spinlock_t *pg_lock = __PAGECACHE_LOCK(mapping, index); + if (!PageLocked(page)) BUG(); page->index = index; page_cache_get(page); - spin_lock(&pagecache_lock); + spin_lock(pg_lock); add_page_to_inode_queue(mapping, page); add_page_to_hash_queue(page, page_hash(mapping, index)); lru_cache_add(page); - spin_unlock(&pagecache_lock); + spin_unlock(pg_lock); } /* @@ -630,9 +782,11 @@ void add_to_page_cache(struct page * page, struct address_space * mapping, unsigned long offset) { - spin_lock(&pagecache_lock); + spinlock_t *pg_lock = __PAGECACHE_LOCK(mapping, offset); + + spin_lock(pg_lock); __add_to_page_cache(page, mapping, offset, page_hash(mapping, offset)); - spin_unlock(&pagecache_lock); + spin_unlock(pg_lock); } static int add_to_page_cache_unique(struct page * page, @@ -641,8 +795,11 @@ { int err; struct page *alias; + spinlock_t *pg_lock; + + pg_lock = __PAGECACHE_LOCK(mapping, offset); + spin_lock(pg_lock); - spin_lock(&pagecache_lock); alias = __find_page_nolock(mapping, offset, *hash); err = 1; @@ -651,7 +808,8 @@ err = 0; } - spin_unlock(&pagecache_lock); + spin_unlock(pg_lock); + return err; } @@ -667,9 +825,9 @@ struct page **hash = page_hash(mapping, offset); struct page *page; - spin_lock(&pagecache_lock); + spin_lock(__PAGECACHE_LOCK(mapping, offset)); page = __find_page_nolock(mapping, offset, *hash); - spin_unlock(&pagecache_lock); + spin_unlock(__PAGECACHE_LOCK(mapping, offset)); if (page) return 0; @@ -783,11 +941,11 @@ * We scan the hash list read-only. Addition to and removal from * the hash-list needs a held write-lock. */ - spin_lock(&pagecache_lock); + spin_lock(__PAGECACHE_LOCK(mapping, offset)); page = __find_page_nolock(mapping, offset, *hash); if (page) page_cache_get(page); - spin_unlock(&pagecache_lock); + spin_unlock(__PAGECACHE_LOCK(mapping, offset)); return page; } @@ -805,11 +963,11 @@ * the hash-list needs a held write-lock. */ repeat: - spin_lock(&pagecache_lock); + spin_lock(__PAGECACHE_LOCK(mapping, offset)); page = __find_page_nolock(mapping, offset, *hash); if (page) { page_cache_get(page); - spin_unlock(&pagecache_lock); + spin_unlock(__PAGECACHE_LOCK(mapping, offset)); lock_page(page); @@ -822,7 +980,7 @@ page_cache_release(page); goto repeat; } - spin_unlock(&pagecache_lock); + spin_unlock(__PAGECACHE_LOCK(mapping, offset)); return NULL; } @@ -1112,7 +1270,7 @@ * This is really ugly. But the goto's actually try to clarify some * of the logic when it comes to error handling etc. */ -void do_generic_file_read(struct file * filp, loff_t *ppos, read_descriptor_t * desc, read_actor_t actor) +void do_generic_file_read(struct file * filp, loff_t *ppos, read_descriptor_t * desc, read_actor_t actor, int nonblock) { struct inode *inode = filp->f_dentry->d_inode; struct address_space *mapping = inode->i_mapping; @@ -1187,17 +1345,24 @@ */ hash = page_hash(mapping, index); - spin_lock(&pagecache_lock); + spin_lock(__PAGECACHE_LOCK(mapping, index)); page = __find_page_nolock(mapping, index, *hash); if (!page) goto no_cached_page; found_page: page_cache_get(page); - spin_unlock(&pagecache_lock); + spin_unlock(__PAGECACHE_LOCK(mapping, index)); - if (!Page_Uptodate(page)) + if (!Page_Uptodate(page)) { + if (nonblock) { + page_cache_release(page); + desc->error = -EWOULDBLOCKIO; + break; + } goto page_not_up_to_date; - generic_file_readahead(reada_ok, filp, inode, page); + } + if (!nonblock) + generic_file_readahead(reada_ok, filp, inode, page); page_ok: /* If users can be writing to this page using arbitrary * virtual addresses, take care about potential aliasing @@ -1274,6 +1439,11 @@ break; no_cached_page: + if (nonblock) { + spin_unlock(__PAGECACHE_LOCK(mapping, index)); + desc->error = -EWOULDBLOCKIO; + break; + } /* * Ok, it wasn't cached, so we need to create a new * page.. @@ -1281,7 +1451,7 @@ * We get here with the page cache lock held. */ if (!cached_page) { - spin_unlock(&pagecache_lock); + spin_unlock(__PAGECACHE_LOCK(mapping, index)); cached_page = page_cache_alloc(mapping); if (!cached_page) { desc->error = -ENOMEM; @@ -1292,7 +1462,7 @@ * Somebody may have added the page while we * dropped the page cache lock. Check for that. */ - spin_lock(&pagecache_lock); + spin_lock(__PAGECACHE_LOCK(mapping, index)); page = __find_page_nolock(mapping, index, *hash); if (page) goto found_page; @@ -1303,7 +1473,7 @@ */ page = cached_page; __add_to_page_cache(page, mapping, index, hash); - spin_unlock(&pagecache_lock); + spin_unlock(__PAGECACHE_LOCK(mapping, index)); cached_page = NULL; goto readpage; @@ -1449,7 +1619,7 @@ desc.count = count; desc.buf = buf; desc.error = 0; - do_generic_file_read(filp, ppos, &desc, file_read_actor); + do_generic_file_read(filp, ppos, &desc, file_read_actor, 0); retval = desc.written; if (!retval) @@ -1480,7 +1650,7 @@ } } -static int file_send_actor(read_descriptor_t * desc, struct page *page, unsigned long offset , unsigned long size) +int file_send_actor(read_descriptor_t * desc, struct page *page, unsigned long offset , unsigned long size) { ssize_t written; unsigned long count = desc->count; @@ -1573,7 +1743,7 @@ desc.count = count; desc.buf = (char *) out_file; desc.error = 0; - do_generic_file_read(in_file, ppos, &desc, file_send_actor); + do_generic_file_read(in_file, ppos, &desc, file_send_actor, 0); retval = desc.written; if (!retval) @@ -2384,11 +2554,11 @@ struct address_space * as = &vma->vm_file->f_dentry->d_inode->i_data; struct page * page, ** hash = page_hash(as, pgoff); - spin_lock(&pagecache_lock); + spin_lock(__PAGECACHE_LOCK(as, pgoff)); page = __find_page_nolock(as, pgoff, *hash); if ((page) && (Page_Uptodate(page))) present = 1; - spin_unlock(&pagecache_lock); + spin_unlock(__PAGECACHE_LOCK(as, pgoff)); return present; } @@ -2703,7 +2873,7 @@ * Check whether we've reached the file size limit. */ err = -EFBIG; - + if (limit != RLIM_INFINITY) { if (pos >= limit) { send_sig(SIGXFSZ, current, 0); @@ -2887,21 +3057,21 @@ void __init page_cache_init(unsigned long mempages) { - unsigned long htable_size, order; + unsigned long htable_size, order, i; htable_size = mempages; - htable_size *= sizeof(struct page *); + htable_size *= sizeof(struct page_cache_bucket); for(order = 0; (PAGE_SIZE << order) < htable_size; order++) ; do { - unsigned long tmp = (PAGE_SIZE << order) / sizeof(struct page *); + unsigned long tmp = (PAGE_SIZE << order) / sizeof(struct page_cache_bucket); page_hash_bits = 0; while((tmp >>= 1UL) != 0UL) page_hash_bits++; - page_hash_table = (struct page **) + page_hash_table = (struct page_cache_bucket *) __get_free_pages(GFP_ATOMIC, order); } while(page_hash_table == NULL && --order > 0); @@ -2909,5 +3079,9 @@ (1 << page_hash_bits), order, (PAGE_SIZE << order)); if (!page_hash_table) panic("Failed to allocate page hash table\n"); - memset((void *)page_hash_table, 0, PAGE_HASH_SIZE * sizeof(struct page *)); + + for (i = 0; i < PAGE_HASH_SIZE; i++) { + spin_lock_init(&page_hash_table[i].lock); + page_hash_table[i].chain = NULL; + } } --- linux/mm/swap_state.c.orig Wed Sep 26 19:30:12 2001 +++ linux/mm/swap_state.c Wed Sep 26 20:11:06 2001 @@ -42,6 +42,7 @@ LIST_HEAD_INIT(swapper_space.dirty_pages), LIST_HEAD_INIT(swapper_space.locked_pages), 0, /* nrpages */ + SPIN_LOCK_UNLOCKED, &swap_aops, }; @@ -114,9 +115,11 @@ entry.val = page->index; - spin_lock(&pagecache_lock); + spin_lock(PAGECACHE_LOCK(page)); + spin_lock(&page->mapping->page_lock); __delete_from_swap_cache(page); - spin_unlock(&pagecache_lock); + spin_unlock(PAGECACHE_LOCK(page)); + spin_unlock(&page->mapping->page_lock); swap_free(entry); page_cache_release(page); --- linux/mm/swap.c.orig Wed Sep 26 19:30:12 2001 +++ linux/mm/swap.c Wed Sep 26 19:31:22 2001 @@ -63,6 +63,7 @@ spin_unlock(&pagemap_lru_lock); } + /* * Move an inactive page to the active list. */ @@ -103,6 +104,10 @@ */ void __lru_cache_del(struct page * page) { +#if CONFIG_SMP + if (!spin_is_locked(&pagemap_lru_lock)) + BUG(); +#endif if (PageActive(page)) { del_page_from_active_list(page); } else if (PageInactive(page)) { --- linux/include/linux/swap.h.orig Wed Sep 26 19:30:12 2001 +++ linux/include/linux/swap.h Wed Sep 26 20:14:16 2001 @@ -86,7 +86,6 @@ extern atomic_t nr_async_pages; extern atomic_t page_cache_size; extern atomic_t buffermem_pages; -extern spinlock_t pagecache_lock; extern void __remove_inode_page(struct page *); /* Incomplete types for prototype declarations: */ --- linux/include/linux/errno.h.orig Wed Mar 29 03:51:39 2000 +++ linux/include/linux/errno.h Wed Sep 26 19:31:22 2001 @@ -21,6 +21,9 @@ #define EBADTYPE 527 /* Type not supported by server */ #define EJUKEBOX 528 /* Request initiated, but will not complete before timeout */ +/* Defined for TUX async IO */ +#define EWOULDBLOCKIO 530 /* Would block due to block-IO */ + #endif #endif --- linux/include/linux/fs.h.orig Wed Sep 26 19:30:11 2001 +++ linux/include/linux/fs.h Wed Sep 26 20:14:16 2001 @@ -390,6 +390,8 @@ struct list_head dirty_pages; /* list of dirty pages */ struct list_head locked_pages; /* list of locked pages */ unsigned long nrpages; /* number of total pages */ + spinlock_t page_lock; /* and spinlock protecting them */ + struct address_space_operations *a_ops; /* methods */ struct inode *host; /* owner: inode, block_device */ struct vm_area_struct *i_mmap; /* list of private mappings */ @@ -1363,6 +1365,7 @@ extern int cont_prepare_write(struct page*, unsigned, unsigned, get_block_t*, unsigned long *); extern int block_sync_page(struct page *); +extern void flush_inode_pages (struct inode * inode); int generic_block_bmap(struct address_space *, long, get_block_t *); int generic_commit_write(struct file *, struct page *, unsigned, unsigned); @@ -1375,7 +1378,7 @@ extern int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size); extern ssize_t generic_file_read(struct file *, char *, size_t, loff_t *); extern ssize_t generic_file_write(struct file *, const char *, size_t, loff_t *); -extern void do_generic_file_read(struct file *, loff_t *, read_descriptor_t *, read_actor_t); +extern void do_generic_file_read(struct file *, loff_t *, read_descriptor_t *, read_actor_t, int); extern loff_t no_llseek(struct file *file, loff_t offset, int origin); extern loff_t generic_file_llseek(struct file *file, loff_t offset, int origin); extern ssize_t generic_read_dir(struct file *, char *, size_t, loff_t *); --- linux/include/linux/pagemap.h.orig Wed Sep 26 19:30:12 2001 +++ linux/include/linux/pagemap.h Wed Sep 26 20:14:19 2001 @@ -41,12 +41,17 @@ */ #define page_cache_entry(x) virt_to_page(x) +struct page_cache_bucket { + spinlock_t lock; + struct page *chain; +} __attribute__((__aligned__(8))); + extern unsigned int page_hash_bits; #define PAGE_HASH_BITS (page_hash_bits) #define PAGE_HASH_SIZE (1 << PAGE_HASH_BITS) extern atomic_t page_cache_size; /* # of pages currently in the hash table */ -extern struct page **page_hash_table; +extern struct page_cache_bucket *page_hash_table; extern void page_cache_init(unsigned long); @@ -68,7 +73,12 @@ #undef s } -#define page_hash(mapping,index) (page_hash_table+_page_hashfn(mapping,index)) +#define page_hash(mapping,index) \ + &((page_hash_table+_page_hashfn(mapping,index))->chain) +#define __PAGECACHE_LOCK(mapping,index) \ + &((page_hash_table+_page_hashfn(mapping,index))->lock) +#define PAGECACHE_LOCK(page) \ + __PAGECACHE_LOCK((page)->mapping, (page)->index) extern struct page * __find_get_page(struct address_space *mapping, unsigned long index, struct page **hash); --- linux/net/khttpd/datasending.c.orig Wed Sep 26 19:30:12 2001 +++ linux/net/khttpd/datasending.c Wed Sep 26 19:31:22 2001 @@ -127,7 +127,7 @@ desc.count = ReadSize; desc.buf = (char *) CurrentRequest->sock; desc.error = 0; - do_generic_file_read(CurrentRequest->filp, ppos, &desc, sock_send_actor); + do_generic_file_read(CurrentRequest->filp, ppos, &desc, sock_send_actor, 0); if (desc.written>0) { CurrentRequest->BytesSent += desc.written; --- linux/drivers/block/loop.c.orig Wed Sep 26 19:30:09 2001 +++ linux/drivers/block/loop.c Wed Sep 26 19:31:22 2001 @@ -272,7 +272,7 @@ spin_lock_irq(&lo->lo_lock); file = lo->lo_backing_file; spin_unlock_irq(&lo->lo_lock); - do_generic_file_read(file, &pos, &desc, lo_read_actor); + do_generic_file_read(file, &pos, &desc, lo_read_actor, 0); return desc.error; }