diff -ruN linux-2.4.18/drivers/char/Config.in linux-2.4.18a/drivers/char/Config.in --- linux-2.4.18/drivers/char/Config.in Sun Mar 17 13:31:59 2002 +++ linux-2.4.18a/drivers/char/Config.in Sun Mar 17 21:24:27 2002 @@ -174,6 +174,8 @@ fi endmenu +tristate 'Page Coloring' CONFIG_PAGE_COLORING + if [ "$CONFIG_ARCH_NETWINDER" = "y" ]; then tristate 'NetWinder thermometer support' CONFIG_DS1620 tristate 'NetWinder Button' CONFIG_NWBUTTON diff -ruN linux-2.4.18/drivers/char/Makefile linux-2.4.18a/drivers/char/Makefile --- linux-2.4.18/drivers/char/Makefile Sun Mar 17 13:31:59 2002 +++ linux-2.4.18a/drivers/char/Makefile Sun Mar 17 21:24:27 2002 @@ -241,6 +241,11 @@ obj-y += mwave/mwave.o endif +ifeq ($(CONFIG_PAGE_COLORING),m) + CONFIG_PAGE_COLORING_MODULE=y + obj-m += page_color.o +endif + include $(TOPDIR)/Rules.make fastdep: diff -ruN linux-2.4.18/drivers/char/page_color.c linux-2.4.18a/drivers/char/page_color.c --- linux-2.4.18/drivers/char/page_color.c Wed Dec 31 19:00:00 1969 +++ linux-2.4.18a/drivers/char/page_color.c Sun Mar 17 21:24:27 2002 @@ -0,0 +1,167 @@ +/* + * This module implements page coloring, a systematic way + * to get the most performance out of the expensive cache + * memory your computer has. At present the code is *only* + * to be built as a loadable kernel module. + * + * After building the kernel and rebooting, load the module + * and specify the cache size to use, like so: + * + * insmod cache_size=X + * + * where X is the size of the largest cache your system has. + * For machines with three cache levels (Alpha 21164, AMD K6-III) + * this will be the size in bytes of the L3 cache, and for all + * others it will be the size of the L2 cache. If your system + * doesn't have at least L2 cache, fer cryin' out loud GET SOME! + * When specifying the cache size you can use 'K' or 'M' to signify + * kilobytes or megabytes, respectively. In any case, the cache + * size *must* be a power of two. + * + * insmod will create a module called 'page_color' which changes + * the way Linux allocates pages from the free list. It is always + * safe to start and stop the module while other processes are running. + * + * If linux is configured for a /proc filesystem, the module will + * also create /proc/page_color as a means of reporting statistics. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern unsigned int page_miss_count; +extern unsigned int page_hit_count; +extern unsigned int page_colors; +extern unsigned int page_alloc_count; +extern struct list_head *page_color_table; + +void page_color_start(void); +void page_color_stop(void); + +#if defined(__alpha__) +#define CACHE_SIZE_GUESS (4*1024*1024) +#elif defined(__i386__) +#define CACHE_SIZE_GUESS (256*1024) +#else +#define CACHE_SIZE_GUESS (1*1024*1024) +#endif + +#ifdef CONFIG_PROC_FS + +int page_color_getinfo(char *buf, char **start, off_t fpos, int length) +{ + int i, j, k, count, num_colors; + struct list_head *queue, *curr; + char *p = buf; + + p += sprintf(p, "colors: %d\n", page_colors); + p += sprintf(p, "hits: %d\n", page_hit_count); + p += sprintf(p, "misses: %d\n", page_miss_count); + p += sprintf(p, "pages allocated: %d\n", page_alloc_count); + + queue = page_color_table; + for(i=0; inext) + goto getinfo_done; + + list_for_each(curr, queue) { + count++; + } + p += sprintf(p, "%d ", count); + queue++; + } + + p += sprintf(p, "\n"); + if (num_colors > 1) + num_colors >>= 1; + } + } + +getinfo_done: + return p - buf; +} + +#endif + +void cleanup_module(void) +{ + printk("page_color: terminating page coloring\n"); + +#ifdef CONFIG_PROC_FS + remove_proc_entry("page_color", NULL); +#endif + + page_color_stop(); + kfree(page_color_table); +} + +static char *cache_size; +MODULE_PARM(cache_size, "s"); + +int init_module(void) +{ + unsigned int cache_size_int; + unsigned int alloc_size; + + if (cache_size) { + cache_size_int = simple_strtoul(cache_size, + (char **)NULL, 10); + if ( strchr(cache_size, 'M') || + strchr(cache_size, 'm') ) + cache_size_int *= 1024*1024; + + if ( strchr(cache_size, 'K') || + strchr(cache_size, 'k') ) + cache_size_int *= 1024; + } + else { + cache_size_int = CACHE_SIZE_GUESS; + } + + if( (-cache_size_int & cache_size_int) != cache_size_int ) { + printk ("page_color: cache size is not a power of two\n"); + return 1; + } + + page_colors = cache_size_int / PAGE_SIZE; + page_hit_count = 0; + page_miss_count = 0; + page_alloc_count = 0; + alloc_size = MAX_NR_ZONES * sizeof(struct list_head) * + (2 * page_colors + MAX_ORDER); + page_color_table = (struct list_head *)kmalloc(alloc_size, GFP_KERNEL); + if (!page_color_table) { + printk("page_color: memory allocation failed\n"); + return 1; + } + memset(page_color_table, 0, alloc_size); + + page_color_start(); + +#ifdef CONFIG_PROC_FS + create_proc_info_entry("page_color", 0, NULL, page_color_getinfo); +#endif + + printk("page_color: starting with %d colors\n", page_colors ); + return 0; +} diff -ruN linux-2.4.18/include/linux/mmzone.h linux-2.4.18a/include/linux/mmzone.h --- linux-2.4.18/include/linux/mmzone.h Sun Mar 17 13:51:34 2002 +++ linux-2.4.18a/include/linux/mmzone.h Sun Mar 17 21:32:08 2002 @@ -21,6 +21,12 @@ typedef struct free_area_struct { struct list_head free_list; unsigned long *map; + +#ifdef CONFIG_PAGE_COLORING_MODULE + unsigned long count; + struct list_head *color_list; +#endif + } free_area_t; struct pglist_data; diff -ruN linux-2.4.18/include/linux/sched.h linux-2.4.18a/include/linux/sched.h --- linux-2.4.18/include/linux/sched.h Sun Mar 17 13:51:34 2002 +++ linux-2.4.18a/include/linux/sched.h Sun Mar 17 21:32:08 2002 @@ -410,6 +410,11 @@ /* journalling filesystem info */ void *journal_info; + +#ifdef CONFIG_PAGE_COLORING_MODULE + unsigned int color_init; + unsigned int target_color; +#endif }; /* diff -ruN linux-2.4.18/kernel/ksyms.c linux-2.4.18a/kernel/ksyms.c --- linux-2.4.18/kernel/ksyms.c Sun Mar 17 13:32:00 2002 +++ linux-2.4.18a/kernel/ksyms.c Sun Mar 17 21:24:27 2002 @@ -559,3 +559,21 @@ EXPORT_SYMBOL(tasklist_lock); EXPORT_SYMBOL(pidhash); + +#ifdef CONFIG_PAGE_COLORING_MODULE +extern unsigned int page_miss_count; +extern unsigned int page_hit_count; +extern unsigned int page_alloc_count; +extern unsigned int page_colors; +extern struct list_head *page_color_table; +void page_color_start(void); +void page_color_stop(void); + +EXPORT_SYMBOL_NOVERS(page_miss_count); +EXPORT_SYMBOL_NOVERS(page_hit_count); +EXPORT_SYMBOL_NOVERS(page_alloc_count); +EXPORT_SYMBOL_NOVERS(page_colors); +EXPORT_SYMBOL_NOVERS(page_color_table); +EXPORT_SYMBOL_NOVERS(page_color_start); +EXPORT_SYMBOL_NOVERS(page_color_stop); +#endif diff -ruN linux-2.4.18/mm/page_alloc.c linux-2.4.18a/mm/page_alloc.c --- linux-2.4.18/mm/page_alloc.c Sun Mar 17 13:32:00 2002 +++ linux-2.4.18a/mm/page_alloc.c Sun Mar 17 21:24:27 2002 @@ -56,6 +56,288 @@ */ #define BAD_RANGE(zone,x) (((zone) != (x)->zone) || (((x)-mem_map) < (zone)->zone_start_mapnr) || (((x)-mem_map) >= (zone)->zone_start_mapnr+(zone)->size)) + +#ifdef CONFIG_PAGE_COLORING_MODULE + +#ifdef CONFIG_DISCONTIGMEM +#error "Page coloring implementation cannot handle NUMA architectures" +#endif + +unsigned int page_coloring = 0; +unsigned int page_miss_count; +unsigned int page_hit_count; +unsigned int page_alloc_count; +unsigned int page_colors = 0; +struct list_head *page_color_table; + +#define COLOR(x) ((x) & cache_mask) + +void page_color_start(void) +{ + /* Empty the free list in each zone. For each + queue in the free list, transfer the entries + in the queue over to another set of queues + (the destination queue is determined by the + color of each entry). */ + + int i, j, k; + unsigned int num_colors, cache_mask; + unsigned long index; + unsigned long flags[MAX_NR_ZONES]; + struct list_head *color_list_start, *head, *curr; + free_area_t *area; + struct page *page; + zone_t *zone; + pg_data_t *pgdata; + + cache_mask = page_colors - 1; + color_list_start = page_color_table; + pgdata = &contig_page_data; + + /* Stop all allocation of free pages while the + reshuffling is taking place */ + + for(i = 0; i < MAX_NR_ZONES; i++) { + zone = pgdata->node_zones + i; + if (zone->size) + spin_lock_irqsave(&zone->lock, flags[i]); + } + + for(i = 0; i < MAX_NR_ZONES; i++) { + num_colors = page_colors; + zone = pgdata->node_zones + i; + + if (!zone->size) + continue; + + for(j = 0; j < MAX_ORDER; j++) { + area = zone->free_area + j; + area->count = 0; + area->color_list = color_list_start; + head = &area->free_list; + curr = memlist_next(head); + + for(k = 0; k < num_colors; k++) + memlist_init(color_list_start + k); + + while(curr != head) { + page = memlist_entry(curr, struct page, list); + memlist_del(curr); + index = page - zone->zone_mem_map; + memlist_add_head(curr, area->color_list + + (COLOR(index) >> j)); + area->count++; + curr = memlist_next(head); + } + + color_list_start += num_colors; + if (num_colors > 1) + num_colors >>= 1; + } + } + + /* Allocation of free pages can continue */ + + page_coloring = 1; + for(i = 0; i < MAX_NR_ZONES; i++) { + zone = pgdata->node_zones + i; + if (zone->size) + spin_unlock_irqrestore(&zone->lock, flags[i]); + } +} + +void page_color_stop(void) +{ + /* Reverse the operation of page_color_start(). */ + + int i, j, k; + unsigned int num_colors; + unsigned long flags[MAX_NR_ZONES]; + struct list_head *head, *curr; + free_area_t *area; + zone_t *zone; + pg_data_t *pgdata; + + pgdata = &contig_page_data; + + for(i = 0; i < MAX_NR_ZONES; i++) { + zone = pgdata->node_zones + i; + if (zone->size) + spin_lock_irqsave(&zone->lock, flags[i]); + } + + for(i = 0; i < MAX_NR_ZONES; i++) { + num_colors = page_colors; + zone = pgdata->node_zones + i; + + if (!zone->size) + continue; + + for(j = 0; jfree_area + j; + area->count = 0; + + for(k = 0; k < num_colors; k++) { + head = area->color_list + k; + curr = memlist_next(head); + while(curr != head) { + memlist_del(curr); + memlist_add_head(curr, + &area->free_list); + curr = memlist_next(head); + } + } + + if (num_colors > 1) + num_colors >>= 1; + } + } + + page_coloring = 0; + for(i = 0; i < MAX_NR_ZONES; i++) { + zone = pgdata->node_zones + i; + if (zone->size) + spin_unlock_irqrestore(&zone->lock, flags[i]); + } +} + +unsigned int rand_carry = 0x01234567; +unsigned int rand_seed = 0x89abcdef; + +#define MULT 2131995753 + +static inline unsigned int get_rand(void) +{ + /* A multiply-with-carry random number generator by + George Marsaglia. The period is about 1<<63, and + each call to get_rand() returns 32 random bits */ + + unsigned long long prod; + + prod = (unsigned long long)rand_seed * + (unsigned long long)MULT + + (unsigned long long)rand_carry; + rand_seed = (unsigned int)prod; + rand_carry = (unsigned int)(prod >> 32); + + return rand_seed; +} + +static struct page *alloc_pages_by_color(zone_t *zone, unsigned int order) +{ + unsigned int i; + unsigned int mask, color; + unsigned long page_idx; + free_area_t *area; + struct list_head *curr, *head; + struct page *page; + unsigned int cache_mask = page_colors - 1; + + /* If this process hasn't asked for free pages + before, assign it a random starting color. */ + + if (current->color_init != current->pid) { + current->color_init = current->pid; + current->target_color = COLOR(get_rand()); + } + + /* Round the target color to look for up to the + next 1<target_color; + color = COLOR((color + mask) & ~mask); + + /* Find out early if there are no free pages at all. */ + + for(i = order; i < MAX_ORDER; i++) + if (zone->free_area[i].count) + break; + + if (i == MAX_ORDER) + return NULL; + + /* The memory allocation is guaranteed to succeed + (although we may not find the correct color) */ + + while(1) { + area = zone->free_area + order; + for(i = order; i < MAX_ORDER; i++) { + head = area->color_list + (color >> i); + curr = memlist_next(head); + if (curr != head) + goto alloc_page_done; + area++; + } + + page_miss_count++; + color = COLOR(color + (1<zone_mem_map; + zone->free_pages -= 1 << order; + area->count--; + + if (i < (MAX_ORDER - 1)) + __change_bit(page_idx >> (1+i), area->map); + + while (i > order) { + + /* Return 1<count++; + __change_bit(page_idx >> (1+i), area->map); + if (color & mask) { + if (BAD_RANGE(zone,page + mask)) + BUG(); + + memlist_add_head(&page->list, area->color_list + + (COLOR(page_idx) >> i)); + page_idx += mask; + page += mask; + } + else { + memlist_add_head(&(page + mask)->list, + area->color_list + + (COLOR(page_idx + mask) >> i)); + } + } + + set_page_count(page, 1); + + if (BAD_RANGE(zone,page)) + BUG(); + if (PageLRU(page)) + BUG(); + if (PageActive(page)) + BUG(); + + current->target_color = COLOR(color + (1<count--; + order++; +#endif memlist_del(&buddy1->list); mask <<= 1; area++; index >>= 1; page_idx &= mask; } + +#ifdef CONFIG_PAGE_COLORING_MODULE + if (page_coloring == 1) { + unsigned long cache_mask = page_colors - 1; + memlist_add_head(&(base + page_idx)->list, + area->color_list + (COLOR(page_idx) >> order)); + spin_unlock_irqrestore(&zone->lock, flags); + return; + } +#endif memlist_add_head(&(base + page_idx)->list, &area->free_list); spin_unlock_irqrestore(&zone->lock, flags); @@ -187,6 +483,15 @@ struct page *page; spin_lock_irqsave(&zone->lock, flags); + +#ifdef CONFIG_PAGE_COLORING_MODULE + if (page_coloring == 1) { + page = alloc_pages_by_color(zone, order); + spin_unlock_irqrestore(&zone->lock, flags); + return page; + } +#endif + do { head = &area->free_list; curr = memlist_next(head);