diff -urN -x *.[oa] -x .config -x .version -x .depend -x .hdepend -x *.flags -x autoconf.h -x modversions.h -x version.h -x asm -x modules -x config -x soundmodem linux-2.4.0-test9/fs/select.c linux-2.4.0-test9+/fs/select.c --- linux-2.4.0-test9/fs/select.c Mon Jul 24 06:39:44 2000 +++ linux-2.4.0-test9+/fs/select.c Sun Oct 29 20:53:15 2000 @@ -9,238 +9,325 @@ * flag set in its personality we do *not* modify the given timeout * parameter to reflect time remaining. * - * 24 January 2000 - * Changed sys_poll()/do_poll() to use PAGE_SIZE chunk-based allocation - * of fds to overcome nfds < 16390 descriptors limit (Tigran Aivazian). + * August 1999 - February 2000 + * Rewritten to use a wake up callback to queue events + * after sleeping, plus general performance enhancements. + * -- Mike Jagdis */ #include #include +#include #include #include #include -#define ROUND_UP(x,y) (((x)+(y)-1)/(y)) -#define DEFAULT_POLLMASK (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM) +/* Claim the file_lock semaphore around the whole fdset scan loop + * rather than when looking at an fdset bit within the loop (this + * is done by fget()/fput()). This holds a read lock on the decsriptor + * table for longer but reduces overhead in the loop. + * Note that poll accesses user space while holding the file lock. + * This may be an issue for threaded programs? + */ +#define WIDE_FILE_LOCK -struct poll_table_entry { - struct file * filp; - wait_queue_t wait; - wait_queue_head_t * wait_address; -}; - -struct poll_table_page { - struct poll_table_page * next; - struct poll_table_entry * entry; - struct poll_table_entry entries[0]; -}; -#define POLL_TABLE_FULL(table) \ - ((unsigned long)((table)->entry+1) > PAGE_SIZE + (unsigned long)(table)) +#define ROUND_UP(x,y) (((x)+(y)-1)/(y)) +#define DEFAULT_POLLMASK (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM) -/* - * Ok, Peter made a complicated, but straightforward multiple_wait() function. - * I have rewritten this, taking some shortcuts: This code may not be easy to - * follow, but it should be free of race-conditions, and it's practical. If you - * understand what I'm doing here, then you understand how the linux - * sleep/wakeup mechanism works. - * - * Two very simple procedures, poll_wait() and poll_freewait() make all the - * work. poll_wait() is an inline-function defined in , - * as all select/poll functions have to call it to add an entry to the - * poll table. - */ -void poll_freewait(poll_table* pt) +void poll_freewait(poll_table * ptab) { - struct poll_table_page * p = pt->table; - while (p) { - struct poll_table_entry * entry; - struct poll_table_page *old; + struct poll_table_head *p; - entry = p->entry; - do { - entry--; - remove_wait_queue(entry->wait_address,&entry->wait); + p = ptab->head; + while (p) { + struct poll_table_entry *entry; + entry = (struct poll_table_entry *)(p + 1); + while (entry < p->entry) { + remove_wait_queue(entry->wait_address, &entry->wait); fput(entry->filp); - } while (entry > p->entries); - old = p; + entry++; + } + p = p->next; + } + p = ptab->head; + while (p) { + struct poll_table_head *old = p; p = p->next; free_page((unsigned long) old); } } -void __pollwait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p) +static int +__pollwake(wait_queue_t *wq, unsigned int mode, const int sync) { - struct poll_table_page *table = p->table; - - if (!table || POLL_TABLE_FULL(table)) { - struct poll_table_page *new_table; - - new_table = (struct poll_table_page *) __get_free_page(GFP_KERNEL); - if (!new_table) { - p->error = -ENOMEM; - __set_current_state(TASK_RUNNING); - return; + struct poll_table_entry *p; + struct poll_table_head *head; + unsigned long flags; + + wq->onwake = NULL; + p = (void *)wq - offsetof(struct poll_table_entry, wait); + head = (struct poll_table_head *)((unsigned long)p & PAGE_MASK); + + spin_lock_irqsave(&head->poll_table->woken_lock, flags); + p->woken_list = head->poll_table->woken_list; + head->poll_table->woken_list = p; + spin_unlock_irqrestore(&head->poll_table->woken_lock, flags); + + if (wq->task->state & (mode & ~TASK_EXCLUSIVE)) { + if (!sync) { + wake_up_process(wq->task); + goto out; } - new_table->entry = new_table->entries; - new_table->next = table; - p->table = new_table; - table = new_table; + wake_up_process_synchronous(wq->task); } +out: + return 0; +} - /* Add a new entry */ - { - struct poll_table_entry * entry = table->entry; - table->entry = entry+1; +void __pollwait(struct file * filp, wait_queue_head_t * wait_address, poll_table *ptab) +{ + struct poll_table_head * tmp, * p = ptab->last; + struct poll_table_entry *last_entry; + + last_entry = (void *)p + PAGE_SIZE - sizeof(struct poll_table_entry); + if (p->entry <= last_entry) { + struct poll_table_entry *entry; +ok_table: + entry = p->entry++; get_file(filp); entry->filp = filp; + entry->n = ptab->n; + init_waitqueue_entry_onwake(&entry->wait, current, __pollwake); entry->wait_address = wait_address; - init_waitqueue_entry(&entry->wait, current); - add_wait_queue(wait_address,&entry->wait); + add_wait_queue(wait_address, &entry->wait); + return; } -} -#define __IN(fds, n) (fds->in + n) -#define __OUT(fds, n) (fds->out + n) -#define __EX(fds, n) (fds->ex + n) -#define __RES_IN(fds, n) (fds->res_in + n) -#define __RES_OUT(fds, n) (fds->res_out + n) -#define __RES_EX(fds, n) (fds->res_ex + n) - -#define BITS(fds, n) (*__IN(fds, n)|*__OUT(fds, n)|*__EX(fds, n)) - -static int max_select_fd(unsigned long n, fd_set_bits *fds) -{ - unsigned long *open_fds; - unsigned long set; - int max; - - /* handle last in-complete long-word first */ - set = ~(~0UL << (n & (__NFDBITS-1))); - n /= __NFDBITS; - open_fds = current->files->open_fds->fds_bits+n; - max = 0; - if (set) { - set &= BITS(fds, n); - if (set) { - if (!(set & ~*open_fds)) - goto get_max; - return -EBADF; - } - } - while (n) { - open_fds--; - n--; - set = BITS(fds, n); - if (!set) - continue; - if (set & ~*open_fds) - return -EBADF; - if (max) - continue; -get_max: - do { - max++; - set >>= 1; - } while (set); - max += n * __NFDBITS; + tmp = (void *) __get_free_page(GFP_KERNEL); + if (tmp) { + tmp->entry = (struct poll_table_entry *)(tmp + 1); + tmp->next = NULL; + tmp->poll_table = ptab; + p->next = tmp; + ptab->last = p = tmp; + goto ok_table; } - - return max; + ptab->error = -ENOMEM; } + #define BIT(i) (1UL << ((i)&(__NFDBITS-1))) -#define MEM(i,m) ((m)+(unsigned)(i)/__NFDBITS) -#define ISSET(i,m) (((i)&*(m)) != 0) -#define SET(i,m) (*(m) |= (i)) #define POLLIN_SET (POLLRDNORM | POLLRDBAND | POLLIN | POLLHUP | POLLERR) #define POLLOUT_SET (POLLWRBAND | POLLWRNORM | POLLOUT | POLLERR) #define POLLEX_SET (POLLPRI) -int do_select(int n, fd_set_bits *fds, long *timeout) +int do_select(int n, unsigned long *bits, const int size, long *timeout) { - poll_table table, *wait; - int retval, i, off; - long __timeout = *timeout; - - read_lock(¤t->files->file_lock); - retval = max_select_fd(n, fds); - read_unlock(¤t->files->file_lock); - - if (retval < 0) - return retval; - n = retval; - - poll_initwait(&table); - wait = &table; - if (!__timeout) - wait = NULL; - retval = 0; - for (;;) { - set_current_state(TASK_INTERRUPTIBLE); - for (i = 0 ; i < n; i++) { - unsigned long bit = BIT(i); - unsigned long mask; - struct file *file; - - off = i / __NFDBITS; - if (!(bit & BITS(fds, off))) - continue; - file = fget(i); - mask = POLLNVAL; - if (file) { + int retval; + poll_table *wait, wait_table; + long __timeout; + unsigned long bit; + unsigned long fds, *fdl; + unsigned long mask; + struct file *file; + + wait = NULL; + __timeout = *timeout; + if (__timeout) { + wait_table.head = (void *) __get_free_page(GFP_KERNEL); + if (!wait_table.head) + return -ENOMEM; + + poll_initwait(&wait_table); + wait = &wait_table; + } + + wait_table.error = retval = 0; + + if (!n) + goto no_setup; + + fdl = bits + (n - 1) / __NFDBITS; + + fds = (*fdl | fdl[size] | fdl[2*size]); + +#ifdef WIDE_FILE_LOCK + read_lock(¤t->files->file_lock); +#endif + if (!fds) + goto next_long; + + mask = (n & (__NFDBITS-1)); + if (mask) { + fds &= ~(~0UL << mask); + if (!fds) + goto next_long; + } + + do { + /* At this point we know some bit is set in fds. */ + wait_table.n = (fdl - bits) * 8*sizeof(unsigned long); + bit = 1; + do { + /* If a long has set bits do we expect most + * of them to be set? + */ + if ((fds & bit)) { + fds ^= bit; +#ifdef WIDE_FILE_LOCK + file = fcheck(wait_table.n); +#else + file = fget(wait_table.n); +#endif + if (!file) + goto out_badf; mask = DEFAULT_POLLMASK; if (file->f_op && file->f_op->poll) mask = file->f_op->poll(file, wait); +#ifndef WIDE_FILE_LOCK fput(file); +#endif + /* Branch prediction says that forward + * conditional branches are not taken. So... + */ + /* Normally have to wait for input */ + if (!(mask & POLLIN_SET)) + goto no_in_check; + if ((*fdl & bit)) { + fdl[3*size] |= bit; + retval++; + wait = NULL; + } +no_in_check: + /* Normally can output */ + if (!(fdl[size] & bit)) + goto no_out_check; + if ((mask & POLLOUT_SET)) { + fdl[4*size] |= bit; + retval++; + wait = NULL; + } +no_out_check: + /* Normally no exception */ + if (!(mask & POLLEX_SET)) + goto no_ex_check; + if ((fdl[2*size] & bit)) { + fdl[5*size] |= bit; + retval++; + wait = NULL; + } +no_ex_check: } - if ((mask & POLLIN_SET) && ISSET(bit, __IN(fds,off))) { - SET(bit, __RES_IN(fds,off)); + bit += bit; + wait_table.n++; + } while (fds); + +next_long: + /* Long runs of zero bits tend to be common, especially + * at the end of over large sets so we use a tight loop + * to skip them. + * N.B. We allocated an extra, non-zero, long in front + * of bits in sys_select specifically so we can access + * below bits and avoid an extra test and branch. + */ + do { + fdl--; + fds = *fdl | fdl[size] | fdl[2*size]; + } while (!fds); + } while (fdl >= bits); + +#ifdef WIDE_FILE_LOCK + read_unlock(¤t->files->file_lock); +#endif + + if (!retval) + retval = wait_table.error; + +no_setup: + while (!retval && __timeout && !signal_pending(current)) { + unsigned long flags; + struct poll_table_entry *item, *next; + + set_current_state(TASK_INTERRUPTIBLE); + if (wait_table.woken_list == NULL) + __timeout = schedule_timeout(__timeout); + set_current_state(TASK_RUNNING); + + /* Lift the queued events and reset the queue ready + * for anything that happens while we "think". + */ + spin_lock_irqsave(&wait_table.woken_lock, flags); + item = wait_table.woken_list; + wait_table.woken_list = NULL; + spin_unlock_irqrestore(&wait_table.woken_lock, flags); + + for (; item; item=next) { + struct poll_table_entry * p = item; + + /* Reset the wait_queue's event pointer before + * checking in case it is triggered again. + */ + next = item->woken_list; + set_waitqueue_entry_onwake(&p->wait, __pollwake); + + file = p->filp; +#if 0 + /* This has to be true or we would never have + * done a wait on the file! + */ + if (file->f_op && file->f_op->poll) +#endif + mask = file->f_op->poll(file, NULL); + + fdl = bits + (p->n / __NFDBITS); + bit = BIT(p->n); + + /* Branch prediction says that forward + * conditional branches are not taken. + * Wake ups are dumb. We get woken up every time + * more space becomes available even if we are + * not interested. + */ + if (!(mask & POLLIN_SET)) + goto ev_no_in_check; + if ((*fdl & bit)) { + fdl[3*size] |= bit; retval++; - wait = NULL; } - if ((mask & POLLOUT_SET) && ISSET(bit, __OUT(fds,off))) { - SET(bit, __RES_OUT(fds,off)); +ev_no_in_check: + if (!(fdl[size] & bit)) + goto ev_no_out_check; + if ((mask & POLLOUT_SET)) { + fdl[4*size] |= bit; retval++; - wait = NULL; } - if ((mask & POLLEX_SET) && ISSET(bit, __EX(fds,off))) { - SET(bit, __RES_EX(fds,off)); +ev_no_out_check: + /* Normally no exception */ + if (!(mask & POLLEX_SET)) + goto ev_no_ex_check; + if ((fdl[2*size] & bit)) { + fdl[5*size] |= bit; retval++; - wait = NULL; } +ev_no_ex_check: } - wait = NULL; - if (retval || !__timeout || signal_pending(current)) - break; - if(table.error) { - retval = table.error; - break; - } - __timeout = schedule_timeout(__timeout); } - current->state = TASK_RUNNING; - - poll_freewait(&table); - - /* - * Up-to-date the caller timeout. - */ +out: + if (*timeout) + poll_freewait(&wait_table); *timeout = __timeout; return retval; -} - -static void *select_bits_alloc(int size) -{ - return kmalloc(6 * size, GFP_KERNEL); -} -static void select_bits_free(void *bits, int size) -{ - kfree(bits); +out_badf: +#ifdef WIDE_FILE_LOCK + read_unlock(¤t->files->file_lock); +#endif + retval = -EBADF; + goto out; } /* @@ -254,13 +341,22 @@ #define MAX_SELECT_SECONDS \ ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1) +/* How big a set we can use stack for rather than going out to kmalloc. + * 512 fds = 388 bytes, 1024 fds = 772 bytes, 2048 fds = 1540 bytes, + * 4096 fds = 3068 bytes + * Remember: the future call depth is fairly shallow but we need + * to leave space for interrupts! + */ +#define MAX_LOC_FDS 1024 +#define MAX_LOC_LONGS (1 + 6 * FDS_LONGS(MAX_LOC_FDS)) + asmlinkage long sys_select(int n, fd_set *inp, fd_set *outp, fd_set *exp, struct timeval *tvp) { - fd_set_bits fds; - char *bits; + unsigned long *data, *bits; long timeout; int ret, size; + unsigned long ldata[MAX_LOC_LONGS]; timeout = MAX_SCHEDULE_TIMEOUT; if (tvp) { @@ -291,29 +387,27 @@ /* * We need 6 bitmaps (in/out/ex for both incoming and outgoing), * since we used fdset we need to allocate memory in units of - * long-words. + * long-words. We add an extra, non-zero, long at the beginning + * because that lets us avoid an extra test-and-branch within + * the loop in do_select. */ ret = -ENOMEM; - size = FDS_BYTES(n); - bits = select_bits_alloc(size); - if (!bits) - goto out_nofds; - fds.in = (unsigned long *) bits; - fds.out = (unsigned long *) (bits + size); - fds.ex = (unsigned long *) (bits + 2*size); - fds.res_in = (unsigned long *) (bits + 3*size); - fds.res_out = (unsigned long *) (bits + 4*size); - fds.res_ex = (unsigned long *) (bits + 5*size); - - if ((ret = get_fd_set(n, inp, fds.in)) || - (ret = get_fd_set(n, outp, fds.out)) || - (ret = get_fd_set(n, exp, fds.ex))) + size = FDS_LONGS(n); + data = ldata; + if (n > MAX_LOC_FDS) { + data = kmalloc((1 + 6 * size) * sizeof(unsigned long), GFP_KERNEL); + if (!data) + goto out_nofds; + } + *data = 1; + bits = data + 1; + if ((ret = get_fd_set(n, inp, bits)) || + (ret = get_fd_set(n, outp, bits+size)) || + (ret = get_fd_set(n, exp, bits+2*size))) goto out; - zero_fd_set(n, fds.res_in); - zero_fd_set(n, fds.res_out); - zero_fd_set(n, fds.res_ex); + memset(bits+3*size, 0, 3*size*sizeof(unsigned long)); - ret = do_select(n, &fds, &timeout); + ret = do_select(n, bits, size, &timeout); if (tvp && !(current->personality & STICKY_TIMEOUTS)) { time_t sec = 0, usec = 0; @@ -322,8 +416,8 @@ usec = timeout % HZ; usec *= (1000000/HZ); } - put_user(sec, &tvp->tv_sec); - put_user(usec, &tvp->tv_usec); + __put_user(sec, &tvp->tv_sec); + __put_user(usec, &tvp->tv_usec); } if (ret < 0) @@ -335,86 +429,148 @@ ret = 0; } - set_fd_set(n, inp, fds.res_in); - set_fd_set(n, outp, fds.res_out); - set_fd_set(n, exp, fds.res_ex); + if (inp) + __copy_to_user(inp, bits+3*size, size*sizeof(unsigned long)); + if (outp) + __copy_to_user(outp, bits+4*size, size*sizeof(unsigned long)); + if (exp) + __copy_to_user(exp, bits+5*size, size*sizeof(unsigned long)); out: - select_bits_free(bits, size); + if (n > MAX_LOC_FDS) + kfree(data); out_nofds: return ret; } -#define POLLFD_PER_PAGE ((PAGE_SIZE) / sizeof(struct pollfd)) - -static void do_pollfd(unsigned int num, struct pollfd * fdpage, - poll_table ** pwait, int *count) +static int do_poll(unsigned int nfds, struct pollfd *ufds, long timeout) { - int i; + int retval; + poll_table *wait, wait_table; + + wait = NULL; + if (timeout) { + wait_table.head = (void *) __get_free_page(GFP_KERNEL); + if (!wait_table.head) + return -ENOMEM; - for (i = 0; i < num; i++) { + poll_initwait(&wait_table); + wait = &wait_table; + } + + wait_table.error = retval = 0; + +#ifdef WIDE_FILE_LOCK + read_lock(¤t->files->file_lock); +#endif + for (wait_table.n = 0; wait_table.n < nfds; wait_table.n++) { int fd; - unsigned int mask; - struct pollfd *fdp; + unsigned int mask, events; mask = 0; - fdp = fdpage+i; - fd = fdp->fd; + if (__get_user(fd, &ufds[wait_table.n].fd)) + goto out_fault; if (fd >= 0) { - struct file * file = fget(fd); + struct file * file; +#ifdef WIDE_FILE_LOCK + file = fcheck(fd); +#else + file = fget(fd); +#endif mask = POLLNVAL; if (file != NULL) { mask = DEFAULT_POLLMASK; if (file->f_op && file->f_op->poll) - mask = file->f_op->poll(file, *pwait); - mask &= fdp->events | POLLERR | POLLHUP; + mask = file->f_op->poll(file, wait); +#ifndef WIDE_FILE_LOCK fput(file); +#endif + if (__get_user(events, &ufds[wait_table.n].events)) + goto out_fault; + mask &= events | POLLERR | POLLHUP; } if (mask) { - *pwait = NULL; - (*count)++; + wait = NULL; + retval++; } } - fdp->revents = mask; + __put_user(mask, &ufds[wait_table.n].revents); } -} -static int do_poll(unsigned int nfds, unsigned int nchunks, unsigned int nleft, - struct pollfd *fds[], poll_table *wait, long timeout) -{ - int count = 0; - poll_table* pt = wait; +#ifdef WIDE_FILE_LOCK + read_unlock(¤t->files->file_lock); +#endif - for (;;) { - unsigned int i; + if (!retval) + retval = wait_table.error; + + while (!retval && timeout && !signal_pending(current)) { + unsigned long flags; + struct poll_table_entry *item, *next; set_current_state(TASK_INTERRUPTIBLE); - for (i=0; i < nchunks; i++) - do_pollfd(POLLFD_PER_PAGE, fds[i], &pt, &count); - if (nleft) - do_pollfd(nleft, fds[nchunks], &pt, &count); - pt = NULL; - if (count || !timeout || signal_pending(current)) - break; - if(wait->error) { - return wait->error; + if (wait_table.woken_list == NULL) + timeout = schedule_timeout(timeout); + set_current_state(TASK_RUNNING); + + /* Lift the queued events and reset the queue ready + * for anything that happens while we "think". + */ + spin_lock_irqsave(&wait_table.woken_lock, flags); + item = wait_table.woken_list; + wait_table.woken_list = NULL; + spin_unlock_irqrestore(&wait_table.woken_lock, flags); + + for (; item; item=next) { + struct poll_table_entry * p = item; + unsigned long mask, events; + struct file * file; + + /* Reset the wait_queue's event pointer before + * checking in case it is triggered again. + */ + next = item->woken_list; + set_waitqueue_entry_onwake(&p->wait, __pollwake); + + file = p->filp; +#if 0 + /* This has to be true or we would never have + * done a wait on the file! + */ + if (file->f_op && file->f_op->poll) +#endif + mask = file->f_op->poll(file, NULL); + if (__get_user(events, &ufds[p->n].events)) + goto out_fault; + mask &= events | POLLERR | POLLHUP; + if (mask) { + retval++; + __put_user(mask, &ufds[p->n].revents); + } } - timeout = schedule_timeout(timeout); } - current->state = TASK_RUNNING; - return count; +out: + if (timeout) + poll_freewait(&wait_table); + return retval; + +out_fault: +#ifdef WIDE_FILE_LOCK + read_unlock(¤t->files->file_lock); +#endif + retval = -EFAULT; + goto out; } -asmlinkage long sys_poll(struct pollfd * ufds, unsigned int nfds, long timeout) +asmlinkage long +sys_poll(struct pollfd * ufds, unsigned int nfds, long timeout) { - int i, j, fdcount, err; - struct pollfd **fds; - poll_table table, *wait; - int nchunks, nleft; + int err; /* Do a sanity check on nfds ... */ + err = -EINVAL; if (nfds > current->files->max_fds) - return -EINVAL; + goto out; if (timeout) { /* Careful about overflow in the intermediate values */ @@ -424,69 +580,13 @@ timeout = MAX_SCHEDULE_TIMEOUT; } - poll_initwait(&table); - wait = &table; - if (!timeout) - wait = NULL; - - err = -ENOMEM; - fds = NULL; - if (nfds != 0) { - fds = (struct pollfd **)kmalloc( - (1 + (nfds - 1) / POLLFD_PER_PAGE) * sizeof(struct pollfd *), - GFP_KERNEL); - if (fds == NULL) - goto out; - } + err = verify_area(VERIFY_WRITE, ufds, nfds * sizeof(struct pollfd)); + if (!err) { + err = do_poll(nfds, ufds, timeout); - nchunks = 0; - nleft = nfds; - while (nleft > POLLFD_PER_PAGE) { /* allocate complete PAGE_SIZE chunks */ - fds[nchunks] = (struct pollfd *)__get_free_page(GFP_KERNEL); - if (fds[nchunks] == NULL) - goto out_fds; - nchunks++; - nleft -= POLLFD_PER_PAGE; - } - if (nleft) { /* allocate last PAGE_SIZE chunk, only nleft elements used */ - fds[nchunks] = (struct pollfd *)__get_free_page(GFP_KERNEL); - if (fds[nchunks] == NULL) - goto out_fds; - } - - err = -EFAULT; - for (i=0; i < nchunks; i++) - if (copy_from_user(fds[i], ufds + i*POLLFD_PER_PAGE, PAGE_SIZE)) - goto out_fds1; - if (nleft) { - if (copy_from_user(fds[nchunks], ufds + nchunks*POLLFD_PER_PAGE, - nleft * sizeof(struct pollfd))) - goto out_fds1; - } - - fdcount = do_poll(nfds, nchunks, nleft, fds, wait, timeout); - - /* OK, now copy the revents fields back to user space. */ - for(i=0; i < nchunks; i++) - for (j=0; j < POLLFD_PER_PAGE; j++, ufds++) - __put_user((fds[i] + j)->revents, &ufds->revents); - if (nleft) - for (j=0; j < nleft; j++, ufds++) - __put_user((fds[nchunks] + j)->revents, &ufds->revents); - - err = fdcount; - if (!fdcount && signal_pending(current)) - err = -EINTR; - -out_fds1: - if (nleft) - free_page((unsigned long)(fds[nchunks])); -out_fds: - for (i=0; i < nchunks; i++) - free_page((unsigned long)(fds[i])); - if (nfds != 0) - kfree(fds); + if (!err && signal_pending(current)) + err = -EINTR; + } out: - poll_freewait(&table); return err; } diff -urN -x *.[oa] -x .config -x .version -x .depend -x .hdepend -x *.flags -x autoconf.h -x modversions.h -x version.h -x asm -x modules -x config -x soundmodem linux-2.4.0-test9/include/linux/poll.h linux-2.4.0-test9+/include/linux/poll.h --- linux-2.4.0-test9/include/linux/poll.h Mon Oct 2 19:01:39 2000 +++ linux-2.4.0-test9+/include/linux/poll.h Sun Oct 29 21:01:35 2000 @@ -8,15 +8,33 @@ #include #include #include +#include #include -struct poll_table_page; + +struct poll_table_entry { + int n; + struct file * filp; + struct poll_table_entry *woken_list; + wait_queue_t wait; + wait_queue_head_t * wait_address; +}; typedef struct poll_table_struct { + int n; int error; - struct poll_table_page * table; + spinlock_t woken_lock; + struct poll_table_entry *woken_list; + struct poll_table_head * head, * last; } poll_table; +struct poll_table_head { + poll_table *poll_table; + struct poll_table_head * next; + struct poll_table_entry * entry; +}; + + extern void __pollwait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p); extern inline void poll_wait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p) @@ -27,20 +45,16 @@ static inline void poll_initwait(poll_table* pt) { + spin_lock_init(&pt->woken_lock); + pt->head->entry = (struct poll_table_entry *)(pt->head + 1); + pt->head->poll_table = pt; + pt->head->next = NULL; + pt->woken_list = NULL; pt->error = 0; - pt->table = NULL; + pt->last = pt->head; } -extern void poll_freewait(poll_table* pt); - -/* - * Scaleable version of the fd_set. - */ - -typedef struct { - unsigned long *in, *out, *ex; - unsigned long *res_in, *res_out, *res_ex; -} fd_set_bits; +extern void poll_freewait(poll_table* pt); /* * How many longwords for "nr" bits? @@ -69,21 +83,6 @@ memset(fdset, 0, nr); return 0; } - -static inline -void set_fd_set(unsigned long nr, void *ufdset, unsigned long *fdset) -{ - if (ufdset) - __copy_to_user(ufdset, fdset, FDS_BYTES(nr)); -} - -static inline -void zero_fd_set(unsigned long nr, unsigned long *fdset) -{ - memset(fdset, 0, FDS_BYTES(nr)); -} - -extern int do_select(int n, fd_set_bits *fds, long *timeout); #endif /* KERNEL */ diff -urN -x *.[oa] -x .config -x .version -x .depend -x .hdepend -x *.flags -x autoconf.h -x modversions.h -x version.h -x asm -x modules -x config -x soundmodem linux-2.4.0-test9/include/linux/sched.h linux-2.4.0-test9+/include/linux/sched.h --- linux-2.4.0-test9/include/linux/sched.h Mon Oct 2 19:01:19 2000 +++ linux-2.4.0-test9+/include/linux/sched.h Sun Oct 29 18:01:27 2000 @@ -541,6 +541,7 @@ extern long FASTCALL(interruptible_sleep_on_timeout(wait_queue_head_t *q, signed long timeout)); extern void FASTCALL(wake_up_process(struct task_struct * tsk)); +extern void FASTCALL(wake_up_process_synchronous(struct task_struct * tsk)); #define wake_up(x) __wake_up((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE | TASK_EXCLUSIVE) #define wake_up_all(x) __wake_up((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE) diff -urN -x *.[oa] -x .config -x .version -x .depend -x .hdepend -x *.flags -x autoconf.h -x modversions.h -x version.h -x asm -x modules -x config -x soundmodem linux-2.4.0-test9/include/linux/wait.h linux-2.4.0-test9+/include/linux/wait.h --- linux-2.4.0-test9/include/linux/wait.h Mon Oct 2 19:01:17 2000 +++ linux-2.4.0-test9+/include/linux/wait.h Sun Oct 29 17:10:05 2000 @@ -16,6 +16,7 @@ #include #include +#include #include /* @@ -31,7 +32,7 @@ BUG(); \ } while (0) -#define CHECK_MAGIC(x) if (x != (long)&(x)) \ +#define CHECK_MAGIC(x) if ((long)x != (long)&(x)) \ { printk("bad magic %lx (should be %lx), ", (long)x, (long)&(x)); WQ_BUG(); } #define CHECK_MAGIC_WQHEAD(x) do { \ @@ -43,16 +44,19 @@ } while (0) #endif +typedef struct __wait_queue wait_queue_t; +typedef int (*onwake_func_t)(wait_queue_t *, unsigned int, const int); + struct __wait_queue { unsigned int compiler_warning; struct task_struct * task; struct list_head task_list; + onwake_func_t onwake; #if WAITQUEUE_DEBUG long __magic; long __waker; #endif }; -typedef struct __wait_queue wait_queue_t; /* * 'dual' spinlock architecture. Can be switched between spinlock_t and @@ -109,7 +113,7 @@ #endif #define __WAITQUEUE_INITIALIZER(name,task) \ - { 0x1234567, task, { NULL, NULL } __WAITQUEUE_DEBUG_INIT(name)} + { 0x1234567, task, { NULL, NULL }, (void *)-1 __WAITQUEUE_DEBUG_INIT(name)} #define DECLARE_WAITQUEUE(name,task) \ wait_queue_t name = __WAITQUEUE_INITIALIZER(name,task) @@ -134,17 +138,25 @@ #endif } -static inline void init_waitqueue_entry(wait_queue_t *q, - struct task_struct *p) +static inline void init_waitqueue_entry_onwake(wait_queue_t *q, + struct task_struct *p, onwake_func_t onwake) { #if WAITQUEUE_DEBUG if (!q || !p) WQ_BUG(); #endif q->task = p; + q->onwake = onwake; #if WAITQUEUE_DEBUG q->__magic = (long)&q->__magic; #endif +} + +#define init_waitqueue_entry(q, p) init_waitqueue_entry_onwake(q, p, (void *)-1); + +static inline void set_waitqueue_entry_onwake(wait_queue_t *wq, onwake_func_t onwake) +{ + wq->onwake = onwake; } static inline int waitqueue_active(wait_queue_head_t *q) diff -urN -x *.[oa] -x .config -x .version -x .depend -x .hdepend -x *.flags -x autoconf.h -x modversions.h -x version.h -x asm -x modules -x config -x soundmodem linux-2.4.0-test9/kernel/sched.c linux-2.4.0-test9+/kernel/sched.c --- linux-2.4.0-test9/kernel/sched.c Mon Oct 2 19:45:01 2000 +++ linux-2.4.0-test9+/kernel/sched.c Tue Oct 24 11:49:07 2000 @@ -361,7 +361,7 @@ spin_unlock_irqrestore(&runqueue_lock, flags); } -static inline void wake_up_process_synchronous(struct task_struct * p) +inline void wake_up_process_synchronous(struct task_struct * p) { unsigned long flags; @@ -726,38 +726,45 @@ while (tmp != head) { unsigned int state; wait_queue_t *curr = list_entry(tmp, wait_queue_t, task_list); + onwake_func_t onwake = curr->onwake; tmp = tmp->next; #if WAITQUEUE_DEBUG CHECK_MAGIC(curr->__magic); #endif - p = curr->task; - state = p->state; - if (state & (mode & ~TASK_EXCLUSIVE)) { + if (onwake == (void *)-1) { + p = curr->task; + state = p->state; + if (state & (mode & ~TASK_EXCLUSIVE)) { #if WAITQUEUE_DEBUG - curr->__waker = (long)__builtin_return_address(0); + curr->__waker = (long)__builtin_return_address(0); #endif - /* - * If waking up from an interrupt context then - * prefer processes which are affine to this - * CPU. - */ - if (irq && (state & mode & TASK_EXCLUSIVE)) { - if (!best_exclusive) - best_exclusive = p; - else if ((p->processor == best_cpu) && - (best_exclusive->processor != best_cpu)) + /* + * If waking up from an interrupt context then + * prefer processes which are affine to this + * CPU. + */ + if (irq && (state & mode & TASK_EXCLUSIVE)) { + if (!best_exclusive) best_exclusive = p; - } else { - if (sync) - wake_up_process_synchronous(p); - else - wake_up_process(p); - if (state & mode & TASK_EXCLUSIVE) - break; + else if ((p->processor == best_cpu) && + (best_exclusive->processor != best_cpu)) + best_exclusive = p; + } else { + if (sync) + wake_up_process_synchronous(p); + else + wake_up_process(p); + if (state & mode & TASK_EXCLUSIVE) + break; + } } + continue; } + + if (onwake && onwake(curr, mode, sync)) + break; } if (best_exclusive) best_exclusive->state = TASK_RUNNING;