--- linux/fs/dcache.c.orig +++ linux/fs/dcache.c @@ -23,6 +23,7 @@ #include #include #include +#include #include @@ -62,6 +63,10 @@ static inline void d_free(struct dentry dentry->d_op->d_release(dentry); if (dname_external(dentry)) kfree(dentry->d_name.name); + if (dentry->d_extra_attributes) { + kfree(dentry->d_extra_attributes); + dentry->d_extra_attributes = NULL; + } kmem_cache_free(dentry_cache, dentry); dentry_stat.nr_dentry--; } @@ -615,6 +620,7 @@ struct dentry * d_alloc(struct dentry * dentry->d_name.hash = name->hash; dentry->d_op = NULL; dentry->d_fsdata = NULL; + dentry->d_extra_attributes = NULL; dentry->d_mounted = 0; INIT_LIST_HEAD(&dentry->d_hash); INIT_LIST_HEAD(&dentry->d_lru); @@ -910,6 +916,16 @@ void d_move(struct dentry * dentry, stru /* Unhash the target: dput() will then get rid of it */ list_del_init(&target->d_hash); + /* flush any possible attributes */ + if (dentry->d_extra_attributes) { + kfree(dentry->d_extra_attributes); + dentry->d_extra_attributes = NULL; + } + if (target->d_extra_attributes) { + kfree(target->d_extra_attributes); + target->d_extra_attributes = NULL; + } + list_del(&dentry->d_child); list_del(&target->d_child); @@ -1172,6 +1188,26 @@ out: return ino; } +void flush_dentry_attributes (void) +{ + struct list_head *chain, *tmp; + struct dentry *dentry; + int i; + + spin_lock(&dcache_lock); + for (i = 0; i <= d_hash_mask; i++) { + chain = dentry_hashtable + i; + tmp = chain->next; + while (tmp != chain) { + dentry = list_entry(tmp, struct dentry, d_hash); + kfree(dentry->d_extra_attributes); + dentry->d_extra_attributes = NULL; + tmp = tmp->next; + } + } + spin_unlock(&dcache_lock); +} + static void __init dcache_init(unsigned long mempages) { struct list_head *d; --- linux/fs/namei.c.orig +++ linux/fs/namei.c @@ -451,9 +451,13 @@ int link_path_walk(const char * name, st { struct dentry *dentry; struct inode *inode; - int err; + int err, atomic; unsigned int lookup_flags = nd->flags; + atomic = 0; + if (lookup_flags & LOOKUP_ATOMIC) + atomic = 1; + while (*name=='/') name++; if (!*name) @@ -522,6 +526,9 @@ int link_path_walk(const char * name, st /* This does the actual lookups.. */ dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE); if (!dentry) { + err = -EWOULDBLOCKIO; + if (atomic) + break; dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE); err = PTR_ERR(dentry); if (IS_ERR(dentry)) @@ -585,6 +592,9 @@ last_component: } dentry = cached_lookup(nd->dentry, &this, 0); if (!dentry) { + err = -EWOULDBLOCKIO; + if (atomic) + break; dentry = real_lookup(nd->dentry, &this, 0); err = PTR_ERR(dentry); if (IS_ERR(dentry)) @@ -953,6 +963,8 @@ static inline int lookup_flags(unsigned if (f & O_DIRECTORY) retval |= LOOKUP_DIRECTORY; + if (f & O_ATOMICLOOKUP) + retval |= LOOKUP_ATOMIC; return retval; } --- linux/kernel/exit.c.orig +++ linux/kernel/exit.c @@ -439,6 +439,14 @@ fake_volatile: #ifdef CONFIG_BSD_PROCESS_ACCT acct_process(code); #endif + if (current->tux_info) { +#ifdef CONFIG_TUX_DEBUG + printk("Possibly unexpected TUX-thread exit(%ld) at %p?\n", + code, __builtin_return_address(0)); +#endif + current->tux_exit(); + } + __exit_mm(tsk); lock_kernel(); --- linux/kernel/fork.c.orig +++ linux/kernel/fork.c @@ -661,6 +661,7 @@ int do_fork(unsigned long clone_flags, u *p = *current; + p->tux_info = NULL; retval = -EAGAIN; /* * Check if we are over our maximum process limit, but be sure to --- linux/kernel/ksyms.c.orig +++ linux/kernel/ksyms.c @@ -9,6 +9,7 @@ * by Bjorn Ekwall */ +#define __KERNEL_SYSCALLS__ #include #include #include @@ -51,6 +52,8 @@ #include #include #include +#include + #if defined(CONFIG_PROC_FS) #include @@ -159,6 +162,13 @@ EXPORT_SYMBOL(__user_walk); EXPORT_SYMBOL(lookup_one_len); EXPORT_SYMBOL(lookup_hash); EXPORT_SYMBOL(sys_close); +EXPORT_SYMBOL_GPL(sys_read); +EXPORT_SYMBOL_GPL(sys_write); +EXPORT_SYMBOL_GPL(sys_dup); +EXPORT_SYMBOL_GPL(sys_chroot); +EXPORT_SYMBOL_GPL(sys_chdir); +EXPORT_SYMBOL_GPL(sys_fcntl); +EXPORT_SYMBOL_GPL(do_pipe); EXPORT_SYMBOL(dcache_lock); EXPORT_SYMBOL(d_alloc_root); EXPORT_SYMBOL(d_delete); @@ -248,6 +258,7 @@ EXPORT_SYMBOL(d_prune_aliases); EXPORT_SYMBOL(prune_dcache); EXPORT_SYMBOL(shrink_dcache_sb); EXPORT_SYMBOL(shrink_dcache_parent); +EXPORT_SYMBOL_GPL(flush_dentry_attributes); EXPORT_SYMBOL(find_inode_number); EXPORT_SYMBOL(is_subdir); EXPORT_SYMBOL(get_unused_fd); @@ -385,6 +396,8 @@ EXPORT_SYMBOL(add_wait_queue); EXPORT_SYMBOL(add_wait_queue_exclusive); EXPORT_SYMBOL(remove_wait_queue); +EXPORT_SYMBOL_GPL(flush_signal_handlers); + /* completion handling */ EXPORT_SYMBOL(wait_for_completion); EXPORT_SYMBOL(complete); --- linux/mm/filemap.c.orig +++ linux/mm/filemap.c @@ -1232,7 +1232,7 @@ static inline int get_max_readahead(stru static void generic_file_readahead(int reada_ok, struct file * filp, struct inode * inode, - struct page * page) + struct page * page, int flags) { unsigned long end_index; unsigned long index = page->index; @@ -1240,6 +1240,10 @@ static void generic_file_readahead(int r unsigned long raend; int max_readahead = get_max_readahead(inode); + /* Readahead might block. */ + if (flags & F_ATOMIC) + return; + end_index = inode->i_size >> PAGE_CACHE_SHIFT; raend = filp->f_raend; @@ -1357,7 +1361,7 @@ void mark_page_accessed(struct page *pag * This is really ugly. But the goto's actually try to clarify some * of the logic when it comes to error handling etc. */ -void do_generic_file_read(struct file * filp, loff_t *ppos, read_descriptor_t * desc, read_actor_t actor) +void do_generic_file_read(struct file * filp, loff_t *ppos, read_descriptor_t * desc, read_actor_t actor, int flags) { struct address_space *mapping = filp->f_dentry->d_inode->i_mapping; struct inode *inode = mapping->host; @@ -1442,7 +1446,7 @@ found_page: if (!Page_Uptodate(page)) goto page_not_up_to_date; - generic_file_readahead(reada_ok, filp, inode, page); + generic_file_readahead(reada_ok, filp, inode, page, flags); page_ok: /* If users can be writing to this page using arbitrary * virtual addresses, take care about potential aliasing @@ -1482,13 +1486,23 @@ page_ok: * Ok, the page was not immediately readable, so let's try to read ahead while we're at it.. */ page_not_up_to_date: - generic_file_readahead(reada_ok, filp, inode, page); + generic_file_readahead(reada_ok, filp, inode, page, flags); if (Page_Uptodate(page)) goto page_ok; /* Get exclusive access to the page ... */ - lock_page(page); + if (flags & F_ATOMIC) { + if (TryLockPage(page)) { + if (Page_Uptodate(page)) + goto page_ok; + desc->error = -EWOULDBLOCKIO; + page_cache_release(page); + break; + } + printk("page_not_up_to_date: atomic trylock succeeded\n"); + } else + lock_page(page); /* Did it get unhashed before we got the lock? */ if (!page->mapping) { @@ -1512,11 +1526,12 @@ readpage: goto page_ok; /* Again, try some read-ahead while waiting for the page to finish.. */ - generic_file_readahead(reada_ok, filp, inode, page); - wait_on_page(page); + generic_file_readahead(reada_ok, filp, inode, page, flags); + if (!(flags & F_ATOMIC)) + wait_on_page(page); if (Page_Uptodate(page)) goto page_ok; - error = -EIO; + error = (flags & F_ATOMIC) ? -EWOULDBLOCKIO : -EIO; } /* UHHUH! A synchronous read error occurred. Report it */ @@ -1525,6 +1540,11 @@ readpage: break; no_cached_page: + if (flags & F_ATOMIC) { + spin_unlock(&pagecache_lock); + desc->error = -EWOULDBLOCKIO; + break; + } /* * Ok, it wasn't cached, so we need to create a new * page.. @@ -1699,11 +1719,7 @@ int file_read_actor(read_descriptor_t * return size; } -/* - * This is the "read()" routine for all filesystems - * that can use the page cache directly. - */ -ssize_t generic_file_read(struct file * filp, char * buf, size_t count, loff_t *ppos) +static ssize_t generic_file_new_read(struct file * filp, char * buf, size_t count, loff_t *ppos, int flags) { ssize_t retval; @@ -1724,7 +1740,7 @@ ssize_t generic_file_read(struct file * desc.count = count; desc.buf = buf; desc.error = 0; - do_generic_file_read(filp, ppos, &desc, file_read_actor); + do_generic_file_read(filp, ppos, &desc, file_read_actor, flags); retval = desc.written; if (!retval) @@ -1758,6 +1774,15 @@ ssize_t generic_file_read(struct file * } } +/* + * This is the "read()" routine for all filesystems + * that can use the page cache directly. + */ +ssize_t generic_file_read(struct file * filp, char * buf, size_t count, loff_t *ppos) +{ + return generic_file_new_read(filp, buf, count, ppos, 0); +} + static int file_send_actor(read_descriptor_t * desc, struct page *page, unsigned long offset , unsigned long size) { ssize_t written; @@ -1845,7 +1870,7 @@ static ssize_t common_sendfile(int out_f desc.count = count; desc.buf = (char *) out_file; desc.error = 0; - do_generic_file_read(in_file, offset, &desc, file_send_actor); + do_generic_file_read(in_file, offset, &desc, file_send_actor, 0); retval = desc.written; if (!retval) --- linux/include/linux/dcache.h.orig +++ linux/include/linux/dcache.h @@ -81,6 +81,7 @@ struct dentry { struct super_block * d_sb; /* The root of the dentry tree */ unsigned long d_vfs_flags; void * d_fsdata; /* fs-specific data */ + void * d_extra_attributes; /* TUX-specific data */ unsigned char d_iname[DNAME_INLINE_LEN]; /* small names */ }; @@ -168,6 +169,7 @@ extern struct dentry * d_alloc(struct de extern void shrink_dcache_sb(struct super_block *); extern void shrink_dcache_parent(struct dentry *); extern int d_invalidate(struct dentry *); +extern void flush_dentry_attributes(void); #define shrink_dcache() prune_dcache(0) struct zone_struct; --- linux/include/linux/errno.h.orig 2000-03-29 03:51:39.000000000 +0200 +++ linux/include/linux/errno.h @@ -21,6 +21,9 @@ #define EBADTYPE 527 /* Type not supported by server */ #define EJUKEBOX 528 /* Request initiated, but will not complete before timeout */ +/* Defined for TUX async IO */ +#define EWOULDBLOCKIO 530 /* Would block due to block-IO */ + #endif #endif --- linux/include/linux/fs.h.orig +++ linux/include/linux/fs.h @@ -660,6 +660,14 @@ extern int fcntl_setlk(unsigned int, uns extern int fcntl_getlk64(unsigned int, struct flock64 *); extern int fcntl_setlk64(unsigned int, unsigned int, struct flock64 *); +extern asmlinkage long sys_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg); +extern asmlinkage long sys_dup(unsigned int fildes); +extern asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd); +extern asmlinkage ssize_t sys_read(unsigned int fd, char * buf, size_t count); +extern asmlinkage ssize_t sys_write(unsigned int fd, const char * buf, size_t count); +extern asmlinkage long sys_chroot(const char * filename); +extern asmlinkage long sys_chdir(const char * filename); + /* fs/locks.c */ extern void locks_init_lock(struct file_lock *); extern void locks_copy_lock(struct file_lock *, struct file_lock *); @@ -855,6 +863,9 @@ struct block_device_operations { * read, write, poll, fsync, readv, writev can be called * without the big kernel lock held in all filesystems. */ + +#define F_ATOMIC 0x0001 + struct file_operations { struct module *owner; loff_t (*llseek) (struct file *, loff_t, int); @@ -1352,6 +1363,8 @@ static inline long IS_ERR(const void *pt #define LOOKUP_POSITIVE (8) #define LOOKUP_PARENT (16) #define LOOKUP_NOALT (32) +#define LOOKUP_ATOMIC (64) + /* * Type of the last component on LOOKUP_PARENT */ @@ -1483,7 +1496,7 @@ extern int file_read_actor(read_descript extern ssize_t generic_file_read(struct file *, char *, size_t, loff_t *); extern int precheck_file_write(struct file *, struct inode *, size_t *, loff_t *); extern ssize_t generic_file_write(struct file *, const char *, size_t, loff_t *); -extern void do_generic_file_read(struct file *, loff_t *, read_descriptor_t *, read_actor_t); +extern void do_generic_file_read(struct file *, loff_t *, read_descriptor_t *, read_actor_t, int); extern loff_t no_llseek(struct file *file, loff_t offset, int origin); extern loff_t generic_file_llseek(struct file *file, loff_t offset, int origin); extern ssize_t generic_read_dir(struct file *, char *, size_t, loff_t *); --- linux/include/linux/kernel_stat.h.orig +++ linux/include/linux/kernel_stat.h @@ -57,12 +57,17 @@ static inline int kstat_irqs (int irq) */ extern inline int kstat_irqs (int irq) { +#if CONFIG_SMP int i, sum=0; for (i = 0 ; i < smp_num_cpus ; i++) sum += kstat.irqs[cpu_logical_map(i)][irq]; return sum; +#else + return kstat.irqs[0][irq]; +#endif + } #endif --- linux/include/linux/mmzone.h.orig +++ linux/include/linux/mmzone.h @@ -8,6 +8,7 @@ #include #include #include +#include /* * Free memory management - zoned buddy allocator. --- linux/include/linux/sched.h.orig +++ linux/include/linux/sched.h @@ -138,6 +138,7 @@ extern spinlock_t mmlist_lock; extern void sched_init(void); extern void init_idle(void); extern void show_state(void); +extern void show_stack(unsigned long * esp); extern void cpu_init (void); extern void trap_init(void); extern void update_process_times(int user); @@ -406,6 +407,11 @@ struct task_struct { int (*notifier)(void *priv); void *notifier_data; sigset_t *notifier_mask; + + /* TUX state */ + void *tux_info; + void (*tux_exit)(void); + /* Thread group tracking */ u32 parent_exec_id; --- linux/include/linux/skbuff.h.orig +++ linux/include/linux/skbuff.h @@ -1156,6 +1156,8 @@ extern void skb_copy_and_csum_dev(cons extern void skb_init(void); extern void skb_add_mtu(int mtu); +struct tux_req_struct; + #ifdef CONFIG_NETFILTER static inline void nf_conntrack_put(struct nf_ct_info *nfct) --- linux/include/linux/socket.h.orig +++ linux/include/linux/socket.h @@ -278,6 +278,11 @@ extern void memcpy_tokerneliovec(struct extern int move_addr_to_user(void *kaddr, int klen, void *uaddr, int *ulen); extern int move_addr_to_kernel(void *uaddr, int ulen, void *kaddr); extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data); +struct socket; +struct file * sock_map_file(struct socket *sock); +extern int sock_map_fd(struct socket *sock); +extern struct socket *sockfd_lookup(int fd, int *err); + #endif #endif /* not kernel and not glibc */ #endif /* _LINUX_SOCKET_H */ --- linux/include/linux/sysctl.h.orig +++ linux/include/linux/sysctl.h @@ -179,7 +179,8 @@ enum NET_DECNET=15, NET_ECONET=16, NET_KHTTPD=17, - NET_SCTP=18 + NET_SCTP=18, + NET_TUX=19 }; /* /proc/sys/kernel/random */ @@ -546,6 +547,56 @@ enum { NET_SCTP_PRESERVE_ENABLE = 11, NET_SCTP_MAX_BURST = 12, }; + +/* /proc/sys/net/tux/ */ +enum { + NET_TUX_DOCROOT = 1, + NET_TUX_LOGFILE = 2, + NET_TUX_EXTCGI = 3, + NET_TUX_STOP = 4, + NET_TUX_CLIENTPORT = 5, + NET_TUX_LOGGING = 6, + NET_TUX_SERVERPORT = 7, + NET_TUX_THREADS = 8, + NET_TUX_KEEPALIVE_TIMEOUT = 9, + NET_TUX_MAX_KEEPALIVE_BW = 10, + NET_TUX_DEFER_ACCEPT = 11, + NET_TUX_MAX_FREE_REQUESTS = 12, + NET_TUX_MAX_CONNECT = 13, + NET_TUX_MAX_BACKLOG = 14, + NET_TUX_MODE_FORBIDDEN = 15, + NET_TUX_MODE_ALLOWED = 16, + NET_TUX_MODE_USERSPACE = 17, + NET_TUX_MODE_CGI = 18, + NET_TUX_CGI_UID = 19, + NET_TUX_CGI_GID = 20, + NET_TUX_CGIROOT = 21, + NET_TUX_LOGENTRY_ALIGN_ORDER = 22, + NET_TUX_NONAGLE = 23, + NET_TUX_ACK_PINGPONG = 24, + NET_TUX_PUSH_ALL = 25, + NET_TUX_ZEROCOPY_PARSE = 26, + NET_CONFIG_TUX_DEBUG_BLOCKING = 27, + NET_TUX_PAGE_AGE_START = 28, + NET_TUX_PAGE_AGE_ADV = 29, + NET_TUX_PAGE_AGE_MAX = 30, + NET_TUX_VIRTUAL_SERVER = 31, + NET_TUX_MAX_OBJECT_SIZE = 32, + NET_TUX_COMPRESSION = 33, + NET_TUX_NOID = 34, + NET_TUX_CGI_INHERIT_CPU = 35, + NET_TUX_CGI_CPU_MASK = 36, + NET_TUX_ZEROCOPY_HEADER = 37, + NET_TUX_ZEROCOPY_SENDFILE = 38, + NET_TUX_ALL_USERSPACE = 39, + NET_TUX_REDIRECT_LOGGING = 40, + NET_TUX_REFERER_LOGGING = 41, + NET_TUX_MAX_HEADER_LEN = 42, + NET_TUX_404_PAGE = 43, + NET_TUX_MAX_KEEPALIVES = 44, + NET_TUX_IGNORE_QUERY = 45, +}; + /* /proc/sys/net/khttpd/ */ enum { NET_KHTTPD_DOCROOT = 1, --- linux/include/asm-i386/fcntl.h.orig +++ linux/include/asm-i386/fcntl.h @@ -20,6 +20,7 @@ #define O_LARGEFILE 0100000 #define O_DIRECTORY 0200000 /* must be a directory */ #define O_NOFOLLOW 0400000 /* don't follow links */ +#define O_ATOMICLOOKUP 01000000 /* do atomic file lookup */ #define F_DUPFD 0 /* dup */ #define F_GETFD 1 /* get close_on_exec */ --- linux/include/asm-alpha/fcntl.h.orig +++ linux/include/asm-alpha/fcntl.h @@ -20,6 +20,7 @@ #define O_DIRECTORY 0100000 /* must be a directory */ #define O_NOFOLLOW 0200000 /* don't follow links */ #define O_LARGEFILE 0400000 /* will be set by the kernel on every open */ +#define O_ATOMICLOOKUP 01000000 /* do atomic file lookup */ #define O_DIRECT 02000000 /* direct disk access - should check with OSF/1 */ #define F_DUPFD 0 /* dup */ --- linux/include/asm-alpha/unistd.h.orig +++ linux/include/asm-alpha/unistd.h @@ -532,7 +532,7 @@ static inline long open(const char * nam return sys_open(name, mode, flags); } -extern long sys_dup(int); +extern long sys_dup(unsigned int); static inline long dup(int fd) { return sys_dup(fd); @@ -557,13 +557,11 @@ static inline long _exit(int value) #define exit(x) _exit(x) -extern long sys_write(int, const char *, int); static inline long write(int fd, const char * buf, int nr) { return sys_write(fd, buf, nr); } -extern long sys_read(int, char *, int); static inline long read(int fd, char * buf, int nr) { return sys_read(fd, buf, nr); --- linux/include/asm-sparc/fcntl.h.orig +++ linux/include/asm-sparc/fcntl.h @@ -20,6 +20,7 @@ #define O_DIRECTORY 0x10000 /* must be a directory */ #define O_NOFOLLOW 0x20000 /* don't follow links */ #define O_LARGEFILE 0x40000 +#define O_ATOMICLOOKUP 0x80000 /* do atomic file lookup */ #define O_DIRECT 0x100000 /* direct disk access hint */ #define F_DUPFD 0 /* dup */ --- linux/include/net/sock.h.orig +++ linux/include/net/sock.h @@ -477,7 +477,7 @@ struct tcp_opt { */ /* Define this to get the sk->debug debugging facility. */ -#define SOCK_DEBUGGING +//#define SOCK_DEBUGGING #ifdef SOCK_DEBUGGING #define SOCK_DEBUG(sk, msg...) do { if((sk) && ((sk)->debug)) printk(KERN_DEBUG msg); } while (0) #else @@ -683,7 +683,7 @@ struct sock { /* Identd and reporting IO signals */ struct socket *socket; - /* RPC layer private data */ + /* RPC and TUX layer private data */ void *user_data; /* Callbacks */ @@ -694,6 +694,7 @@ struct sock { int (*backlog_rcv) (struct sock *sk, struct sk_buff *skb); + void (*create_child)(struct sock *sk, struct sock *newsk); void (*destruct)(struct sock *sk); }; --- linux/include/net/tcp.h.orig +++ linux/include/net/tcp.h @@ -814,6 +814,7 @@ extern void tcp_send_skb(struct sock *, extern void tcp_push_one(struct sock *, unsigned mss_now); extern void tcp_send_ack(struct sock *sk); extern void tcp_send_delayed_ack(struct sock *sk); +extern void cleanup_rbuf(struct sock *sk, int copied); /* tcp_timer.c */ extern void tcp_init_xmit_timers(struct sock *); @@ -1176,8 +1177,9 @@ static __inline__ void tcp_minshall_upda /* Return 0, if packet can be sent now without violation Nagle's rules: 1. It is full sized. 2. Or it contains FIN. - 3. Or TCP_NODELAY was set. - 4. Or TCP_CORK is not set, and all sent packets are ACKed. + 3. Or higher layers meant to force a packet boundary, hence the PSH bit. + 4. Or TCP_NODELAY was set. + 5. Or TCP_CORK is not set, and all sent packets are ACKed. With Minshall's modification: all sent small packets are ACKed. */ @@ -1198,6 +1200,8 @@ tcp_nagle_check(struct tcp_opt *tp, stru static __inline__ int tcp_snd_test(struct tcp_opt *tp, struct sk_buff *skb, unsigned cur_mss, int nonagle) { + int res; + /* RFC 1122 - section 4.2.3.4 * * We must queue if @@ -1222,11 +1226,12 @@ static __inline__ int tcp_snd_test(struc /* Don't be strict about the congestion window for the * final FIN frame. -DaveM */ - return ((nonagle==1 || tp->urg_mode + res = ((nonagle==1 || tp->urg_mode || !tcp_nagle_check(tp, skb, cur_mss, nonagle)) && ((tcp_packets_in_flight(tp) < tp->snd_cwnd) || (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN)) && !after(TCP_SKB_CB(skb)->end_seq, tp->snd_una + tp->snd_wnd)); + return res; } static __inline__ void tcp_check_probe_timer(struct sock *sk, struct tcp_opt *tp) @@ -1252,9 +1257,12 @@ static __inline__ void __tcp_push_pendin struct sk_buff *skb = tp->send_head; if (skb) { + int snd_test; + if (!tcp_skb_is_last(sk, skb)) nonagle = 1; - if (!tcp_snd_test(tp, skb, cur_mss, nonagle) || + snd_test = tcp_snd_test(tp, skb, cur_mss, nonagle); + if (!snd_test || tcp_write_xmit(sk, nonagle)) tcp_check_probe_timer(sk, tp); } --- linux/include/net/tux.h.orig +++ linux/include/net/tux.h @@ -0,0 +1,797 @@ +#ifndef _NET_TUX_H +#define _NET_TUX_H + +/* + * TUX - Integrated Application Protocols Layer and Object Cache + * + * Copyright (C) 2000, 2001, Ingo Molnar + * + * tux.h: main structure definitions and function prototypes + */ + +#define __KERNEL_SYSCALLS__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +/* Maximum number of threads: */ +#define CONFIG_TUX_NUMTHREADS 8 + +/* Number of cachemiss/IO threads: */ +#define NR_IO_THREADS 32 + +/* Maximum number of listen sockets per thread: */ +#define CONFIG_TUX_NUMSOCKETS 16 + +extern spinlock_t tux_module_lock; +extern struct module *tux_module; +extern int (*sys_tux_ptr) (unsigned int action, user_req_t *u_info); + +#undef Dprintk + +extern int tux_TDprintk; +extern int tux_Dprintk; + +#define TUX_DEBUG CONFIG_TUX_DEBUG +#if CONFIG_TUX_DEBUG +# define TUX_BUG() BUG() + +# define TUX_DPRINTK 1 +# define TDprintk(x...) do { if (tux_TDprintk) { printk("<%ld:%s:%d>: ", jiffies, __FILE__, __LINE__); printk(x); } } while (0) +# define Dprintk(x...) do { if (tux_Dprintk == 1) TDprintk(x); } while (0) +#else +# define TUX_DPRINTK 0 +# define Dprintk(x...) do { } while (0) +# define TDprintk(x...) do { } while (0) +//# define TUX_BUG() BUG() +# define TUX_BUG() do { } while (0) +#endif + +#if 1 +# define INC_STAT(x) do { } while (0) +# define DEC_STAT(x) do { } while (0) +# define ADD_STAT(x,y) do { } while (0) +# define SUB_STAT(x,y) do { } while (0) +#else +# define INC_STAT(x) atomic_inc((atomic_t *)&kstat.x) +# define DEC_STAT(x) atomic_dec((atomic_t *)&kstat.x) +# define ADD_STAT(y,x) atomic_add(y,(atomic_t *)&kstat.x) +# define SUB_STAT(y,x) atomic_sub(y,(atomic_t *)&kstat.x) +#endif + +// lru needs this: + +# define DEBUG_DEL_LIST(x...) do { INIT_LIST_HEAD((x)); } while (0) + + +#define LOG_LEN (8*1024*1024UL) + +struct tux_req_struct; +typedef struct tux_req_struct tux_req_t; +typedef struct tux_threadinfo threadinfo_t; + +extern struct address_space_operations url_aops; + +typedef struct tcapi_template_s { + char *vfs_name; + struct list_head modules; + int (*query) (tux_req_t *req); + struct module *mod; + unsigned int userspace_id; +} tcapi_template_t; + +typedef struct mimetype_s { + struct list_head list; + + char *ext; + unsigned int ext_len; + char *type; + unsigned int type_len; + char *expire_str; + unsigned int expire_str_len; + + unsigned int special; +} mimetype_t; + +typedef struct tux_attribute_s { + mimetype_t *mime; + tcapi_template_t *tcapi; +} tux_attribute_t; + +#define MAX_TUX_ATOMS 8 + +typedef void (atom_func_t)(tux_req_t *req, int cachemiss); + +typedef struct tux_proto_s +{ + unsigned int defer_accept; + unsigned int can_redirect; + void (*got_request) (tux_req_t *req); + int (*parse_message) (tux_req_t *req, const int total_len); + atom_func_t *illegal_request; + atom_func_t *request_timeout; + void (*pre_log) (tux_req_t *req); + int (*check_req_err) (tux_req_t *req, int cachemiss); + char * (*print_dir_line) (tux_req_t *req, char *tmp, char *d_name, int d_len, int d_type, struct dentry *dentry, struct inode *inode); + const char *name; + struct nameidata main_docroot; +} tux_proto_t; + +typedef struct tux_socket_s { + tux_proto_t *proto; + unsigned int ip; + unsigned short port; + struct proc_dir_entry *entry; +} tux_socket_t; + +extern tux_socket_t tux_listen [CONFIG_TUX_NUMTHREADS][CONFIG_TUX_NUMSOCKETS]; + + +typedef struct abuf_s { + struct page *page; + char *buf; + unsigned int size; + unsigned int max_len; + unsigned int offset; + unsigned int left; + unsigned long flags; +} abuf_t; + +struct linux_dirent64 { + u64 d_ino; + s64 d_off; + unsigned short d_reclen; + unsigned char d_type; + char d_name[0]; +}; + +struct getdents_callback64 { + struct linux_dirent64 * current_dir; + struct linux_dirent64 * previous; + int count; + int error; +}; + +#define TUX_MAGIC 0x12457801 + +#define MAX_TUX_ATOMS 8 + +struct tux_req_struct +{ + tux_proto_t *proto; + + int atom_idx; + atom_func_t *atoms [MAX_TUX_ATOMS]; + struct list_head work; + + struct list_head all; + struct list_head free; + struct list_head lru; + + unsigned long idle_input; + unsigned long wait_output_space; + + struct socket *sock; + struct dentry *dentry; + struct vfsmount *mnt; + struct dentry *docroot_dentry; + struct vfsmount *docroot_mnt; + struct dentry *cwd_dentry; + struct vfsmount *cwd_mnt; + + struct file in_file; + int fd; + read_descriptor_t desc; + u32 client_addr; + u32 client_port; + unsigned int virtual; + + loff_t total_file_len; + unsigned int lendigits; + loff_t offset_start; + loff_t offset_end; + loff_t output_len; + + loff_t ftp_offset_start; + + time_t mtime; + unsigned int etaglen; + char etag [40]; + + char usermode; + unsigned int usermodule_idx; + struct dentry *module_dentry; + struct vfsmount *module_mnt; + char *userbuf; + unsigned int userlen; + + tux_attribute_t *attr; + + threadinfo_t *ti; + wait_queue_t sleep; + wait_queue_t ftp_sleep; + + abuf_t abuf; + /* + * Parsed request fields. In-line strings are zero-delimited. + */ + const char *headers; + unsigned int headers_len; + + unsigned int parsed_len; + + // FTP part + ftp_command_t ftp_command; + u32 ftp_user_addr; + u16 ftp_user_port; + + struct socket *data_sock; + unsigned int prev_pos; + + // ls handing: + struct linux_dirent64 *dirp0; + unsigned int curroff, total; + +#define MAX_USERNAME_LEN 16 + char username[MAX_USERNAME_LEN]; + unsigned int username_len; + + // HTTP part + http_method_t method; + const char *method_str; + unsigned int method_len; + + http_version_t version; + const char *version_str; + unsigned int version_len; + + /* requested URI: */ + + const char *uri_str; + unsigned int uri_len; + + /* Objectname (filename/scriptname) this URI refers to: */ + +#define MAX_OBJECTNAME_LEN 256 + char objectname[MAX_OBJECTNAME_LEN + 4]; // space for .gz as well + unsigned int objectname_len; + + /* Query string within the URI: */ + + const char *query_str; + unsigned int query_len; + + /* Cookies: */ + + const char *cookies_str; + unsigned int cookies_len; + unsigned int parse_cookies; + + /* Content-TYpe */ + const char *content_type_str; + unsigned int content_type_len; + + /* Content-Length: */ + + const char *contentlen_str; + unsigned int contentlen_len; + unsigned int content_len; + + /* User-Agent: */ + + const char *user_agent_str; + unsigned int user_agent_len; + + /* Accept: */ + + const char *accept_str; + unsigned int accept_len; + + /* Accept-Charset: */ + + const char *accept_charset_str; + unsigned int accept_charset_len; + + /* Accept-Language: */ + + const char *accept_language_str; + unsigned int accept_language_len; + + /* Cache-Control: */ + + const char *cache_control_str; + unsigned int cache_control_len; + + /* If-Modified-Since: */ + + const char *if_modified_since_str; + unsigned int if_modified_since_len; + + /* If-None-Match: */ + const char *if_none_match_str; + unsigned int if_none_match_len; + + /* If-Range: */ + + const char *if_range_str; + unsigned int if_range_len; + + /* Negotiate: */ + + const char *negotiate_str; + unsigned int negotiate_len; + + /* Pragma: */ + + const char *pragma_str; + unsigned int pragma_len; + + /* Referer: */ + + const char *referer_str; + unsigned int referer_len; + + /* Accept-Encoding: */ + + const char *accept_encoding_str; + unsigned int accept_encoding_len; + unsigned int may_send_gzip; + unsigned int content_gzipped; + + /* Host */ + +#define MAX_HOST_LEN 128 + char host[MAX_HOST_LEN]; + unsigned int host_len; + + /* POSTed data: */ + + const char *post_data_str; + unsigned int post_data_len; + + unsigned int status; + + /* the file being sent */ + + unsigned int bytes_sent; +#if CONFIG_TUX_DEBUG + unsigned int bytes_expected; +#endif + unsigned long first_timestamp; + unsigned int body_len; + + unsigned int user_error; + + char error; + char postponed; + + char had_cachemiss; + char lookup_dir; + char lookup_404; + + char keep_alive; + struct timer_list keepalive_timer; + unsigned int total_bytes; + struct timer_list output_timer; + + unsigned int nr_keepalives; + + void *gzip_state; + + unsigned int event; + u64 private; + + unsigned int magic; + void (*real_data_ready)(struct sock *sk, int space); + void (*real_state_change)(struct sock *sk); + void (*real_write_space)(struct sock *sk); + void (*real_error_report)(struct sock *sk); + void (*real_destruct)(struct sock *sk); + + void (*ftp_real_data_ready)(struct sock *sk, int space); + void (*ftp_real_state_change)(struct sock *sk); + void (*ftp_real_write_space)(struct sock *sk); + void (*ftp_real_error_report)(struct sock *sk); + void (*ftp_real_create_child)(struct sock *sk, struct sock *newsk); + void (*ftp_real_destruct)(struct sock *sk); + +#if CONFIG_TUX_EXTENDED_LOG + unsigned long accept_timestamp; + unsigned long parse_timestamp; + unsigned long output_timestamp; + unsigned long flush_timestamp; +# define SET_TIMESTAMP(x) do { (x) = jiffies; } while (0) +#else +# define SET_TIMESTAMP(x) do { } while (0) +#endif + +}; + +extern void add_tux_atom (tux_req_t *req, atom_func_t *event_done); +extern void del_tux_atom (tux_req_t *req); +extern void tux_schedule_atom (tux_req_t *req, int cachemiss); +extern void add_req_to_workqueue (tux_req_t *req); + + +typedef struct iothread_s +{ + spinlock_t async_lock; + threadinfo_t *ti; + struct list_head async_queue; + wait_queue_head_t async_sleep; + unsigned int nr_async_pending; + unsigned int threads; + unsigned int shutdown; + wait_queue_head_t wait_shutdown; +} iothread_t; + +typedef struct tux_listen_s +{ + tux_proto_t *proto; + struct socket *sock; + unsigned int cloned; +} tux_listen_t; + +struct tux_threadinfo +{ + tux_req_t *userspace_req; + unsigned int started; + struct task_struct *thread; + iothread_t *iot; + wait_queue_t wait_event [CONFIG_TUX_NUMSOCKETS]; + wait_queue_t stop; + unsigned int pid; + + struct page *header_cache; + unsigned int header_offset; + + unsigned int nr_requests; + struct list_head all_requests; + + unsigned int nr_free_requests; + spinlock_t free_requests_lock; + struct list_head free_requests; + + spinlock_t work_lock; + struct list_head work_pending; + struct list_head lru; + unsigned int nr_lru; + + unsigned int listen_error; + tux_listen_t listen[CONFIG_TUX_NUMSOCKETS]; + + unsigned int cpu; + unsigned int __padding[16]; +}; + +typedef enum special_mimetypes { + NORMAL_MIME_TYPE, + MIME_TYPE_REDIRECT, + MIME_TYPE_CGI, + MIME_TYPE_MODULE, +} special_mimetypes_t; + +#if CONFIG_TUX_DEBUG +#if 0 +extern inline void url_hist_hit (int size) +{ + unsigned int idx = size/1024; + + if (idx >= URL_HIST_SIZE) + idx = URL_HIST_SIZE-1; + kstat.url_hist_hits[idx]++; +} +extern inline void url_hist_miss (int size) +{ + unsigned int idx = size/1024; + + if (idx >= URL_HIST_SIZE) + idx = URL_HIST_SIZE-1; + kstat.url_hist_misses[idx]++; +} +#endif +extern void __check_req_list (tux_req_t *req, struct list_head *list); +# define check_req_list __check_req_list +#else +# define check_req_list(req, list) do { } while (0) +#endif + +#define url_hist_hit(size) do { } while (0) +#define url_hist_miss(size) do { } while (0) + +extern char tux_common_docroot[200]; +extern char tux_http_subdocroot[200]; +extern char tux_ftp_subdocroot[200]; +extern char tux_logfile[200]; +extern char tux_cgiroot[200]; +extern char tux_404_page[200]; +extern char tux_default_vhost[200]; +extern char tux_extra_html_header[600]; +extern unsigned int tux_extra_html_header_size; +extern int tux_cgi_uid; +extern int tux_cgi_gid; +extern unsigned int tux_clientport; +extern unsigned int tux_logging; +extern unsigned int tux_threads; +extern unsigned int tux_keepalive_timeout; +extern unsigned int tux_max_output_bandwidth; +extern unsigned int tux_max_backlog; +extern unsigned int tux_max_connect; +extern unsigned int tux_mode_forbidden; +extern unsigned int tux_mode_allowed; +extern unsigned int tux_logentry_align_order; +extern unsigned int tux_nonagle; +extern unsigned int tux_ack_pingpong; +extern unsigned int tux_push_all; +extern unsigned int tux_zerocopy_parse; +extern unsigned int tux_generate_etags; +extern unsigned int tux_generate_last_mod; +extern unsigned int tux_generate_cache_control; +extern unsigned int tux_ip_logging; +extern unsigned int tux_ftp_wait_close; +extern unsigned int tux_ftp_log_retr_only; +extern unsigned int tux_hide_unreadable; + +typedef enum virtual_server { + TUX_VHOST_NONE, + TUX_VHOST_HOST, + TUX_VHOST_IP, + TUX_VHOST_IP_HOST, +} virtual_server_t; + +extern unsigned int tux_virtual_server; +extern unsigned int mass_hosting_hash; +extern unsigned int strip_host_tail; +extern unsigned int tux_ftp_virtual_server; + +extern unsigned int tux_max_object_size; +extern unsigned int tux_max_free_requests; +extern unsigned int tux_defer_accept; + +extern struct socket * start_listening(tux_socket_t *listen, int nr); +extern void stop_listening(struct socket **sock); +extern void start_sysctl(void); +extern void end_sysctl(void); +extern void flush_request (tux_req_t *req, int cachemiss); +extern void unlink_tux_socket (tux_req_t *req); +extern void unlink_tux_data_socket (tux_req_t *req); +extern void unlink_tux_listen_socket (tux_req_t *req); +extern void link_tux_ftp_accept_socket (tux_req_t *req, struct socket *sock); +extern void link_tux_data_socket (tux_req_t *req, struct socket *sock); +extern void tux_push_req (tux_req_t *req); +extern int send_sync_buf (tux_req_t *req, struct socket *sock, const char *buf, const size_t length, unsigned long flags); +extern void __send_async_message (tux_req_t *req, const char *message, int status, unsigned int size, int push); +#define send_async_message(req,str,status,push) \ + __send_async_message(req,str,status,strlen(str),push) + +extern void send_success (tux_req_t *req, struct socket *sock); +extern void send_async_err_not_found (tux_req_t *req); +extern void send_async_timed_out (tux_req_t *req); + +extern void kfree_req (tux_req_t *req); +extern int accept_requests (threadinfo_t *ti); +extern int process_requests (threadinfo_t *ti, tux_req_t **user_req); +extern int flush_freequeue (threadinfo_t * ti); +extern int tux_flush_workqueue (threadinfo_t *ti); +extern tux_req_t * pick_userspace_req (threadinfo_t *ti); +extern atom_func_t redirect_request; +extern atom_func_t parse_request; +extern void queue_cachemiss (tux_req_t *req); +extern int start_cachemiss_threads (threadinfo_t *ti); +extern void stop_cachemiss_threads (threadinfo_t *ti); +struct file * tux_open_file(char *filename, int mode); +extern void start_log_thread (void); +extern void stop_log_thread (void); +extern void add_mimetype (char *new_ext, char *new_type, char *new_expire); +extern void free_mimetypes (void); +extern int lookup_object (tux_req_t *req, const unsigned int flag); +extern int handle_gzip_req (tux_req_t *req, unsigned int flags); +extern struct dentry * tux_lookup (tux_req_t *req, const char *filename, const unsigned int flag, struct vfsmount **mnt); +extern tcapi_template_t * lookup_tuxmodule (const char *filename); +extern int register_tuxmodule (tcapi_template_t *tcapi); +extern tcapi_template_t * unregister_tuxmodule (char *vfs_name); +extern tcapi_template_t * get_first_usermodule (void); +extern int user_register_module (user_req_t *u_info); +extern int user_unregister_module (user_req_t *u_info); +extern void unregister_all_tuxmodules (void); + +typedef struct exec_param_s { + char *command; + char **argv; + char **envp; + unsigned int pipe_fds; +} exec_param_t; + +extern pid_t tux_exec_process (char *command, char **argv, char **envp, int pipe_fds, exec_param_t *param, int wait); + +extern void start_external_cgi (tux_req_t *req); +extern tcapi_template_t extcgi_tcapi; + +extern void queue_output_req (tux_req_t *req, threadinfo_t *ti); +extern void queue_userspace_req (tux_req_t *req, threadinfo_t *ti); + + +extern void __log_request (tux_req_t *req); +extern inline void log_request (tux_req_t *req) +{ + if (tux_logging) + __log_request(req); +} + +extern int __connection_too_fast (tux_req_t *req); + +#define connection_too_fast(req) \ + ({ \ + int __ret = 1; \ + if (unlikely(tux_max_output_bandwidth)) \ + __ret = __connection_too_fast(req); \ + __ret; \ + }) + +extern void trunc_headers (tux_req_t *req); +extern int generic_send_file (tux_req_t *req, struct socket *sock, int cachemiss); +extern int tux_fetch_file (tux_req_t *req, int nonblock); + +extern void postpone_request (tux_req_t *req); +extern int continue_request (int fd); +extern void tux_push_pending (struct sock *sk); +extern void zap_request (tux_req_t *req, int cachemiss); +extern int add_output_space_event (tux_req_t *req, struct socket *sock); + +extern void reap_kids (void); +extern void unuse_frag (struct sk_buff *skb, skb_frag_t *frag); +extern skb_frag_t * build_dynbuf_frag (tux_req_t *req, unsigned int size); +extern int tux_permission (struct inode *inode); +extern void flush_all_signals (void); + +#define D() Dprintk("{%s:%d}\n", __FILE__, __LINE__) + +extern int nr_async_io_pending (void); + +extern void __add_keepalive_timer (tux_req_t *req); +#define add_keepalive_timer(req) \ +do { \ + if (tux_keepalive_timeout) { \ + Dprintk("add_keepalive_timer(%p).\n", (req)); \ + __add_keepalive_timer(req); \ + } \ +} while (0) +extern void __del_keepalive_timer (tux_req_t *req); +#define del_keepalive_timer(req) \ +do { \ + if (tux_keepalive_timeout) { \ + Dprintk("del_keepalive_timer(%p).\n", (req)); \ + __del_keepalive_timer(req); \ + } \ +} while (0) + +extern void del_output_timer (tux_req_t *req); +extern void output_timeout (tux_req_t *req); + +extern void print_req (tux_req_t *req); + +extern char tux_date [DATE_LEN]; + + +extern int nr_async_io_pending (void); +extern void tux_exit (void); +extern char * get_abuf (tux_req_t *req, unsigned int max_size); +extern void send_abuf (tux_req_t *req, unsigned int size, unsigned long flags); + + +extern int idle_event (tux_req_t *req); +extern int output_space_event (tux_req_t *req); +extern unsigned int log_cpu_mask; +extern unsigned int tux_compression; +extern unsigned int tux_noid; +extern unsigned int tux_cgi_inherit_cpu; +extern unsigned int tux_zerocopy_header; +extern unsigned int tux_zerocopy_sendfile; +extern unsigned int tux_cgi_cpu_mask; +extern tux_proto_t tux_proto_http; +extern tux_proto_t tux_proto_ftp; +extern unsigned int tux_all_userspace; +extern unsigned int tux_ignore_query; +extern unsigned int tux_redirect_logging; +extern unsigned int tux_referer_logging; +extern unsigned int tux_log_incomplete; +extern unsigned int tux_max_header_len; +extern unsigned int tux_cpu_offset; +extern unsigned int tux_ftp_login_message; + +extern void drop_permissions (void); +extern int query_extcgi (tux_req_t *req); +extern int tux_chroot (char *dir); + +extern void install_req_dentry (tux_req_t *req, struct dentry *dentry, struct vfsmount *mnt); +extern void release_req_dentry (tux_req_t *req); +extern void unidle_req (tux_req_t *req); +extern int nr_requests_used (void); + +#define req_err(req) do { (req)->error = 1; Dprintk("request %p error at %s:%d.\n", req, __FILE__, __LINE__); } while (0) + +#define enough_wspace(sk) (tcp_wspace(sk) >= tcp_min_write_space(sk)) +#define clear_keepalive(req) do { (req)->keep_alive = 0; Dprintk("keepalive cleared for req %p.\n", req); } while (0) + +extern int print_all_requests (threadinfo_t *ti); +extern unsigned int tux_max_keepalives; +extern int time_unix2ls (time_t zulu, char *buf); +extern void last_mod_time(char * curr, const time_t t); +extern int mdtm_time(char * curr, const time_t t); +extern time_t parse_time(const char *str, const int str_len); + +extern unsigned int nr_tux_threads; +extern threadinfo_t threadinfo[CONFIG_TUX_NUMTHREADS]; + +#define switch_docroot(req) do { if (((req)->docroot_dentry != current->fs->root) || ((req)->docroot_mnt != current->fs->rootmnt)) __switch_docroot(req); } while (0) +extern void __switch_docroot(tux_req_t *req); +extern void list_directory (tux_req_t *req, int cachemiss); +extern char * tux_print_path (tux_req_t *req, struct dentry *dentry, struct vfsmount *mnt, char *buf, unsigned int max_len); + +extern unsigned int tux_http_dir_indexing; + +extern void tux_gzip_start (tux_req_t *req); +extern void tux_gzip_end (tux_req_t *req); +int tux_gzip_compress (void *state, unsigned char *data_in, unsigned char *data_out, __u32 *in_len, __u32 *out_len); + +struct dentry * __tux_lookup (tux_req_t *req, const char *filename, + struct nameidata *base, struct vfsmount **mnt); + +/* error codes for req->error */ +#define TUX_ERROR_REDIRECT 1 +#define TUX_ERROR_UNUSED 2 +#define TUX_ERROR_CONN_CLOSE 3 +#define TUX_ERROR_CONN_TIMEOUT 4 + +extern void __put_data_sock (tux_req_t *req); + +static inline void put_data_sock (tux_req_t *req) +{ + if (req->data_sock) + __put_data_sock(req); +} + +#define socket_input(sock) \ + (!skb_queue_empty(&(sock)->sk->receive_queue) || \ + !skb_queue_empty(&(sock)->sk->error_queue)) + +#define tux_kmalloc(size) \ +({ \ + void *__ptr; \ + \ + while (!(__ptr = kmalloc(size, GFP_KERNEL))) { \ + if (net_ratelimit()) \ + printk(KERN_WARNING "tux: OOM at %s:%d.\n", \ + __FILE__, __LINE__); \ + current->state = TASK_UNINTERRUPTIBLE; \ + schedule_timeout(1); \ + } \ + __ptr; \ +}) + + +#endif --- linux/include/net/tux_u.h.orig +++ linux/include/net/tux_u.h @@ -0,0 +1,163 @@ +#ifndef _NET_TUX_U_H +#define _NET_TUX_U_H + +/* + * TUX - Integrated Application Protocols Layer and Object Cache + * + * Copyright (C) 2000, 2001, Ingo Molnar + * + * tux_u.h: HTTP module API - HTTP interface to user-space + */ + +/* + * Different major versions are not compatible. + * Different minor versions are only downward compatible. + * Different patchlevel versions are downward and upward compatible. + */ +#define TUX_MAJOR_VERSION 3 +#define TUX_MINOR_VERSION 0 +#define TUX_PATCHLEVEL_VERSION 0 + +#define __KERNEL_SYSCALLS__ + +typedef enum http_versions { + HTTP_1_0, + HTTP_1_1 +} http_version_t; + +/* + * Request methods known to HTTP: + */ +typedef enum http_methods { + METHOD_NONE, + METHOD_GET, + METHOD_HEAD, + METHOD_POST, + METHOD_PUT, + NR_METHODS +} http_method_t; + +enum user_req { + TUX_ACTION_STARTUP = 1, + TUX_ACTION_SHUTDOWN = 2, + TUX_ACTION_STARTTHREAD = 3, + TUX_ACTION_STOPTHREAD = 4, + TUX_ACTION_EVENTLOOP = 5, + TUX_ACTION_GET_OBJECT = 6, + TUX_ACTION_SEND_OBJECT = 7, + TUX_ACTION_READ_OBJECT = 8, + TUX_ACTION_FINISH_REQ = 9, + TUX_ACTION_FINISH_CLOSE_REQ = 10, + TUX_ACTION_REGISTER_MODULE = 11, + TUX_ACTION_UNREGISTER_MODULE = 12, + TUX_ACTION_CURRENT_DATE = 13, + TUX_ACTION_REGISTER_MIMETYPE = 14, + TUX_ACTION_READ_HEADERS = 15, + TUX_ACTION_POSTPONE_REQ = 16, + TUX_ACTION_CONTINUE_REQ = 17, + TUX_ACTION_REDIRECT_REQ = 18, + TUX_ACTION_READ_POST_DATA = 19, + TUX_ACTION_SEND_BUFFER = 20, + TUX_ACTION_WATCH_PROXY_SOCKET = 21, + TUX_ACTION_WAIT_PROXY_SOCKET = 22, + TUX_ACTION_QUERY_VERSION = 23, + MAX_TUX_ACTION +}; + +enum tux_ret { + TUX_ERROR = -1, + TUX_RETURN_USERSPACE_REQUEST = 0, + TUX_RETURN_EXIT = 1, + TUX_RETURN_SIGNAL = 2, + TUX_CONTINUE_EVENTLOOP = 3, +}; + +#define MAX_URI_LEN 256 +#define MAX_COOKIE_LEN 128 +#define MAX_FIELD_LEN 64 +#define DATE_LEN 30 + +typedef struct user_req_s { + u32 version_major; + u32 version_minor; + u32 version_patch; + u32 http_version; + u32 http_method; + u32 http_status; + + u32 sock; + u32 event; + u32 error; + u32 thread_nr; + u32 bytes_sent; + u32 client_host; + u32 objectlen; + u32 module_index; + u32 keep_alive; + u32 cookies_len; + + u64 id; + u64 priv; + u64 object_addr; + + u8 query[MAX_URI_LEN]; + u8 objectname[MAX_URI_LEN]; + u8 cookies[MAX_COOKIE_LEN]; + u8 content_type[MAX_FIELD_LEN]; + u8 user_agent[MAX_FIELD_LEN]; + u8 accept[MAX_FIELD_LEN]; + u8 accept_charset[MAX_FIELD_LEN]; + u8 accept_encoding[MAX_FIELD_LEN]; + u8 accept_language[MAX_FIELD_LEN]; + u8 cache_control[MAX_FIELD_LEN]; + u8 if_modified_since[MAX_FIELD_LEN]; + u8 negotiate[MAX_FIELD_LEN]; + u8 pragma[MAX_FIELD_LEN]; + u8 referer[MAX_FIELD_LEN]; + u8 new_date[DATE_LEN]; + u8 pad[2]; + +} user_req_t; + +typedef enum ftp_commands { + FTP_COMM_NONE, + FTP_COMM_USER, + FTP_COMM_PASS, + FTP_COMM_ACCT, + FTP_COMM_CWD, + FTP_COMM_CDUP, + FTP_COMM_SMNT, + FTP_COMM_QUIT, + FTP_COMM_REIN, + FTP_COMM_PORT, + FTP_COMM_PASV, + FTP_COMM_TYPE, + FTP_COMM_STRU, + FTP_COMM_MODE, + FTP_COMM_RETR, + FTP_COMM_SIZE, + FTP_COMM_MDTM, + FTP_COMM_STOR, + FTP_COMM_STOU, + FTP_COMM_APPE, + FTP_COMM_ALLO, + FTP_COMM_REST, + FTP_COMM_RNFR, + FTP_COMM_RNTO, + FTP_COMM_ABOR, + FTP_COMM_DELE, + FTP_COMM_RMD, + FTP_COMM_MKD, + FTP_COMM_PWD, + FTP_COMM_LIST, + FTP_COMM_NLST, + FTP_COMM_SITE, + FTP_COMM_SYST, + FTP_COMM_STAT, + FTP_COMM_HELP, + FTP_COMM_NOOP, + FTP_COMM_FEAT, + FTP_COMM_CLNT, +} ftp_command_t; + +#endif --- linux/include/asm-sparc64/fcntl.h.orig +++ linux/include/asm-sparc64/fcntl.h @@ -20,6 +20,7 @@ #define O_DIRECTORY 0x10000 /* must be a directory */ #define O_NOFOLLOW 0x20000 /* don't follow links */ #define O_LARGEFILE 0x40000 +#define O_ATOMICLOOKUP 0x80000 /* do atomic file lookup */ #define O_DIRECT 0x100000 /* direct disk access hint */ --- linux/include/asm-ia64/fcntl.h.orig 2000-10-10 02:54:58.000000000 +0200 +++ linux/include/asm-ia64/fcntl.h @@ -28,6 +28,7 @@ #define O_LARGEFILE 0100000 #define O_DIRECTORY 0200000 /* must be a directory */ #define O_NOFOLLOW 0400000 /* don't follow links */ +#define O_ATOMICLOOKUP 01000000 /* do atomic file lookup */ #define F_DUPFD 0 /* dup */ #define F_GETFD 1 /* get close_on_exec */ --- linux/include/asm-x86_64/unistd.h.orig +++ linux/include/asm-x86_64/unistd.h @@ -633,7 +633,6 @@ static inline pid_t setsid(void) return sys_setsid(); } -extern ssize_t sys_write(unsigned int, char *, size_t); static inline ssize_t write(unsigned int fd, char * buf, size_t count) { return sys_write(fd, buf, count); --- linux/include/asm-x86_64/fcntl.h.orig +++ linux/include/asm-x86_64/fcntl.h @@ -17,6 +17,7 @@ #define O_SYNC 010000 #define FASYNC 020000 /* fcntl, for BSD compatibility */ #define O_DIRECT 040000 /* direct disk access hint */ +#define O_ATOMICLOOKUP 0x80000 /* do atomic file lookup */ #define O_LARGEFILE 0100000 #define O_DIRECTORY 0200000 /* must be a directory */ #define O_NOFOLLOW 0400000 /* don't follow links */ --- linux/net/ipv4/route.c.orig +++ linux/net/ipv4/route.c @@ -99,6 +99,8 @@ #include #endif +#define CONFIG_TUX_LOOPBACK_PHYSICAL 0 + #define IP_MAX_MTU 0xFFF0 #define RT_GC_TIMEOUT (300*HZ) @@ -1485,6 +1487,7 @@ int ip_route_input_slow(struct sk_buff * goto brd_input; if (res.type == RTN_LOCAL) { +#if !CONFIG_TUX_LOOPBACK_PHYSICAL int result; result = fib_validate_source(saddr, daddr, tos, loopback_dev.ifindex, @@ -1493,6 +1496,7 @@ int ip_route_input_slow(struct sk_buff * goto martian_source; if (result) flags |= RTCF_DIRECTSRC; +#endif spec_dst = daddr; goto local_input; } @@ -1884,7 +1888,11 @@ int ip_route_output_slow(struct rtable * goto make_route; } +#if !CONFIG_TUX_LOOPBACK_PHYSICAL if (fib_lookup(&key, &res)) { +#else + if (main_table->tb_lookup(main_table, &key, &res)) { +#endif res.fi = NULL; if (oldkey->oif) { /* Apparently, routing tables are wrong. Assume, @@ -1922,6 +1930,7 @@ int ip_route_output_slow(struct rtable * goto e_inval; if (res.type == RTN_LOCAL) { +#if !CONFIG_TUX_LOOPBACK_PHYSICAL if (!key.src) key.src = key.dst; if (dev_out) @@ -1934,6 +1943,7 @@ int ip_route_output_slow(struct rtable * res.fi = NULL; flags |= RTCF_LOCAL; goto make_route; +#endif } #ifdef CONFIG_IP_ROUTE_MULTIPATH --- linux/net/ipv4/tcp.c.orig +++ linux/net/ipv4/tcp.c @@ -1279,7 +1279,7 @@ static inline void tcp_eat_skb(struct so * calculation of whether or not we must ACK for the sake of * a window update. */ -static void cleanup_rbuf(struct sock *sk, int copied) +void cleanup_rbuf(struct sock *sk, int copied) { struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); int time_to_ack = 0; --- linux/net/ipv4/tcp_minisocks.c.orig +++ linux/net/ipv4/tcp_minisocks.c @@ -683,6 +683,8 @@ struct sock *tcp_create_openreq_child(st if ((filter = newsk->filter) != NULL) sk_filter_charge(newsk, filter); #endif + if (sk->create_child) + sk->create_child(sk, newsk); /* Now setup tcp_opt */ newtp = &(newsk->tp_pinfo.af_tcp); --- linux/net/khttpd/datasending.c.orig +++ linux/net/khttpd/datasending.c @@ -127,7 +127,7 @@ int DataSending(const int CPUNR) desc.count = ReadSize; desc.buf = (char *) CurrentRequest->sock; desc.error = 0; - do_generic_file_read(CurrentRequest->filp, ppos, &desc, sock_send_actor); + do_generic_file_read(CurrentRequest->filp, ppos, &desc, sock_send_actor, 0); if (desc.written>0) { CurrentRequest->BytesSent += desc.written; --- linux/net/Config.in.orig +++ linux/net/Config.in @@ -18,6 +18,7 @@ bool 'Socket Filtering' CONFIG_FILTER tristate 'Unix domain sockets' CONFIG_UNIX bool 'TCP/IP networking' CONFIG_INET if [ "$CONFIG_INET" = "y" ]; then + source net/tux/Config.in source net/ipv4/Config.in if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then # IPv6 as module will cause a CRASH if you try to unload it --- linux/net/Makefile.orig +++ linux/net/Makefile @@ -27,6 +27,7 @@ subdir-$(CONFIG_NETFILTER) += ipv6/netfi endif endif +subdir-$(CONFIG_TUX) += tux subdir-$(CONFIG_KHTTPD) += khttpd subdir-$(CONFIG_PACKET) += packet subdir-$(CONFIG_NET_SCHED) += sched --- linux/net/netsyms.c.orig +++ linux/net/netsyms.c @@ -25,6 +25,7 @@ #include #include #include +#include #ifdef CONFIG_HIPPI #include #endif @@ -60,7 +61,7 @@ extern __u32 sysctl_rmem_max; extern struct net_proto_family inet_family_ops; -#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) || defined (CONFIG_KHTTPD) || defined (CONFIG_KHTTPD_MODULE) || defined (CONFIG_IP_SCTP_MODULE) +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) || defined (CONFIG_KHTTPD) || defined (CONFIG_KHTTPD_MODULE) || defined (CONFIG_IP_SCTP_MODULE) || defined (CONFIG_TUX) || defined (CONFIG_TUX_MODULE) #include #include #include @@ -297,7 +298,7 @@ EXPORT_SYMBOL(dlci_ioctl_hook); #endif -#if defined (CONFIG_IPV6_MODULE) || defined (CONFIG_KHTTPD) || defined (CONFIG_KHTTPD_MODULE) || defined (CONFIG_IP_SCTP_MODULE) +#if defined (CONFIG_IPV6_MODULE) || defined (CONFIG_KHTTPD) || defined (CONFIG_KHTTPD_MODULE) || defined (CONFIG_IP_SCTP_MODULE) || defined (CONFIG_TUX) || defined (CONFIG_TUX_MODULE) /* inet functions common to v4 and v6 */ EXPORT_SYMBOL(inet_release); EXPORT_SYMBOL(inet_stream_connect); @@ -326,6 +327,7 @@ EXPORT_SYMBOL(ip_queue_xmit); EXPORT_SYMBOL(memcpy_fromiovecend); EXPORT_SYMBOL(csum_partial_copy_fromiovecend); EXPORT_SYMBOL(tcp_v4_lookup_listener); +EXPORT_SYMBOL_GPL(cleanup_rbuf); /* UDP/TCP exported functions for TCPv6 */ EXPORT_SYMBOL(udp_ioctl); EXPORT_SYMBOL(udp_connect); @@ -343,6 +345,7 @@ EXPORT_SYMBOL(tcp_setsockopt); EXPORT_SYMBOL(tcp_getsockopt); EXPORT_SYMBOL(tcp_recvmsg); EXPORT_SYMBOL(tcp_send_synack); +EXPORT_SYMBOL_GPL(tcp_send_skb); EXPORT_SYMBOL(tcp_check_req); EXPORT_SYMBOL(tcp_child_process); EXPORT_SYMBOL(tcp_parse_options); @@ -625,4 +628,14 @@ EXPORT_SYMBOL(ethtool_op_set_tx_csum); EXPORT_SYMBOL(ethtool_op_get_sg); EXPORT_SYMBOL(ethtool_op_set_sg); +#ifdef CONFIG_TUX_MODULE +EXPORT_SYMBOL_GPL(tux_module_lock); +EXPORT_SYMBOL_GPL(tux_module); +EXPORT_SYMBOL_GPL(sys_tux_ptr); +#endif + #endif /* CONFIG_NET */ + +EXPORT_SYMBOL_GPL(tux_Dprintk); +EXPORT_SYMBOL_GPL(tux_TDprintk); + --- linux/net/socket.c.orig +++ linux/net/socket.c @@ -83,6 +83,7 @@ #include #include +#include #include static int sock_no_open(struct inode *irrelevant, struct file *dontcare); @@ -111,7 +112,7 @@ static ssize_t sock_sendpage(struct file * in the operation structures but are done directly via the socketcall() multiplexor. */ -static struct file_operations socket_file_ops = { +struct file_operations socket_file_ops = { llseek: no_llseek, read: sock_read, write: sock_write, @@ -325,51 +326,62 @@ static struct dentry_operations sockfs_d * but we take care of internal coherence yet. */ -int sock_map_fd(struct socket *sock) +struct file * sock_map_file(struct socket *sock) { - int fd; + struct file *file; struct qstr this; char name[32]; + file = get_empty_filp(); + + if (!file) + return ERR_PTR(-ENFILE); + + sprintf(name, "[%lu]", sock->inode->i_ino); + this.name = name; + this.len = strlen(name); + this.hash = sock->inode->i_ino; + + file->f_dentry = d_alloc(sock_mnt->mnt_sb->s_root, &this); + if (!file->f_dentry) { + put_filp(file); + return ERR_PTR(-ENOMEM); + } + file->f_dentry->d_op = &sockfs_dentry_operations; + d_add(file->f_dentry, sock->inode); + file->f_vfsmnt = mntget(sock_mnt); + + if (sock->file) + BUG(); + sock->file = file; + file->f_op = sock->inode->i_fop = &socket_file_ops; + file->f_mode = 3; + file->f_flags = O_RDWR; + file->f_pos = 0; + + return file; +} + +int sock_map_fd(struct socket *sock) +{ + int fd; + struct file *file; + /* * Find a file descriptor suitable for return to the user. */ fd = get_unused_fd(); - if (fd >= 0) { - struct file *file = get_empty_filp(); - - if (!file) { - put_unused_fd(fd); - fd = -ENFILE; - goto out; - } - - sprintf(name, "[%lu]", sock->inode->i_ino); - this.name = name; - this.len = strlen(name); - this.hash = sock->inode->i_ino; - - file->f_dentry = d_alloc(sock_mnt->mnt_sb->s_root, &this); - if (!file->f_dentry) { - put_filp(file); - put_unused_fd(fd); - fd = -ENOMEM; - goto out; - } - file->f_dentry->d_op = &sockfs_dentry_operations; - d_add(file->f_dentry, sock->inode); - file->f_vfsmnt = mntget(sock_mnt); + if (fd < 0) + return fd; - sock->file = file; - file->f_op = sock->inode->i_fop = &socket_file_ops; - file->f_mode = 3; - file->f_flags = O_RDWR; - file->f_pos = 0; - fd_install(fd, file); + file = sock_map_file(sock); + if (IS_ERR(file)) { + put_unused_fd(fd); + return PTR_ERR(file); } + fd_install(fd, file); -out: return fd; } @@ -789,6 +801,8 @@ static int sock_fasync(int fd, struct fi } out: + if (sock->sk != sk) + BUG(); release_sock(sock->sk); return 0; } @@ -1754,3 +1768,42 @@ int socket_get_info(char *buffer, char * len = 0; return len; } + +int tux_Dprintk; +int tux_TDprintk; + +#ifdef CONFIG_TUX_MODULE + +int (*sys_tux_ptr) (unsigned int action, user_req_t *u_info) = NULL; + +struct module *tux_module = NULL; +spinlock_t tux_module_lock = SPIN_LOCK_UNLOCKED; + +asmlinkage int sys_tux (unsigned int action, user_req_t *u_info) +{ + int ret; + + if (current->tux_info) + return sys_tux_ptr(action, u_info); + + ret = -ENOSYS; + spin_lock(&tux_module_lock); + if (!tux_module) + goto out_unlock; + __MOD_INC_USE_COUNT(tux_module); + spin_unlock(&tux_module_lock); + + if (!sys_tux_ptr) + TUX_BUG(); + ret = sys_tux_ptr(action, u_info); + + spin_lock(&tux_module_lock); + __MOD_DEC_USE_COUNT(tux_module); +out_unlock: + spin_unlock(&tux_module_lock); + + return ret; +} + +#endif + --- linux/net/tux/abuf.c.orig +++ linux/net/tux/abuf.c @@ -0,0 +1,182 @@ +/* + * TUX - Integrated Application Protocols Layer and Object Cache + * + * Copyright (C) 2000, 2001, Ingo Molnar + * + * abuf.c: async buffer-sending + */ + +#include + +/**************************************************************** + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + ****************************************************************/ + +char * get_abuf (tux_req_t *req, unsigned int max_size) +{ + threadinfo_t *ti = req->ti; + struct page *page; + char *buf; + unsigned int offset; + unsigned int left; + + if (req->abuf.page || req->abuf.buf || req->abuf.size) + TUX_BUG(); + + if (max_size > PAGE_SIZE) + BUG(); + offset = ti->header_offset; + if (offset > PAGE_SIZE) + TUX_BUG(); + left = PAGE_SIZE - offset; + if (!max_size) + BUG(); + page = ti->header_cache; + if ((left < max_size) || !page) { + while (!(page = alloc_pages(GFP_KERNEL, 0))) { + if (net_ratelimit()) + printk(KERN_WARNING "tux: OOM in get_abuf()!\n"); + current->state = TASK_UNINTERRUPTIBLE; + schedule_timeout(1); + } + + if (ti->header_cache) + __free_page(ti->header_cache); + ti->header_cache = page; + ti->header_offset = 0; + offset = 0; + } + buf = page_address(page) + offset; + + if (!page) + BUG(); + req->abuf.page = page; + req->abuf.buf = buf; + req->abuf.size = 0; + req->abuf.offset = offset; + req->abuf.flags = 0; + get_page(req->abuf.page); + + return buf; +} + +static void do_send_abuf (tux_req_t *req, int cachemiss); + +void send_abuf (tux_req_t *req, unsigned int size, unsigned long flags) +{ + threadinfo_t *ti = req->ti; + + Dprintk("send_abuf(req: %p, sock: %p): %p(%p), size:%d, off:%d, flags:%08lx\n", req, req->sock, req->abuf.page, req->abuf.buf, size, req->abuf.offset, flags); + + ti->header_offset += size; + if (ti->header_offset > PAGE_SIZE) + TUX_BUG(); + if (req->abuf.offset + req->abuf.size > PAGE_SIZE) + TUX_BUG(); + + req->abuf.flags = flags | MSG_NOSIGNAL; + req->abuf.size = size; + + add_tux_atom(req, do_send_abuf); +} + +static void do_send_abuf (tux_req_t *req, int cachemiss) +{ + int ret; + + req->sock->sk->tp_pinfo.af_tcp.nonagle = 2; + +repeat: + Dprintk("do_send_abuf(%p,%d): %p(%p), size:%d, off:%d, flags:%08lx\n", + req, cachemiss, + req->abuf.page, req->abuf.buf, req->abuf.size, + req->abuf.offset, req->abuf.flags); + + if (tux_zerocopy_header) + ret = tcp_sendpage(req->sock, req->abuf.page, + req->abuf.offset, req->abuf.size, req->abuf.flags); + else { + mm_segment_t oldmm; + oldmm = get_fs(); set_fs(KERNEL_DS); + ret = send_sync_buf(req, req->sock, req->abuf.buf, + req->abuf.size, req->abuf.flags); + set_fs(oldmm); + } + + + Dprintk("do_send_abuf: ret: %d\n", ret); + if (!ret) + TUX_BUG(); + + if (ret < 0) { + if (ret != -EAGAIN) { + TDprintk("ret: %d, req->error = TUX_ERROR_CONN_CLOSE.\n", ret); + req->error = TUX_ERROR_CONN_CLOSE; + req->atom_idx = 0; + req->in_file.f_pos = 0; + __free_page(req->abuf.page); + memset(&req->abuf, 0, sizeof(req->abuf)); + zap_request(req, cachemiss); + return; + } + add_tux_atom(req, do_send_abuf); + if (add_output_space_event(req, req->sock)) { + del_tux_atom(req); + goto repeat; + } + return; + } + + req->abuf.buf += ret; + req->abuf.offset += ret; + req->abuf.size -= ret; + + if ((int)req->abuf.size < 0) + TUX_BUG(); + if (req->abuf.size > 0) + goto repeat; + + Dprintk("DONE do_send_abuf: %p(%p), size:%d, off:%d, flags:%08lx\n", + req->abuf.page, req->abuf.buf, req->abuf.size, + req->abuf.offset, req->abuf.flags); + + __free_page(req->abuf.page); + + memset(&req->abuf, 0, sizeof(req->abuf)); + + add_req_to_workqueue(req); +} + +void __send_async_message (tux_req_t *req, const char *message, + int status, unsigned int size, int push) +{ + unsigned int flags; + char *buf; + + Dprintk("TUX: sending %d reply (%d bytes)!\n", status, size); + Dprintk("request %p, reply: %s\n", req, message); + if (!size) + TUX_BUG(); + buf = get_abuf(req, size); + memcpy(buf, message, size); + + req->status = status; + flags = MSG_DONTWAIT; + if (!push) + flags |= MSG_MORE; + send_abuf(req, size, flags); + add_req_to_workqueue(req); +} --- linux/net/tux/accept.c.orig +++ linux/net/tux/accept.c @@ -0,0 +1,852 @@ +/* + * TUX - Integrated Application Protocols Layer and Object Cache + * + * Copyright (C) 2000, 2001, Ingo Molnar + * + * accept.c: accept new connections, allocate requests + */ + +#include + +/**************************************************************** + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + ****************************************************************/ + +unsigned int tux_ack_pingpong = 1; +unsigned int tux_push_all = 0; +unsigned int tux_zerocopy_parse = 1; + +static int __idle_event (tux_req_t *req); +static int __output_space_event (tux_req_t *req); + +struct socket * start_listening(tux_socket_t *listen, int nr) +{ + struct sockaddr_in sin; + struct socket *sock = NULL; + struct sock *sk; + struct tcp_opt *tp; + int err; + u16 port = listen->port; + u32 addr = listen->ip; + tux_proto_t *proto = listen->proto; + + /* Create a listening socket: */ + + err = sock_create(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock); + if (err < 0) { + printk(KERN_ERR "TUX: error %d creating socket.\n", err); + goto err; + } + + /* Bind the socket: */ + + sin.sin_family = AF_INET; + sin.sin_addr.s_addr = htonl(addr); + sin.sin_port = htons(port); + + sk = sock->sk; + sk->reuse = 1; + sk->urginline = 1; + + err = sock->ops->bind(sock, (struct sockaddr*)&sin, sizeof(sin)); + if (err < 0) { + printk(KERN_ERR "TUX: error %d binding socket. This means that probably some other process is (or was a short time ago) using addr %s://%d.%d.%d.%d:%d.\n", + err, proto->name, HIPQUAD(addr), port); + goto err; + } + + tp = &sk->tp_pinfo.af_tcp; + tp->ack.pingpong = tux_ack_pingpong; + + sk->linger = 0; + sk->lingertime = 0; + tp->linger2 = tux_keepalive_timeout * HZ; + + if (proto->defer_accept && !tux_keepalive_timeout && tux_defer_accept) + tp->defer_accept = 1; + + /* Now, start listening on the socket */ + + err = sock->ops->listen(sock, tux_max_backlog); + if (err) { + printk(KERN_ERR "TUX: error %d listening on socket.\n", err); + goto err; + } + + printk(KERN_NOTICE "TUX: thread %d listens on %s://%d.%d.%d.%d:%d.\n", + nr, proto->name, HIPQUAD(addr), port); + return sock; + +err: + if (sock) + sock_release(sock); + return NULL; +} + +static inline void __kfree_req (tux_req_t *req, threadinfo_t * ti) +{ + list_del(&req->all); + DEBUG_DEL_LIST(&req->all); + ti->nr_requests--; + kfree(req); +} + +int flush_freequeue (threadinfo_t * ti) +{ + struct list_head *tmp; + unsigned long flags; + tux_req_t *req; + int count = 0; + + spin_lock_irqsave(&ti->free_requests_lock,flags); + while (ti->nr_free_requests) { + ti->nr_free_requests--; + tmp = ti->free_requests.next; + req = list_entry(tmp, tux_req_t, free); + list_del(tmp); + DEBUG_DEL_LIST(tmp); + DEC_STAT(nr_free_pending); + __kfree_req(req, ti); + count++; + } + spin_unlock_irqrestore(&ti->free_requests_lock,flags); + + return count; +} + +static tux_req_t * kmalloc_req (threadinfo_t * ti) +{ + struct list_head *tmp; + unsigned long flags; + tux_req_t *req; + + spin_lock_irqsave(&ti->free_requests_lock, flags); + if (ti->nr_free_requests) { + ti->nr_free_requests--; + tmp = ti->free_requests.next; + req = list_entry(tmp, tux_req_t, free); + list_del(tmp); + DEBUG_DEL_LIST(tmp); + DEC_STAT(nr_free_pending); + req->magic = TUX_MAGIC; + spin_unlock_irqrestore(&ti->free_requests_lock, flags); + } else { + spin_unlock_irqrestore(&ti->free_requests_lock, flags); + req = tux_kmalloc(sizeof(*req)); + ti->nr_requests++; + memset (req, 0, sizeof(*req)); + list_add(&req->all, &ti->all_requests); + } + req->magic = TUX_MAGIC; + INC_STAT(nr_allocated); + init_waitqueue_entry(&req->sleep, current); + init_waitqueue_entry(&req->ftp_sleep, current); + INIT_LIST_HEAD(&req->work); + INIT_LIST_HEAD(&req->free); + INIT_LIST_HEAD(&req->lru); + req->ti = ti; + req->total_bytes = 0; + SET_TIMESTAMP(req->accept_timestamp); + req->first_timestamp = jiffies; + req->fd = -1; + init_timer(&req->keepalive_timer); + init_timer(&req->output_timer); + + Dprintk("allocated NEW req %p.\n", req); + return req; +} + +void kfree_req (tux_req_t *req) +{ + threadinfo_t * ti = req->ti; + unsigned long flags; + + Dprintk("freeing req %p.\n", req); + + if (req->magic != TUX_MAGIC) + TUX_BUG(); + spin_lock_irqsave(&ti->free_requests_lock,flags); + req->magic = 0; + DEC_STAT(nr_allocated); + if (req->sock || req->dentry || req->private) + TUX_BUG(); + if (ti->nr_free_requests > tux_max_free_requests) + __kfree_req(req, ti); + else { + req->error = 0; + ti->nr_free_requests++; + + // the free requests queue is LIFO + list_add(&req->free, &ti->free_requests); + INC_STAT(nr_free_pending); + } + spin_unlock_irqrestore(&ti->free_requests_lock,flags); +} + +static void __add_req_to_workqueue (tux_req_t *req) +{ + threadinfo_t *ti = req->ti; + + if (!list_empty(&req->work)) + TUX_BUG(); + Dprintk("work-queueing request %p at %p/%p.\n", req, __builtin_return_address(0), __builtin_return_address(1)); + if (connection_too_fast(req)) + list_add_tail(&req->work, &ti->work_pending); + else + list_add(&req->work, &ti->work_pending); + INC_STAT(nr_work_pending); + wake_up_process(ti->thread); + return; +} + +void add_req_to_workqueue (tux_req_t *req) +{ + unsigned long flags; + threadinfo_t *ti = req->ti; + + spin_lock_irqsave(&ti->work_lock, flags); + __add_req_to_workqueue(req); + spin_unlock_irqrestore(&ti->work_lock, flags); +} + +void del_output_timer (tux_req_t *req) +{ +#if CONFIG_SMP + if (!spin_is_locked(&req->ti->work_lock)) + TUX_BUG(); +#endif + if (!list_empty(&req->lru)) { + list_del(&req->lru); + DEBUG_DEL_LIST(&req->lru); + req->ti->nr_lru--; + } + Dprintk("del output timeout for req %p.\n", req); + del_timer(&req->output_timer); +} + +static void output_timeout_fn (unsigned long data); + +#define OUTPUT_TIMEOUT HZ + +static void add_output_timer (tux_req_t *req) +{ + struct timer_list *timer = &req->output_timer; + + timer->data = (unsigned long) req; + timer->function = &output_timeout_fn; + mod_timer(timer, jiffies + OUTPUT_TIMEOUT); +} + +static void output_timeout_fn (unsigned long data) +{ + tux_req_t *req = (tux_req_t *)data; + + if (connection_too_fast(req)) { + add_output_timer(req); +// mod_timer(&req->output_timer, jiffies + OUTPUT_TIMEOUT); + return; + } + output_space_event(req); +} + +void output_timeout (tux_req_t *req) +{ + Dprintk("output timeout for req %p.\n", req); + if (test_and_set_bit(0, &req->wait_output_space)) + TUX_BUG(); + INC_STAT(nr_output_space_pending); + add_output_timer(req); +} + +void __del_keepalive_timer (tux_req_t *req) +{ +#if CONFIG_SMP + if (!spin_is_locked(&req->ti->work_lock)) + TUX_BUG(); +#endif + if (!list_empty(&req->lru)) { + list_del(&req->lru); + DEBUG_DEL_LIST(&req->lru); + req->ti->nr_lru--; + } + Dprintk("del keepalive timeout for req %p.\n", req); + del_timer(&req->keepalive_timer); +} + +static void keepalive_timeout_fn (unsigned long data) +{ + tux_req_t *req = (tux_req_t *)data; + +#if CONFIG_TUX_DEBUG + Dprintk("req %p timed out after %d sec!\n", req, tux_keepalive_timeout); + if (tux_Dprintk) + print_req(req); +#endif + Dprintk("req->error = TUX_ERROR_CONN_TIMEOUT!\n"); + req->error = TUX_ERROR_CONN_TIMEOUT; + if (!idle_event(req)) + output_space_event(req); +} + +void __add_keepalive_timer (tux_req_t *req) +{ + struct timer_list *timer = &req->keepalive_timer; + + if (!tux_keepalive_timeout) + TUX_BUG(); +#if CONFIG_SMP + if (!spin_is_locked(&req->ti->work_lock)) + TUX_BUG(); +#endif + + if (!list_empty(&req->lru)) + TUX_BUG(); + if (req->ti->nr_lru > tux_max_keepalives) { + struct list_head *head, *last; + tux_req_t *last_req; + + head = &req->ti->lru; + last = head->prev; + if (last == head) + TUX_BUG(); + last_req = list_entry(last, tux_req_t, lru); + list_del(last); + DEBUG_DEL_LIST(last); + req->ti->nr_lru--; + + Dprintk("LRU-aging req %p!\n", last_req); + last_req->error = TUX_ERROR_CONN_TIMEOUT; + if (!__idle_event(last_req)) + __output_space_event(last_req); + } + list_add(&req->lru, &req->ti->lru); + req->ti->nr_lru++; + + timer->expires = jiffies + tux_keepalive_timeout * HZ; + timer->data = (unsigned long) req; + timer->function = &keepalive_timeout_fn; + add_timer(timer); +} + +static int __output_space_event (tux_req_t *req) +{ + if (!req || (req->magic != TUX_MAGIC)) + TUX_BUG(); + + if (!test_and_clear_bit(0, &req->wait_output_space)) { + Dprintk("output space ready event at <%p>, on non-idle %p.\n", __builtin_return_address(0), req); + return 0; + } + + Dprintk("output space ready event at <%p>, %p was waiting!\n", __builtin_return_address(0), req); + DEC_STAT(nr_output_space_pending); + + del_keepalive_timer(req); + del_output_timer(req); + + __add_req_to_workqueue(req); + return 1; +} + +int output_space_event (tux_req_t *req) +{ + int ret; + unsigned long flags; + + spin_lock_irqsave(&req->ti->work_lock, flags); + ret = __output_space_event(req); + spin_unlock_irqrestore(&req->ti->work_lock, flags); + + return ret; +} + +static int __idle_event (tux_req_t *req) +{ + threadinfo_t *ti; + + if (!req || (req->magic != TUX_MAGIC)) + TUX_BUG(); + ti = req->ti; + + if (!test_and_clear_bit(0, &req->idle_input)) { + Dprintk("data ready event at <%p>, on non-idle %p.\n", __builtin_return_address(0), req); + return 0; + } + + Dprintk("data ready event at <%p>, %p was idle!\n", __builtin_return_address(0), req); + del_keepalive_timer(req); + del_output_timer(req); + DEC_STAT(nr_idle_input_pending); + + req->sock->sk->tp_pinfo.af_tcp.ack.pingpong = tux_ack_pingpong; + SET_TIMESTAMP(req->accept_timestamp); + + __add_req_to_workqueue(req); + + return 1; +} + +int idle_event (tux_req_t *req) +{ + int ret; + unsigned long flags; + + spin_lock_irqsave(&req->ti->work_lock, flags); + ret = __idle_event(req); + spin_unlock_irqrestore(&req->ti->work_lock, flags); + + return ret; +} + +#define HANDLE_CALLBACK_1(callback, tux_name, real_name, param...) \ + tux_req_t *req; \ + \ + read_lock(&sk->callback_lock); \ + req = sk->user_data; \ + \ + Dprintk("callback "#callback"(%p) req %p.\n", \ + sk->callback, req); \ + \ + if (!req) { \ + if (sk->callback == tux_name) { \ + printk("BUG: "#callback" "#tux_name" "#real_name" no req!"); \ + TUX_BUG(); \ + } \ + read_unlock(&sk->callback_lock); \ + if (sk->callback) \ + sk->callback(param); \ + return; \ + } \ + +#define HANDLE_CALLBACK_2(callback, tux_name, real_name, param...) \ + Dprintk(#tux_name"() on %p.\n", req); \ + if (req->magic != TUX_MAGIC) \ + TUX_BUG(); \ + if (req->real_name) \ + req->real_name(param); + +#define HANDLE_CALLBACK(callback, tux_name, real_name, param...) \ + HANDLE_CALLBACK_1(callback,tux_name,real_name,param) \ + HANDLE_CALLBACK_2(callback,tux_name,real_name,param) + +static void tux_data_ready (struct sock *sk, int len) +{ + HANDLE_CALLBACK_1(data_ready, tux_data_ready, real_data_ready, sk, len); + + if (!idle_event(req)) + output_space_event(req); + read_unlock(&sk->callback_lock); +} + +static void tux_write_space (struct sock *sk) +{ + HANDLE_CALLBACK(write_space, tux_write_space, real_write_space, sk); + + Dprintk("sk->wmem_queued: %d, sk->sndbuf: %d.\n", + sk->wmem_queued, sk->sndbuf); + + if (tcp_wspace(sk) >= tcp_min_write_space(sk)) { + clear_bit(SOCK_NOSPACE, &sk->socket->flags); + if (!idle_event(req)) + output_space_event(req); + } + read_unlock(&sk->callback_lock); +} + +static void tux_error_report (struct sock *sk) +{ + HANDLE_CALLBACK(error_report, tux_error_report, real_error_report, sk); + + req->error = TUX_ERROR_CONN_CLOSE; + if (!idle_event(req)) + output_space_event(req); + read_unlock(&sk->callback_lock); +} + +static void tux_state_change (struct sock *sk) +{ + HANDLE_CALLBACK(state_change, tux_state_change, real_state_change, sk); + + if (req->sock && req->sock->sk && + (req->sock->sk->state > TCP_ESTABLISHED)) { + Dprintk("req %p changed to TCP non-established!\n", req); + Dprintk("req->sock: %p\n", req->sock); + if (req->sock) + Dprintk("req->sock->sk: %p\n", req->sock->sk); + if (req->sock && req->sock->sk) + Dprintk("TCP state: %d\n", req->sock->sk->state); + Dprintk("req->error = TUX_ERROR_CONN_CLOSE!\n"); + req->error = TUX_ERROR_CONN_CLOSE; + } + if (!idle_event(req)) + output_space_event(req); + read_unlock(&sk->callback_lock); +} + +static void tux_destruct (struct sock *sk) +{ + BUG(); +} + +static void tux_ftp_data_ready (struct sock *sk, int len) +{ + HANDLE_CALLBACK_1(data_ready, tux_ftp_data_ready, + ftp_real_data_ready, sk, len); + if (!idle_event(req)) + output_space_event(req); + read_unlock(&sk->callback_lock); +} + +static void tux_ftp_write_space (struct sock *sk) +{ + HANDLE_CALLBACK_1(write_space, tux_ftp_write_space, + ftp_real_write_space, sk); + + Dprintk("sk->wmem_queued: %d, sk->sndbuf: %d.\n", + sk->wmem_queued, sk->sndbuf); + + if (tcp_wspace(sk) >= sk->sndbuf/10*8) { + clear_bit(SOCK_NOSPACE, &sk->socket->flags); + if (!idle_event(req)) + output_space_event(req); + } + read_unlock(&sk->callback_lock); +} + +static void tux_ftp_error_report (struct sock *sk) +{ + HANDLE_CALLBACK(error_report, tux_ftp_error_report, + ftp_real_error_report, sk); + + TDprintk("req %p sock %p got TCP errors on FTP data connection!\n", req, sk); + TDprintk("req->error = TUX_ERROR_CONN_CLOSE!\n"); + req->error = TUX_ERROR_CONN_CLOSE; + if (!idle_event(req)) + output_space_event(req); + read_unlock(&sk->callback_lock); +} + +static void tux_ftp_state_change (struct sock *sk) +{ + HANDLE_CALLBACK(state_change, tux_ftp_state_change, + ftp_real_state_change, sk); + + if (req->sock && req->sock->sk && + (req->sock->sk->state > TCP_ESTABLISHED)) { + Dprintk("req %p FTP control sock changed to TCP non-established!\n", req); + Dprintk("req->sock: %p\n", req->sock); + TDprintk("req->error = TUX_ERROR_CONN_CLOSE!\n"); + + req->error = TUX_ERROR_CONN_CLOSE; + } + if (!idle_event(req)) + output_space_event(req); + read_unlock(&sk->callback_lock); +} + +static void tux_ftp_create_child (struct sock *sk, struct sock *newsk) +{ + HANDLE_CALLBACK(create_child, tux_ftp_create_child, + ftp_real_create_child, sk, newsk); + + newsk->user_data = NULL; + newsk->data_ready = req->ftp_real_data_ready; + newsk->state_change = req->ftp_real_state_change; + newsk->write_space = req->ftp_real_write_space; + newsk->error_report = req->ftp_real_error_report; + newsk->create_child = req->ftp_real_create_child; + newsk->destruct = req->ftp_real_destruct; + + if (!idle_event(req)) + output_space_event(req); + read_unlock(&sk->callback_lock); +} + +static void tux_ftp_destruct (struct sock *sk) +{ + BUG(); +} + +static void link_tux_socket (tux_req_t *req, struct socket *sock) +{ + struct sock *sk = sock->sk; + + if (req->sock) + TUX_BUG(); + if (sk->destruct == tux_destruct) + TUX_BUG(); + /* + * (No need to lock the socket, we just want to + * make sure that events from now on go through + * tux_data_ready()) + */ + write_lock_irq(&sk->callback_lock); + + req->sock = sock; + sk->user_data = req; + + req->real_data_ready = sk->data_ready; + req->real_state_change = sk->state_change; + req->real_write_space = sk->write_space; + req->real_error_report = sk->error_report; + req->real_destruct = sk->destruct; + + sk->data_ready = tux_data_ready; + sk->state_change = tux_state_change; + sk->write_space = tux_write_space; + sk->error_report = tux_error_report; + sk->destruct = tux_destruct; + + write_unlock_irq(&sk->callback_lock); + + if (req->real_destruct == tux_destruct) + TUX_BUG(); + req->client_addr = sk->daddr; + req->client_port = sk->dport; + + add_wait_queue(sk->sleep, &req->sleep); +} + +void __link_data_socket (tux_req_t *req, struct socket *sock, + struct sock *sk) +{ + /* + * (No need to lock the socket, we just want to + * make sure that events from now on go through + * tux_data_ready()) + */ + write_lock_irq(&sk->callback_lock); + + req->data_sock = sock; + sk->user_data = req; + + req->ftp_real_data_ready = sk->data_ready; + req->ftp_real_state_change = sk->state_change; + req->ftp_real_write_space = sk->write_space; + req->ftp_real_error_report = sk->error_report; + req->ftp_real_create_child = sk->create_child; + req->ftp_real_destruct = sk->destruct; + + sk->data_ready = tux_ftp_data_ready; + sk->state_change = tux_ftp_state_change; + sk->write_space = tux_ftp_write_space; + sk->error_report = tux_ftp_error_report; + sk->create_child = tux_ftp_create_child; + sk->destruct = tux_ftp_destruct; + + if (req->ftp_real_destruct == tux_ftp_destruct) + TUX_BUG(); + + write_unlock_irq(&sk->callback_lock); + + add_wait_queue(sk->sleep, &req->ftp_sleep); +} + +void link_tux_data_socket (tux_req_t *req, struct socket *sock) +{ + struct sock *sk = sock->sk; + + if (req->data_sock) + TUX_BUG(); + if (sk->destruct == tux_ftp_destruct) + TUX_BUG(); + __link_data_socket(req, sock, sk); +} + +void unlink_tux_socket (tux_req_t *req) +{ + struct sock *sk; + + if (!req->sock || !req->sock->sk) + return; + sk = req->sock->sk; + + write_lock_irq(&sk->callback_lock); + if (!sk->user_data) + TUX_BUG(); + if (req->real_destruct == tux_destruct) + TUX_BUG(); + + sk->user_data = NULL; + + sk->data_ready = req->real_data_ready; + sk->state_change = req->real_state_change; + sk->write_space = req->real_write_space; + sk->error_report = req->real_error_report; + sk->destruct = req->real_destruct; + + if (sk->destruct == tux_destruct) + TUX_BUG(); + + req->real_data_ready = NULL; + req->real_state_change = NULL; + req->real_write_space = NULL; + req->real_error_report = NULL; + req->real_destruct = NULL; + + write_unlock_irq(&sk->callback_lock); + + remove_wait_queue(sk->sleep, &req->sleep); +} + +void unlink_tux_data_socket (tux_req_t *req) +{ + struct sock *sk; + + if (!req->data_sock || !req->data_sock->sk) + return; + sk = req->data_sock->sk; + + write_lock_irq(&sk->callback_lock); + + if (req->real_destruct == tux_ftp_destruct) + TUX_BUG(); + + sk->user_data = NULL; + sk->data_ready = req->ftp_real_data_ready; + sk->state_change = req->ftp_real_state_change; + sk->write_space = req->ftp_real_write_space; + sk->error_report = req->ftp_real_error_report; + sk->create_child = req->ftp_real_create_child; + sk->destruct = req->ftp_real_destruct; + + req->ftp_real_data_ready = NULL; + req->ftp_real_state_change = NULL; + req->ftp_real_write_space = NULL; + req->ftp_real_error_report = NULL; + req->ftp_real_create_child = NULL; + req->ftp_real_destruct = NULL; + + write_unlock_irq(&sk->callback_lock); + + if (sk->destruct == tux_ftp_destruct) + TUX_BUG(); + + remove_wait_queue(sk->sleep, &req->ftp_sleep); +} + +void add_tux_atom (tux_req_t *req, atom_func_t *atom) +{ + Dprintk("adding TUX atom %p to req %p, atom_idx: %d, at %p/%p.\n", + atom, req, req->atom_idx, __builtin_return_address(0), __builtin_return_address(1)); + if (req->atom_idx == MAX_TUX_ATOMS) + TUX_BUG(); + req->atoms[req->atom_idx] = atom; + req->atom_idx++; +} + +void del_tux_atom (tux_req_t *req) +{ + if (!req->atom_idx) + TUX_BUG(); + req->atom_idx--; + Dprintk("removing TUX atom %p to req %p, atom_idx: %d, at %p.\n", + req->atoms[req->atom_idx], req, req->atom_idx, __builtin_return_address(0)); +} + +void tux_schedule_atom (tux_req_t *req, int cachemiss) +{ + if (!list_empty(&req->work)) + TUX_BUG(); + if (!req->atom_idx) + TUX_BUG(); + req->atom_idx--; + Dprintk("DOING TUX atom %p, req %p, atom_idx: %d, at %p.\n", + req->atoms[req->atom_idx], req, req->atom_idx, __builtin_return_address(0)); + req->atoms[req->atom_idx](req, cachemiss); + Dprintk("DONE TUX atom %p, req %p, atom_idx: %d, at %p.\n", + req->atoms[req->atom_idx], req, req->atom_idx, __builtin_return_address(0)); +} + +/* + * Puts newly accepted connections into the inputqueue. This is the + * first step in the life of a TUX request. + */ +int accept_requests (threadinfo_t *ti) +{ + int count = 0, last_count = 0, error, socknr = 0; + struct socket *sock, *new_sock; + struct tcp_opt *tp1, *tp2; + tux_req_t *req; + + if (ti->nr_requests > tux_max_connect) + goto out; + +repeat: + for (socknr = 0; socknr < CONFIG_TUX_NUMSOCKETS; socknr++) { + tux_listen_t *tux_listen; + + tux_listen = ti->listen + socknr; + sock = tux_listen->sock; + if (!sock) + break; + if (current->need_resched) + break; + + tp1 = &sock->sk->tp_pinfo.af_tcp; + /* + * Quick test to see if there are connections on the queue. + * This is cheaper than accept() itself because this saves us + * the allocation of a new socket. (Which doesn't seem to be + * used anyway) + */ + if (tp1->accept_queue) { + tux_proto_t *proto; + + if (!count++) + __set_task_state(current, TASK_RUNNING); + + new_sock = sock_alloc(); + if (!new_sock) + goto out; + + new_sock->type = sock->type; + new_sock->ops = sock->ops; + + error = sock->ops->accept(sock, new_sock, O_NONBLOCK); + if (error < 0) + goto err; + if (new_sock->sk->state != TCP_ESTABLISHED) + goto err; + + tp2 = &new_sock->sk->tp_pinfo.af_tcp; + tp2->nonagle = 2; + tp2->ack.pingpong = tux_ack_pingpong; + new_sock->sk->reuse = 1; + new_sock->sk->urginline = 1; + + /* Allocate a request-entry for the connection */ + req = kmalloc_req(ti); + if (!req) + BUG(); + link_tux_socket(req, new_sock); + + proto = req->proto = tux_listen->proto; + + proto->got_request(req); + } + } + if (count != last_count) { + last_count = count; + goto repeat; + } +out: + return count; +err: + sock_release(new_sock); + goto out; +} + --- linux/net/tux/cachemiss.c.orig +++ linux/net/tux/cachemiss.c @@ -0,0 +1,261 @@ +/* + * TUX - Integrated Application Protocols Layer and Object Cache + * + * Copyright (C) 2000, 2001, Ingo Molnar + * + * cachemiss.c: handle the 'slow IO path' by queueing not-yet-cached + * requests to the IO-thread pool. Dynamic load balancing is done + * between IO threads, based on the number of requests they have pending. + */ + +#include +#include + +/**************************************************************** + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + ****************************************************************/ + +void queue_cachemiss (tux_req_t *req) +{ + iothread_t *iot = req->ti->iot; + + Dprintk("queueing_cachemiss(req:%p) (req->cwd_dentry: %p) at %p:%p.\n", + req, req->cwd_dentry, __builtin_return_address(0), __builtin_return_address(1)); + if (req->idle_input || req->wait_output_space) + TUX_BUG(); + req->had_cachemiss = 1; + if (!list_empty(&req->work)) + TUX_BUG(); + spin_lock(&iot->async_lock); + if (connection_too_fast(req)) + list_add_tail(&req->work, &iot->async_queue); + else + list_add(&req->work, &iot->async_queue); + iot->nr_async_pending++; + INC_STAT(nr_cachemiss_pending); + spin_unlock(&iot->async_lock); + + wake_up(&iot->async_sleep); +} + +static tux_req_t * get_cachemiss (iothread_t *iot) +{ + struct list_head *tmp; + tux_req_t *req = NULL; + + spin_lock(&iot->async_lock); + if (!list_empty(&iot->async_queue)) { + + tmp = iot->async_queue.next; + req = list_entry(tmp, tux_req_t, work); + + Dprintk("get_cachemiss(%p): got req %p.\n", iot, req); + list_del(tmp); + DEBUG_DEL_LIST(tmp); + iot->nr_async_pending--; + DEC_STAT(nr_cachemiss_pending); + + if (req->ti->iot != iot) + TUX_BUG(); + } + spin_unlock(&iot->async_lock); + return req; +} + +struct file * tux_open_file (char *filename, int mode) +{ + struct file *filp; + + if (!filename) + TUX_BUG(); + + /* Rule no. 3 -- Does the file exist ? */ + + filp = filp_open(filename, mode, 0600); + + if (IS_ERR(filp) || !filp || !filp->f_dentry) + goto err; + +out: + return filp; +err: + Dprintk("filp_open() error: %d.\n", (int)filp); + filp = NULL; + goto out; +} + +static int cachemiss_thread (void *data) +{ + tux_req_t *req; + struct k_sigaction *ka; + DECLARE_WAITQUEUE(wait, current); + iothread_t *iot = data; + int nr = iot->ti->cpu, wake_up; + + Dprintk("iot %p/%p got started.\n", iot, current); + drop_permissions(); + + spin_lock(&iot->async_lock); + iot->threads++; + sprintf(current->comm, "async IO %d/%d", nr, iot->threads); + + + spin_lock_irq(¤t->sigmask_lock); + ka = current->sig->action + SIGCHLD-1; + ka->sa.sa_handler = SIG_IGN; + siginitsetinv(¤t->blocked, sigmask(SIGCHLD)); + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); + + spin_unlock(&iot->async_lock); +#if CONFIG_SMP + { + unsigned int mask; + + mask = 1 << nr; + if (cpu_online_map & mask) + set_cpus_allowed(current, mask); + } +#endif + + add_wait_queue_exclusive(&iot->async_sleep, &wait); + + for (;;) { + while (!list_empty(&iot->async_queue) && + (req = get_cachemiss(iot))) { + + if (!req->atom_idx) { + add_tux_atom(req, flush_request); + add_req_to_workqueue(req); + continue; + } + tux_schedule_atom(req, 1); + if (signal_pending(current)) + flush_all_signals(); + } + if (signal_pending(current)) + flush_all_signals(); + if (!list_empty(&iot->async_queue)) + continue; + if (iot->shutdown) { + Dprintk("iot %p/%p got shutdown!\n", iot, current); + break; + } + __set_current_state(TASK_INTERRUPTIBLE); + if (list_empty(&iot->async_queue)) { + Dprintk("iot %p/%p going to sleep.\n", iot, current); + schedule(); + Dprintk("iot %p/%p got woken up.\n", iot, current); + } + __set_current_state(TASK_RUNNING); + } + + remove_wait_queue(&iot->async_sleep, &wait); + + wake_up = 0; + spin_lock(&iot->async_lock); + if (!--iot->threads) + wake_up = 1; + spin_unlock(&iot->async_lock); + Dprintk("iot %p/%p has finished shutdown!\n", iot, current); + if (wake_up) { + Dprintk("iot %p/%p waking up master.\n", iot, current); + wake_up(&iot->wait_shutdown); + } + + return 0; +} + +static void __stop_cachemiss_threads (iothread_t *iot) +{ + DECLARE_WAITQUEUE(wait, current); + + Dprintk("stopping async IO threads %p.\n", iot); + add_wait_queue(&iot->wait_shutdown, &wait); + + spin_lock(&iot->async_lock); + if (iot->shutdown) + TUX_BUG(); + if (!iot->threads) + TUX_BUG(); + iot->shutdown = 1; + wake_up_all(&iot->async_sleep); + spin_unlock(&iot->async_lock); + + __set_current_state(TASK_UNINTERRUPTIBLE); + Dprintk("waiting for async IO threads %p to exit.\n", iot); + schedule(); + remove_wait_queue(&iot->wait_shutdown, &wait); + + if (iot->threads) + TUX_BUG(); + if (iot->nr_async_pending) + TUX_BUG(); + Dprintk("stopped async IO threads %p.\n", iot); +} + +void stop_cachemiss_threads (threadinfo_t *ti) +{ + iothread_t *iot = ti->iot; + + if (!iot) + TUX_BUG(); + if (iot->nr_async_pending) + TUX_BUG(); + __stop_cachemiss_threads(iot); + ti->iot = NULL; + kfree(iot); +} + +int start_cachemiss_threads (threadinfo_t *ti) +{ + int i, pid; + + iothread_t *iot; + + iot = kmalloc(sizeof(*iot), GFP_KERNEL); + if (!iot) + return -ENOMEM; + memset(iot, 0, sizeof(*iot)); + + iot->ti = ti; + iot->async_lock = SPIN_LOCK_UNLOCKED; + iot->nr_async_pending = 0; + INIT_LIST_HEAD(&iot->async_queue); + init_waitqueue_head(&iot->async_sleep); + init_waitqueue_head(&iot->wait_shutdown); + + for (i = 0; i < NR_IO_THREADS; i++) { + pid = kernel_thread(cachemiss_thread, (void *)iot, 0); + if (pid < 0) { + printk(KERN_ERR "TUX: error %d creating IO thread!\n", + pid); + __stop_cachemiss_threads(iot); + kfree(iot); + return pid; + } + } + ti->iot = iot; + /* + * Wait for all cachemiss threads to start up: + */ + while (iot->threads != NR_IO_THREADS) { + __set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(HZ/10); + } + return 0; +} + --- linux/net/tux/cgi.c.orig +++ linux/net/tux/cgi.c @@ -0,0 +1,248 @@ +/* + * TUX - Integrated Application Protocols Layer and Object Cache + * + * Copyright (C) 2000, 2001, Ingo Molnar + * + * cgi.c: user-space CGI (and other) code execution. + */ + +#define __KERNEL_SYSCALLS__ + +#include + +/**************************************************************** + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + ****************************************************************/ + +/* + * Define our own execve() syscall - the unistd.h one uses + * errno which is not an exported symbol. (but removing it + * breaks old userspace tools.) + */ +#ifdef CONFIG_X86 + +static int tux_execve (const char *file, char **argv, char **envp) +{ + int ret; + __asm__ volatile ("int $0x80" + : "=a" (ret) + : "0" (__NR_execve), + "b" ((int)file), + "c" ((int)argv), + "d" ((long)envp)); + return ret; +} + +#else +#ifdef CONFIG_ALPHA + +static int tux_execve (const char *arg1, char **arg2, char **arg3) +{ + long _sc_ret, _sc_err; + { + register long _sc_0 __asm__("$0"); + register long _sc_16 __asm__("$16"); + register long _sc_17 __asm__("$17"); + register long _sc_18 __asm__("$18"); + register long _sc_19 __asm__("$19"); + + _sc_0 = __NR_execve; + _sc_16 = (long) (arg1); + _sc_17 = (long) (arg2); + _sc_18 = (long) (arg3); + __asm__("callsys # %0 %1 %2 %3 %4 %5" + : "=r"(_sc_0), "=r"(_sc_19) + : "0"(_sc_0), "r"(_sc_16), "r"(_sc_17), + "r"(_sc_18) + : _syscall_clobbers); + _sc_ret = _sc_0, _sc_err = _sc_19; + } + return _sc_err; +} +#else +#ifdef CONFIG_PPC +static int tux_execve (const char *arg1, char **arg2, char **arg3) +{ + unsigned long __sc_ret, __sc_err; + { + register unsigned long __sc_0 __asm__ ("r0"); + register unsigned long __sc_3 __asm__ ("r3"); + register unsigned long __sc_4 __asm__ ("r4"); + register unsigned long __sc_5 __asm__ ("r5"); + + __sc_3 = (unsigned long) (arg1); + __sc_4 = (unsigned long) (arg2); + __sc_5 = (unsigned long) (arg3); + __sc_0 = __NR_execve; + __asm__ __volatile__ + ("sc \n\t" + "mfcr %1 " + : "=&r" (__sc_3), "=&r" (__sc_0) + : "0" (__sc_3), "1" (__sc_0), + "r" (__sc_4), + "r" (__sc_5) + : __syscall_clobbers); + __sc_ret = __sc_3; + __sc_err = __sc_0; + } + return __sc_err; +} +#else +#ifdef CONFIG_ARCH_S390 /* OK for both 31 and 64 bit mode */ +static int tux_execve(const char *file, char **argv, char **envp) +{ + register const char * __arg1 asm("2") = file; + register char ** __arg2 asm("3") = argv; + register char ** __arg3 asm("4") = envp; + register long __svcres asm("2"); + __asm__ __volatile__ ( + " svc %b1\n" + : "=d" (__svcres) + : "i" (__NR_execve), + "0" (__arg1), + "d" (__arg2), + "d" (__arg3) + : "cc", "memory"); + return __svcres; +} +#else +# define tux_execve execve +#endif +#endif +#endif +#endif + +static int exec_usermode(char *program_path, char *argv[], char *envp[]) +{ + int i, err; + + err = tux_chroot(tux_cgiroot); + if (err) { + printk(KERN_ERR "TUX: CGI chroot returned %d, /proc/sys/net/tux/cgiroot is probably set up incorrectly! Aborting CGI execution.\n", err); + return err; + } + + /* Allow execve args to be in kernel space. */ + set_fs(KERNEL_DS); + + spin_lock_irq(¤t->sigmask_lock); + flush_signals(current); + flush_signal_handlers(current); + spin_unlock_irq(¤t->sigmask_lock); + + for (i = 3; i < current->files->max_fds; i++ ) + if (current->files->fd[i]) + sys_close(i); + + err = tux_execve(program_path, argv, envp); + if (err < 0) + return err; + return 0; +} + +static int exec_helper (void * data) +{ + exec_param_t *param = data; + char **tmp; + int ret; + + sprintf(current->comm,"doexec - %d", current->pid); +#if CONFIG_SMP + if (!tux_cgi_inherit_cpu) { + unsigned int mask = cpu_online_map & tux_cgi_cpu_mask; + + if (mask) + set_cpus_allowed(current, mask); + else + set_cpus_allowed(current, cpu_online_map); + } +#endif + + if (!param) + TUX_BUG(); + Dprintk("doing exec(%s).\n", param->command); + + Dprintk("argv: "); + tmp = param->argv; + while (*tmp) { + Dprintk("{%s} ", *tmp); + tmp++; + } + Dprintk("\n"); + Dprintk("envp: "); + tmp = param->envp; + while (*tmp) { + Dprintk("{%s} ", *tmp); + tmp++; + } + Dprintk("\n"); + /* + * Set up stdin, stdout and stderr of the external + * CGI application. + */ + if (param->pipe_fds) { + sys_close(1); + sys_close(2); + sys_close(4); + if (sys_dup(3) != 1) + TUX_BUG(); + if (sys_dup(5) != 2) + TUX_BUG(); + sys_close(3); + sys_close(5); + // do not close on exec. + sys_fcntl(0, F_SETFD, 0); + sys_fcntl(1, F_SETFD, 0); + sys_fcntl(2, F_SETFD, 0); + } + ret = exec_usermode(param->command, param->argv, param->envp); + if (ret < 0) + Dprintk("bug: exec() returned %d.\n", ret); + else + Dprintk("exec()-ed successfully!\n"); + return 0; +} + +pid_t tux_exec_process (char *command, char **argv, + char **envp, int pipe_fds, + exec_param_t *param, int wait) +{ + exec_param_t param_local; + pid_t pid; + struct k_sigaction *ka; + + ka = current->sig->action + SIGCHLD-1; + ka->sa.sa_handler = SIG_IGN; + + if (!param && wait) + param = ¶m_local; + + param->command = command; + param->argv = argv; + param->envp = envp; + param->pipe_fds = pipe_fds; + +repeat_fork: + pid = kernel_thread(exec_helper, (void*) param, CLONE_SIGHAND|SIGCHLD); + Dprintk("kernel thread created PID %d.\n", pid); + if (pid < 0) { + printk(KERN_ERR "TUX: could not create new CGI kernel thread due to %d... retrying.\n", pid); + current->state = TASK_UNINTERRUPTIBLE; + schedule_timeout(HZ); + goto repeat_fork; + } + return pid; +} --- linux/net/tux/Config.in.orig +++ linux/net/tux/Config.in @@ -0,0 +1,7 @@ +tristate ' Threaded linUX application protocol accelerator layer (TUX)' CONFIG_TUX +if [ "$CONFIG_TUX" = "y" -o "$CONFIG_TUX" = "m" ]; then + bool ' External CGI module' CONFIG_TUX_EXTCGI + bool ' extended TUX logging format' CONFIG_TUX_EXTENDED_LOG + bool ' debug TUX' CONFIG_TUX_DEBUG +fi + --- linux/net/tux/directory.c.orig +++ linux/net/tux/directory.c @@ -0,0 +1,297 @@ +/* + * TUX - Integrated Application Protocols Layer and Object Cache + * + * Copyright (C) 2000, 2001, Ingo Molnar + * + * directory.c: directory listing support + */ + +#define __KERNEL_SYSCALLS__ +#include + +/**************************************************************** + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + ****************************************************************/ + +char * tux_print_path (tux_req_t *req, struct dentry *dentry, struct vfsmount *mnt, char *buf, unsigned int max_len) +{ + char *res; + struct dentry *cwd, *root; + struct vfsmount *cwd_mnt, *rootmnt; + + cwd = dget(dentry); + cwd_mnt = mntget(mnt); + root = dget(req->docroot_dentry); + rootmnt = mntget(req->docroot_mnt); + + spin_lock(&dcache_lock); + res = __d_path(cwd, cwd_mnt, root, rootmnt, buf, max_len); + spin_unlock(&dcache_lock); + + dput(cwd); + mntput(cwd_mnt); + dput(root); + mntput(rootmnt); + + return res; +} + +/* + * There are filesystems that do not fill in ->d_type correctly. + * Determine file-type. + */ +static int get_d_type (struct dentry *dentry) +{ + unsigned int mode = dentry->d_inode->i_mode; + + if (S_ISREG(mode)) + return DT_REG; + if (S_ISDIR(mode)) + return DT_DIR; + if (S_ISLNK(mode)) + return DT_LNK; + if (S_ISFIFO(mode)) + return DT_FIFO; + if (S_ISSOCK(mode)) + return DT_SOCK; + if (S_ISCHR(mode)) + return DT_CHR; + if (S_ISBLK(mode)) + return DT_BLK; + return 0; +} + +static void do_dir_line (tux_req_t *req, int cachemiss) +{ + struct linux_dirent64 *dirp, *dirp0; + char string0[MAX_OBJECTNAME_LEN+200], *tmp; + int len, curroff, total, str_len = 0; + int err, flag = cachemiss ? 0 : LOOKUP_ATOMIC; + struct nameidata base; + struct dentry *dentry = NULL; + struct inode *inode = NULL; + struct vfsmount *mnt = NULL; + + if (req->proto->check_req_err(req, cachemiss)) + return; + + tmp = NULL; + dirp0 = req->dirp0; + curroff = req->curroff; + total = req->total; + + dirp = (struct linux_dirent64 *)((char *)dirp0 + curroff); + if (!dirp->d_name || !dirp->d_name[0]) + goto next_dir; + /* + * Hide .xxxxx files: + */ + if (dirp->d_name[0] == '.') + goto next_dir; + Dprintk("<%s T:%d (off:%Ld) (len:%d)>\n", dirp->d_name, dirp->d_type, dirp->d_off, dirp->d_reclen); + if (tux_hide_unreadable) { + switch (dirp->d_type) { + default: + goto next_dir; + case DT_UNKNOWN: + case DT_REG: + case DT_DIR: + case DT_LNK: + /* valid entries - fall through. */ + ; + } + } + len = strlen(dirp->d_name); + if (len >= MAX_OBJECTNAME_LEN) { + dirp->d_name[MAX_OBJECTNAME_LEN] = 0; + len = MAX_OBJECTNAME_LEN-1; + } + + if (!req->dentry) + TUX_BUG(); + + base.flags = LOOKUP_POSITIVE|flag; + base.last_type = LAST_ROOT; + base.dentry = dget(req->dentry); + base.mnt = mntget(req->cwd_mnt); + + switch_docroot(req); + err = path_walk(dirp->d_name, &base); + + Dprintk("path_walk() returned %d.\n", err); + + if (err) { + if (err == -EWOULDBLOCKIO) { + add_tux_atom(req, do_dir_line); + queue_cachemiss(req); + return; + } + goto next_dir; + } + + dentry = base.dentry; + mnt = base.mnt; + if (!dentry) + TUX_BUG(); + if (IS_ERR(dentry)) + TUX_BUG(); + inode = dentry->d_inode; + if (!inode) + TUX_BUG(); + if (!dirp->d_type) + dirp->d_type = get_d_type(dentry); + if (tux_hide_unreadable) { + umode_t mode; + + mode = inode->i_mode; + if (mode & tux_mode_forbidden) + goto out_dput; + if (!(mode & tux_mode_allowed)) + goto out_dput; + + err = permission(inode, MAY_READ); + if (err) + goto out_dput; + if (dirp->d_type == DT_DIR) { + err = permission(inode, MAY_EXEC); + if (err) + goto out_dput; + } + } + + tmp = req->proto->print_dir_line(req, string0, dirp->d_name, len, dirp->d_type, dentry, inode); + if (tmp) + str_len = tmp-string0; +out_dput: + dput(dentry); + mntput(mnt); +next_dir: + curroff += dirp->d_reclen; + + if (tmp && (tmp != string0)) + Dprintk("writing line (len: %d): <%s>\n", strlen(string0), string0); + + if (curroff < total) { + req->dirp0 = dirp0; + req->curroff = curroff; + add_tux_atom(req, do_dir_line); + } else { + kfree(dirp0); + req->dirp0 = NULL; + req->curroff = 0; + // falls back to the list_directory atom + } + if (tmp && (tmp != string0)) + __send_async_message(req, string0, 200, str_len, 0); + else + add_req_to_workqueue(req); +} + +#define NAME_OFFSET(de) ((int) ((de)->d_name - (char *) (de))) +#define ROUND_UP(x) (((x)+sizeof(long)-1) & ~(sizeof(long)-1)) +#define ROUND_UP64(x) (((x)+sizeof(u64)-1) & ~(sizeof(u64)-1)) + +static int filldir64(void * __buf, const char * name, int namlen, loff_t offset, + ino_t ino, unsigned int d_type) +{ + struct linux_dirent64 * dirent, d; + struct getdents_callback64 * buf = (struct getdents_callback64 *) __buf; + int reclen = ROUND_UP64(NAME_OFFSET(dirent) + namlen + 1); + + buf->error = -EINVAL; /* only used if we fail.. */ + if (reclen > buf->count) + return -EINVAL; + dirent = buf->previous; + if (dirent) { + d.d_off = offset; + copy_to_user(&dirent->d_off, &d.d_off, sizeof(d.d_off)); + } + dirent = buf->current_dir; + buf->previous = dirent; + memset(&d, 0, NAME_OFFSET(&d)); + d.d_ino = ino; + d.d_reclen = reclen; + d.d_type = d_type; + copy_to_user(dirent, &d, NAME_OFFSET(&d)); + copy_to_user(dirent->d_name, name, namlen); + put_user(0, dirent->d_name + namlen); + ((char *) dirent) += reclen; + buf->current_dir = dirent; + buf->count -= reclen; + return 0; +} +#define DIRENT_SIZE 3000 + +void list_directory (tux_req_t *req, int cachemiss) +{ + struct getdents_callback64 buf; + struct linux_dirent64 *dirp0; + mm_segment_t oldmm; + int total; + + Dprintk("list_directory(%p, %d), dentry: %p.\n", req, cachemiss, req->dentry); + if (!req->cwd_dentry) + TUX_BUG(); + + if (!cachemiss) { + add_tux_atom(req, list_directory); + queue_cachemiss(req); + return; + } + + dirp0 = tux_kmalloc(DIRENT_SIZE); + + buf.current_dir = dirp0; + buf.previous = NULL; + buf.count = DIRENT_SIZE; + buf.error = 0; + + oldmm = get_fs(); set_fs(KERNEL_DS); + set_fs(KERNEL_DS); + total = vfs_readdir(&req->in_file, filldir64, &buf); + set_fs(oldmm); + + if (buf.previous) + total = DIRENT_SIZE - buf.count; + + Dprintk("total: %d (buf.error: %d, buf.previous %p)\n", + total, buf.error, buf.previous); + + if (total < 0) { + kfree(dirp0); + req_err(req); + add_req_to_workqueue(req); + return; + } + if (!total) { + kfree(dirp0); + req->in_file.f_pos = 0; + add_req_to_workqueue(req); + return; + } + + if (!req->cwd_dentry) + TUX_BUG(); + add_tux_atom(req, list_directory); + + req->dirp0 = dirp0; + req->curroff = 0; + req->total = total; + add_tux_atom(req, do_dir_line); + + add_req_to_workqueue(req); +} + --- linux/net/tux/extcgi.c.orig +++ linux/net/tux/extcgi.c @@ -0,0 +1,315 @@ +/* + * TUX - Integrated Application Protocols Layer and Object Cache + * + * Copyright (C) 2000, 2001, Ingo Molnar + * + * extcgi.c: dynamic TUX module which forks and starts an external CGI + */ + +#define __KERNEL_SYSCALLS__ + +#include +#include "parser.h" + +/**************************************************************** + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + ****************************************************************/ + +#define MAX_ENVLEN 1000 +#define MAX_CGI_METAVARIABLES 32 +#define CGI_CHUNK_SIZE 1024 +#define MAX_CGI_COMMAND_LEN 256 + +#if CONFIG_TUX_DEBUG +#define PRINT_MESSAGE_LEFT \ + Dprintk("CGI message left at %s:%d:\n--->{%s}<---\n", \ + __FILE__, __LINE__, curr) +#else +#define PRINT_MESSAGE_LEFT do {} while(0) +#endif + +#define GOTO_INCOMPLETE do { Dprintk("invalid CGI reply at %s:%d.\n", __FILE__, __LINE__); goto invalid; } while (0) + +/* + * Please acknowledge our hard work by not changing this define, or + * at least please acknowledge us by leaving "TUX/2.0 (Linux)" in + * the ID string. Thanks! :-) + */ +#define CGI_SUCCESS2 "HTTP/1.1 200 OK\r\nConnection: close\r\nServer: TUX/2.0 (Linux)\r\n" + +static int handle_cgi_reply (tux_req_t *req) +{ + int first = 1; + int len, left, total; + char buf [CGI_CHUNK_SIZE+1], *tmp; + mm_segment_t oldmm; + + sys_close(3); + sys_close(4); + sys_close(5); + oldmm = get_fs(); set_fs(KERNEL_DS); + send_sync_buf(NULL, req->sock, CGI_SUCCESS2, sizeof(CGI_SUCCESS2)-1, MSG_MORE); + set_fs(oldmm); + + req->bytes_sent = 0; + /* + * The new process is the new owner of the socket, it will + * close it. + */ +repeat: + left = CGI_CHUNK_SIZE; + len = 0; + total = 0; + tmp = buf; + do { + mm_segment_t oldmm; + + tmp += len; + total += len; + left -= len; + if (!left) + break; +repeat_read: + Dprintk("reading %d bytes via sys_read().\n", left); + oldmm = get_fs(); set_fs(KERNEL_DS); + len = sys_read(2, tmp, left); + set_fs(oldmm); + Dprintk("got %d bytes from sys_read() (total: %d).\n", len, total); + if (len > 0) + tmp[len] = 0; + Dprintk("CGI reply: (%d bytes, total %d).\n", len, total); + if (len == -ERESTARTSYS) { + flush_all_signals(); + goto repeat_read; + } + } while (len > 0); + if (total > CGI_CHUNK_SIZE) { + printk(KERN_ERR "TUX: CGI weirdness. total: %d, len: %d, left: %d.\n", total, len, left); + TUX_BUG(); + } + Dprintk("CGI done reply chunk: (%d bytes last, total %d).\n", len, total); + if (total) { + mm_segment_t oldmm; + + oldmm = get_fs(); set_fs(KERNEL_DS); + if (!len) + send_sync_buf(NULL, req->sock, buf, total, 0); + else + send_sync_buf(NULL, req->sock, buf, total, MSG_MORE); + set_fs(oldmm); + req->bytes_sent += total; + } + + Dprintk("bytes_sent: %d\n", req->bytes_sent); + if ((total > 0) && first) { + first = 0; + + if (buf[total]) + TUX_BUG(); + tmp = strstr(buf, "\n\n"); + if (tmp) { + req->bytes_sent -= (tmp-buf) + 2; + Dprintk("new bytes_sent: %d\n", req->bytes_sent); + } else { + req->bytes_sent = 0; + req_err(req); + } + } + if (len < 0) + Dprintk("sys_read returned with %d.\n", len); + else { + if (total > 0) + goto repeat; + } + sys_close(2); + + req->status = 200; + add_req_to_workqueue(req); + return -1; +} + +static int exec_external_cgi (void *data) +{ + exec_param_t param; + tux_req_t *req = data; + char *envp[MAX_CGI_METAVARIABLES+1], **envp_p; + char *argv[] = { "extcgi", NULL}; + char envstr[MAX_ENVLEN], *tmp; + unsigned int host; + struct k_sigaction *ka; + int in_pipe_fds[2], out_pipe_fds[2], err_pipe_fds[2], len; + char command [MAX_CGI_COMMAND_LEN]; + pid_t pid; + + len = strlen(tux_common_docroot); + if (req->objectname_len + len + 12 > MAX_CGI_COMMAND_LEN) + return -ENOMEM; + sprintf(current->comm,"cgimain - %d", current->pid); + host = req->sock->sk->daddr; + + tmp = envstr; + envp_p = envp; + +#define WRITE_ENV(str...) \ + if (envp_p >= envp + MAX_CGI_METAVARIABLES) \ + TUX_BUG(); \ + len = sprintf(tmp, str); \ + *envp_p++ = tmp; \ + tmp += len + 1; \ + if (tmp >= envstr + MAX_ENVLEN) \ + TUX_BUG(); + + #define WRITE_ENV_STR(str,field,len) \ + do { \ + int offset; \ + \ + offset = sizeof(str)-1; \ + if (tmp - envstr + offset + len >= MAX_ENVLEN) \ + return -EFAULT; \ + if (envp_p >= envp + MAX_CGI_METAVARIABLES) \ + TUX_BUG(); \ + memcpy(tmp, str, offset); \ + memcpy(tmp + offset, field, len); \ + offset += len; \ + tmp[offset] = 0; \ + *envp_p++ = tmp; \ + tmp += offset + 1; \ + } while (0) + + WRITE_ENV("GATEWAY_INTERFACE=CGI/1.1"); + WRITE_ENV("CONTENT_LENGTH=%d", req->post_data_len); + WRITE_ENV("REMOTE_ADDR=%d.%d.%d.%d", NIPQUAD(host)); + WRITE_ENV("SERVER_PORT=%d", 80); + WRITE_ENV("SERVER_SOFTWARE=TUX/2.0 (Linux)"); + +#if 1 + WRITE_ENV("DOCUMENT_ROOT=/"); + WRITE_ENV("PATH_INFO=/"); +#else + WRITE_ENV_STR("DOCUMENT_ROOT=", tux_common_docroot, len); + WRITE_ENV_STR("PATH_INFO=", tux_common_docroot, len); +#endif + WRITE_ENV_STR("QUERY_STRING=", req->query_str, req->query_len); + WRITE_ENV_STR("REQUEST_METHOD=", req->method_str, req->method_len); + WRITE_ENV_STR("SCRIPT_NAME=", req->objectname, req->objectname_len); + WRITE_ENV_STR("SERVER_PROTOCOL=", req->version_str, req->version_len); + + if (req->content_type_len) + WRITE_ENV_STR("CONTENT_TYPE=", + req->content_type_str, req->content_type_len); + if (req->cookies_len) + WRITE_ENV_STR("HTTP_COOKIE=", + req->cookies_str, req->cookies_len); + + if (req->host_len) + WRITE_ENV_STR("SERVER_NAME=", req->host, req->host_len); + else { + const char *host = "localhost"; + WRITE_ENV_STR("SERVER_NAME=", host, strlen(host)); + } + + *envp_p = NULL; + + spin_lock_irq(¤t->sigmask_lock); + ka = current->sig->action + SIGPIPE-1; + ka->sa.sa_handler = SIG_IGN; + siginitsetinv(¤t->blocked, sigmask(SIGCHLD)); + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); + + sys_close(0); sys_close(1); + sys_close(2); sys_close(3); + sys_close(4); sys_close(5); + + in_pipe_fds[0] = in_pipe_fds[1] = -1; + out_pipe_fds[0] = out_pipe_fds[1] = -1; + err_pipe_fds[0] = err_pipe_fds[1] = -1; + + if (do_pipe(in_pipe_fds)) + return -ENFILE; + if (do_pipe(out_pipe_fds)) + return -ENFILE; + if (do_pipe(err_pipe_fds)) + return -ENFILE; + + if (in_pipe_fds[0] != 0) TUX_BUG(); + if (in_pipe_fds[1] != 1) TUX_BUG(); + if (out_pipe_fds[0] != 2) TUX_BUG(); + if (out_pipe_fds[1] != 3) TUX_BUG(); + if (err_pipe_fds[0] != 4) TUX_BUG(); + if (err_pipe_fds[1] != 5) TUX_BUG(); + + if (req->virtual && req->host_len) + sprintf(command, "/%s/cgi-bin/%s", req->host, req->objectname); + else + sprintf(command, "/cgi-bin/%s", req->objectname); + Dprintk("before CGI exec.\n"); + pid = tux_exec_process(command, argv, envp, 1, ¶m, 0); + Dprintk("after CGI exec.\n"); + + if (req->post_data_len) { + mm_segment_t oldmm; + int ret; + + Dprintk("POST data to CGI:\n"); + oldmm = get_fs(); set_fs(KERNEL_DS); + ret = sys_write(1, req->post_data_str, req->post_data_len); + set_fs(oldmm); + Dprintk("sys_write() returned: %d.\n", ret); + if (ret != req->post_data_len) + Dprintk("sys_write() returned: %d.\n", ret); + } + + sys_close(0); + sys_close(1); + + handle_cgi_reply(req); + + return 0; +} + +void start_external_cgi (tux_req_t *req) +{ + int pid; + +repeat: + pid = kernel_thread(exec_external_cgi, (void*) req, SIGCHLD); + if (pid == -1) + return; + if (pid < 0) { + printk(KERN_INFO "TUX: Could not fork external CGI process due to %d, retrying!\n", pid); + current->state = TASK_UNINTERRUPTIBLE; + schedule_timeout(HZ); + goto repeat; + } +} + +int query_extcgi (tux_req_t *req) +{ + clear_keepalive(req); + start_external_cgi(req); + return -1; +} + +#define EXTCGI_INVALID_HEADER \ + "HTTP/1.1 503 Service Unavailable\r\n" \ + "Content-Length: 23\r\n\r\n" + +#define EXTCGI_INVALID_BODY \ + "TUX: invalid CGI reply." + +#define EXTCGI_INVALID EXTCGI_INVALID_HEADER EXTCGI_INVALID_BODY + --- linux/net/tux/gzip.c.orig +++ linux/net/tux/gzip.c @@ -0,0 +1,3347 @@ +/* $Id: zlib.h,v 1.2 1997/12/23 10:47:44 paulus Exp $ */ + +#include +#include +#include + +/* + * This file is derived from zlib.h and zconf.h from the zlib-1.0.4 + * distribution by Jean-loup Gailly and Mark Adler, with some additions + * by Paul Mackerras to aid in implementing Deflate compression and + * decompression for PPP packets. + */ + +/* + * ==FILEVERSION 971127== + * + * This marker is used by the Linux installation script to determine + * whether an up-to-date version of this file is already installed. + */ + + +/* +++ zlib.h */ +/* zlib.h -- interface of the 'zlib' general purpose compression library + version 1.0.4, Jul 24th, 1996. + + Copyright (C) 1995-1996 Jean-loup Gailly and Mark Adler + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + Jean-loup Gailly Mark Adler + gzip@prep.ai.mit.edu madler@alumni.caltech.edu + + + The data format used by the zlib library is described by RFCs (Request for + Comments) 1950 to 1952 in the files ftp://ds.internic.net/rfc/rfc1950.txt + (zlib format), rfc1951.txt (deflate format) and rfc1952.txt (gzip format). +*/ + +/* +++ zconf.h */ +/* zconf.h -- configuration of the zlib compression library + * Copyright (C) 1995-1996 Jean-loup Gailly. + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* From: zconf.h,v 1.20 1996/07/02 15:09:28 me Exp $ */ + +/* + * Compile with -DMAXSEG_64K if the alloc function cannot allocate more + * than 64k bytes at a time (needed on systems with 16-bit int). + */ +#if defined(MSDOS) && !defined(__32BIT__) +# define MAXSEG_64K +#endif +#ifdef MSDOS +# define UNALIGNED_OK +#endif + +/* Maximum value for memLevel in deflateInit2 */ +#ifndef MAX_MEM_LEVEL +# ifdef MAXSEG_64K +# define MAX_MEM_LEVEL 8 +# else +# define MAX_MEM_LEVEL 9 +# endif +#endif + +/* Maximum value for windowBits in deflateInit2 and inflateInit2 */ +#ifndef MAX_WBITS +# define MAX_WBITS 15 /* 32K LZ77 window */ +#endif + +/* The memory requirements for deflate are (in bytes): + 1 << (windowBits+2) + 1 << (memLevel+9) + that is: 128K for windowBits=15 + 128K for memLevel = 8 (default values) + plus a few kilobytes for small objects. For example, if you want to reduce + the default memory requirements from 256K to 128K, compile with + make CFLAGS="-O -DMAX_WBITS=14 -DMAX_MEM_LEVEL=7" + Of course this will generally degrade compression (there's no free lunch). + + The memory requirements for inflate are (in bytes) 1 << windowBits + that is, 32K for windowBits=15 (default value) plus a few kilobytes + for small objects. +*/ + + /* Type declarations */ + +#define OF(args) args + +#ifndef FAR +# define FAR +#endif + +typedef unsigned char Byte; /* 8 bits */ +typedef unsigned int uInt; /* 16 bits or more */ +typedef unsigned long uLong; /* 32 bits or more */ + +#if defined(__BORLANDC__) && defined(SMALL_MEDIUM) + /* Borland C/C++ ignores FAR inside typedef */ +# define Bytef Byte FAR +#else + typedef Byte FAR Bytef; +#endif +typedef char FAR charf; +typedef int FAR intf; +typedef uInt FAR uIntf; +typedef uLong FAR uLongf; + +typedef void FAR *voidpf; +typedef void *voidp; + +#define ZLIB_VERSION "1.0.4P" + +/* + The 'zlib' compression library provides in-memory compression and + decompression functions, including integrity checks of the uncompressed + data. This version of the library supports only one compression method + (deflation) but other algorithms may be added later and will have the same + stream interface. + + For compression the application must provide the output buffer and + may optionally provide the input buffer for optimization. For decompression, + the application must provide the input buffer and may optionally provide + the output buffer for optimization. + + Compression can be done in a single step if the buffers are large + enough (for example if an input file is mmap'ed), or can be done by + repeated calls of the compression function. In the latter case, the + application must provide more input and/or consume the output + (providing more output space) before each call. + + The library does not install any signal handler. It is recommended to + add at least a handler for SIGSEGV when decompressing; the library checks + the consistency of the input data whenever possible but may go nuts + for some forms of corrupted input. +*/ + +typedef voidpf (*alloc_func) OF((voidpf opaque, uInt items, uInt size)); +typedef void (*free_func) OF((voidpf opaque, voidpf address)); + +struct internal_state; + +typedef struct z_stream_s { + Bytef *next_in; /* next input byte */ + uInt avail_in; /* number of bytes available at next_in */ + uLong total_in; /* total nb of input bytes read so far */ + + Bytef *next_out; /* next output byte should be put there */ + uInt avail_out; /* remaining free space at next_out */ + uLong total_out; /* total nb of bytes output so far */ + + char *msg; /* last error message, NULL if no error */ + struct internal_state FAR *state; /* not visible by applications */ + + alloc_func zalloc; /* used to allocate the internal state */ + free_func zfree; /* used to free the internal state */ + voidpf opaque; /* private data object passed to zalloc and zfree */ + + int data_type; /* best guess about the data type: ascii or binary */ + uLong adler; /* adler32 value of the uncompressed data */ + uLong reserved; /* reserved for future use */ +} z_stream; + +typedef z_stream FAR *z_streamp; + +/* + The application must update next_in and avail_in when avail_in has + dropped to zero. It must update next_out and avail_out when avail_out + has dropped to zero. The application must initialize zalloc, zfree and + opaque before calling the init function. All other fields are set by the + compression library and must not be updated by the application. + + The opaque value provided by the application will be passed as the first + parameter for calls of zalloc and zfree. This can be useful for custom + memory management. The compression library attaches no meaning to the + opaque value. + + zalloc must return Z_NULL if there is not enough memory for the object. + On 16-bit systems, the functions zalloc and zfree must be able to allocate + exactly 65536 bytes, but will not be required to allocate more than this + if the symbol MAXSEG_64K is defined (see zconf.h). WARNING: On MSDOS, + pointers returned by zalloc for objects of exactly 65536 bytes *must* + have their offset normalized to zero. The default allocation function + provided by this library ensures this (see zutil.c). To reduce memory + requirements and avoid any allocation of 64K objects, at the expense of + compression ratio, compile the library with -DMAX_WBITS=14 (see zconf.h). + + The fields total_in and total_out can be used for statistics or + progress reports. After compression, total_in holds the total size of + the uncompressed data and may be saved for use in the decompressor + (particularly if the decompressor wants to decompress everything in + a single step). +*/ + + /* constants */ + +#define Z_NO_FLUSH 0 +#define Z_PARTIAL_FLUSH 1 +#define Z_PACKET_FLUSH 2 +#define Z_SYNC_FLUSH 3 +#define Z_FULL_FLUSH 4 +#define Z_FINISH 5 +/* Allowed flush values; see deflate() below for details */ + +#define Z_OK 0 +#define Z_STREAM_END 1 +#define Z_NEED_DICT 2 +#define Z_ERRNO (-1) +#define Z_STREAM_ERROR (-2) +#define Z_DATA_ERROR (-3) +#define Z_MEM_ERROR (-4) +#define Z_BUF_ERROR (-5) +#define Z_VERSION_ERROR (-6) +/* Return codes for the compression/decompression functions. Negative + * values are errors, positive values are used for special but normal events. + */ + +#define Z_NO_COMPRESSION 0 +#define Z_BEST_SPEED 1 +#define Z_BEST_COMPRESSION 9 +#define Z_DEFAULT_COMPRESSION (-1) +/* compression levels */ + +#define Z_FILTERED 1 +#define Z_HUFFMAN_ONLY 2 +#define Z_DEFAULT_STRATEGY 0 +/* compression strategy; see deflateInit2() below for details */ + +#define Z_BINARY 0 +#define Z_ASCII 1 +#define Z_UNKNOWN 2 +/* Possible values of the data_type field */ + +#define Z_DEFLATED 8 +/* The deflate compression method (the only one supported in this version) */ + +#define Z_NULL 0 /* for initializing zalloc, zfree, opaque */ + +static int deflate OF((z_streamp strm, int flush)); +/* + Performs one or both of the following actions: + + - Compress more input starting at next_in and update next_in and avail_in + accordingly. If not all input can be processed (because there is not + enough room in the output buffer), next_in and avail_in are updated and + processing will resume at this point for the next call of deflate(). + + - Provide more output starting at next_out and update next_out and avail_out + accordingly. This action is forced if the parameter flush is non zero. + Forcing flush frequently degrades the compression ratio, so this parameter + should be set only when necessary (in interactive applications). + Some output may be provided even if flush is not set. + + Before the call of deflate(), the application should ensure that at least + one of the actions is possible, by providing more input and/or consuming + more output, and updating avail_in or avail_out accordingly; avail_out + should never be zero before the call. The application can consume the + compressed output when it wants, for example when the output buffer is full + (avail_out == 0), or after each call of deflate(). If deflate returns Z_OK + and with zero avail_out, it must be called again after making room in the + output buffer because there might be more output pending. + + If the parameter flush is set to Z_PARTIAL_FLUSH, the current compression + block is terminated and flushed to the output buffer so that the + decompressor can get all input data available so far. For method 9, a future + variant on method 8, the current block will be flushed but not terminated. + Z_SYNC_FLUSH has the same effect as partial flush except that the compressed + output is byte aligned (the compressor can clear its internal bit buffer) + and the current block is always terminated; this can be useful if the + compressor has to be restarted from scratch after an interruption (in which + case the internal state of the compressor may be lost). + If flush is set to Z_FULL_FLUSH, the compression block is terminated, a + special marker is output and the compression dictionary is discarded; this + is useful to allow the decompressor to synchronize if one compressed block + has been damaged (see inflateSync below). Flushing degrades compression and + so should be used only when necessary. Using Z_FULL_FLUSH too often can + seriously degrade the compression. If deflate returns with avail_out == 0, + this function must be called again with the same value of the flush + parameter and more output space (updated avail_out), until the flush is + complete (deflate returns with non-zero avail_out). + + If the parameter flush is set to Z_PACKET_FLUSH, the compression + block is terminated, and a zero-length stored block is output, + omitting the length bytes (the effect of this is that the 3-bit type + code 000 for a stored block is output, and the output is then + byte-aligned). This is designed for use at the end of a PPP packet. + + If the parameter flush is set to Z_FINISH, pending input is processed, + pending output is flushed and deflate returns with Z_STREAM_END if there + was enough output space; if deflate returns with Z_OK, this function must be + called again with Z_FINISH and more output space (updated avail_out) but no + more input data, until it returns with Z_STREAM_END or an error. After + deflate has returned Z_STREAM_END, the only possible operations on the + stream are deflateReset or deflateEnd. + + Z_FINISH can be used immediately after deflateInit if all the compression + is to be done in a single step. In this case, avail_out must be at least + 0.1% larger than avail_in plus 12 bytes. If deflate does not return + Z_STREAM_END, then it must be called again as described above. + + deflate() may update data_type if it can make a good guess about + the input data type (Z_ASCII or Z_BINARY). In doubt, the data is considered + binary. This field is only for information purposes and does not affect + the compression algorithm in any manner. + + deflate() returns Z_OK if some progress has been made (more input + processed or more output produced), Z_STREAM_END if all input has been + consumed and all output has been produced (only when flush is set to + Z_FINISH), Z_STREAM_ERROR if the stream state was inconsistent (for example + if next_in or next_out was NULL), Z_BUF_ERROR if no progress is possible. +*/ + + +static int deflateEnd OF((z_streamp strm)); +/* + All dynamically allocated data structures for this stream are freed. + This function discards any unprocessed input and does not flush any + pending output. + + deflateEnd returns Z_OK if success, Z_STREAM_ERROR if the + stream state was inconsistent, Z_DATA_ERROR if the stream was freed + prematurely (some input or output was discarded). In the error case, + msg may be set but then points to a static string (which must not be + deallocated). +*/ + + /* Advanced functions */ + +/* + The following functions are needed only in some special applications. +*/ + +/* +static int deflateInit2 OF((z_streamp strm, + int level, + int method, + int windowBits, + int memLevel, + int strategy)); + + This is another version of deflateInit with more compression options. The + fields next_in, zalloc, zfree and opaque must be initialized before by + the caller. + + The method parameter is the compression method. It must be Z_DEFLATED in + this version of the library. (Method 9 will allow a 64K history buffer and + partial block flushes.) + + The windowBits parameter is the base two logarithm of the window size + (the size of the history buffer). It should be in the range 8..15 for this + version of the library (the value 16 will be allowed for method 9). Larger + values of this parameter result in better compression at the expense of + memory usage. The default value is 15 if deflateInit is used instead. + + The memLevel parameter specifies how much memory should be allocated + for the internal compression state. memLevel=1 uses minimum memory but + is slow and reduces compression ratio; memLevel=9 uses maximum memory + for optimal speed. The default value is 8. See zconf.h for total memory + usage as a function of windowBits and memLevel. + + The strategy parameter is used to tune the compression algorithm. Use the + value Z_DEFAULT_STRATEGY for normal data, Z_FILTERED for data produced by a + filter (or predictor), or Z_HUFFMAN_ONLY to force Huffman encoding only (no + string match). Filtered data consists mostly of small values with a + somewhat random distribution. In this case, the compression algorithm is + tuned to compress them better. The effect of Z_FILTERED is to force more + Huffman coding and less string matching; it is somewhat intermediate + between Z_DEFAULT and Z_HUFFMAN_ONLY. The strategy parameter only affects + the compression ratio but not the correctness of the compressed output even + if it is not set appropriately. + + If next_in is not null, the library will use this buffer to hold also + some history information; the buffer must either hold the entire input + data, or have at least 1<<(windowBits+1) bytes and be writable. If next_in + is null, the library will allocate its own history buffer (and leave next_in + null). next_out need not be provided here but must be provided by the + application for the next call of deflate(). + + If the history buffer is provided by the application, next_in must + must never be changed by the application since the compressor maintains + information inside this buffer from call to call; the application + must provide more input only by increasing avail_in. next_in is always + reset by the library in this case. + + deflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was + not enough memory, Z_STREAM_ERROR if a parameter is invalid (such as + an invalid method). msg is set to null if there is no error message. + deflateInit2 does not perform any compression: this will be done by + deflate(). +*/ + +static int deflateReset OF((z_streamp strm)); +/* + This function is equivalent to deflateEnd followed by deflateInit, + but does not free and reallocate all the internal compression state. + The stream will keep the same compression level and any other attributes + that may have been set by deflateInit2. + + deflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent (such as zalloc or state being NULL). +*/ + +static uLong adler32 OF((uLong adler, const Bytef *buf, uInt len)); + +/* + Update a running Adler-32 checksum with the bytes buf[0..len-1] and + return the updated checksum. If buf is NULL, this function returns + the required initial value for the checksum. + An Adler-32 checksum is almost as reliable as a CRC32 but can be computed + much faster. Usage example: + + uLong adler = adler32(0L, Z_NULL, 0); + + while (read_buffer(buffer, length) != EOF) { + adler = adler32(adler, buffer, length); + } + if (adler != original_adler) error(); +*/ + + /* various hacks, don't look :) */ + +/* deflateInit and inflateInit are macros to allow checking the zlib version + * and the compiler's view of z_stream: + */ +static int deflateInit_ OF((z_streamp strm, int level, + const char *version, int stream_size)); +static int deflateInit2_ OF((z_streamp strm, int level, int method, + int windowBits, int memLevel, int strategy, + const char *version, int stream_size)); +#define deflateInit(strm, level) \ + deflateInit_((strm), (level), ZLIB_VERSION, sizeof(z_stream)) +#define deflateInit2(strm, level, method, windowBits, memLevel, strategy) \ + deflateInit2_((strm),(level),(method),(windowBits),(memLevel),\ + (strategy), ZLIB_VERSION, sizeof(z_stream)) + +uLongf *get_crc_table OF((void)); /* can be used by asm versions of crc32() */ + +#ifdef __cplusplus +} +#endif + +/* --- zlib.h */ +/* + * This file is derived from various .h and .c files from the zlib-1.0.4 + * distribution by Jean-loup Gailly and Mark Adler, with some additions + * by Paul Mackerras to aid in implementing Deflate compression and + * decompression for PPP packets. See zlib.h for conditions of + * distribution and use. + * + * Changes that have been made include: + * - added Z_PACKET_FLUSH (see zlib.h for details) + * - added inflateIncomp and deflateOutputPending + * - allow strm->next_out to be NULL, meaning discard the output + * + * $Id: zlib.c,v 1.3 1997/12/23 10:47:42 paulus Exp $ + */ + +/* + * ==FILEVERSION 971210== + * + * This marker is used by the Linux installation script to determine + * whether an up-to-date version of this file is already installed. + */ + +#define NO_DUMMY_DECL +#define NO_ZCFUNCS +#define MY_ZCALLOC + +/* +++ zutil.h */ +/* zutil.h -- internal interface and configuration of the compression library + * Copyright (C) 1995-1996 Jean-loup Gailly. + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +/* From: zutil.h,v 1.16 1996/07/24 13:41:13 me Exp $ */ + +/* compile with -Dlocal if your debugger can't find static symbols */ + +typedef unsigned char uch; +typedef uch FAR uchf; +typedef unsigned short ush; +typedef ush FAR ushf; +typedef unsigned long ulg; + +static const char *z_errmsg[10]; /* indexed by 2-zlib_error */ +/* (size given to avoid silly warnings with Visual C++) */ + +#define ERR_MSG(err) z_errmsg[Z_NEED_DICT-(err)] + +#define ERR_RETURN(strm,err) \ + return (strm->msg = (char*)ERR_MSG(err), (err)) +/* To be used only when the state is known to be valid */ + + /* common constants */ + +#ifndef DEF_WBITS +# define DEF_WBITS MAX_WBITS +#endif +/* default windowBits for decompression. MAX_WBITS is for compression only */ + +#if MAX_MEM_LEVEL >= 8 +# define DEF_MEM_LEVEL 8 +#else +# define DEF_MEM_LEVEL MAX_MEM_LEVEL +#endif +/* default memLevel */ + +#define STORED_BLOCK 0 +#define STATIC_TREES 1 +#define DYN_TREES 2 +/* The three kinds of block type */ + +#define MIN_MATCH 3 +#define MAX_MATCH 258 +/* The minimum and maximum match lengths */ + +#define PRESET_DICT 0x20 /* preset dictionary flag in zlib header */ + + /* target dependencies */ + + /* Common defaults */ + +#ifndef OS_CODE +# define OS_CODE 0x03 /* assume Unix */ +#endif + +#ifndef FOPEN +# define FOPEN(name, mode) fopen((name), (mode)) +#endif + + /* functions */ + +#ifdef HAVE_STRERROR + extern char *strerror OF((int)); +# define zstrerror(errnum) strerror(errnum) +#else +# define zstrerror(errnum) "" +#endif + +/* Diagnostic functions */ +#ifdef DEBUG_ZLIB +# include +# ifndef verbose +# define verbose 0 +# endif + extern void z_error OF((char *m)); +# define Assert(cond,msg) {if(!(cond)) z_error(msg);} +# define Trace(x) fprintf x +# define Tracev(x) {if (verbose) fprintf x ;} +# define Tracevv(x) {if (verbose>1) fprintf x ;} +# define Tracec(c,x) {if (verbose && (c)) fprintf x ;} +# define Tracecv(c,x) {if (verbose>1 && (c)) fprintf x ;} +#else +# define Assert(cond,msg) +# define Trace(x) +# define Tracev(x) +# define Tracevv(x) +# define Tracec(c,x) +# define Tracecv(c,x) +#endif + + +typedef uLong (*check_func) OF((uLong check, const Bytef *buf, uInt len)); + +voidpf zcalloc OF((voidpf opaque, unsigned items, unsigned size)); +void zcfree OF((voidpf opaque, voidpf ptr)); + +#define ZALLOC(strm, items, size) \ + (*((strm)->zalloc))((strm)->opaque, (items), (size)) +#define ZFREE(strm, addr) (*((strm)->zfree))((strm)->opaque, (voidpf)(addr)) +#define TRY_FREE(s, p) {if (p) ZFREE(s, p);} + +/* --- zutil.h */ + +/* +++ deflate.h */ +/* deflate.h -- internal compression state + * Copyright (C) 1995-1996 Jean-loup Gailly + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +/* From: deflate.h,v 1.10 1996/07/02 12:41:00 me Exp $ */ + +#ifndef _DEFLATE_H +#define _DEFLATE_H + +/* #include "zutil.h" */ + +/* =========================================================================== + * Internal compression state. + */ + +#define LENGTH_CODES 29 +/* number of length codes, not counting the special END_BLOCK code */ + +#define LITERALS 256 +/* number of literal bytes 0..255 */ + +#define L_CODES (LITERALS+1+LENGTH_CODES) +/* number of Literal or Length codes, including the END_BLOCK code */ + +#define D_CODES 30 +/* number of distance codes */ + +#define BL_CODES 19 +/* number of codes used to transfer the bit lengths */ + +#define HEAP_SIZE (2*L_CODES+1) +/* maximum heap size */ + +#define MAX_BITS 15 +/* All codes must not exceed MAX_BITS bits */ + +#define INIT_STATE 42 +#define BUSY_STATE 113 +#define FINISH_STATE 666 +/* Stream status */ + + +/* Data structure describing a single value and its code string. */ +typedef struct ct_data_s { + union { + ush freq; /* frequency count */ + ush code; /* bit string */ + } fc; + union { + ush dad; /* father node in Huffman tree */ + ush len; /* length of bit string */ + } dl; +} FAR ct_data; + +#define Freq fc.freq +#define Code fc.code +#define Dad dl.dad +#define Len dl.len + +typedef struct static_tree_desc_s static_tree_desc; + +typedef struct tree_desc_s { + ct_data *dyn_tree; /* the dynamic tree */ + int max_code; /* largest code with non zero frequency */ + static_tree_desc *stat_desc; /* the corresponding static tree */ +} FAR tree_desc; + +typedef ush Pos; +typedef Pos FAR Posf; +typedef unsigned IPos; + +/* A Pos is an index in the character window. We use short instead of int to + * save space in the various tables. IPos is used only for parameter passing. + */ + +typedef struct deflate_state { + z_streamp strm; /* pointer back to this zlib stream */ + int status; /* as the name implies */ + Bytef *pending_buf; /* output still pending */ + ulg pending_buf_size; /* size of pending_buf */ + Bytef *pending_out; /* next pending byte to output to the stream */ + int pending; /* nb of bytes in the pending buffer */ + int noheader; /* suppress zlib header and adler32 */ + Byte data_type; /* UNKNOWN, BINARY or ASCII */ + Byte method; /* STORED (for zip only) or DEFLATED */ + int last_flush; /* value of flush param for previous deflate call */ + + /* used by deflate.c: */ + + uInt w_size; /* LZ77 window size (32K by default) */ + uInt w_bits; /* log2(w_size) (8..16) */ + uInt w_mask; /* w_size - 1 */ + + Bytef *window; + /* Sliding window. Input bytes are read into the second half of the window, + * and move to the first half later to keep a dictionary of at least wSize + * bytes. With this organization, matches are limited to a distance of + * wSize-MAX_MATCH bytes, but this ensures that IO is always + * performed with a length multiple of the block size. Also, it limits + * the window size to 64K, which is quite useful on MSDOS. + * To do: use the user input buffer as sliding window. + */ + + ulg window_size; + /* Actual size of window: 2*wSize, except when the user input buffer + * is directly used as sliding window. + */ + + Posf *prev; + /* Link to older string with same hash index. To limit the size of this + * array to 64K, this link is maintained only for the last 32K strings. + * An index in this array is thus a window index modulo 32K. + */ + + Posf *head; /* Heads of the hash chains or NIL. */ + + uInt ins_h; /* hash index of string to be inserted */ + uInt hash_size; /* number of elements in hash table */ + uInt hash_bits; /* log2(hash_size) */ + uInt hash_mask; /* hash_size-1 */ + + uInt hash_shift; + /* Number of bits by which ins_h must be shifted at each input + * step. It must be such that after MIN_MATCH steps, the oldest + * byte no longer takes part in the hash key, that is: + * hash_shift * MIN_MATCH >= hash_bits + */ + + long block_start; + /* Window position at the beginning of the current output block. Gets + * negative when the window is moved backwards. + */ + + uInt match_length; /* length of best match */ + IPos prev_match; /* previous match */ + int match_available; /* set if previous match exists */ + uInt strstart; /* start of string to insert */ + uInt match_start; /* start of matching string */ + uInt lookahead; /* number of valid bytes ahead in window */ + + uInt prev_length; + /* Length of the best match at previous step. Matches not greater than this + * are discarded. This is used in the lazy match evaluation. + */ + + uInt max_chain_length; + /* To speed up deflation, hash chains are never searched beyond this + * length. A higher limit improves compression ratio but degrades the + * speed. + */ + + uInt max_lazy_match; + /* Attempt to find a better match only when the current match is strictly + * smaller than this value. This mechanism is used only for compression + * levels >= 4. + */ +# define max_insert_length max_lazy_match + /* Insert new strings in the hash table only if the match length is not + * greater than this length. This saves time but degrades compression. + * max_insert_length is used only for compression levels <= 3. + */ + + int level; /* compression level (1..9) */ + int strategy; /* favor or force Huffman coding*/ + + uInt good_match; + /* Use a faster search when the previous match is longer than this */ + + int nice_match; /* Stop searching when current match exceeds this */ + + /* used by trees.c: */ + /* Didn't use ct_data typedef below to supress compiler warning */ + struct ct_data_s dyn_ltree[HEAP_SIZE]; /* literal and length tree */ + struct ct_data_s dyn_dtree[2*D_CODES+1]; /* distance tree */ + struct ct_data_s bl_tree[2*BL_CODES+1]; /* Huffman tree for bit lengths */ + + struct tree_desc_s l_desc; /* desc. for literal tree */ + struct tree_desc_s d_desc; /* desc. for distance tree */ + struct tree_desc_s bl_desc; /* desc. for bit length tree */ + + ush bl_count[MAX_BITS+1]; + /* number of codes at each bit length for an optimal tree */ + + int heap[2*L_CODES+1]; /* heap used to build the Huffman trees */ + int heap_len; /* number of elements in the heap */ + int heap_max; /* element of largest frequency */ + /* The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used. + * The same heap array is used to build all trees. + */ + + uch depth[2*L_CODES+1]; + /* Depth of each subtree used as tie breaker for trees of equal frequency + */ + + uchf *l_buf; /* buffer for literals or lengths */ + + uInt lit_bufsize; + /* Size of match buffer for literals/lengths. There are 4 reasons for + * limiting lit_bufsize to 64K: + * - frequencies can be kept in 16 bit counters + * - if compression is not successful for the first block, all input + * data is still in the window so we can still emit a stored block even + * when input comes from standard input. (This can also be done for + * all blocks if lit_bufsize is not greater than 32K.) + * - if compression is not successful for a file smaller than 64K, we can + * even emit a stored file instead of a stored block (saving 5 bytes). + * This is applicable only for zip (not gzip or zlib). + * - creating new Huffman trees less frequently may not provide fast + * adaptation to changes in the input data statistics. (Take for + * example a binary file with poorly compressible code followed by + * a highly compressible string table.) Smaller buffer sizes give + * fast adaptation but have of course the overhead of transmitting + * trees more frequently. + * - I can't count above 4 + */ + + uInt last_lit; /* running index in l_buf */ + + ushf *d_buf; + /* Buffer for distances. To simplify the code, d_buf and l_buf have + * the same number of elements. To use different lengths, an extra flag + * array would be necessary. + */ + + ulg opt_len; /* bit length of current block with optimal trees */ + ulg static_len; /* bit length of current block with static trees */ + ulg compressed_len; /* total bit length of compressed file */ + uInt matches; /* number of string matches in current block */ + int last_eob_len; /* bit length of EOB code for last block */ + +#ifdef DEBUG_ZLIB + ulg bits_sent; /* bit length of the compressed data */ +#endif + + ush bi_buf; + /* Output buffer. bits are inserted starting at the bottom (least + * significant bits). + */ + int bi_valid; + /* Number of valid bits in bi_buf. All bits above the last valid bit + * are always zero. + */ + +} FAR deflate_state; + +/* Output a byte on the stream. + * IN assertion: there is enough room in pending_buf. + */ +#define put_byte(s, c) {s->pending_buf[s->pending++] = (c);} + + +#define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1) +/* Minimum amount of lookahead, except at the end of the input file. + * See deflate.c for comments about the MIN_MATCH+1. + */ + +#define MAX_DIST(s) ((s)->w_size-MIN_LOOKAHEAD) +/* In order to simplify the code, particularly on 16 bit machines, match + * distances are limited to MAX_DIST instead of WSIZE. + */ + + /* in trees.c */ +static void _tr_init OF((deflate_state *s)); +static int _tr_tally OF((deflate_state *s, unsigned dist, unsigned lc)); +static ulg _tr_flush_block OF((deflate_state *s, charf *buf, ulg stored_len, + int eof)); +static void _tr_align OF((deflate_state *s)); +static void _tr_stored_block OF((deflate_state *s, charf *buf, ulg stored_len, + int eof)); +static void _tr_stored_type_only OF((deflate_state *)); + +#endif +/* --- deflate.h */ + +/* +++ deflate.c */ +/* deflate.c -- compress data using the deflation algorithm + * Copyright (C) 1995-1996 Jean-loup Gailly. + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* + * ALGORITHM + * + * The "deflation" process depends on being able to identify portions + * of the input text which are identical to earlier input (within a + * sliding window trailing behind the input currently being processed). + * + * The most straightforward technique turns out to be the fastest for + * most input files: try all possible matches and select the longest. + * The key feature of this algorithm is that insertions into the string + * dictionary are very simple and thus fast, and deletions are avoided + * completely. Insertions are performed at each input character, whereas + * string matches are performed only when the previous match ends. So it + * is preferable to spend more time in matches to allow very fast string + * insertions and avoid deletions. The matching algorithm for small + * strings is inspired from that of Rabin & Karp. A brute force approach + * is used to find longer strings when a small match has been found. + * A similar algorithm is used in comic (by Jan-Mark Wams) and freeze + * (by Leonid Broukhis). + * A previous version of this file used a more sophisticated algorithm + * (by Fiala and Greene) which is guaranteed to run in linear amortized + * time, but has a larger average cost, uses more memory and is patented. + * However the F&G algorithm may be faster for some highly redundant + * files if the parameter max_chain_length (described below) is too large. + * + * ACKNOWLEDGEMENTS + * + * The idea of lazy evaluation of matches is due to Jan-Mark Wams, and + * I found it in 'freeze' written by Leonid Broukhis. + * Thanks to many people for bug reports and testing. + * + * REFERENCES + * + * Deutsch, L.P.,"DEFLATE Compressed Data Format Specification". + * Available in ftp://ds.internic.net/rfc/rfc1951.txt + * + * A description of the Rabin and Karp algorithm is given in the book + * "Algorithms" by R. Sedgewick, Addison-Wesley, p252. + * + * Fiala,E.R., and Greene,D.H. + * Data Compression with Finite Windows, Comm.ACM, 32,4 (1989) 490-595 + * + */ + +/* From: deflate.c,v 1.15 1996/07/24 13:40:58 me Exp $ */ + +/* #include "deflate.h" */ + +// static char deflate_copyright[] = " deflate 1.0.4 Copyright 1995-1996 Jean-loup Gailly "; +/* + If you use the zlib library in a product, an acknowledgment is welcome + in the documentation of your product. If for some reason you cannot + include such an acknowledgment, I would appreciate that you keep this + copyright string in the executable of your product. + */ + +/* =========================================================================== + * Function prototypes. + */ +typedef enum { + need_more, /* block not completed, need more input or more output */ + block_done, /* block flush performed */ + finish_started, /* finish started, need only more output at next deflate */ + finish_done /* finish done, accept no more input or output */ +} block_state; + +typedef block_state (*compress_func) OF((deflate_state *s, int flush)); +/* Compression function. Returns the block state after the call. */ + +static void fill_window OF((deflate_state *s)); +static block_state deflate_stored OF((deflate_state *s, int flush)); +static block_state deflate_fast OF((deflate_state *s, int flush)); +static block_state deflate_slow OF((deflate_state *s, int flush)); +static void lm_init OF((deflate_state *s)); +static void putShortMSB OF((deflate_state *s, uInt b)); +static void flush_pending OF((z_streamp strm)); +static int read_buf OF((z_streamp strm, charf *buf, unsigned size)); +#ifdef ASMV + void match_init OF((void)); /* asm code initialization */ + uInt longest_match OF((deflate_state *s, IPos cur_match)); +#else +static uInt longest_match OF((deflate_state *s, IPos cur_match)); +#endif + +#ifdef DEBUG_ZLIB +static void check_match OF((deflate_state *s, IPos start, IPos match, + int length)); +#endif + +/* =========================================================================== + * Local data + */ + +#define NIL 0 +/* Tail of hash chains */ + +#ifndef TOO_FAR +# define TOO_FAR 4096 +#endif +/* Matches of length 3 are discarded if their distance exceeds TOO_FAR */ + +#define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1) +/* Minimum amount of lookahead, except at the end of the input file. + * See deflate.c for comments about the MIN_MATCH+1. + */ + +/* Values for max_lazy_match, good_match and max_chain_length, depending on + * the desired pack level (0..9). The values given below have been tuned to + * exclude worst case performance for pathological files. Better values may be + * found for specific files. + */ +typedef struct config_s { + ush good_length; /* reduce lazy search above this match length */ + ush max_lazy; /* do not perform lazy search above this match length */ + ush nice_length; /* quit search above this match length */ + ush max_chain; + compress_func func; +} config; + +static config configuration_table[10] = { +/* good lazy nice chain */ +/* 0 */ {0, 0, 0, 0, deflate_stored}, /* store only */ +/* 1 */ {4, 4, 8, 4, deflate_fast}, /* maximum speed, no lazy matches */ +/* 2 */ {4, 5, 16, 8, deflate_fast}, +/* 3 */ {4, 6, 32, 32, deflate_fast}, + +/* 4 */ {4, 4, 16, 16, deflate_slow}, /* lazy matches */ +/* 5 */ {8, 16, 32, 32, deflate_slow}, +/* 6 */ {8, 16, 128, 128, deflate_slow}, +/* 7 */ {8, 32, 128, 256, deflate_slow}, +/* 8 */ {32, 128, 258, 1024, deflate_slow}, +/* 9 */ {32, 258, 258, 4096, deflate_slow}}; /* maximum compression */ + +/* Note: the deflate() code requires max_lazy >= MIN_MATCH and max_chain >= 4 + * For deflate_fast() (levels <= 3) good is ignored and lazy has a different + * meaning. + */ + +#define EQUAL 0 +/* result of memcmp for equal strings */ + +#ifndef NO_DUMMY_DECL +struct static_tree_desc_s {int dummy;}; /* for buggy compilers */ +#endif + +/* =========================================================================== + * Update a hash value with the given input byte + * IN assertion: all calls to to UPDATE_HASH are made with consecutive + * input characters, so that a running hash key can be computed from the + * previous key instead of complete recalculation each time. + */ +#define UPDATE_HASH(s,h,c) (h = (((h)<hash_shift) ^ (c)) & s->hash_mask) + + +/* =========================================================================== + * Insert string str in the dictionary and set match_head to the previous head + * of the hash chain (the most recent string with same hash key). Return + * the previous length of the hash chain. + * IN assertion: all calls to to INSERT_STRING are made with consecutive + * input characters and the first MIN_MATCH bytes of str are valid + * (except for the last MIN_MATCH-1 bytes of the input file). + */ +#define INSERT_STRING(s, str, match_head) \ + (UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]), \ + s->prev[(str) & s->w_mask] = match_head = s->head[s->ins_h], \ + s->head[s->ins_h] = (Pos)(str)) + +/* =========================================================================== + * Initialize the hash table (avoiding 64K overflow for 16 bit systems). + * prev[] will be initialized on the fly. + */ +#define CLEAR_HASH(s) \ + s->head[s->hash_size-1] = NIL; \ + memset((charf *)s->head, 0, (unsigned)(s->hash_size-1)*sizeof(*s->head)); + +/* ========================================================================= */ +int deflateInit_(strm, level, version, stream_size) + z_streamp strm; + int level; + const char *version; + int stream_size; +{ + return deflateInit2_(strm, level, Z_DEFLATED, MAX_WBITS, DEF_MEM_LEVEL, + Z_DEFAULT_STRATEGY, version, stream_size); + /* To do: ignore strm->next_in if we use it as window */ +} + +/* ========================================================================= */ +int deflateInit2_(strm, level, method, windowBits, memLevel, strategy, + version, stream_size) + z_streamp strm; + int level; + int method; + int windowBits; + int memLevel; + int strategy; + const char *version; + int stream_size; +{ + deflate_state *s; + int noheader = 0; + static char* my_version = ZLIB_VERSION; + + ushf *overlay; + /* We overlay pending_buf and d_buf+l_buf. This works since the average + * output size for (length,distance) codes is <= 24 bits. + */ + + if (version == Z_NULL || version[0] != my_version[0] || + stream_size != sizeof(z_stream)) { + return Z_VERSION_ERROR; + } + if (strm == Z_NULL) return Z_STREAM_ERROR; + + strm->msg = Z_NULL; +#ifndef NO_ZCFUNCS + if (strm->zalloc == Z_NULL) { + strm->zalloc = zcalloc; + strm->opaque = (voidpf)0; + } + if (strm->zfree == Z_NULL) strm->zfree = zcfree; +#endif + + if (level == Z_DEFAULT_COMPRESSION) level = 6; + + if (windowBits < 0) { /* undocumented feature: suppress zlib header */ + noheader = 1; + windowBits = -windowBits; + } + if (memLevel < 1 || memLevel > MAX_MEM_LEVEL || method != Z_DEFLATED || + windowBits < 8 || windowBits > 15 || level < 0 || level > 9 || + strategy < 0 || strategy > Z_HUFFMAN_ONLY) { + return Z_STREAM_ERROR; + } + s = (deflate_state *) ZALLOC(strm, 1, sizeof(deflate_state)); + if (s == Z_NULL) return Z_MEM_ERROR; + strm->state = (struct internal_state FAR *)s; + s->strm = strm; + + s->noheader = noheader; + s->w_bits = windowBits; + s->w_size = 1 << s->w_bits; + s->w_mask = s->w_size - 1; + + s->hash_bits = memLevel + 7; + s->hash_size = 1 << s->hash_bits; + s->hash_mask = s->hash_size - 1; + s->hash_shift = ((s->hash_bits+MIN_MATCH-1)/MIN_MATCH); + + s->window = (Bytef *) ZALLOC(strm, s->w_size, 2*sizeof(Byte)); + s->prev = (Posf *) ZALLOC(strm, s->w_size, sizeof(Pos)); + s->head = (Posf *) ZALLOC(strm, s->hash_size, sizeof(Pos)); + + s->lit_bufsize = 1 << (memLevel + 6); /* 16K elements by default */ + + overlay = (ushf *) ZALLOC(strm, s->lit_bufsize, sizeof(ush)+2); + s->pending_buf = (uchf *) overlay; + s->pending_buf_size = (ulg)s->lit_bufsize * (sizeof(ush)+2L); + + if (s->window == Z_NULL || s->prev == Z_NULL || s->head == Z_NULL || + s->pending_buf == Z_NULL) { + strm->msg = (char*)ERR_MSG(Z_MEM_ERROR); + deflateEnd (strm); + return Z_MEM_ERROR; + } + s->d_buf = overlay + s->lit_bufsize/sizeof(ush); + s->l_buf = s->pending_buf + (1+sizeof(ush))*s->lit_bufsize; + + s->level = level; + s->strategy = strategy; + s->method = (Byte)method; + + return deflateReset(strm); +} + +/* ========================================================================= */ +int deflateReset (strm) + z_streamp strm; +{ + deflate_state *s; + + if (strm == Z_NULL || strm->state == Z_NULL || + strm->zalloc == Z_NULL || strm->zfree == Z_NULL) return Z_STREAM_ERROR; + + strm->total_in = strm->total_out = 0; + strm->msg = Z_NULL; /* use zfree if we ever allocate msg dynamically */ + strm->data_type = Z_UNKNOWN; + + s = (deflate_state *)strm->state; + s->pending = 0; + s->pending_out = s->pending_buf; + + if (s->noheader < 0) { + s->noheader = 0; /* was set to -1 by deflate(..., Z_FINISH); */ + } + s->status = s->noheader ? BUSY_STATE : INIT_STATE; + strm->adler = 1; + s->last_flush = Z_NO_FLUSH; + + _tr_init(s); + lm_init(s); + + return Z_OK; +} + +/* ========================================================================= + * Put a short in the pending buffer. The 16-bit value is put in MSB order. + * IN assertion: the stream state is correct and there is enough room in + * pending_buf. + */ +static void putShortMSB (s, b) + deflate_state *s; + uInt b; +{ + put_byte(s, (Byte)(b >> 8)); + put_byte(s, (Byte)(b & 0xff)); +} + +/* ========================================================================= + * Flush as much pending output as possible. All deflate() output goes + * through this function so some applications may wish to modify it + * to avoid allocating a large strm->next_out buffer and copying into it. + * (See also read_buf()). + */ +static void flush_pending(strm) + z_streamp strm; +{ + deflate_state *s = (deflate_state *) strm->state; + unsigned len = s->pending; + + if (len > strm->avail_out) len = strm->avail_out; + if (len == 0) return; + + if (strm->next_out != Z_NULL) { + memcpy(strm->next_out, s->pending_out, len); + strm->next_out += len; + } + s->pending_out += len; + strm->total_out += len; + strm->avail_out -= len; + s->pending -= len; + if (s->pending == 0) { + s->pending_out = s->pending_buf; + } +} + +/* ========================================================================= */ +int deflate (strm, flush) + z_streamp strm; + int flush; +{ + int old_flush; /* value of flush param for previous deflate call */ + deflate_state *s; + + if (strm == Z_NULL || strm->state == Z_NULL || + flush > Z_FINISH || flush < 0) { + return Z_STREAM_ERROR; + } + s = (deflate_state *) strm->state; + + if ((strm->next_in == Z_NULL && strm->avail_in != 0) || + (s->status == FINISH_STATE && flush != Z_FINISH)) { + ERR_RETURN(strm, Z_STREAM_ERROR); + } + if (strm->avail_out == 0) ERR_RETURN(strm, Z_BUF_ERROR); + + s->strm = strm; /* just in case */ + old_flush = s->last_flush; + s->last_flush = flush; + + /* Write the zlib header */ + if (s->status == INIT_STATE) { + + uInt header = (Z_DEFLATED + ((s->w_bits-8)<<4)) << 8; + uInt level_flags = (s->level-1) >> 1; + + if (level_flags > 3) level_flags = 3; + header |= (level_flags << 6); + if (s->strstart != 0) header |= PRESET_DICT; + header += 31 - (header % 31); + + s->status = BUSY_STATE; + putShortMSB(s, header); + + /* Save the adler32 of the preset dictionary: */ + if (s->strstart != 0) { + putShortMSB(s, (uInt)(strm->adler >> 16)); + putShortMSB(s, (uInt)(strm->adler & 0xffff)); + } + strm->adler = 1L; + } + + /* Flush as much pending output as possible */ + if (s->pending != 0) { + flush_pending(strm); + if (strm->avail_out == 0) { + /* Since avail_out is 0, deflate will be called again with + * more output space, but possibly with both pending and + * avail_in equal to zero. There won't be anything to do, + * but this is not an error situation so make sure we + * return OK instead of BUF_ERROR at next call of deflate: + */ + s->last_flush = -1; + return Z_OK; + } + + /* Make sure there is something to do and avoid duplicate consecutive + * flushes. For repeated and useless calls with Z_FINISH, we keep + * returning Z_STREAM_END instead of Z_BUFF_ERROR. + */ + } else if (strm->avail_in == 0 && flush <= old_flush && + flush != Z_FINISH) { + ERR_RETURN(strm, Z_BUF_ERROR); + } + + /* User must not provide more input after the first FINISH: */ + if (s->status == FINISH_STATE && strm->avail_in != 0) { + ERR_RETURN(strm, Z_BUF_ERROR); + } + + /* Start a new block or continue the current one. + */ + if (strm->avail_in != 0 || s->lookahead != 0 || + (flush != Z_NO_FLUSH && s->status != FINISH_STATE)) { + block_state bstate; + + bstate = (*(configuration_table[s->level].func))(s, flush); + + if (bstate == finish_started || bstate == finish_done) { + s->status = FINISH_STATE; + } + if (bstate == need_more || bstate == finish_started) { + if (strm->avail_out == 0) { + s->last_flush = -1; /* avoid BUF_ERROR next call, see above */ + } + return Z_OK; + /* If flush != Z_NO_FLUSH && avail_out == 0, the next call + * of deflate should use the same flush parameter to make sure + * that the flush is complete. So we don't have to output an + * empty block here, this will be done at next call. This also + * ensures that for a very small output buffer, we emit at most + * one empty block. + */ + } + if (bstate == block_done) { + if (flush == Z_PARTIAL_FLUSH) { + _tr_align(s); + } else if (flush == Z_PACKET_FLUSH) { + /* Output just the 3-bit `stored' block type value, + but not a zero length. */ + _tr_stored_type_only(s); + } else { /* FULL_FLUSH or SYNC_FLUSH */ + _tr_stored_block(s, (char*)0, 0L, 0); + /* For a full flush, this empty block will be recognized + * as a special marker by inflate_sync(). + */ + if (flush == Z_FULL_FLUSH) { + CLEAR_HASH(s); /* forget history */ + } + } + flush_pending(strm); + if (strm->avail_out == 0) { + s->last_flush = -1; /* avoid BUF_ERROR at next call, see above */ + return Z_OK; + } + } + } + Assert(strm->avail_out > 0, "bug2"); + + if (flush != Z_FINISH) return Z_OK; + if (s->noheader) return Z_STREAM_END; + + /* Write the zlib trailer (adler32) */ + putShortMSB(s, (uInt)(strm->adler >> 16)); + putShortMSB(s, (uInt)(strm->adler & 0xffff)); + flush_pending(strm); + /* If avail_out is zero, the application will call deflate again + * to flush the rest. + */ + s->noheader = -1; /* write the trailer only once! */ + return s->pending != 0 ? Z_OK : Z_STREAM_END; +} + +/* ========================================================================= */ +int deflateEnd (strm) + z_streamp strm; +{ + int status; + deflate_state *s; + + if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; + s = (deflate_state *) strm->state; + + status = s->status; + if (status != INIT_STATE && status != BUSY_STATE && + status != FINISH_STATE) { + return Z_STREAM_ERROR; + } + + /* Deallocate in reverse order of allocations: */ + TRY_FREE(strm, s->pending_buf); + TRY_FREE(strm, s->head); + TRY_FREE(strm, s->prev); + TRY_FREE(strm, s->window); + + ZFREE(strm, s); + strm->state = Z_NULL; + + return status == BUSY_STATE ? Z_DATA_ERROR : Z_OK; +} + +/* =========================================================================== + * Read a new buffer from the current input stream, update the adler32 + * and total number of bytes read. All deflate() input goes through + * this function so some applications may wish to modify it to avoid + * allocating a large strm->next_in buffer and copying from it. + * (See also flush_pending()). + */ +static int read_buf(strm, buf, size) + z_streamp strm; + charf *buf; + unsigned size; +{ + unsigned len = strm->avail_in; + + if (len > size) len = size; + if (len == 0) return 0; + + strm->avail_in -= len; + + if (!((deflate_state *)(strm->state))->noheader) { + strm->adler = adler32(strm->adler, strm->next_in, len); + } + memcpy(buf, strm->next_in, len); + strm->next_in += len; + strm->total_in += len; + + return (int)len; +} + +/* =========================================================================== + * Initialize the "longest match" routines for a new zlib stream + */ +static void lm_init (s) + deflate_state *s; +{ + s->window_size = (ulg)2L*s->w_size; + + CLEAR_HASH(s); + + /* Set the default configuration parameters: + */ + s->max_lazy_match = configuration_table[s->level].max_lazy; + s->good_match = configuration_table[s->level].good_length; + s->nice_match = configuration_table[s->level].nice_length; + s->max_chain_length = configuration_table[s->level].max_chain; + + s->strstart = 0; + s->block_start = 0L; + s->lookahead = 0; + s->match_length = s->prev_length = MIN_MATCH-1; + s->match_available = 0; + s->ins_h = 0; +#ifdef ASMV + match_init(); /* initialize the asm code */ +#endif +} + +/* =========================================================================== + * Set match_start to the longest match starting at the given string and + * return its length. Matches shorter or equal to prev_length are discarded, + * in which case the result is equal to prev_length and match_start is + * garbage. + * IN assertions: cur_match is the head of the hash chain for the current + * string (strstart) and its distance is <= MAX_DIST, and prev_length >= 1 + * OUT assertion: the match length is not greater than s->lookahead. + */ +#ifndef ASMV +/* For 80x86 and 680x0, an optimized version will be provided in match.asm or + * match.S. The code will be functionally equivalent. + */ +static uInt longest_match(s, cur_match) + deflate_state *s; + IPos cur_match; /* current match */ +{ + unsigned chain_length = s->max_chain_length;/* max hash chain length */ + register Bytef *scan = s->window + s->strstart; /* current string */ + register Bytef *match; /* matched string */ + register int len; /* length of current match */ + int best_len = s->prev_length; /* best match length so far */ + int nice_match = s->nice_match; /* stop if match long enough */ + IPos limit = s->strstart > (IPos)MAX_DIST(s) ? + s->strstart - (IPos)MAX_DIST(s) : NIL; + /* Stop when cur_match becomes <= limit. To simplify the code, + * we prevent matches with the string of window index 0. + */ + Posf *prev = s->prev; + uInt wmask = s->w_mask; + +#ifdef UNALIGNED_OK + /* Compare two bytes at a time. Note: this is not always beneficial. + * Try with and without -DUNALIGNED_OK to check. + */ + register Bytef *strend = s->window + s->strstart + MAX_MATCH - 1; + register ush scan_start = *(ushf*)scan; + register ush scan_end = *(ushf*)(scan+best_len-1); +#else + register Bytef *strend = s->window + s->strstart + MAX_MATCH; + register Byte scan_end1 = scan[best_len-1]; + register Byte scan_end = scan[best_len]; +#endif + + /* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16. + * It is easy to get rid of this optimization if necessary. + */ + Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever"); + + /* Do not waste too much time if we already have a good match: */ + if (s->prev_length >= s->good_match) { + chain_length >>= 2; + } + /* Do not look for matches beyond the end of the input. This is necessary + * to make deflate deterministic. + */ + if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead; + + Assert((ulg)s->strstart <= s->window_size-MIN_LOOKAHEAD, "need lookahead"); + + do { + Assert(cur_match < s->strstart, "no future"); + match = s->window + cur_match; + + /* Skip to next match if the match length cannot increase + * or if the match length is less than 2: + */ +#if (defined(UNALIGNED_OK) && MAX_MATCH == 258) + /* This code assumes sizeof(unsigned short) == 2. Do not use + * UNALIGNED_OK if your compiler uses a different size. + */ + if (*(ushf*)(match+best_len-1) != scan_end || + *(ushf*)match != scan_start) continue; + + /* It is not necessary to compare scan[2] and match[2] since they are + * always equal when the other bytes match, given that the hash keys + * are equal and that HASH_BITS >= 8. Compare 2 bytes at a time at + * strstart+3, +5, ... up to strstart+257. We check for insufficient + * lookahead only every 4th comparison; the 128th check will be made + * at strstart+257. If MAX_MATCH-2 is not a multiple of 8, it is + * necessary to put more guard bytes at the end of the window, or + * to check more often for insufficient lookahead. + */ + Assert(scan[2] == match[2], "scan[2]?"); + scan++, match++; + do { + } while (*(ushf*)(scan+=2) == *(ushf*)(match+=2) && + *(ushf*)(scan+=2) == *(ushf*)(match+=2) && + *(ushf*)(scan+=2) == *(ushf*)(match+=2) && + *(ushf*)(scan+=2) == *(ushf*)(match+=2) && + scan < strend); + /* The funny "do {}" generates better code on most compilers */ + + /* Here, scan <= window+strstart+257 */ + Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan"); + if (*scan == *match) scan++; + + len = (MAX_MATCH - 1) - (int)(strend-scan); + scan = strend - (MAX_MATCH-1); + +#else /* UNALIGNED_OK */ + + if (match[best_len] != scan_end || + match[best_len-1] != scan_end1 || + *match != *scan || + *++match != scan[1]) continue; + + /* The check at best_len-1 can be removed because it will be made + * again later. (This heuristic is not always a win.) + * It is not necessary to compare scan[2] and match[2] since they + * are always equal when the other bytes match, given that + * the hash keys are equal and that HASH_BITS >= 8. + */ + scan += 2, match++; + Assert(*scan == *match, "match[2]?"); + + /* We check for insufficient lookahead only every 8th comparison; + * the 256th check will be made at strstart+258. + */ + do { + } while (*++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + scan < strend); + + Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan"); + + len = MAX_MATCH - (int)(strend - scan); + scan = strend - MAX_MATCH; + +#endif /* UNALIGNED_OK */ + + if (len > best_len) { + s->match_start = cur_match; + best_len = len; + if (len >= nice_match) break; +#ifdef UNALIGNED_OK + scan_end = *(ushf*)(scan+best_len-1); +#else + scan_end1 = scan[best_len-1]; + scan_end = scan[best_len]; +#endif + } + } while ((cur_match = prev[cur_match & wmask]) > limit + && --chain_length != 0); + + if ((uInt)best_len <= s->lookahead) return best_len; + return s->lookahead; +} +#endif /* ASMV */ + +#ifdef DEBUG_ZLIB +/* =========================================================================== + * Check that the match at match_start is indeed a match. + */ +static void check_match(s, start, match, length) + deflate_state *s; + IPos start, match; + int length; +{ + /* check that the match is indeed a match */ + if (zmemcmp((charf *)s->window + match, + (charf *)s->window + start, length) != EQUAL) { + fprintf(stderr, " start %u, match %u, length %d\n", + start, match, length); + do { + fprintf(stderr, "%c%c", s->window[match++], s->window[start++]); + } while (--length != 0); + z_error("invalid match"); + } + if (z_verbose > 1) { + fprintf(stderr,"\\[%d,%d]", start-match, length); + do { putc(s->window[start++], stderr); } while (--length != 0); + } +} +#else +# define check_match(s, start, match, length) +#endif + +/* =========================================================================== + * Fill the window when the lookahead becomes insufficient. + * Updates strstart and lookahead. + * + * IN assertion: lookahead < MIN_LOOKAHEAD + * OUT assertions: strstart <= window_size-MIN_LOOKAHEAD + * At least one byte has been read, or avail_in == 0; reads are + * performed for at least two bytes (required for the zip translate_eol + * option -- not supported here). + */ +static void fill_window(s) + deflate_state *s; +{ + register unsigned n, m; + register Posf *p; + unsigned more; /* Amount of free space at the end of the window. */ + uInt wsize = s->w_size; + + do { + more = (unsigned)(s->window_size -(ulg)s->lookahead -(ulg)s->strstart); + + /* Deal with !@#$% 64K limit: */ + if (more == 0 && s->strstart == 0 && s->lookahead == 0) { + more = wsize; + + } else if (more == (unsigned)(-1)) { + /* Very unlikely, but possible on 16 bit machine if strstart == 0 + * and lookahead == 1 (input done one byte at time) + */ + more--; + + /* If the window is almost full and there is insufficient lookahead, + * move the upper half to the lower one to make room in the upper half. + */ + } else if (s->strstart >= wsize+MAX_DIST(s)) { + + memcpy((charf *)s->window, (charf *)s->window+wsize, + (unsigned)wsize); + s->match_start -= wsize; + s->strstart -= wsize; /* we now have strstart >= MAX_DIST */ + s->block_start -= (long) wsize; + + /* Slide the hash table (could be avoided with 32 bit values + at the expense of memory usage). We slide even when level == 0 + to keep the hash table consistent if we switch back to level > 0 + later. (Using level 0 permanently is not an optimal usage of + zlib, so we don't care about this pathological case.) + */ + n = s->hash_size; + p = &s->head[n]; + do { + m = *--p; + *p = (Pos)(m >= wsize ? m-wsize : NIL); + } while (--n); + + n = wsize; + p = &s->prev[n]; + do { + m = *--p; + *p = (Pos)(m >= wsize ? m-wsize : NIL); + /* If n is not on any hash chain, prev[n] is garbage but + * its value will never be used. + */ + } while (--n); + more += wsize; + } + if (s->strm->avail_in == 0) return; + + /* If there was no sliding: + * strstart <= WSIZE+MAX_DIST-1 && lookahead <= MIN_LOOKAHEAD - 1 && + * more == window_size - lookahead - strstart + * => more >= window_size - (MIN_LOOKAHEAD-1 + WSIZE + MAX_DIST-1) + * => more >= window_size - 2*WSIZE + 2 + * In the BIG_MEM or MMAP case (not yet supported), + * window_size == input_size + MIN_LOOKAHEAD && + * strstart + s->lookahead <= input_size => more >= MIN_LOOKAHEAD. + * Otherwise, window_size == 2*WSIZE so more >= 2. + * If there was sliding, more >= WSIZE. So in all cases, more >= 2. + */ + Assert(more >= 2, "more < 2"); + + n = read_buf(s->strm, (charf *)s->window + s->strstart + s->lookahead, + more); + s->lookahead += n; + + /* Initialize the hash value now that we have some input: */ + if (s->lookahead >= MIN_MATCH) { + s->ins_h = s->window[s->strstart]; + UPDATE_HASH(s, s->ins_h, s->window[s->strstart+1]); +#if MIN_MATCH != 3 + Call UPDATE_HASH() MIN_MATCH-3 more times +#endif + } + /* If the whole input has less than MIN_MATCH bytes, ins_h is garbage, + * but this is not important since only literal bytes will be emitted. + */ + + } while (s->lookahead < MIN_LOOKAHEAD && s->strm->avail_in != 0); +} + +/* =========================================================================== + * Flush the current block, with given end-of-file flag. + * IN assertion: strstart is set to the end of the current match. + */ +#define FLUSH_BLOCK_ONLY(s, eof) { \ + _tr_flush_block(s, (s->block_start >= 0L ? \ + (charf *)&s->window[(unsigned)s->block_start] : \ + (charf *)Z_NULL), \ + (ulg)((long)s->strstart - s->block_start), \ + (eof)); \ + s->block_start = s->strstart; \ + flush_pending(s->strm); \ + Tracev((stderr,"[FLUSH]")); \ +} + +/* Same but force premature exit if necessary. */ +#define FLUSH_BLOCK(s, eof) { \ + FLUSH_BLOCK_ONLY(s, eof); \ + if (s->strm->avail_out == 0) return (eof) ? finish_started : need_more; \ +} + +/* =========================================================================== + * Copy without compression as much as possible from the input stream, return + * the current block state. + * This function does not insert new strings in the dictionary since + * uncompressible data is probably not useful. This function is used + * only for the level=0 compression option. + * NOTE: this function should be optimized to avoid extra copying from + * window to pending_buf. + */ +static block_state deflate_stored(s, flush) + deflate_state *s; + int flush; +{ + /* Stored blocks are limited to 0xffff bytes, pending_buf is limited + * to pending_buf_size, and each stored block has a 5 byte header: + */ + ulg max_block_size = 0xffff; + ulg max_start; + + if (max_block_size > s->pending_buf_size - 5) { + max_block_size = s->pending_buf_size - 5; + } + + /* Copy as much as possible from input to output: */ + for (;;) { + /* Fill the window as much as possible: */ + if (s->lookahead <= 1) { + + Assert(s->strstart < s->w_size+MAX_DIST(s) || + s->block_start >= (long)s->w_size, "slide too late"); + + fill_window(s); + if (s->lookahead == 0 && flush == Z_NO_FLUSH) return need_more; + + if (s->lookahead == 0) break; /* flush the current block */ + } + Assert(s->block_start >= 0L, "block gone"); + + s->strstart += s->lookahead; + s->lookahead = 0; + + /* Emit a stored block if pending_buf will be full: */ + max_start = s->block_start + max_block_size; + if (s->strstart == 0 || (ulg)s->strstart >= max_start) { + /* strstart == 0 is possible when wraparound on 16-bit machine */ + s->lookahead = (uInt)(s->strstart - max_start); + s->strstart = (uInt)max_start; + FLUSH_BLOCK(s, 0); + } + /* Flush if we may have to slide, otherwise block_start may become + * negative and the data will be gone: + */ + if (s->strstart - (uInt)s->block_start >= MAX_DIST(s)) { + FLUSH_BLOCK(s, 0); + } + } + FLUSH_BLOCK(s, flush == Z_FINISH); + return flush == Z_FINISH ? finish_done : block_done; +} + +/* =========================================================================== + * Compress as much as possible from the input stream, return the current + * block state. + * This function does not perform lazy evaluation of matches and inserts + * new strings in the dictionary only for unmatched strings or for short + * matches. It is used only for the fast compression options. + */ +static block_state deflate_fast(s, flush) + deflate_state *s; + int flush; +{ + IPos hash_head = NIL; /* head of the hash chain */ + int bflush; /* set if current block must be flushed */ + + for (;;) { + /* Make sure that we always have enough lookahead, except + * at the end of the input file. We need MAX_MATCH bytes + * for the next match, plus MIN_MATCH bytes to insert the + * string following the next match. + */ + if (s->lookahead < MIN_LOOKAHEAD) { + fill_window(s); + if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) { + return need_more; + } + if (s->lookahead == 0) break; /* flush the current block */ + } + + /* Insert the string window[strstart .. strstart+2] in the + * dictionary, and set hash_head to the head of the hash chain: + */ + if (s->lookahead >= MIN_MATCH) { + INSERT_STRING(s, s->strstart, hash_head); + } + + /* Find the longest match, discarding those <= prev_length. + * At this point we have always match_length < MIN_MATCH + */ + if (hash_head != NIL && s->strstart - hash_head <= MAX_DIST(s)) { + /* To simplify the code, we prevent matches with the string + * of window index 0 (in particular we have to avoid a match + * of the string with itself at the start of the input file). + */ + if (s->strategy != Z_HUFFMAN_ONLY) { + s->match_length = longest_match (s, hash_head); + } + /* longest_match() sets match_start */ + } + if (s->match_length >= MIN_MATCH) { + check_match(s, s->strstart, s->match_start, s->match_length); + + bflush = _tr_tally(s, s->strstart - s->match_start, + s->match_length - MIN_MATCH); + + s->lookahead -= s->match_length; + + /* Insert new strings in the hash table only if the match length + * is not too large. This saves time but degrades compression. + */ + if (s->match_length <= s->max_insert_length && + s->lookahead >= MIN_MATCH) { + s->match_length--; /* string at strstart already in hash table */ + do { + s->strstart++; + INSERT_STRING(s, s->strstart, hash_head); + /* strstart never exceeds WSIZE-MAX_MATCH, so there are + * always MIN_MATCH bytes ahead. + */ + } while (--s->match_length != 0); + s->strstart++; + } else { + s->strstart += s->match_length; + s->match_length = 0; + s->ins_h = s->window[s->strstart]; + UPDATE_HASH(s, s->ins_h, s->window[s->strstart+1]); +#if MIN_MATCH != 3 + Call UPDATE_HASH() MIN_MATCH-3 more times +#endif + /* If lookahead < MIN_MATCH, ins_h is garbage, but it does not + * matter since it will be recomputed at next deflate call. + */ + } + } else { + /* No match, output a literal byte */ + Tracevv((stderr,"%c", s->window[s->strstart])); + bflush = _tr_tally (s, 0, s->window[s->strstart]); + s->lookahead--; + s->strstart++; + } + if (bflush) FLUSH_BLOCK(s, 0); + } + FLUSH_BLOCK(s, flush == Z_FINISH); + return flush == Z_FINISH ? finish_done : block_done; +} + +/* =========================================================================== + * Same as above, but achieves better compression. We use a lazy + * evaluation for matches: a match is finally adopted only if there is + * no better match at the next window position. + */ +static block_state deflate_slow(s, flush) + deflate_state *s; + int flush; +{ + IPos hash_head = NIL; /* head of hash chain */ + int bflush; /* set if current block must be flushed */ + + /* Process the input block. */ + for (;;) { + /* Make sure that we always have enough lookahead, except + * at the end of the input file. We need MAX_MATCH bytes + * for the next match, plus MIN_MATCH bytes to insert the + * string following the next match. + */ + if (s->lookahead < MIN_LOOKAHEAD) { + fill_window(s); + if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) { + return need_more; + } + if (s->lookahead == 0) break; /* flush the current block */ + } + + /* Insert the string window[strstart .. strstart+2] in the + * dictionary, and set hash_head to the head of the hash chain: + */ + if (s->lookahead >= MIN_MATCH) { + INSERT_STRING(s, s->strstart, hash_head); + } + + /* Find the longest match, discarding those <= prev_length. + */ + s->prev_length = s->match_length, s->prev_match = s->match_start; + s->match_length = MIN_MATCH-1; + + if (hash_head != NIL && s->prev_length < s->max_lazy_match && + s->strstart - hash_head <= MAX_DIST(s)) { + /* To simplify the code, we prevent matches with the string + * of window index 0 (in particular we have to avoid a match + * of the string with itself at the start of the input file). + */ + if (s->strategy != Z_HUFFMAN_ONLY) { + s->match_length = longest_match (s, hash_head); + } + /* longest_match() sets match_start */ + + if (s->match_length <= 5 && (s->strategy == Z_FILTERED || + (s->match_length == MIN_MATCH && + s->strstart - s->match_start > TOO_FAR))) { + + /* If prev_match is also MIN_MATCH, match_start is garbage + * but we will ignore the current match anyway. + */ + s->match_length = MIN_MATCH-1; + } + } + /* If there was a match at the previous step and the current + * match is not better, output the previous match: + */ + if (s->prev_length >= MIN_MATCH && s->match_length <= s->prev_length) { + uInt max_insert = s->strstart + s->lookahead - MIN_MATCH; + /* Do not insert strings in hash table beyond this. */ + + check_match(s, s->strstart-1, s->prev_match, s->prev_length); + + bflush = _tr_tally(s, s->strstart -1 - s->prev_match, + s->prev_length - MIN_MATCH); + + /* Insert in hash table all strings up to the end of the match. + * strstart-1 and strstart are already inserted. If there is not + * enough lookahead, the last two strings are not inserted in + * the hash table. + */ + s->lookahead -= s->prev_length-1; + s->prev_length -= 2; + do { + if (++s->strstart <= max_insert) { + INSERT_STRING(s, s->strstart, hash_head); + } + } while (--s->prev_length != 0); + s->match_available = 0; + s->match_length = MIN_MATCH-1; + s->strstart++; + + if (bflush) FLUSH_BLOCK(s, 0); + + } else if (s->match_available) { + /* If there was no match at the previous position, output a + * single literal. If there was a match but the current match + * is longer, truncate the previous match to a single literal. + */ + Tracevv((stderr,"%c", s->window[s->strstart-1])); + if (_tr_tally (s, 0, s->window[s->strstart-1])) { + FLUSH_BLOCK_ONLY(s, 0); + } + s->strstart++; + s->lookahead--; + if (s->strm->avail_out == 0) return need_more; + } else { + /* There is no previous match to compare with, wait for + * the next step to decide. + */ + s->match_available = 1; + s->strstart++; + s->lookahead--; + } + } + Assert (flush != Z_NO_FLUSH, "no flush?"); + if (s->match_available) { + Tracevv((stderr,"%c", s->window[s->strstart-1])); + _tr_tally (s, 0, s->window[s->strstart-1]); + s->match_available = 0; + } + FLUSH_BLOCK(s, flush == Z_FINISH); + return flush == Z_FINISH ? finish_done : block_done; +} +/* --- deflate.c */ + +/* +++ trees.c */ +/* trees.c -- output deflated data using Huffman coding + * Copyright (C) 1995-1996 Jean-loup Gailly + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* + * ALGORITHM + * + * The "deflation" process uses several Huffman trees. The more + * common source values are represented by shorter bit sequences. + * + * Each code tree is stored in a compressed form which is itself + * a Huffman encoding of the lengths of all the code strings (in + * ascending order by source values). The actual code strings are + * reconstructed from the lengths in the inflate process, as described + * in the deflate specification. + * + * REFERENCES + * + * Deutsch, L.P.,"'Deflate' Compressed Data Format Specification". + * Available in ftp.uu.net:/pub/archiving/zip/doc/deflate-1.1.doc + * + * Storer, James A. + * Data Compression: Methods and Theory, pp. 49-50. + * Computer Science Press, 1988. ISBN 0-7167-8156-5. + * + * Sedgewick, R. + * Algorithms, p290. + * Addison-Wesley, 1983. ISBN 0-201-06672-6. + */ + +/* From: trees.c,v 1.11 1996/07/24 13:41:06 me Exp $ */ + +/* #include "deflate.h" */ + +#ifdef DEBUG_ZLIB +# include +#endif + +/* =========================================================================== + * Constants + */ + +#define MAX_BL_BITS 7 +/* Bit length codes must not exceed MAX_BL_BITS bits */ + +#define END_BLOCK 256 +/* end of block literal code */ + +#define REP_3_6 16 +/* repeat previous bit length 3-6 times (2 bits of repeat count) */ + +#define REPZ_3_10 17 +/* repeat a zero length 3-10 times (3 bits of repeat count) */ + +#define REPZ_11_138 18 +/* repeat a zero length 11-138 times (7 bits of repeat count) */ + +static int extra_lbits[LENGTH_CODES] /* extra bits for each length code */ + = {0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0}; + +static int extra_dbits[D_CODES] /* extra bits for each distance code */ + = {0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13}; + +static int extra_blbits[BL_CODES]/* extra bits for each bit length code */ + = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,7}; + +static uch bl_order[BL_CODES] + = {16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15}; +/* The lengths of the bit length codes are sent in order of decreasing + * probability, to avoid transmitting the lengths for unused bit length codes. + */ + +#define Buf_size (8 * 2*sizeof(char)) +/* Number of bits used within bi_buf. (bi_buf might be implemented on + * more than 16 bits on some systems.) + */ + +/* =========================================================================== + * Local data. These are initialized only once. + */ + +static ct_data static_ltree[L_CODES+2]; +/* The static literal tree. Since the bit lengths are imposed, there is no + * need for the L_CODES extra codes used during heap construction. However + * The codes 286 and 287 are needed to build a canonical tree (see _tr_init + * below). + */ + +static ct_data static_dtree[D_CODES]; +/* The static distance tree. (Actually a trivial tree since all codes use + * 5 bits.) + */ + +static uch dist_code[512]; +/* distance codes. The first 256 values correspond to the distances + * 3 .. 258, the last 256 values correspond to the top 8 bits of + * the 15 bit distances. + */ + +static uch length_code[MAX_MATCH-MIN_MATCH+1]; +/* length code for each normalized match length (0 == MIN_MATCH) */ + +static int base_length[LENGTH_CODES]; +/* First normalized length for each code (0 = MIN_MATCH) */ + +static int base_dist[D_CODES]; +/* First normalized distance for each code (0 = distance of 1) */ + +struct static_tree_desc_s { + ct_data *static_tree; /* static tree or NULL */ + intf *extra_bits; /* extra bits for each code or NULL */ + int extra_base; /* base index for extra_bits */ + int elems; /* max number of elements in the tree */ + int max_length; /* max bit length for the codes */ +}; + +static static_tree_desc static_l_desc = +{static_ltree, extra_lbits, LITERALS+1, L_CODES, MAX_BITS}; + +static static_tree_desc static_d_desc = +{static_dtree, extra_dbits, 0, D_CODES, MAX_BITS}; + +static static_tree_desc static_bl_desc = +{(ct_data *)0, extra_blbits, 0, BL_CODES, MAX_BL_BITS}; + +/* =========================================================================== + * Local (static) routines in this file. + */ + +static void tr_static_init OF((void)); +static void init_block OF((deflate_state *s)); +static void pqdownheap OF((deflate_state *s, ct_data *tree, int k)); +static void gen_bitlen OF((deflate_state *s, tree_desc *desc)); +static void gen_codes OF((ct_data *tree, int max_code, ushf *bl_count)); +static void build_tree OF((deflate_state *s, tree_desc *desc)); +static void scan_tree OF((deflate_state *s, ct_data *tree, int max_code)); +static void send_tree OF((deflate_state *s, ct_data *tree, int max_code)); +static int build_bl_tree OF((deflate_state *s)); +static void send_all_trees OF((deflate_state *s, int lcodes, int dcodes, + int blcodes)); +static void compress_block OF((deflate_state *s, ct_data *ltree, + ct_data *dtree)); +static void set_data_type OF((deflate_state *s)); +static unsigned bi_reverse OF((unsigned value, int length)); +static void bi_windup OF((deflate_state *s)); +static void bi_flush OF((deflate_state *s)); +static void copy_block OF((deflate_state *s, charf *buf, unsigned len, + int header)); + +#ifndef DEBUG_ZLIB +# define send_code(s, c, tree) send_bits(s, tree[c].Code, tree[c].Len) + /* Send a code of the given tree. c and tree must not have side effects */ + +#else /* DEBUG_ZLIB */ +# define send_code(s, c, tree) \ + { if (verbose>2) fprintf(stderr,"\ncd %3d ",(c)); \ + send_bits(s, tree[c].Code, tree[c].Len); } +#endif + +#define d_code(dist) \ + ((dist) < 256 ? dist_code[dist] : dist_code[256+((dist)>>7)]) +/* Mapping from a distance to a distance code. dist is the distance - 1 and + * must not have side effects. dist_code[256] and dist_code[257] are never + * used. + */ + +/* =========================================================================== + * Output a short LSB first on the stream. + * IN assertion: there is enough room in pendingBuf. + */ +#define put_short(s, w) { \ + put_byte(s, (uch)((w) & 0xff)); \ + put_byte(s, (uch)((ush)(w) >> 8)); \ +} + +/* =========================================================================== + * Send a value on a given number of bits. + * IN assertion: length <= 16 and value fits in length bits. + */ +#ifdef DEBUG_ZLIB +static void send_bits OF((deflate_state *s, int value, int length)); + +static void send_bits(s, value, length) + deflate_state *s; + int value; /* value to send */ + int length; /* number of bits */ +{ + Tracevv((stderr," l %2d v %4x ", length, value)); + Assert(length > 0 && length <= 15, "invalid length"); + s->bits_sent += (ulg)length; + + /* If not enough room in bi_buf, use (valid) bits from bi_buf and + * (16 - bi_valid) bits from value, leaving (width - (16-bi_valid)) + * unused bits in value. + */ + if (s->bi_valid > (int)Buf_size - length) { + s->bi_buf |= (value << s->bi_valid); + put_short(s, s->bi_buf); + s->bi_buf = (ush)value >> (Buf_size - s->bi_valid); + s->bi_valid += length - Buf_size; + } else { + s->bi_buf |= value << s->bi_valid; + s->bi_valid += length; + } +} +#else /* !DEBUG_ZLIB */ + +#define send_bits(s, value, length) \ +{ int len = length;\ + if (s->bi_valid > (int)Buf_size - len) {\ + int val = value;\ + s->bi_buf |= (val << s->bi_valid);\ + put_short(s, s->bi_buf);\ + s->bi_buf = (ush)val >> (Buf_size - s->bi_valid);\ + s->bi_valid += len - Buf_size;\ + } else {\ + s->bi_buf |= (value) << s->bi_valid;\ + s->bi_valid += len;\ + }\ +} +#endif /* DEBUG_ZLIB */ + + +#define MAX(a,b) (a >= b ? a : b) +/* the arguments must not have side effects */ + +/* =========================================================================== + * Initialize the various 'constant' tables. In a multi-threaded environment, + * this function may be called by two threads concurrently, but this is + * harmless since both invocations do exactly the same thing. + */ +static void tr_static_init() +{ + static int static_init_done = 0; + int n; /* iterates over tree elements */ + int bits; /* bit counter */ + int length; /* length value */ + int code; /* code value */ + int dist; /* distance index */ + ush bl_count[MAX_BITS+1]; + /* number of codes at each bit length for an optimal tree */ + + if (static_init_done) return; + + /* Initialize the mapping length (0..255) -> length code (0..28) */ + length = 0; + for (code = 0; code < LENGTH_CODES-1; code++) { + base_length[code] = length; + for (n = 0; n < (1< dist code (0..29) */ + dist = 0; + for (code = 0 ; code < 16; code++) { + base_dist[code] = dist; + for (n = 0; n < (1<>= 7; /* from now on, all distances are divided by 128 */ + for ( ; code < D_CODES; code++) { + base_dist[code] = dist << 7; + for (n = 0; n < (1<<(extra_dbits[code]-7)); n++) { + dist_code[256 + dist++] = (uch)code; + } + } + Assert (dist == 256, "tr_static_init: 256+dist != 512"); + + /* Construct the codes of the static literal tree */ + for (bits = 0; bits <= MAX_BITS; bits++) bl_count[bits] = 0; + n = 0; + while (n <= 143) static_ltree[n++].Len = 8, bl_count[8]++; + while (n <= 255) static_ltree[n++].Len = 9, bl_count[9]++; + while (n <= 279) static_ltree[n++].Len = 7, bl_count[7]++; + while (n <= 287) static_ltree[n++].Len = 8, bl_count[8]++; + /* Codes 286 and 287 do not exist, but we must include them in the + * tree construction to get a canonical Huffman tree (longest code + * all ones) + */ + gen_codes((ct_data *)static_ltree, L_CODES+1, bl_count); + + /* The static distance tree is trivial: */ + for (n = 0; n < D_CODES; n++) { + static_dtree[n].Len = 5; + static_dtree[n].Code = bi_reverse((unsigned)n, 5); + } + static_init_done = 1; +} + +/* =========================================================================== + * Initialize the tree data structures for a new zlib stream. + */ +static void _tr_init(s) + deflate_state *s; +{ + tr_static_init(); + + s->compressed_len = 0L; + + s->l_desc.dyn_tree = s->dyn_ltree; + s->l_desc.stat_desc = &static_l_desc; + + s->d_desc.dyn_tree = s->dyn_dtree; + s->d_desc.stat_desc = &static_d_desc; + + s->bl_desc.dyn_tree = s->bl_tree; + s->bl_desc.stat_desc = &static_bl_desc; + + s->bi_buf = 0; + s->bi_valid = 0; + s->last_eob_len = 8; /* enough lookahead for inflate */ +#ifdef DEBUG_ZLIB + s->bits_sent = 0L; +#endif + + /* Initialize the first block of the first file: */ + init_block(s); +} + +/* =========================================================================== + * Initialize a new block. + */ +static void init_block(s) + deflate_state *s; +{ + int n; /* iterates over tree elements */ + + /* Initialize the trees. */ + for (n = 0; n < L_CODES; n++) s->dyn_ltree[n].Freq = 0; + for (n = 0; n < D_CODES; n++) s->dyn_dtree[n].Freq = 0; + for (n = 0; n < BL_CODES; n++) s->bl_tree[n].Freq = 0; + + s->dyn_ltree[END_BLOCK].Freq = 1; + s->opt_len = s->static_len = 0L; + s->last_lit = s->matches = 0; +} + +#define SMALLEST 1 +/* Index within the heap array of least frequent node in the Huffman tree */ + + +/* =========================================================================== + * Remove the smallest element from the heap and recreate the heap with + * one less element. Updates heap and heap_len. + */ +#define pqremove(s, tree, top) \ +{\ + top = s->heap[SMALLEST]; \ + s->heap[SMALLEST] = s->heap[s->heap_len--]; \ + pqdownheap(s, tree, SMALLEST); \ +} + +/* =========================================================================== + * Compares to subtrees, using the tree depth as tie breaker when + * the subtrees have equal frequency. This minimizes the worst case length. + */ +#define smaller(tree, n, m, depth) \ + (tree[n].Freq < tree[m].Freq || \ + (tree[n].Freq == tree[m].Freq && depth[n] <= depth[m])) + +/* =========================================================================== + * Restore the heap property by moving down the tree starting at node k, + * exchanging a node with the smallest of its two sons if necessary, stopping + * when the heap property is re-established (each father smaller than its + * two sons). + */ +static void pqdownheap(s, tree, k) + deflate_state *s; + ct_data *tree; /* the tree to restore */ + int k; /* node to move down */ +{ + int v = s->heap[k]; + int j = k << 1; /* left son of k */ + while (j <= s->heap_len) { + /* Set j to the smallest of the two sons: */ + if (j < s->heap_len && + smaller(tree, s->heap[j+1], s->heap[j], s->depth)) { + j++; + } + /* Exit if v is smaller than both sons */ + if (smaller(tree, v, s->heap[j], s->depth)) break; + + /* Exchange v with the smallest son */ + s->heap[k] = s->heap[j]; k = j; + + /* And continue down the tree, setting j to the left son of k */ + j <<= 1; + } + s->heap[k] = v; +} + +/* =========================================================================== + * Compute the optimal bit lengths for a tree and update the total bit length + * for the current block. + * IN assertion: the fields freq and dad are set, heap[heap_max] and + * above are the tree nodes sorted by increasing frequency. + * OUT assertions: the field len is set to the optimal bit length, the + * array bl_count contains the frequencies for each bit length. + * The length opt_len is updated; static_len is also updated if stree is + * not null. + */ +static void gen_bitlen(s, desc) + deflate_state *s; + tree_desc *desc; /* the tree descriptor */ +{ + ct_data *tree = desc->dyn_tree; + int max_code = desc->max_code; + ct_data *stree = desc->stat_desc->static_tree; + intf *extra = desc->stat_desc->extra_bits; + int base = desc->stat_desc->extra_base; + int max_length = desc->stat_desc->max_length; + int h; /* heap index */ + int n, m; /* iterate over the tree elements */ + int bits; /* bit length */ + int xbits; /* extra bits */ + ush f; /* frequency */ + int overflow = 0; /* number of elements with bit length too large */ + + for (bits = 0; bits <= MAX_BITS; bits++) s->bl_count[bits] = 0; + + /* In a first pass, compute the optimal bit lengths (which may + * overflow in the case of the bit length tree). + */ + tree[s->heap[s->heap_max]].Len = 0; /* root of the heap */ + + for (h = s->heap_max+1; h < HEAP_SIZE; h++) { + n = s->heap[h]; + bits = tree[tree[n].Dad].Len + 1; + if (bits > max_length) bits = max_length, overflow++; + tree[n].Len = (ush)bits; + /* We overwrite tree[n].Dad which is no longer needed */ + + if (n > max_code) continue; /* not a leaf node */ + + s->bl_count[bits]++; + xbits = 0; + if (n >= base) xbits = extra[n-base]; + f = tree[n].Freq; + s->opt_len += (ulg)f * (bits + xbits); + if (stree) s->static_len += (ulg)f * (stree[n].Len + xbits); + } + if (overflow == 0) return; + + Trace((stderr,"\nbit length overflow\n")); + /* This happens for example on obj2 and pic of the Calgary corpus */ + + /* Find the first bit length which could increase: */ + do { + bits = max_length-1; + while (s->bl_count[bits] == 0) bits--; + s->bl_count[bits]--; /* move one leaf down the tree */ + s->bl_count[bits+1] += 2; /* move one overflow item as its brother */ + s->bl_count[max_length]--; + /* The brother of the overflow item also moves one step up, + * but this does not affect bl_count[max_length] + */ + overflow -= 2; + } while (overflow > 0); + + /* Now recompute all bit lengths, scanning in increasing frequency. + * h is still equal to HEAP_SIZE. (It is simpler to reconstruct all + * lengths instead of fixing only the wrong ones. This idea is taken + * from 'ar' written by Haruhiko Okumura.) + */ + for (bits = max_length; bits != 0; bits--) { + n = s->bl_count[bits]; + while (n != 0) { + m = s->heap[--h]; + if (m > max_code) continue; + if (tree[m].Len != (unsigned) bits) { + Trace((stderr,"code %d bits %d->%d\n", m, tree[m].Len, bits)); + s->opt_len += ((long)bits - (long)tree[m].Len) + *(long)tree[m].Freq; + tree[m].Len = (ush)bits; + } + n--; + } + } +} + +/* =========================================================================== + * Generate the codes for a given tree and bit counts (which need not be + * optimal). + * IN assertion: the array bl_count contains the bit length statistics for + * the given tree and the field len is set for all tree elements. + * OUT assertion: the field code is set for all tree elements of non + * zero code length. + */ +static void gen_codes (tree, max_code, bl_count) + ct_data *tree; /* the tree to decorate */ + int max_code; /* largest code with non zero frequency */ + ushf *bl_count; /* number of codes at each bit length */ +{ + ush next_code[MAX_BITS+1]; /* next code value for each bit length */ + ush code = 0; /* running code value */ + int bits; /* bit index */ + int n; /* code index */ + + /* The distribution counts are first used to generate the code values + * without bit reversal. + */ + for (bits = 1; bits <= MAX_BITS; bits++) { + next_code[bits] = code = (code + bl_count[bits-1]) << 1; + } + /* Check that the bit counts in bl_count are consistent. The last code + * must be all ones. + */ + Assert (code + bl_count[MAX_BITS]-1 == (1<dyn_tree; + ct_data *stree = desc->stat_desc->static_tree; + int elems = desc->stat_desc->elems; + int n, m; /* iterate over heap elements */ + int max_code = -1; /* largest code with non zero frequency */ + int node; /* new node being created */ + + /* Construct the initial heap, with least frequent element in + * heap[SMALLEST]. The sons of heap[n] are heap[2*n] and heap[2*n+1]. + * heap[0] is not used. + */ + s->heap_len = 0, s->heap_max = HEAP_SIZE; + + for (n = 0; n < elems; n++) { + if (tree[n].Freq != 0) { + s->heap[++(s->heap_len)] = max_code = n; + s->depth[n] = 0; + } else { + tree[n].Len = 0; + } + } + + /* The pkzip format requires that at least one distance code exists, + * and that at least one bit should be sent even if there is only one + * possible code. So to avoid special checks later on we force at least + * two codes of non zero frequency. + */ + while (s->heap_len < 2) { + node = s->heap[++(s->heap_len)] = (max_code < 2 ? ++max_code : 0); + tree[node].Freq = 1; + s->depth[node] = 0; + s->opt_len--; if (stree) s->static_len -= stree[node].Len; + /* node is 0 or 1 so it does not have extra bits */ + } + desc->max_code = max_code; + + /* The elements heap[heap_len/2+1 .. heap_len] are leaves of the tree, + * establish sub-heaps of increasing lengths: + */ + for (n = s->heap_len/2; n >= 1; n--) pqdownheap(s, tree, n); + + /* Construct the Huffman tree by repeatedly combining the least two + * frequent nodes. + */ + node = elems; /* next internal node of the tree */ + do { + pqremove(s, tree, n); /* n = node of least frequency */ + m = s->heap[SMALLEST]; /* m = node of next least frequency */ + + s->heap[--(s->heap_max)] = n; /* keep the nodes sorted by frequency */ + s->heap[--(s->heap_max)] = m; + + /* Create a new node father of n and m */ + tree[node].Freq = tree[n].Freq + tree[m].Freq; + s->depth[node] = (uch) (MAX(s->depth[n], s->depth[m]) + 1); + tree[n].Dad = tree[m].Dad = (ush)node; +#ifdef DUMP_BL_TREE + if (tree == s->bl_tree) { + fprintf(stderr,"\nnode %d(%d), sons %d(%d) %d(%d)", + node, tree[node].Freq, n, tree[n].Freq, m, tree[m].Freq); + } +#endif + /* and insert the new node in the heap */ + s->heap[SMALLEST] = node++; + pqdownheap(s, tree, SMALLEST); + + } while (s->heap_len >= 2); + + s->heap[--(s->heap_max)] = s->heap[SMALLEST]; + + /* At this point, the fields freq and dad are set. We can now + * generate the bit lengths. + */ + gen_bitlen(s, (tree_desc *)desc); + + /* The field len is now set, we can generate the bit codes */ + gen_codes ((ct_data *)tree, max_code, s->bl_count); +} + +/* =========================================================================== + * Scan a literal or distance tree to determine the frequencies of the codes + * in the bit length tree. + */ +static void scan_tree (s, tree, max_code) + deflate_state *s; + ct_data *tree; /* the tree to be scanned */ + int max_code; /* and its largest code of non zero frequency */ +{ + int n; /* iterates over all tree elements */ + int prevlen = -1; /* last emitted length */ + int curlen; /* length of current code */ + int nextlen = tree[0].Len; /* length of next code */ + int count = 0; /* repeat count of the current code */ + int max_count = 7; /* max repeat count */ + int min_count = 4; /* min repeat count */ + + if (nextlen == 0) max_count = 138, min_count = 3; + tree[max_code+1].Len = (ush)0xffff; /* guard */ + + for (n = 0; n <= max_code; n++) { + curlen = nextlen; nextlen = tree[n+1].Len; + if (++count < max_count && curlen == nextlen) { + continue; + } else if (count < min_count) { + s->bl_tree[curlen].Freq += count; + } else if (curlen != 0) { + if (curlen != prevlen) s->bl_tree[curlen].Freq++; + s->bl_tree[REP_3_6].Freq++; + } else if (count <= 10) { + s->bl_tree[REPZ_3_10].Freq++; + } else { + s->bl_tree[REPZ_11_138].Freq++; + } + count = 0; prevlen = curlen; + if (nextlen == 0) { + max_count = 138, min_count = 3; + } else if (curlen == nextlen) { + max_count = 6, min_count = 3; + } else { + max_count = 7, min_count = 4; + } + } +} + +/* =========================================================================== + * Send a literal or distance tree in compressed form, using the codes in + * bl_tree. + */ +static void send_tree (s, tree, max_code) + deflate_state *s; + ct_data *tree; /* the tree to be scanned */ + int max_code; /* and its largest code of non zero frequency */ +{ + int n; /* iterates over all tree elements */ + int prevlen = -1; /* last emitted length */ + int curlen; /* length of current code */ + int nextlen = tree[0].Len; /* length of next code */ + int count = 0; /* repeat count of the current code */ + int max_count = 7; /* max repeat count */ + int min_count = 4; /* min repeat count */ + + /* tree[max_code+1].Len = -1; */ /* guard already set */ + if (nextlen == 0) max_count = 138, min_count = 3; + + for (n = 0; n <= max_code; n++) { + curlen = nextlen; nextlen = tree[n+1].Len; + if (++count < max_count && curlen == nextlen) { + continue; + } else if (count < min_count) { + do { send_code(s, curlen, s->bl_tree); } while (--count != 0); + + } else if (curlen != 0) { + if (curlen != prevlen) { + send_code(s, curlen, s->bl_tree); count--; + } + Assert(count >= 3 && count <= 6, " 3_6?"); + send_code(s, REP_3_6, s->bl_tree); send_bits(s, count-3, 2); + + } else if (count <= 10) { + send_code(s, REPZ_3_10, s->bl_tree); send_bits(s, count-3, 3); + + } else { + send_code(s, REPZ_11_138, s->bl_tree); send_bits(s, count-11, 7); + } + count = 0; prevlen = curlen; + if (nextlen == 0) { + max_count = 138, min_count = 3; + } else if (curlen == nextlen) { + max_count = 6, min_count = 3; + } else { + max_count = 7, min_count = 4; + } + } +} + +/* =========================================================================== + * Construct the Huffman tree for the bit lengths and return the index in + * bl_order of the last bit length code to send. + */ +static int build_bl_tree(s) + deflate_state *s; +{ + int max_blindex; /* index of last bit length code of non zero freq */ + + /* Determine the bit length frequencies for literal and distance trees */ + scan_tree(s, (ct_data *)s->dyn_ltree, s->l_desc.max_code); + scan_tree(s, (ct_data *)s->dyn_dtree, s->d_desc.max_code); + + /* Build the bit length tree: */ + build_tree(s, (tree_desc *)(&(s->bl_desc))); + /* opt_len now includes the length of the tree representations, except + * the lengths of the bit lengths codes and the 5+5+4 bits for the counts. + */ + + /* Determine the number of bit length codes to send. The pkzip format + * requires that at least 4 bit length codes be sent. (appnote.txt says + * 3 but the actual value used is 4.) + */ + for (max_blindex = BL_CODES-1; max_blindex >= 3; max_blindex--) { + if (s->bl_tree[bl_order[max_blindex]].Len != 0) break; + } + /* Update opt_len to include the bit length tree and counts */ + s->opt_len += 3*(max_blindex+1) + 5+5+4; + Tracev((stderr, "\ndyn trees: dyn %ld, stat %ld", + s->opt_len, s->static_len)); + + return max_blindex; +} + +/* =========================================================================== + * Send the header for a block using dynamic Huffman trees: the counts, the + * lengths of the bit length codes, the literal tree and the distance tree. + * IN assertion: lcodes >= 257, dcodes >= 1, blcodes >= 4. + */ +static void send_all_trees(s, lcodes, dcodes, blcodes) + deflate_state *s; + int lcodes, dcodes, blcodes; /* number of codes for each tree */ +{ + int rank; /* index in bl_order */ + + Assert (lcodes >= 257 && dcodes >= 1 && blcodes >= 4, "not enough codes"); + Assert (lcodes <= L_CODES && dcodes <= D_CODES && blcodes <= BL_CODES, + "too many codes"); + Tracev((stderr, "\nbl counts: ")); + send_bits(s, lcodes-257, 5); /* not +255 as stated in appnote.txt */ + send_bits(s, dcodes-1, 5); + send_bits(s, blcodes-4, 4); /* not -3 as stated in appnote.txt */ + for (rank = 0; rank < blcodes; rank++) { + Tracev((stderr, "\nbl code %2d ", bl_order[rank])); + send_bits(s, s->bl_tree[bl_order[rank]].Len, 3); + } + Tracev((stderr, "\nbl tree: sent %ld", s->bits_sent)); + + send_tree(s, (ct_data *)s->dyn_ltree, lcodes-1); /* literal tree */ + Tracev((stderr, "\nlit tree: sent %ld", s->bits_sent)); + + send_tree(s, (ct_data *)s->dyn_dtree, dcodes-1); /* distance tree */ + Tracev((stderr, "\ndist tree: sent %ld", s->bits_sent)); +} + +/* =========================================================================== + * Send a stored block + */ +static void _tr_stored_block(s, buf, stored_len, eof) + deflate_state *s; + charf *buf; /* input block */ + ulg stored_len; /* length of input block */ + int eof; /* true if this is the last block for a file */ +{ + send_bits(s, (STORED_BLOCK<<1)+eof, 3); /* send block type */ + s->compressed_len = (s->compressed_len + 3 + 7) & (ulg)~7L; + s->compressed_len += (stored_len + 4) << 3; + + copy_block(s, buf, (unsigned)stored_len, 1); /* with header */ +} + +/* Send just the `stored block' type code without any length bytes or data. + */ +static void _tr_stored_type_only(s) + deflate_state *s; +{ + send_bits(s, (STORED_BLOCK << 1), 3); + bi_windup(s); + s->compressed_len = (s->compressed_len + 3) & ~7L; +} + + +/* =========================================================================== + * Send one empty static block to give enough lookahead for inflate. + * This takes 10 bits, of which 7 may remain in the bit buffer. + * The current inflate code requires 9 bits of lookahead. If the + * last two codes for the previous block (real code plus EOB) were coded + * on 5 bits or less, inflate may have only 5+3 bits of lookahead to decode + * the last real code. In this case we send two empty static blocks instead + * of one. (There are no problems if the previous block is stored or fixed.) + * To simplify the code, we assume the worst case of last real code encoded + * on one bit only. + */ +static void _tr_align(s) + deflate_state *s; +{ + send_bits(s, STATIC_TREES<<1, 3); + send_code(s, END_BLOCK, static_ltree); + s->compressed_len += 10L; /* 3 for block type, 7 for EOB */ + bi_flush(s); + /* Of the 10 bits for the empty block, we have already sent + * (10 - bi_valid) bits. The lookahead for the last real code (before + * the EOB of the previous block) was thus at least one plus the length + * of the EOB plus what we have just sent of the empty static block. + */ + if (1 + s->last_eob_len + 10 - s->bi_valid < 9) { + send_bits(s, STATIC_TREES<<1, 3); + send_code(s, END_BLOCK, static_ltree); + s->compressed_len += 10L; + bi_flush(s); + } + s->last_eob_len = 7; +} + +/* =========================================================================== + * Determine the best encoding for the current block: dynamic trees, static + * trees or store, and output the encoded block to the zip file. This function + * returns the total compressed length for the file so far. + */ +static ulg _tr_flush_block(s, buf, stored_len, eof) + deflate_state *s; + charf *buf; /* input block, or NULL if too old */ + ulg stored_len; /* length of input block */ + int eof; /* true if this is the last block for a file */ +{ + ulg opt_lenb, static_lenb; /* opt_len and static_len in bytes */ + int max_blindex = 0; /* index of last bit length code of non zero freq */ + + /* Build the Huffman trees unless a stored block is forced */ + if (s->level > 0) { + + /* Check if the file is ascii or binary */ + if (s->data_type == Z_UNKNOWN) set_data_type(s); + + /* Construct the literal and distance trees */ + build_tree(s, (tree_desc *)(&(s->l_desc))); + Tracev((stderr, "\nlit data: dyn %ld, stat %ld", s->opt_len, + s->static_len)); + + build_tree(s, (tree_desc *)(&(s->d_desc))); + Tracev((stderr, "\ndist data: dyn %ld, stat %ld", s->opt_len, + s->static_len)); + /* At this point, opt_len and static_len are the total bit lengths of + * the compressed block data, excluding the tree representations. + */ + + /* Build the bit length tree for the above two trees, and get the index + * in bl_order of the last bit length code to send. + */ + max_blindex = build_bl_tree(s); + + /* Determine the best encoding. Compute first the block length in bytes*/ + opt_lenb = (s->opt_len+3+7)>>3; + static_lenb = (s->static_len+3+7)>>3; + + Tracev((stderr, "\nopt %lu(%lu) stat %lu(%lu) stored %lu lit %u ", + opt_lenb, s->opt_len, static_lenb, s->static_len, stored_len, + s->last_lit)); + + if (static_lenb <= opt_lenb) opt_lenb = static_lenb; + + } else { + Assert(buf != (char*)0, "lost buf"); + opt_lenb = static_lenb = stored_len + 5; /* force a stored block */ + } + + /* If compression failed and this is the first and last block, + * and if the .zip file can be seeked (to rewrite the local header), + * the whole file is transformed into a stored file: + */ +#ifdef STORED_FILE_OK +# ifdef FORCE_STORED_FILE + if (eof && s->compressed_len == 0L) { /* force stored file */ +# else + if (stored_len <= opt_lenb && eof && s->compressed_len==0L && seekable()) { +# endif + /* Since LIT_BUFSIZE <= 2*WSIZE, the input data must be there: */ + if (buf == (charf*)0) error ("block vanished"); + + copy_block(s, buf, (unsigned)stored_len, 0); /* without header */ + s->compressed_len = stored_len << 3; + s->method = STORED; + } else +#endif /* STORED_FILE_OK */ + +#ifdef FORCE_STORED + if (buf != (char*)0) { /* force stored block */ +#else + if (stored_len+4 <= opt_lenb && buf != (char*)0) { + /* 4: two words for the lengths */ +#endif + /* The test buf != NULL is only necessary if LIT_BUFSIZE > WSIZE. + * Otherwise we can't have processed more than WSIZE input bytes since + * the last block flush, because compression would have been + * successful. If LIT_BUFSIZE <= WSIZE, it is never too late to + * transform a block into a stored block. + */ + _tr_stored_block(s, buf, stored_len, eof); + +#ifdef FORCE_STATIC + } else if (static_lenb >= 0) { /* force static trees */ +#else + } else if (static_lenb == opt_lenb) { +#endif + send_bits(s, (STATIC_TREES<<1)+eof, 3); + compress_block(s, (ct_data *)static_ltree, (ct_data *)static_dtree); + s->compressed_len += 3 + s->static_len; + } else { + send_bits(s, (DYN_TREES<<1)+eof, 3); + send_all_trees(s, s->l_desc.max_code+1, s->d_desc.max_code+1, + max_blindex+1); + compress_block(s, (ct_data *)s->dyn_ltree, (ct_data *)s->dyn_dtree); + s->compressed_len += 3 + s->opt_len; + } + Assert (s->compressed_len == s->bits_sent, "bad compressed size"); + init_block(s); + + if (eof) { + bi_windup(s); + s->compressed_len += 7; /* align on byte boundary */ + } + Tracev((stderr,"\ncomprlen %lu(%lu) ", s->compressed_len>>3, + s->compressed_len-7*eof)); + + return s->compressed_len >> 3; +} + +/* =========================================================================== + * Save the match info and tally the frequency counts. Return true if + * the current block must be flushed. + */ +static int _tr_tally (s, dist, lc) + deflate_state *s; + unsigned dist; /* distance of matched string */ + unsigned lc; /* match length-MIN_MATCH or unmatched char (if dist==0) */ +{ + s->d_buf[s->last_lit] = (ush)dist; + s->l_buf[s->last_lit++] = (uch)lc; + if (dist == 0) { + /* lc is the unmatched char */ + s->dyn_ltree[lc].Freq++; + } else { + s->matches++; + /* Here, lc is the match length - MIN_MATCH */ + dist--; /* dist = match distance - 1 */ + Assert((ush)dist < (ush)MAX_DIST(s) && + (ush)lc <= (ush)(MAX_MATCH-MIN_MATCH) && + (ush)d_code(dist) < (ush)D_CODES, "_tr_tally: bad match"); + + s->dyn_ltree[length_code[lc]+LITERALS+1].Freq++; + s->dyn_dtree[d_code(dist)].Freq++; + } + + /* Try to guess if it is profitable to stop the current block here */ + if (s->level > 2 && (s->last_lit & 0xfff) == 0) { + /* Compute an upper bound for the compressed length */ + ulg out_length = (ulg)s->last_lit*8L; + ulg in_length = (ulg)((long)s->strstart - s->block_start); + int dcode; + for (dcode = 0; dcode < D_CODES; dcode++) { + out_length += (ulg)s->dyn_dtree[dcode].Freq * + (5L+extra_dbits[dcode]); + } + out_length >>= 3; + Tracev((stderr,"\nlast_lit %u, in %ld, out ~%ld(%ld%%) ", + s->last_lit, in_length, out_length, + 100L - out_length*100L/in_length)); + if (s->matches < s->last_lit/2 && out_length < in_length/2) return 1; + } + return (s->last_lit == s->lit_bufsize-1); + /* We avoid equality with lit_bufsize because of wraparound at 64K + * on 16 bit machines and because stored blocks are restricted to + * 64K-1 bytes. + */ +} + +/* =========================================================================== + * Send the block data compressed using the given Huffman trees + */ +static void compress_block(s, ltree, dtree) + deflate_state *s; + ct_data *ltree; /* literal tree */ + ct_data *dtree; /* distance tree */ +{ + unsigned dist; /* distance of matched string */ + int lc; /* match length or unmatched char (if dist == 0) */ + unsigned lx = 0; /* running index in l_buf */ + unsigned code; /* the code to send */ + int extra; /* number of extra bits to send */ + + if (s->last_lit != 0) do { + dist = s->d_buf[lx]; + lc = s->l_buf[lx++]; + if (dist == 0) { + send_code(s, lc, ltree); /* send a literal byte */ + Tracecv(isgraph(lc), (stderr," '%c' ", lc)); + } else { + /* Here, lc is the match length - MIN_MATCH */ + code = length_code[lc]; + send_code(s, code+LITERALS+1, ltree); /* send the length code */ + extra = extra_lbits[code]; + if (extra != 0) { + lc -= base_length[code]; + send_bits(s, lc, extra); /* send the extra length bits */ + } + dist--; /* dist is now the match distance - 1 */ + code = d_code(dist); + Assert (code < D_CODES, "bad d_code"); + + send_code(s, code, dtree); /* send the distance code */ + extra = extra_dbits[code]; + if (extra != 0) { + dist -= base_dist[code]; + send_bits(s, dist, extra); /* send the extra distance bits */ + } + } /* literal or match pair ? */ + + /* Check that the overlay between pending_buf and d_buf+l_buf is ok: */ + Assert(s->pending < s->lit_bufsize + 2*lx, "pendingBuf overflow"); + + } while (lx < s->last_lit); + + send_code(s, END_BLOCK, ltree); + s->last_eob_len = ltree[END_BLOCK].Len; +} + +/* =========================================================================== + * Set the data type to ASCII or BINARY, using a crude approximation: + * binary if more than 20% of the bytes are <= 6 or >= 128, ascii otherwise. + * IN assertion: the fields freq of dyn_ltree are set and the total of all + * frequencies does not exceed 64K (to fit in an int on 16 bit machines). + */ +static void set_data_type(s) + deflate_state *s; +{ + int n = 0; + unsigned ascii_freq = 0; + unsigned bin_freq = 0; + while (n < 7) bin_freq += s->dyn_ltree[n++].Freq; + while (n < 128) ascii_freq += s->dyn_ltree[n++].Freq; + while (n < LITERALS) bin_freq += s->dyn_ltree[n++].Freq; + s->data_type = (Byte)(bin_freq > (ascii_freq >> 2) ? Z_BINARY : Z_ASCII); +} + +/* =========================================================================== + * Reverse the first len bits of a code, using straightforward code (a faster + * method would use a table) + * IN assertion: 1 <= len <= 15 + */ +static unsigned bi_reverse(code, len) + unsigned code; /* the value to invert */ + int len; /* its bit length */ +{ + register unsigned res = 0; + do { + res |= code & 1; + code >>= 1, res <<= 1; + } while (--len > 0); + return res >> 1; +} + +/* =========================================================================== + * Flush the bit buffer, keeping at most 7 bits in it. + */ +static void bi_flush(s) + deflate_state *s; +{ + if (s->bi_valid == 16) { + put_short(s, s->bi_buf); + s->bi_buf = 0; + s->bi_valid = 0; + } else if (s->bi_valid >= 8) { + put_byte(s, (Byte)s->bi_buf); + s->bi_buf >>= 8; + s->bi_valid -= 8; + } +} + +/* =========================================================================== + * Flush the bit buffer and align the output on a byte boundary + */ +static void bi_windup(s) + deflate_state *s; +{ + if (s->bi_valid > 8) { + put_short(s, s->bi_buf); + } else if (s->bi_valid > 0) { + put_byte(s, (Byte)s->bi_buf); + } + s->bi_buf = 0; + s->bi_valid = 0; +#ifdef DEBUG_ZLIB + s->bits_sent = (s->bits_sent+7) & ~7; +#endif +} + +/* =========================================================================== + * Copy a stored block, storing first the length and its + * one's complement if requested. + */ +static void copy_block(s, buf, len, header) + deflate_state *s; + charf *buf; /* the input data */ + unsigned len; /* its length */ + int header; /* true if block header must be written */ +{ + bi_windup(s); /* align on byte boundary */ + s->last_eob_len = 8; /* enough lookahead for inflate */ + + if (header) { + put_short(s, (ush)len); + put_short(s, (ush)~len); +#ifdef DEBUG_ZLIB + s->bits_sent += 2*16; +#endif + } +#ifdef DEBUG_ZLIB + s->bits_sent += (ulg)len<<3; +#endif + /* bundle up the put_byte(s, *buf++) calls */ + memcpy(&s->pending_buf[s->pending], buf, len); + s->pending += len; +} +/* --- trees.c */ + +/* From: adler32.c,v 1.10 1996/05/22 11:52:18 me Exp $ */ + +/* #include "zlib.h" */ + +#define BASE 65521L /* largest prime smaller than 65536 */ +#define NMAX 5552 +/* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */ + +#define DO1(buf,i) {s1 += buf[i]; s2 += s1;} +#define DO2(buf,i) DO1(buf,i); DO1(buf,i+1); +#define DO4(buf,i) DO2(buf,i); DO2(buf,i+2); +#define DO8(buf,i) DO4(buf,i); DO4(buf,i+4); +#define DO16(buf) DO8(buf,0); DO8(buf,8); + +/* ========================================================================= */ +uLong adler32(adler, buf, len) + uLong adler; + const Bytef *buf; + uInt len; +{ + unsigned long s1 = adler & 0xffff; + unsigned long s2 = (adler >> 16) & 0xffff; + int k; + + if (buf == Z_NULL) return 1L; + + while (len > 0) { + k = len < NMAX ? len : NMAX; + len -= k; + while (k >= 16) { + DO16(buf); + buf += 16; + k -= 16; + } + if (k != 0) do { + s1 += *buf++; + s2 += s1; + } while (--k); + s1 %= BASE; + s2 %= BASE; + } + return (s2 << 16) | s1; +} +/* --- adler32.c */ +static void *zalloc(void *opaque, unsigned nr, unsigned size) +{ + /* How much does it request? Should we use vmalloc? Or be dynamic? */ + return kmalloc(nr * size, GFP_KERNEL); +} + +static void zfree(void *opaque, void *addr) +{ + kfree(addr); +} + + /* Plan: call deflate() with avail_in == *sourcelen, + avail_out = *dstlen - 12 and flush == Z_FINISH. + If it doesn't manage to finish, call it again with + avail_in == 0 and avail_out set to the remaining 12 + bytes for it to clean up. + Q: Is 12 bytes sufficient? + */ +#define STREAM_END_SPACE 12 + +int zlib_compress(unsigned char *data_in, unsigned char *cpage_out, + __u32 *sourcelen, __u32 *dstlen) +{ + z_stream strm; + int ret; + + if (*dstlen <= STREAM_END_SPACE) + return -1; + + strm.zalloc = zalloc; + strm.zfree = zfree; + + if (Z_OK != deflateInit(&strm, 3)) { + printk(KERN_WARNING "deflateInit failed\n"); + return -1; + } + strm.next_in = data_in; + strm.total_in = 0; + + strm.next_out = cpage_out; + strm.total_out = 0; + + while (strm.total_out < *dstlen - STREAM_END_SPACE && strm.total_in < *sourcelen) { + strm.avail_out = *dstlen - (strm.total_out + STREAM_END_SPACE); + strm.avail_in = min((unsigned)(*sourcelen-strm.total_in), strm.avail_out); + Dprintk("calling deflate with avail_in %d, avail_out %d\n", strm.avail_in, strm.avail_out); + ret = deflate(&strm, Z_PARTIAL_FLUSH); + Dprintk("deflate returned with avail_in %d, avail_out %d, total_in %ld, total_out %ld\n", strm.avail_in, strm.avail_out, strm.total_in, strm.total_out); + if (ret != Z_OK) { + Dprintk("deflate in loop returned %d\n", ret); + deflateEnd(&strm); + return -1; + } + } + strm.avail_out += STREAM_END_SPACE; + strm.avail_in = 0; + ret = deflate(&strm, Z_FINISH); + if (ret != Z_STREAM_END) { + Dprintk("final deflate returned %d\n", ret); + deflateEnd(&strm); + return -1; + } + deflateEnd(&strm); + + Dprintk("zlib compressed %ld bytes into %ld\n", strm.total_in, strm.total_out); + + if (strm.total_out >= strm.total_in) + return -1; + + + *dstlen = strm.total_out; + *sourcelen = strm.total_in; + return 0; +} + +void tux_gzip_start (tux_req_t *req) +{ + z_stream *strm; + + strm = kmalloc(sizeof(*strm), GFP_KERNEL); + if (!strm) + BUG(); + + strm->zalloc = zalloc; + strm->zfree = zfree; + + if (Z_OK != deflateInit(strm, 6)) + BUG(); + req->gzip_state = strm; +} + +int tux_gzip_compress (void *state, unsigned char *data_in, unsigned char *data_out, __u32 *in_len, __u32 *out_len) +{ + z_stream *s = state; + int ret; + +// int zlib_compress(unsigned char *data_in, unsigned char *cpage_out, __u32 *sourcelen, __u32 *dstlen) + + s->next_in = data_in; + s->total_in = 0; + s->next_out = data_out; + s->total_out = 0; + s->avail_out = *out_len - (s->total_out + STREAM_END_SPACE); + s->avail_in = min((unsigned)(*in_len-s->total_in), s->avail_out); + Dprintk("calling deflate with avail_in %d, avail_out %d\n", s->avail_in, s->avail_out); + ret = deflate(s, Z_FINISH); + Dprintk("deflate returned with avail_in %d, avail_out %d, total_in %ld, total_out %ld\n", s->avail_in, s->avail_out, s->total_in, s->total_out); + if (ret != Z_STREAM_END) { + printk("deflate in loop returned %d\n", ret); + BUG(); + } + *in_len = s->avail_in; + *out_len = s->avail_out; + return s->avail_in; +} + +void tux_gzip_end (tux_req_t *req) +{ + z_stream *strm = req->gzip_state; + deflateEnd(strm); + kfree(req->gzip_state); + req->gzip_state = NULL; +} --- linux/net/tux/input.c.orig +++ linux/net/tux/input.c @@ -0,0 +1,627 @@ +/* + * TUX - Integrated Application Protocols Layer and Object Cache + * + * Copyright (C) 2000, 2001, Ingo Molnar + * + * input.c: handle requests arriving on accepted connections + */ + +#include +#include + +/**************************************************************** + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + ****************************************************************/ + +void zap_request (tux_req_t *req, int cachemiss) +{ + if (!req->error) + TUX_BUG(); + if (req->error == TUX_ERROR_CONN_TIMEOUT) { + if (req->proto->request_timeout) { + clear_keepalive(req); + req->proto->request_timeout(req, cachemiss); + } else { + clear_keepalive(req); + if (!cachemiss) + flush_request(req, 0); + else { + add_tux_atom(req, flush_request); + add_req_to_workqueue(req); + } + } + return; + } + + if (!cachemiss && (req->error == TUX_ERROR_CONN_CLOSE)) { + /* + * Zap connection as fast as possible, there is + * no valid client connection anymore: + */ + clear_keepalive(req); + flush_request(req, 0); + } else { + if (req->error == TUX_ERROR_CONN_CLOSE) { + clear_keepalive(req); + add_tux_atom(req, flush_request); + } else + /* + * Potentially redirect to the secondary server: + */ + add_tux_atom(req, redirect_request); + add_req_to_workqueue(req); + } +} + +void __switch_docroot(tux_req_t *req) +{ + if (!req->docroot_dentry || !req->docroot_mnt) + TUX_BUG(); + set_fs_root(current->fs, req->docroot_mnt, req->docroot_dentry); +} + +struct dentry * __tux_lookup (tux_req_t *req, const char *filename, + struct nameidata *base, struct vfsmount **mnt) +{ + int err; + + err = path_walk(filename, base); + if (err) { + Dprintk("path_walk() returned with %d!\n", err); + return ERR_PTR(err); + } + if (*mnt) + TUX_BUG(); + *mnt = base->mnt; + + return base->dentry; +} + +int tux_permission (struct inode *inode) +{ + umode_t mode; + int err; + + mode = inode->i_mode; + Dprintk("URL inode mode: %08x.\n", mode); + + if (mode & tux_mode_forbidden) + return -2; + /* + * at least one bit in the 'allowed' set has to + * be present to allow access. + */ + if (!(mode & tux_mode_allowed)) + return -3; + err = permission(inode,MAY_READ); + return err; +} + +struct dentry * tux_lookup (tux_req_t *req, const char *filename, + const unsigned int flag, struct vfsmount **mnt) +{ + struct dentry *dentry; + struct nameidata base; + + Dprintk("tux_lookup(%p, %s, %d, virtual: %d, host: %s (%d).)\n", req, filename, flag, req->virtual, req->host, req->host_len); + + base.flags = LOOKUP_POSITIVE|LOOKUP_FOLLOW|flag; + base.last_type = LAST_ROOT; + if (req->objectname[0] == '/') { + base.dentry = dget(req->docroot_dentry); + base.mnt = mntget(req->docroot_mnt); + } else { + if (!req->cwd_dentry) { + req->cwd_dentry = dget(req->docroot_dentry); + req->cwd_mnt = mntget(req->docroot_mnt); + } + base.dentry = req->cwd_dentry; + dget(base.dentry); + base.mnt = mntget(req->cwd_mnt); + } + + switch_docroot(req); + dentry = __tux_lookup (req, filename, &base, mnt); + + Dprintk("looked up {%s} == dentry %p.\n", filename, dentry); + + if (dentry && !IS_ERR(dentry) && !dentry->d_inode) + TUX_BUG(); + return dentry; +} + +int lookup_object (tux_req_t *req, const unsigned int flag) +{ + struct vfsmount *mnt = NULL; + struct dentry *dentry = NULL; + int perm; + + dentry = tux_lookup(req, req->objectname, flag, &mnt); + if (!dentry || IS_ERR(dentry)) { + if (PTR_ERR(dentry) == -EWOULDBLOCKIO) + goto cachemiss; + goto abort; + } + perm = tux_permission(dentry->d_inode); + /* + * Only regular files allowed. + */ + if ((perm < 0) || !S_ISREG(dentry->d_inode->i_mode)) { + req->status = 403; + goto abort; + } + req->total_file_len = dentry->d_inode->i_size; +out: + install_req_dentry(req, dentry, mnt); + return 0; +cachemiss: + return 1; +abort: + if (dentry) { + if (!IS_ERR(dentry)) + dput(dentry); + dentry = NULL; + } + if (mnt) { + if (!IS_ERR(mnt)) + mntput(mnt); + mnt = NULL; + } + req_err(req); + goto out; +} + +void install_req_dentry (tux_req_t *req, struct dentry *dentry, struct vfsmount *mnt) +{ + if (req->dentry) + TUX_BUG(); + req->dentry = dentry; + if (req->mnt) + TUX_BUG(); + req->mnt = mnt; + if (req->in_file.f_dentry) + TUX_BUG(); + if (dentry) + init_private_file(&req->in_file, dentry, FMODE_READ); +} + +void release_req_dentry (tux_req_t *req) +{ + if (!req->dentry) { + if (req->in_file.f_dentry) + TUX_BUG(); + return; + } + if (req->in_file.f_op && req->in_file.f_op->release) + req->in_file.f_op->release(req->dentry->d_inode, &req->in_file); + memset(&req->in_file, 0, sizeof(req->in_file)); + + dput(req->dentry); + req->dentry = NULL; + mntput(req->mnt); + req->mnt = NULL; +} + +int __connection_too_fast (tux_req_t *req) +{ + unsigned long curr_bw, delta, bytes; + + bytes = req->total_bytes + req->bytes_sent; + if (!bytes) + return 1; + + delta = jiffies - req->first_timestamp; + if (!delta) + delta++; + curr_bw = bytes * HZ / delta; + + if (curr_bw > tux_max_output_bandwidth) + return 2; + return 0; +} + +void unidle_req (tux_req_t *req) +{ + threadinfo_t *ti = req->ti; + + Dprintk("UNIDLE req %p <%p> (sock %p, sk %p) (keepalive: %d, status: %d)\n", req, __builtin_return_address(0), req->sock, req->sock->sk, req->keep_alive, req->status); + spin_lock_irq(&ti->work_lock); + if (req->magic != TUX_MAGIC) + TUX_BUG(); + if (!test_and_clear_bit(0, &req->idle_input)) { + Dprintk("unidling %p, wasnt idle!\n", req); + if (list_empty(&req->work)) + TUX_BUG(); + list_del(&req->work); + DEBUG_DEL_LIST(&req->work); + DEC_STAT(nr_work_pending); + } else { + del_keepalive_timer(req); + DEC_STAT(nr_idle_input_pending); + Dprintk("unidled %p.\n", req); + } + if (req->idle_input) + TUX_BUG(); + spin_unlock_irq(&ti->work_lock); +} + +#define GOTO_INCOMPLETE do { Dprintk("incomplete at %s:%d.\n", __FILE__, __LINE__); goto incomplete; } while (0) +#define GOTO_REDIRECT do { TDprintk("redirect at %s:%d.\n", __FILE__, __LINE__); goto redirect; } while (0) +#define GOTO_REDIRECT_NONIDLE do { TDprintk("redirect at %s:%d.\n", __FILE__, __LINE__); goto redirect_nonidle; } while (0) + +static int read_request (struct socket *sock, char *buf, int max_size) +{ + mm_segment_t oldmm; + struct msghdr msg; + struct iovec iov; + int len; + + msg.msg_name = 0; + msg.msg_namelen = 0; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_flags = 0; + + msg.msg_iov->iov_base = buf; + msg.msg_iov->iov_len = max_size; + + oldmm = get_fs(); set_fs(KERNEL_DS); + +read_again: + len = sock->sk->prot->recvmsg(sock->sk, &msg, max_size, + MSG_DONTWAIT, MSG_PEEK, NULL); + + /* + * We must not get a signal inbetween + */ + if ((len == -EAGAIN) || (len == -ERESTARTSYS)) { + if (!signal_pending(current)) { + len = 0; + goto out; + } + flush_all_signals(); + goto read_again; + } +out: + set_fs(oldmm); + return len; +} + +/* + * We inline URG data so it's at the head of the normal receive queue. + */ +static int zap_urg_data (struct socket *sock) +{ + mm_segment_t oldmm; + struct msghdr msg; + struct iovec iov; + int len; + char buf[10]; + + oldmm = get_fs(); set_fs(KERNEL_DS); + + msg.msg_name = 0; + msg.msg_namelen = 0; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_flags = 0; + + msg.msg_iov->iov_base = buf; + msg.msg_iov->iov_len = 2; + +read_again: + len = sock->sk->prot->recvmsg(sock->sk, &msg, 2, MSG_DONTWAIT, 0, NULL); + Dprintk("recvmsg(MSG_OOB) returned %d.\n", len); + + /* + * We must not get a signal inbetween + */ + if ((len == -EAGAIN) || (len == -ERESTARTSYS)) { + if (!signal_pending(current)) { + len = 0; + goto out; + } + flush_all_signals(); + goto read_again; + } +out: + set_fs(oldmm); + + Dprintk("in out:.. and will return %d.!\n", len); + + return len; +} + +void trunc_headers (tux_req_t *req) +{ + struct sock *sk = req->sock->sk; + int len, addr_len = 0; + + if (!req->parsed_len) + TUX_BUG(); +repeat_trunc: + len = sk->prot->recvmsg(sk, NULL, req->parsed_len, 1, MSG_TRUNC, &addr_len); + if ((len == -ERESTARTSYS) || (len == -EAGAIN)) { + flush_all_signals(); + goto repeat_trunc; + } + Dprintk("truncated (TRUNC) %d bytes at %p. (wanted: %d.)\n", len, __builtin_return_address(0), req->parsed_len); + + + + req->parsed_len = 0; +} + +void print_req (tux_req_t *req) +{ + struct sock *sk; + + printk("PRINT req %p <%p>, sock %p\n", + req, __builtin_return_address(0), req->sock); + printk("... idx: %d\n", req->atom_idx); + if (req->sock) { + sk = req->sock->sk; + printk("... sock %p, sk %p, sk->state: %d, sk->err: %d\n", req->sock, sk, sk->state, sk->err); + printk("... write_queue: %d, receive_queue: %d, error_queue: %d, keepalive: %d, status: %d\n", !skb_queue_empty(&sk->write_queue), !skb_queue_empty(&sk->receive_queue), !skb_queue_empty(&sk->error_queue), req->keep_alive, req->status); + printk("...tp->send_head: %p\n", sk->tp_pinfo.af_tcp.send_head); + printk("...tp->snd_una: %08x\n", sk->tp_pinfo.af_tcp.snd_una); + printk("...tp->snd_nxt: %08x\n", sk->tp_pinfo.af_tcp.snd_nxt); + printk("...tp->packets_out: %08x\n", sk->tp_pinfo.af_tcp.packets_out); + } + printk("... meth:{%s}, uri:{%s}, query:{%s}, ver:{%s}\n", req->method_str ? req->method_str : "", req->uri_str ? req->uri_str : "", req->query_str ? req->query_str : "", req->version_str ? req->version_str : ""); + printk("... post_data:{%s}(%d).\n", req->post_data_str, req->post_data_len); + printk("... headers: {%s}\n", req->headers); +} +/* + * parse_request() reads all available TCP/IP data and prepares + * the request if the TUX request is complete. (we can get TUX + * requests in several packets.) Invalid requests are redirected + * to the secondary server. + */ + +void parse_request (tux_req_t *req, int cachemiss) +{ + int len, parsed_len; + struct sock *sk = req->sock->sk; + struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; + int was_keepalive = req->keep_alive; + + if (req->magic != TUX_MAGIC) + TUX_BUG(); + + SET_TIMESTAMP(req->parse_timestamp); + + spin_lock_irq(&req->ti->work_lock); + add_keepalive_timer(req); + if (test_and_set_bit(0, &req->idle_input)) + TUX_BUG(); + INC_STAT(nr_idle_input_pending); + spin_unlock_irq(&req->ti->work_lock); + + Dprintk("idled request %p.\n", req); + +restart: + + if (tp->urg_data && !(tp->urg_data & TCP_URG_READ)) { + len = zap_urg_data(req->sock); + if (tp->urg_data && !(tp->urg_data & TCP_URG_READ)) { + req->error = TUX_ERROR_CONN_CLOSE; + goto redirect_error; + } + } + + INC_STAT(input_slowpath); + + if (!req->headers) + req->headers = tux_kmalloc(tux_max_header_len); + + /* First, read the data */ + len = read_request(req->sock, (char *)req->headers, tux_max_header_len-1); + if (len < 0) { + req->error = TUX_ERROR_CONN_CLOSE; + goto redirect_error; + } + if (!len) + GOTO_INCOMPLETE; + + /* + * Make it a zero-delimited string to automatically get + * protection against various buffer overflow situations. + * Then pass it to the TUX application protocol stack. + */ + ((char *)req->headers)[len] = 0; + req->headers_len = len; + + parsed_len = req->proto->parse_message(req, len); + + /* + * Is the request fully read? (or is there any error) + */ + if (parsed_len < 0) + GOTO_REDIRECT; + if (!parsed_len) { + /* + * Push pending ACK which was delayed due to the + * pingpong optimization: + */ + if (was_keepalive) { + lock_sock(sk); + tp->ack.pingpong = 0; + tp->ack.pending |= TCP_ACK_PUSHED; + cleanup_rbuf(sk, 1); + release_sock(sk); + } + if (len >= tux_max_header_len-1) + GOTO_REDIRECT; + GOTO_INCOMPLETE; + } + unidle_req(req); + + req->sock->sk->tp_pinfo.af_tcp.nonagle = 2; + + add_req_to_workqueue(req); + return; + +redirect: + TDprintk("req %p will be redirected!\n", req); + req_err(req); + +redirect_error: + unidle_req(req); + + if (len < 0) + req->parsed_len = 0; + else + req->parsed_len = len; + + INC_STAT(parse_static_redirect); + if (req->headers) + kfree(req->headers); + req->headers = NULL; + if (req->error) + zap_request(req, cachemiss); + return; + +incomplete: + if (req->error) + goto redirect_error; + if (tp->urg_data && !(tp->urg_data & TCP_URG_READ)) + goto restart; + + add_tux_atom(req, parse_request); + INC_STAT(parse_static_incomplete); + tux_push_req(req); +} + +int process_requests (threadinfo_t *ti, tux_req_t **user_req) +{ + struct list_head *head, *curr; + int count = 0; + tux_req_t *req; + + *user_req = NULL; + +restart_loop: + spin_lock_irq(&ti->work_lock); + head = &ti->work_pending; + curr = head->next; + + if (curr != head) { + int i; + + req = list_entry(curr, tux_req_t, work); + Dprintk("PROCESS req %p <%p>.\n", + req, __builtin_return_address(0)); + for (i = 0; i < req->atom_idx; i++) + Dprintk("... atom %d: %p\n", i, req->atoms[i]); + + if (req->ti != ti) + TUX_BUG(); + if (req->magic != TUX_MAGIC) + TUX_BUG(); + + if (list_empty(&req->work)) + TUX_BUG(); + list_del(curr); + DEBUG_DEL_LIST(&req->work); + spin_unlock_irq(&ti->work_lock); + + if (!req->atom_idx) { + if (req->usermode) { + *user_req = req; + return count; + } + /* + * idx == 0 requests are flushed automatically. + */ + flush_request(req, 0); + } else + tux_schedule_atom(req, 0); + count++; + goto restart_loop; + } + spin_unlock_irq(&ti->work_lock); + + return count; +} + +int tux_flush_workqueue (threadinfo_t *ti) +{ + struct list_head *head, *curr, *next; + tux_req_t *req; + int count = 0; + +restart: + spin_lock_irq(&ti->work_lock); + head = &ti->work_pending; + curr = head->next; + + if (curr != head) { + req = list_entry(curr, tux_req_t, work); + next = curr->next; + clear_bit(0, &req->idle_input); + clear_bit(0, &req->wait_output_space); + if (list_empty(&req->work)) + TUX_BUG(); + list_del(curr); + DEBUG_DEL_LIST(curr); + DEC_STAT(nr_input_pending); + spin_unlock_irq(&ti->work_lock); +#if CONFIG_TUX_DEBUG + req->bytes_expected = 0; +#endif + req->in_file.f_pos = 0; + req->atom_idx = 0; + clear_keepalive(req); + req->status = -1; + if (req->usermode) { + req->usermode = 0; + req->private = 0; + } + flush_request(req, 0); + count++; + goto restart; + } + spin_unlock_irq(&ti->work_lock); + + return count; +} + +int print_all_requests (threadinfo_t *ti) +{ + struct list_head *head, *curr; + tux_req_t *req; + int count = 0; + + spin_lock_irq(&ti->work_lock); + head = &ti->all_requests; + curr = head->next; + + while (curr != head) { + req = list_entry(curr, tux_req_t, all); + curr = curr->next; + print_req(req); + count++; + } + spin_unlock_irq(&ti->work_lock); + + return count; +} + --- linux/net/tux/logger.c.orig +++ linux/net/tux/logger.c @@ -0,0 +1,824 @@ +/* + * TUX - Integrated Application Protocols Layer and Object Cache + * + * Copyright (C) 2000, 2001, Ingo Molnar + * + * Cleaned up logger output for Alpha. + * -- Phil Ezolt (Phillip.Ezolt@compaq.com) & Bill Carr (wcarr92@yahoo.com) + * + * logger.c: log requests finished by TUX. + */ + +#define __KERNEL_SYSCALLS__ +#include + +/**************************************************************** + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + ****************************************************************/ + +static spinlock_t log_lock = SPIN_LOCK_UNLOCKED; +static unsigned int log_head, log_tail; +static char * log_buffer = NULL; +static DECLARE_WAIT_QUEUE_HEAD(log_wait); +static DECLARE_WAIT_QUEUE_HEAD(log_full); +static int logger_pid = 0; + +/* + * High-speed TUX logging architecture: + * + * All fast threads share a common log-ringbuffer. (default size 1MB) + * Log entries are binary and are padded to be cacheline aligned, this + * ensures that there is no cache-pingpong between fast threads. + * + * The logger thread writes out pending log entries within 1 second + * (buffer-cache writes data out within 5 seconds). The logger thread + * gets activated once we have more than 25% of the log ringbuffer + * filled - or the 1 second log timeout expires. Fast threads block + * if if more than 95% of the ringbuffer is filled and unblock only + * if used logbuffer space drops below 90%. + * + * This architecture guarantees that 1) logging is reliable (no + * log entry is ever lost), 2) timely (touches disk within 6 seconds), + * 3) in the log-contention case the saturation behavior is still + * write-clustered, but 4) if the logger thread can keep up then + * the coupling is completely asynchron and parallel. + * + * The binary log format gives us about 50% saved IO/memory bandwith + * and 50% less on-disk used log space than the traditional W3C ASCII + * format. + * + * (We might switch to raw IO though to write the logfile.) + */ + +#define SOFT_LIMIT (LOG_LEN*25/100) +#define HARD_LIMIT (LOG_LEN*95/100) +#define HARD_RELAX_LIMIT (LOG_LEN*90/100) + +unsigned int tux_logentry_align_order = 5; + +#if SMP_CACHE_BYTES == 8 +# define TUX_LOGENTRY_ALIGN 3 +#else +#if SMP_CACHE_BYTES == 16 +# define TUX_LOGENTRY_ALIGN 4 +#else +#if SMP_CACHE_BYTES == 32 +# define TUX_LOGENTRY_ALIGN 5 +#else +#if SMP_CACHE_BYTES == 64 +# define TUX_LOGENTRY_ALIGN 6 +#else +#if SMP_CACHE_BYTES == 128 +# define TUX_LOGENTRY_ALIGN 7 +#else +#if SMP_CACHE_BYTES == 256 +# define TUX_LOGENTRY_ALIGN 8 +#else +#error Add entry! +#endif +#endif +#endif +#endif +#endif +#endif + +#define ROUND_UP(x) (((((x)-1) >> TUX_LOGENTRY_ALIGN) + 1) \ + << TUX_LOGENTRY_ALIGN) + +static void __throttle_logging (void) +{ + DECLARE_WAITQUEUE(wait, current); + int pending; + + add_wait_queue(&log_full, &wait); + for (;;) { + static unsigned long last_warning = 0; + + if (jiffies - last_warning > 10*HZ) { + last_warning = jiffies; + printk(KERN_NOTICE "TUX: log buffer overflow, have to throttle TUX thread!\n"); + } + + current->state = TASK_INTERRUPTIBLE; + + spin_lock(&log_lock); + pending = log_head-log_tail; + spin_unlock(&log_lock); + + if ((pending % LOG_LEN) < HARD_LIMIT) + break; + + schedule(); + } + current->state = TASK_RUNNING; + remove_wait_queue(&log_full, &wait); +} + +#if CONFIG_TUX_DEBUG +#define CHECK_LOGPTR(ptr) \ +do { \ + if ((ptr < log_buffer) || (ptr > log_buffer + LOG_LEN)) { \ + printk(KERN_ERR "TUX: ouch: log ptr %p > %p + %ld!\n", \ + ptr, log_buffer, LOG_LEN); \ + TUX_BUG(); \ + } \ +} while (0) +#else +#define CHECK_LOGPTR(ptr) do { } while (0) +#endif + +void __log_request (tux_req_t *req) +{ + char *str, *next; + const char *uri_str; + unsigned int inc, len, uri_len, pending, next_head, def_vhost_len = 0; + unsigned long flags; + + if (req->proto->pre_log) + req->proto->pre_log(req); + /* + * Log the reply status (success, or type of failure) + */ + if (!tux_log_incomplete && (!req->status || (req->bytes_sent == -1))) { + + Dprintk("not logging req %p: {%s} [%d/%d]\n", req, req->uri_str, req->status, req->bytes_sent); + return; + } + Dprintk("uri: {%s} [%d]\n", req->uri_str, req->uri_len); + +#define NO_URI "" + if (req->uri_len) { + uri_len = req->uri_len; + uri_str = req->uri_str; + } else { + uri_str = NO_URI; + uri_len = sizeof(NO_URI)-1; + } + len = uri_len + 1; + + if (req->virtual) { + if (req->host_len) + len += req->host_len; + else { + def_vhost_len = strlen(tux_default_vhost); + len += def_vhost_len; + } + } + + Dprintk("method_str: {%s} [%d]\n", req->method_str, req->method_len); + len += req->method_len + 1; + + Dprintk("version_str: {%s} [%d]\n", req->version_str, req->version_len); + len += req->version_len + 1; + +#if CONFIG_TUX_EXTENDED_LOG + Dprintk("user_agent_str: {%s} [%d]\n", req->user_agent_str, req->user_agent_len); + len += req->user_agent_len + 1; +#endif + if (tux_referer_logging) { + Dprintk("referer_str: {%s} [%d]\n", req->referer_str, req->referer_len); + len += req->referer_len; + } + len++; + + inc = 5*sizeof(u32) + len; +#if CONFIG_TUX_EXTENDED_LOG + inc += 7*sizeof(u32); +#endif + + spin_lock_irqsave(&log_lock, flags); + + next_head = ROUND_UP(log_head + inc); + + if (next_head < LOG_LEN) { + str = log_buffer + log_head; + if (str > log_buffer + LOG_LEN) + TUX_BUG(); + log_head = next_head; + } else { + if (log_head < LOG_LEN) + memset(log_buffer+log_head, 0, LOG_LEN-log_head); + str = log_buffer; + log_head = ROUND_UP(inc); + } + + if (str < log_buffer || str+inc >= log_buffer+LOG_LEN) + TUX_BUG(); + + /* + * Log record signature - this makes finding the next entry + * easier (since record length is variable), and makes the + * binary logfile more robust against potential data corruption + * and other damage. The signature also servers as a log format + * version identifier. + */ +#if CONFIG_TUX_EXTENDED_LOG + *(u32 *)str = 0x2223beef; +#else + *(u32 *)str = 0x1112beef; +#endif + str += sizeof(u32); + CHECK_LOGPTR(str); + + *(u32 *)str = 0; + /* + * Log the client IP address: + */ + if (tux_ip_logging) + *(u32 *)str = req->client_addr; + str += sizeof(u32); + CHECK_LOGPTR(str); + +#if CONFIG_TUX_EXTENDED_LOG + /* + * Log the client port number: + */ + *(u32 *)str = 0; + if (tux_ip_logging) + *(u32 *)str = req->client_port; + str += sizeof(u32); + CHECK_LOGPTR(str); +#endif + + /* + * Log the request timestamp, in units of 'seconds since 1970'. + */ + *(u32 *)str = CURRENT_TIME; + str += sizeof(u32); + CHECK_LOGPTR(str); + +#if CONFIG_TUX_EXTENDED_LOG + *(u32 *)str = req->accept_timestamp; str += sizeof(u32); + *(u32 *)str = req->parse_timestamp; str += sizeof(u32); + *(u32 *)str = req->output_timestamp; str += sizeof(u32); + *(u32 *)str = req->flush_timestamp; str += sizeof(u32); + *(u32 *)str = req->had_cachemiss; str += sizeof(u32); + *(u32 *)str = req->keep_alive; str += sizeof(u32); +#endif + /* + * Log the requested file size (in fact, log actual bytes sent.) + */ + *(u32 *)str = req->bytes_sent; + str += sizeof(u32); + CHECK_LOGPTR(str); + + *(u32 *)str = req->status; + str += sizeof(u32); + CHECK_LOGPTR(str); + + /* + * Zero-terminated method, (base) URI, query and version string. + */ + if (req->method_len) { + memcpy(str, req->method_str, req->method_len); + str += req->method_len; + CHECK_LOGPTR(str); + } + *str++ = 0; + + if (req->virtual) { + if (req->host_len) { + memcpy(str, req->host, req->host_len); + str += req->host_len; + } else { + memcpy(str, tux_default_vhost, def_vhost_len); + str += def_vhost_len; + } + CHECK_LOGPTR(str); + } + + memcpy(str, uri_str, uri_len); + str += uri_len; + *str++ = 0; + + CHECK_LOGPTR(str); + + if (req->version_len) { + memcpy(str, req->version_str, req->version_len); + str += req->version_len; + CHECK_LOGPTR(str); + } + *str++ = 0; +#if CONFIG_TUX_EXTENDED_LOG + if (req->user_agent_len) { + memcpy(str, req->user_agent_str, req->user_agent_len); + str += req->user_agent_len; + CHECK_LOGPTR(str); + } + *str++ = 0; +#endif + CHECK_LOGPTR(str); + + if (tux_referer_logging && req->referer_len) { + memcpy(str, req->referer_str, req->referer_len); + str += req->referer_len; + CHECK_LOGPTR(str); + } + *str++ = 0; + CHECK_LOGPTR(str); + /* + * pad with spaces to next cacheline, with an ending newline. + * (not needed for the user-space log utility, but results in + * a more readable binary log file, and reduces the amount + * of cache pingpong.) + */ + next = (char *)ROUND_UP((unsigned long)str); + + CHECK_LOGPTR(next); + len = next-str; + memset(str, ' ', len); + + pending = (log_head-log_tail) % LOG_LEN; + spin_unlock_irqrestore(&log_lock, flags); + + if (pending >= SOFT_LIMIT) + wake_up(&log_wait); + + if (pending >= HARD_LIMIT) + __throttle_logging(); +} + +void tux_push_pending (struct sock *sk) +{ + struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; + + Dprintk("pushing pending frames on sock %p.\n", sk); + lock_sock(sk); + if ((sk->state == TCP_ESTABLISHED) && !sk->err) { + tp->ack.pingpong = tux_ack_pingpong; + sk->tp_pinfo.af_tcp.nonagle = 1; + __tcp_push_pending_frames(sk, tp, tcp_current_mss(sk), 1); + } + release_sock(sk); +} + +inline void tux_push_req (tux_req_t *req) +{ + if (req->sock) + tux_push_pending(req->sock->sk); + if (req->data_sock) + tux_push_pending(req->data_sock->sk); +} + +void __put_data_sock (tux_req_t *req) +{ + unlink_tux_data_socket(req); + if (req->data_sock->file) + fput(req->data_sock->file); + else + sock_release(req->data_sock); + req->data_sock = NULL; +} + +void flush_request (tux_req_t *req, int cachemiss) +{ + struct socket *sock; + struct sock *sk; + int keep_alive; + + if (cachemiss) + TUX_BUG(); + __set_task_state(current, TASK_RUNNING); + + if (req->magic != TUX_MAGIC) + TUX_BUG(); + if (req->ti->thread != current) + TUX_BUG(); +#if CONFIG_TUX_DEBUG + if (req->bytes_expected && (req->bytes_sent != req->bytes_expected)) { + printk("hm, bytes_expected: %d != bytes_sent: %d!\n", + req->bytes_expected, req->bytes_sent); + TUX_BUG(); + } +#endif + SET_TIMESTAMP(req->flush_timestamp); + + log_request(req); + sock = req->sock; + sk = NULL; + if (sock) + sk = sock->sk; + Dprintk("FLUSHING req %p <%p> (sock %p, sk %p) (keepalive: %d, status: %d)\n", req, __builtin_return_address(0), sock, sk, req->keep_alive, req->status); + if (req->in_file.f_pos) + /*TUX_BUG()*/; + release_req_dentry(req); + req->private = 0; + + if (req->docroot_dentry) { + dput(req->docroot_dentry); + req->docroot_dentry = NULL; + if (!req->docroot_mnt) + TUX_BUG(); + } + if (req->docroot_mnt) { + mntput(req->docroot_mnt); + req->docroot_mnt = NULL; + } + + req->offset_start = 0; + req->offset_end = 0; + req->output_len = 0; + req->total_file_len = 0; + req->lendigits = 0; + req->mtime = 0; + req->etaglen = 0; + req->etag[0] = 0; + req->ftp_command = 0; + + if (req->postponed) + TUX_BUG(); + if (test_bit(0, &req->idle_input)) + TUX_BUG(); + if (test_bit(0, &req->wait_output_space)) + TUX_BUG(); + if (req->parsed_len) + trunc_headers(req); + if (req->parsed_len) + TUX_BUG(); + req->attr = NULL; + req->usermode = 0; + req->usermodule_idx = 0; + req->atom_idx = 0; + if (req->module_dentry) { + dput(req->module_dentry); + req->module_dentry = NULL; + } + if (req->headers) + kfree(req->headers); + req->headers = NULL; + req->headers_len = 0; + + req->method = METHOD_NONE; + req->method_len = 0; + req->method_str = NULL; + req->version = 0; + req->version_str = NULL; + req->version_len = 0; + + req->uri_str = NULL; + req->uri_len = 0; + + req->objectname[0] = 0; + req->objectname_len = 0; + + req->query_str = NULL; + req->query_len = 0; + + req->cookies_str = NULL; + req->cookies_len = 0; + req->parse_cookies = 0; + + req->contentlen_str = NULL; + req->contentlen_len = 0; + req->content_len = 0; + + req->user_agent_str = NULL; + req->user_agent_len = 0; + + req->may_send_gzip = 0; + req->content_gzipped = 0; + if (req->gzip_state) + tux_gzip_end(req); + + req->content_type_str = NULL; + req->content_type_len = 0; + + req->accept_str = NULL; + req->accept_len = 0; + + req->accept_charset_str = NULL; + req->accept_charset_len = 0; + + req->accept_encoding_str = NULL; + req->accept_encoding_len = 0; + + req->accept_language_str = NULL; + req->accept_language_len = 0; + + req->cache_control_str = NULL; + req->cache_control_len = 0; + + req->if_modified_since_str = NULL; + req->if_modified_since_len = 0; + + req->if_none_match_str = NULL; + req->if_none_match_len = 0; + + req->if_range_str = NULL; + req->if_range_len = 0; + + req->negotiate_str = NULL; + req->negotiate_len = 0; + + req->pragma_str = NULL; + req->pragma_len = 0; + + req->referer_str = NULL; + req->referer_len = 0; + + req->post_data_str = NULL; + req->post_data_len = 0; + + SET_TIMESTAMP(req->accept_timestamp); +#if CONFIG_TUX_EXTENDED_LOG + req->parse_timestamp = 0; + req->output_timestamp = 0; + req->flush_timestamp = 0; +#endif + req->status = 0; + + req->total_bytes += req->bytes_sent; + req->bytes_sent = 0; +#if CONFIG_TUX_DEBUG + req->bytes_expected = 0; +#endif + req->body_len = 0; + keep_alive = req->keep_alive; + clear_keepalive(req); + req->had_cachemiss = 0; + // first_timestamp and total_bytes is kept! + req->event = 0; + req->lookup_dir = 0; + req->lookup_404 = 0; + + req->error = 0; + req->user_error = 0; + + if (req->abuf.page) + __free_page(req->abuf.page); + memset(&req->abuf, 0, sizeof(req->abuf)); + + if (sk && keep_alive) { + add_tux_atom(req, parse_request); + if (skb_queue_empty(&sk->receive_queue)) { + spin_lock_irq(&req->ti->work_lock); + add_keepalive_timer(req); + if (test_and_set_bit(0, &req->idle_input)) + TUX_BUG(); + /* + * Avoid the race with the event callback: + */ + if (skb_queue_empty(&sk->receive_queue) || + !test_and_clear_bit(0, &req->idle_input)) { + INC_STAT(nr_idle_input_pending); + spin_unlock_irq(&req->ti->work_lock); + tux_push_req(req); + goto out; + } + del_keepalive_timer(req); + spin_unlock_irq(&req->ti->work_lock); + } + Dprintk("KEEPALIVE PENDING req %p <%p> (sock %p, sk %p) (keepalive: %d, status: %d)\n", req, __builtin_return_address(0), req->sock, req->sock->sk, req->keep_alive, req->status); + add_req_to_workqueue(req); + INC_STAT(nr_keepalive_optimized); + goto out; + } + + del_timer_sync(&req->keepalive_timer); + del_timer_sync(&req->output_timer); + + if (timer_pending(&req->keepalive_timer)) + TUX_BUG(); + if (timer_pending(&req->output_timer)) + TUX_BUG(); + if (!list_empty(&req->lru)) + TUX_BUG(); + req->nr_keepalives = 0; + req->client_addr = 0; + req->client_port = 0; + req->virtual = 0; + req->ftp_offset_start = 0; + + req->host[0] = 0; + req->host_len = 0; + + if (req->cwd_dentry) { + dput(req->cwd_dentry); + req->cwd_dentry = NULL; + if (!req->cwd_mnt) + TUX_BUG(); + } + if (req->cwd_mnt) { + mntput(req->cwd_mnt); + req->cwd_mnt = NULL; + } + put_data_sock(req); + req->prev_pos = 0; + req->curroff = 0; + req->total = 0; + if (req->dirp0) { + kfree(req->dirp0); + req->dirp0 = NULL; + } + + if (sk) + unlink_tux_socket(req); + req->sock = NULL; + /* + * Close potential user-space file descriptors. + */ + { + int fd = req->fd, ret; + + if (fd != -1) { + req->fd = -1; + ret = sys_close(fd); + if (ret) + TUX_BUG(); + } else + if (sock) + sock_release(sock); + } + kfree_req(req); +out: + ; +} + +static int warn_once = 1; + +static unsigned int writeout_log (void) +{ + unsigned int len, pending, next_log_tail; + mm_segment_t oldmm = get_fs(); + struct file *log_filp; + char * str; + unsigned int ret; + + Dprintk("TUX logger: opening log file {%s}.\n", tux_logfile); + log_filp = tux_open_file(tux_logfile, O_CREAT|O_APPEND|O_WRONLY|O_LARGEFILE); + if (!log_filp) { + if (warn_once) { + printk(KERN_ERR "TUX: could not open log file {%s}!\n", + tux_logfile); + warn_once = 0; + } + __set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(HZ); + return 0; + } + spin_lock(&log_lock); + str = log_buffer + log_tail; + if (log_head < log_tail) { + len = LOG_LEN-log_tail; + next_log_tail = 0; + } else { + len = log_head-log_tail; + next_log_tail = log_head; + } + if (!len) + goto out; + spin_unlock(&log_lock); + + set_fs(KERNEL_DS); + ret = log_filp->f_op->write(log_filp, str, len, &log_filp->f_pos); + set_fs(oldmm); + + if (len != ret) { + if (ret == -ENOSPC) { + printk(KERN_ERR "TUX: trying to write TUX logfile %s, but filesystem is full! Lost %d bytes of log data.\n", tux_logfile, len); + } else { + printk(KERN_ERR "TUX: log write %d != %d.\n", ret, len); + printk(KERN_ERR "TUX: log_filp: %p, str: %p, len: %d str[len-1]: %d.\n", log_filp, str, len, str[len-1]); + } + goto out_lock; + } + + /* + * Sync log data to disk: + */ + if (log_filp->f_op && log_filp->f_op->fsync) { + down(&log_filp->f_dentry->d_inode->i_sem); + log_filp->f_op->fsync(log_filp, log_filp->f_dentry, 1); + up(&log_filp->f_dentry->d_inode->i_sem); + } + + /* + * Reduce the cache footprint of the logger file - it's + * typically write-once. + */ + invalidate_inode_pages(log_filp->f_dentry->d_inode); + +out_lock: + spin_lock(&log_lock); +out: + log_tail = next_log_tail; + pending = (log_head-log_tail) % LOG_LEN; + spin_unlock(&log_lock); + + if (pending < HARD_LIMIT) + wake_up(&log_full); + + fput(log_filp); + return pending; +} + +static DECLARE_WAIT_QUEUE_HEAD(stop_logger_wait); +static int stop_logger = 0; + +static int logger_thread (void *data) +{ + DECLARE_WAITQUEUE(wait, current); + mm_segment_t oldmm; + + daemonize(); + + oldmm = get_fs(); + set_fs(KERNEL_DS); + printk(KERN_NOTICE "TUX: logger thread started.\n"); + sprintf(current->comm, "TUX logger"); +#if CONFIG_SMP + { + unsigned long mask = log_cpu_mask; + + if (cpu_online_map & mask) + set_cpus_allowed(current, mask); + } +#endif + + + spin_lock_irq(¤t->sigmask_lock); + siginitsetinv(¤t->blocked, 0); + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); + + if (log_buffer) + TUX_BUG(); + log_buffer = vmalloc(LOG_LEN); + memset(log_buffer, 0, LOG_LEN); + log_head = log_tail = 0; + + current->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY; + + add_wait_queue(&log_wait, &wait); + for (;;) { + Dprintk("logger does writeout - stop:%d.\n", stop_logger); + + while (writeout_log() >= SOFT_LIMIT) { + if (stop_logger) + break; + } + if (stop_logger) + break; + /* nothing */; + + Dprintk("logger does sleep - stop:%d.\n", stop_logger); + __set_current_state(TASK_INTERRUPTIBLE); + if (log_head != log_tail) { + __set_current_state(TASK_RUNNING); + continue; + } + schedule_timeout(HZ); + Dprintk("logger back from sleep - stop:%d.\n", stop_logger); + if (signal_pending(current)) + flush_all_signals(); + } + remove_wait_queue(&log_wait, &wait); + + vfree(log_buffer); + log_buffer = NULL; + stop_logger = 0; + wake_up(&stop_logger_wait); + + set_fs(oldmm); + + return 0; +} + +void start_log_thread (void) +{ + warn_once = 1; + + logger_pid = kernel_thread(logger_thread, NULL, 0); + if (logger_pid < 0) + TUX_BUG(); +} + +void stop_log_thread (void) +{ + DECLARE_WAITQUEUE(wait, current); + + Dprintk("stopping logger thread %d ...\n", logger_pid); + + __set_current_state(TASK_UNINTERRUPTIBLE); + add_wait_queue(&stop_logger_wait, &wait); + stop_logger = 1; + wake_up(&log_wait); + schedule(); + __set_current_state(TASK_RUNNING); + remove_wait_queue(&stop_logger_wait, &wait); + + Dprintk("logger thread stopped!\n"); +} --- linux/net/tux/main.c.orig +++ linux/net/tux/main.c @@ -0,0 +1,1389 @@ +/* + * TUX - Integrated Application Protocols Layer and Object Cache + * + * Copyright (C) 2000, 2001, Ingo Molnar + * + * main.c: main management and initialization routines + */ + +#define __KERNEL_SYSCALLS__ +#include + +/**************************************************************** + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + ****************************************************************/ + +/* + * Threads information. + */ +unsigned int nr_tux_threads; +static atomic_t nr_tux_threads_running = ATOMIC_INIT(0); +static int stop_threads = 0; + +threadinfo_t threadinfo[CONFIG_TUX_NUMTHREADS]; + +static void flush_all_requests (threadinfo_t *ti); + +void flush_all_signals (void) +{ + spin_lock_irq(¤t->sigmask_lock); + flush_signals(current); + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); +} + +int nr_requests_used (void) +{ + unsigned int i, nr = 0; + + for (i = 0; i < nr_tux_threads; i++) { + threadinfo_t *ti = threadinfo + i; + nr += ti->nr_requests - ti->nr_free_requests; + } + + return nr; +} + +static inline int accept_pending (threadinfo_t *ti) +{ + int j; + + for (j = 0; j < CONFIG_TUX_NUMSOCKETS; j++) { + if (!ti->listen[j].proto) + break; + if (ti->listen[j].sock->sk->tp_pinfo.af_tcp.accept_queue) + return 1; + } + return 0; +} + +static inline int requests_pending (threadinfo_t *ti) +{ + if (!list_empty(&ti->work_pending)) + return 1; + return 0; +} + +static int event_loop (threadinfo_t *ti) +{ + tux_req_t *req; + int work_done; + +repeat_accept: + if (ti->thread != current) + TUX_BUG(); + + /* + * Any (relevant) event on the socket will change this + * thread to TASK_RUNNING because we add it to both + * the main listening and the connection request socket + * waitqueues. Thus we can do 'lazy checking' of work + * to be done and schedule away only if the thread is + * still TASK_INTERRUPTIBLE. This makes TUX fully + * event driven. + */ + set_task_state(current, TASK_INTERRUPTIBLE); + current->flags |= PF_MEMALLOC; + work_done = 0; + if (accept_pending(ti)) + work_done = accept_requests(ti); + + if (requests_pending(ti)) { + work_done = process_requests(ti, &req); + if (req) + goto handle_userspace_req; + } + + /* + * Be nice to other processes: + */ + if (unlikely(current->need_resched)) { + __set_task_state(current, TASK_RUNNING); + schedule(); + goto repeat_accept; + } + + if (ti->userspace_req) + TUX_BUG(); + if (unlikely(stop_threads)) + goto handle_stop; + + /* Any signals? */ + if (unlikely(signal_pending(current))) + goto handle_signal; + + if (work_done) + goto repeat_accept; + /* + * Any socket event either on the listen socket + * or on the request sockets will wake us up: + */ + if ((current->state != TASK_RUNNING) && + !requests_pending(ti) && !accept_pending(ti)) { + Dprintk("fast thread: no work to be done, sleeping.\n"); + schedule(); + Dprintk("fast thread: back from sleep!\n"); + goto repeat_accept; + } + goto repeat_accept; + +handle_userspace_req: + if (req->attr) + TUX_BUG(); + switch_docroot(req); + ti->userspace_req = req; + __set_task_state(current, TASK_RUNNING); + return TUX_RETURN_USERSPACE_REQUEST; + +handle_signal: + __set_task_state(current, TASK_RUNNING); + return TUX_RETURN_SIGNAL; + +handle_stop: + __set_task_state(current, TASK_RUNNING); + return TUX_RETURN_EXIT; +} + +static int init_queues (int nr_tux_threads) +{ + int i; + + for (i = 0; i < nr_tux_threads; i++) { + threadinfo_t *ti = threadinfo + i; + + INIT_LIST_HEAD(&ti->all_requests); + + ti->free_requests_lock = SPIN_LOCK_UNLOCKED; + INIT_LIST_HEAD(&ti->free_requests); + + ti->work_lock = SPIN_LOCK_UNLOCKED; + INIT_LIST_HEAD(&ti->work_pending); + INIT_LIST_HEAD(&ti->lru); + + } + return 0; +} + +int tux_chroot (char *dir) +{ + kernel_cap_t saved_cap = current->cap_effective; + mm_segment_t oldmm; + int err; + + /* Allow chroot dir to be in kernel space. */ + oldmm = get_fs(); set_fs(KERNEL_DS); + set_fs(KERNEL_DS); + cap_raise (current->cap_effective, CAP_SYS_CHROOT); + + err = sys_chroot(dir); + if (!err) + sys_chdir("/"); + + current->cap_effective = saved_cap; + set_fs(oldmm); + + return err; +} + +/* + * Right now this is not fully SMP-safe against multiple TUX + * managers. It's just a rudimentary protection against typical + * mistakes. + */ +static int initialized = 0; + +#define MAX_DOCROOTLEN 500 + +static int lookup_docroot(struct nameidata *docroot, const char *name) +{ + int err; + + docroot->mnt = mntget(current->fs->rootmnt); + docroot->dentry = dget(current->fs->root); + docroot->last.len = 0; + docroot->flags = LOOKUP_FOLLOW|LOOKUP_POSITIVE; + + err = path_walk(name, docroot); + if (err) { + mntput(docroot->mnt); + docroot->mnt = NULL; + return err; + } + return 0; +} + +static int user_req_startup (void) +{ + char name[MAX_DOCROOTLEN]; + struct nameidata *docroot; + unsigned int i; + int err; + + if (initialized) + return -EINVAL; + initialized = 1; + + /* + * Look up the HTTP and FTP document root. + * (typically they are shared, but can be + * different directories.) + */ + docroot = &tux_proto_http.main_docroot; + if (docroot->mnt) + TUX_BUG(); + strcpy(name, tux_common_docroot); + strcat(name, tux_http_subdocroot); + + err = lookup_docroot(docroot, name); + if (err) { + initialized = 0; + printk(KERN_ERR "TUX: could not look up HTTP documentroot: \"%s\"\n", name); + return err; + } + + docroot = &tux_proto_ftp.main_docroot; + if (docroot->mnt) + TUX_BUG(); + strcpy(name, tux_common_docroot); + strcat(name, tux_ftp_subdocroot); + + err = lookup_docroot(docroot, name); + if (err) { + docroot = &tux_proto_http.main_docroot; + path_release(docroot); + memset(docroot, 0, sizeof(*docroot)); + initialized = 0; + printk(KERN_ERR "TUX: could not look up FTP documentroot: \"%s\"\n", name); + return err; + } + + /* + * Start up the logger thread. (which opens the logfile) + */ + start_log_thread(); + + nr_tux_threads = tux_threads; + if (nr_tux_threads < 1) + nr_tux_threads = 1; + if (nr_tux_threads > CONFIG_TUX_NUMTHREADS) + nr_tux_threads = CONFIG_TUX_NUMTHREADS; + tux_threads = nr_tux_threads; + + /* + * Set up per-thread work-queues: + */ + memset(threadinfo, 0, CONFIG_TUX_NUMTHREADS*sizeof(threadinfo_t)); + init_queues(nr_tux_threads); + + /* + * Prepare the worker thread structures. + */ + for (i = 0; i < nr_tux_threads; i++) { + threadinfo_t *ti = threadinfo + i; + ti->cpu = i; + } + + MOD_INC_USE_COUNT; + + return 0; +} + +static DECLARE_WAIT_QUEUE_HEAD(wait_stop); +static DECLARE_WAIT_QUEUE_HEAD(thread_stopped); + +static int user_req_shutdown (void) +{ + DECLARE_WAITQUEUE(wait, current); + struct nameidata *docroot; + int err = -EINVAL; + + lock_kernel(); + if (!initialized) { + Dprintk("TUX is not up - cannot shut down.\n"); + goto err; + } + initialized = 0; + stop_threads = 1; + add_wait_queue(&thread_stopped, &wait); + +wait_more: + /* + * Wake up all the worker threads so they notice + * that we are being stopped. + */ + set_task_state(current, TASK_UNINTERRUPTIBLE); + if (atomic_read(&nr_tux_threads_running)) { + Dprintk("TUX: shutdown, %d threads still running.\n", + atomic_read(&nr_tux_threads_running)); + wake_up(&wait_stop); + schedule(); + goto wait_more; + } + set_task_state(current, TASK_RUNNING); + stop_threads = 0; + remove_wait_queue(&thread_stopped, &wait); + + if (nr_async_io_pending()) + TUX_BUG(); + + stop_log_thread(); + + docroot = &tux_proto_http.main_docroot; + path_release(docroot); + memset(docroot, 0, sizeof(*docroot)); + docroot = &tux_proto_ftp.main_docroot; + path_release(docroot); + memset(docroot, 0, sizeof(*docroot)); + err = 0; + + flush_dentry_attributes(); + free_mimetypes(); + unregister_all_tuxmodules(); + + MOD_DEC_USE_COUNT; + +err: + unlock_kernel(); + return err; +} + +void drop_permissions (void) +{ + /* + * Userspace drops privileges already, and group + * membership is important to keep. + */ + /* Give the new process no privileges.. */ + current->uid = current->euid = + current->suid = current->fsuid = tux_cgi_uid; + current->gid = current->egid = + current->sgid = current->fsgid = tux_cgi_gid; + cap_clear(current->cap_permitted); + cap_clear(current->cap_inheritable); + cap_clear(current->cap_effective); +} + +static int wait_for_others (void) +{ + threadinfo_t *ti; + unsigned int cpu; + +repeat: + if (signal_pending(current)) + return -1; + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(HZ/10); + + for (cpu = 0; cpu < nr_tux_threads; cpu++) { + ti = threadinfo + cpu; + if (ti->listen_error) + return -1; + if (!ti->started) + goto repeat; + } + /* ok, all threads have started up. */ + return 0; +} + +static void zap_listen_sockets (threadinfo_t *ti) +{ + struct socket *sock; + int i; + + for (i = 0; i < CONFIG_TUX_NUMSOCKETS; i++) { + if (!ti->listen[i].proto) + break; + sock = ti->listen[i].sock; + if (!ti->listen[i].cloned && sock) { + while (waitqueue_active(sock->sk->sleep)) + yield(); + sock_release(sock); + } + ti->listen[i].sock = NULL; + ti->listen[i].proto = NULL; + ti->listen[i].cloned = 0; + } +} + +static DECLARE_MUTEX(serialize_startup); + +static int user_req_start_thread (threadinfo_t *ti) +{ + unsigned int err, cpu, i, j, k; + struct k_sigaction *ka; + + cpu = ti->cpu; +#if CONFIG_SMP + { + unsigned int mask; + + mask = 1 << ((cpu + tux_cpu_offset) % smp_num_cpus); + if (cpu_online_map & mask) + set_cpus_allowed(current, mask); + } +#endif + ti->thread = current; + atomic_inc(&nr_tux_threads_running); + + err = start_cachemiss_threads(ti); + if (err) + goto out; + + init_waitqueue_entry(&ti->stop, current); + for (j = 0; j < CONFIG_TUX_NUMSOCKETS; j++) + init_waitqueue_entry(ti->wait_event + j, current); + + ka = current->sig->action + SIGCHLD-1; + ka->sa.sa_handler = SIG_IGN; + + /* Block all signals except SIGKILL, SIGSTOP, SIGHUP and SIGCHLD */ + spin_lock_irq(¤t->sigmask_lock); + siginitsetinv(¤t->blocked, sigmask(SIGKILL) | + sigmask(SIGSTOP)| sigmask(SIGHUP) | sigmask(SIGCHLD)); + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); + + if (!tux_listen[cpu][0].proto) { + printk(KERN_ERR "no listen socket specified for TUX thread %d, in /proc/net/tux/%d/listen/, aborting.\n", cpu, cpu); + goto error; + } + + /* + * Serialize startup so that listen sockets can be + * created race-free. + */ + down(&serialize_startup); + + Dprintk("thread %d initializing sockets.\n", cpu); + + for (k = 0; k < CONFIG_TUX_NUMSOCKETS; k++) { + tux_socket_t *e1, *e2; + + e1 = tux_listen[cpu] + k; + if (!e1->proto) + break; + for (i = 0; i < CONFIG_TUX_NUMTHREADS; i++) { + if (i == cpu) + continue; + for (j = 0; j < CONFIG_TUX_NUMSOCKETS; j++) { + e2 = tux_listen[i] + j; + if (!e2->proto) + continue; + if ((e1->ip == e2->ip) && (e1->port == e2->port) && (e1->proto == e2->proto) && threadinfo[i].listen[j].proto) { + ti->listen[k] = threadinfo[i].listen[j]; + ti->listen[k].cloned = 1; + Dprintk("cloned socket %d from thread %d's socket %d.\n", k, i, j); + goto next_socket; + } + } + } + + ti->listen[k].sock = start_listening(tux_listen[cpu] + k, cpu); + if (!ti->listen[k].sock) + goto error_unlock; + ti->listen[k].cloned = 0; + ti->listen[k].proto = tux_listen[cpu][k].proto; + Dprintk("thread %d got sock %p (%d), proto %s.\n", cpu, ti->listen[k].sock, k, ti->listen[k].proto->name); +next_socket: + ; + } + Dprintk("thread %d done initializing sockets.\n", cpu); + up(&serialize_startup); + + if (wait_for_others()) + goto error_nomsg; + + if (!ti->listen[0].proto) { + printk("hm, socket 0 has no protocol.\n"); + goto error; + } + + add_wait_queue(&wait_stop, &ti->stop); + for (j = 0; j < CONFIG_TUX_NUMSOCKETS; j++) + if (ti->listen[j].proto) + add_wait_queue_exclusive(ti->listen[j].sock->sk->sleep, + ti->wait_event + j); + drop_permissions(); + + MOD_INC_USE_COUNT; + return 0; + +error_unlock: + up(&serialize_startup); +error: + printk(KERN_NOTICE "TUX: could not start worker thread %d.\n", ti->cpu); + +error_nomsg: + ti->listen_error = 1; + ti->started = 0; + + zap_listen_sockets(ti); + flush_all_requests(ti); + stop_cachemiss_threads(ti); + + err = -EINVAL; + +out: + /* + * Last thread close the door: + */ + if (atomic_dec_and_test(&nr_tux_threads_running)) + user_req_shutdown(); + + return -err; +} + +static int flush_idleinput (threadinfo_t * ti) +{ + struct list_head *head, *tmp; + tux_req_t *req; + int count = 0; + + head = &ti->all_requests; + tmp = head->next; + + while (tmp != head) { + req = list_entry(tmp, tux_req_t, all); + tmp = tmp->next; + if (test_bit(0, &req->idle_input)) { + idle_event(req); + count++; + } + } + return count; +} + +static int flush_waitoutput (threadinfo_t * ti) +{ + struct list_head *head, *tmp; + tux_req_t *req; + int count = 0; + + head = &ti->all_requests; + tmp = head->next; + + while (tmp != head) { + req = list_entry(tmp, tux_req_t, all); + tmp = tmp->next; + if (test_bit(0, &req->wait_output_space)) { + output_space_event(req); + count++; + } + } + return count; +} + +static void flush_all_requests (threadinfo_t *ti) +{ + for (;;) { + int count; + + count = flush_idleinput(ti); + count += flush_waitoutput(ti); + count += tux_flush_workqueue(ti); + count += flush_freequeue(ti); + if (!ti->nr_requests) + break; + /* + * Go through again if we advanced: + */ + if (count) + continue; + Dprintk("flush_all_requests: %d requests still waiting.\n", ti->nr_requests); +#if TUX_DEBUG + count = print_all_requests(ti); + Dprintk("flush_all_requests: printed %d requests.\n", count); +#endif + current->state = TASK_UNINTERRUPTIBLE; + schedule_timeout(HZ/10); + } +} + +int nr_async_io_pending (void) +{ + unsigned int i, sum = 0; + + for (i = 0; i < nr_tux_threads; i++) { + threadinfo_t *ti = threadinfo + i; + if (ti->iot) + sum += ti->iot->nr_async_pending; + } + return sum; +} + +static int user_req_stop_thread (threadinfo_t *ti) +{ + int j; + + printk(KERN_NOTICE "TUX: thread %d stopping ...\n", + (int)(ti-threadinfo)); + + if (!ti->started) + TUX_BUG(); + for (j = 0; j < CONFIG_TUX_NUMSOCKETS; j++) + if (ti->listen[j].proto) + remove_wait_queue(ti->listen[j].sock->sk->sleep, + ti->wait_event + j); + remove_wait_queue(&wait_stop, &ti->stop); + + Dprintk(KERN_NOTICE "TUX: thread %d waiting for sockets to go inactive ...\n", (int)(ti-threadinfo)); + zap_listen_sockets(ti); + + Dprintk(KERN_NOTICE "TUX: thread %d has all sockets inactive.\n", (int)(ti-threadinfo)); + + flush_all_requests(ti); + stop_cachemiss_threads(ti); + + if (ti->nr_requests) + TUX_BUG(); + ti->started = 0; + + printk(KERN_INFO "TUX: thread %d stopped.\n", ti->cpu); + + ti->thread = NULL; + current->tux_info = NULL; + current->tux_exit = NULL; + atomic_dec(&nr_tux_threads_running); + wake_up(&thread_stopped); + + MOD_DEC_USE_COUNT; + + return 0; +} + +#define COPY_INT(u_field, k_field) \ +do { \ + if (__copy_to_user(&u_info->u_field, &req->k_field, \ + sizeof(req->k_field))) \ + return_EFAULT; \ +} while (0) + +#define GETLEN(k_field, maxlen) \ + ((req->k_field##_len < maxlen) ? \ + req->k_field##_len : maxlen-1) + +#define COPY_STR(u_field, k_field, maxlen) \ +do { \ + if (__copy_to_user(u_info->u_field, req->k_field##_str, \ + GETLEN(k_field, maxlen))) \ + return_EFAULT; \ +} while (0) + +#define COPY_COND_STR(u_field,k_field,maxlen) \ +do { \ + if (req->k_field##_len) \ + COPY_STR(u_field, k_field, maxlen); \ + if (__put_user((char)0, u_info->u_field + \ + GETLEN(k_field, maxlen))) \ + return_EFAULT; \ +} while (0) + +static void finish_userspace_req (tux_req_t *req) +{ + threadinfo_t *ti = req->ti; + + ti->userspace_req = NULL; + req->usermode = 0; + req->private = 0; + req->error = 0; + DEC_STAT(nr_userspace_pending); + flush_request(req, 0); +} + +static void zap_userspace_req (tux_req_t *req) +{ + clear_keepalive(req); + finish_userspace_req(req); +} + +/* + * Fills in the user-space request structure: + */ +static int prepare_userspace_req (threadinfo_t *ti, user_req_t *u_info) +{ + u64 u_req; + tux_req_t *req = ti->userspace_req; + unsigned int tmp; + int filelen; + int fd; + + Dprintk("prepare_userspace_req(%p).\n", req); + if (!req) + TUX_BUG(); + if (req->error) { + TDprintk("userspace request has error %d.\n", req->error); + return -1; + } + fd = req->fd; + if (fd == -1) { + fd = sock_map_fd(req->sock); + Dprintk("sock_map_fd(%p) :%d.\n", req, fd); + if (fd < 0) { + Dprintk("sock_map_fd() returned %d.\n", fd); + return -EMFILE; + } + req->fd = fd; + } + +#define return_EFAULT do { Dprintk("-EFAULT at %d:%s.\n", __LINE__, __FILE__); return -EFAULT; } while (0) + + if (!access_ok(VERIFY_WRITE, u_info, sizeof(*u_info))) + return_EFAULT; + if (__copy_to_user(&u_info->sock, &fd, sizeof(fd))) + return_EFAULT; + if (req->attr) + TUX_BUG(); + + COPY_INT(module_index, usermodule_idx); + + COPY_COND_STR(query, query, MAX_URI_LEN); + + COPY_INT(event, event); + Dprintk("prepare userspace, user error: %d, event %d.\n", req->user_error, req->event); + COPY_INT(error, user_error); + req->user_error = 0; + + filelen = req->total_file_len; + if (filelen < 0) + filelen = 0; + if (__copy_to_user(&u_info->objectlen, &filelen, sizeof(filelen))) + return_EFAULT; + if ((req->method == METHOD_POST) && !filelen) + if (__copy_to_user(&u_info->objectlen, + &req->content_len, sizeof(filelen))) + return_EFAULT; + if (req->objectname_len) { + if (req->objectname[req->objectname_len]) + TUX_BUG(); + if (__copy_to_user(u_info->objectname, req->objectname, + req->objectname_len + 1)) + return_EFAULT; + } else + if (__put_user((char)0, u_info->objectname)) + return_EFAULT; + + COPY_INT(http_version, version); + COPY_INT(http_method, method); + COPY_INT(keep_alive, keep_alive); + + COPY_INT(cookies_len, cookies_len); + if (req->cookies_len) + COPY_STR(cookies, cookies, MAX_COOKIE_LEN); + if (__put_user((char)0, u_info->cookies + req->cookies_len)) + return_EFAULT; + + u_req = (u64)(unsigned long)req; + if (__copy_to_user(&u_info->id, &u_req, sizeof(u_req))) + return_EFAULT; + COPY_INT(priv, private); + COPY_INT(bytes_sent, bytes_sent); + + tmp = req->sock->sk->daddr; + if (__copy_to_user(&u_info->client_host, &tmp, sizeof(tmp))) + return_EFAULT; + + COPY_COND_STR(content_type, content_type, MAX_FIELD_LEN); + COPY_COND_STR(user_agent, user_agent, MAX_FIELD_LEN); + COPY_COND_STR(accept, accept, MAX_FIELD_LEN); + COPY_COND_STR(accept_charset, accept_charset, MAX_FIELD_LEN); + COPY_COND_STR(accept_encoding, accept_encoding, MAX_FIELD_LEN); + COPY_COND_STR(accept_language, accept_language, MAX_FIELD_LEN); + COPY_COND_STR(cache_control, cache_control, MAX_FIELD_LEN); + COPY_COND_STR(if_modified_since, if_modified_since, MAX_FIELD_LEN); + COPY_COND_STR(negotiate, negotiate, MAX_FIELD_LEN); + COPY_COND_STR(pragma, pragma, MAX_FIELD_LEN); + COPY_COND_STR(referer, referer, MAX_FIELD_LEN); + + return TUX_RETURN_USERSPACE_REQUEST; +} + +#define GOTO_ERR_no_unlock do { Dprintk("sys_tux() ERR at %s:%d.\n", __FILE__, __LINE__); goto err_no_unlock; } while (0) +#define GOTO_ERR_unlock do { Dprintk("sys_tux() ERR at %s:%d.\n", __FILE__, __LINE__); goto err_unlock; } while (0) + +static int register_mimetype(user_req_t *u_info) +{ + char extension[MAX_URI_LEN], mimetype[MAX_URI_LEN], expires[MAX_URI_LEN]; + u64 u_addr; + char *addr; + int ret; + + ret = strncpy_from_user(extension, u_info->objectname, MAX_URI_LEN); + if (ret <= 0) + GOTO_ERR_no_unlock; + extension[ret] = 0; + Dprintk("got MIME extension: %s.\n", extension); + ret = copy_from_user(&u_addr, &u_info->object_addr, sizeof(u_addr)); + if (ret) + GOTO_ERR_no_unlock; + addr = (char *)(unsigned long)u_addr; + ret = strncpy_from_user(mimetype, addr, MAX_URI_LEN); + if (ret <= 0) + GOTO_ERR_no_unlock; + mimetype[ret] = 0; + Dprintk("got MIME type: %s.\n", mimetype); + ret = strncpy_from_user(expires, u_info->cache_control, MAX_URI_LEN); + if (ret >= 0) + expires[ret] = 0; + else + expires[0] = 0; + Dprintk("got expires header: %s.\n", expires); + + add_mimetype(extension, mimetype, expires); + ret = 0; +err_no_unlock: + return ret; +} + +void user_send_buffer (tux_req_t *req, int cachemiss) +{ + int ret; + + + SET_TIMESTAMP(req->output_timestamp); + +repeat: + ret = send_sync_buf(req, req->sock, req->userbuf, req->userlen, MSG_DONTWAIT | MSG_MORE); + switch (ret) { + case -EAGAIN: + add_tux_atom(req, user_send_buffer); + if (add_output_space_event(req, req->sock)) { + del_tux_atom(req); + goto repeat; + } + INC_STAT(user_sendbuf_write_misses); + break; + default: + if (ret <= 0) { + req_err(req); + req->usermode = 0; + req->private = 0; + add_req_to_workqueue(req); + break; + } + req->userbuf += ret; + req->userlen -= ret; + if ((int)req->userlen < 0) + TUX_BUG(); + if (req->userlen) + goto repeat; + add_req_to_workqueue(req); + break; + } +} + +void user_send_object (tux_req_t *req, int cachemiss) +{ + int ret; + + + SET_TIMESTAMP(req->output_timestamp); + +repeat: + ret = generic_send_file(req, req->sock, cachemiss); + switch (ret) { + case -5: + add_tux_atom(req, user_send_object); + output_timeout(req); + break; + case -4: + add_tux_atom(req, user_send_object); + if (add_output_space_event(req, req->sock)) { + del_tux_atom(req); + goto repeat; + } + INC_STAT(user_sendobject_write_misses); + break; + case -3: + INC_STAT(user_sendobject_cachemisses); + add_tux_atom(req, user_send_object); + queue_cachemiss(req); + break; + case -1: + break; + default: + req->in_file.f_pos = 0; + add_req_to_workqueue(req); + break; + } +} + +void user_get_object (tux_req_t *req, int cachemiss) +{ + int missed; + + if (!req->dentry) { + req->usermode = 0; + missed = lookup_object(req, cachemiss ? 0 : LOOKUP_ATOMIC); + if (req->usermode) + TUX_BUG(); + req->usermode = 1; + if (!missed && !req->dentry) { + req->error = 0; + req->user_error = -ENOENT; + add_req_to_workqueue(req); + return; + } + if (missed) { + if (cachemiss) + TUX_BUG(); + INC_STAT(user_lookup_cachemisses); +fetch_missed: + req->ti->userspace_req = NULL; + DEC_STAT(nr_userspace_pending); + add_tux_atom(req, user_get_object); + queue_cachemiss(req); + return; + } + } + req->total_file_len = req->dentry->d_inode->i_size; + if (!req->output_len) + req->output_len = req->total_file_len; + if (tux_fetch_file(req, !cachemiss)) { + INC_STAT(user_fetch_cachemisses); + goto fetch_missed; + } + req->in_file.f_pos = 0; + add_req_to_workqueue(req); +} + +asmlinkage int __sys_tux (unsigned int action, user_req_t *u_info) +{ + int ret = -1; + threadinfo_t *ti; + tux_req_t *req; + + Dprintk("got sys_tux(%d, %p).\n", action, u_info); + + if (action >= MAX_TUX_ACTION) + GOTO_ERR_no_unlock; + + ti = (threadinfo_t *) current->tux_info; + if (ti) + if (ti->thread != current) + TUX_BUG(); + + if (!capable(CAP_SYS_ADMIN) + && (action != TUX_ACTION_CONTINUE_REQ) && + (action != TUX_ACTION_STOPTHREAD)) + goto userspace_actions; + + switch (action) { + case TUX_ACTION_CONTINUE_REQ: + ret = continue_request((int)(long)u_info); + goto out; + + case TUX_ACTION_STARTUP: + lock_kernel(); + ret = user_req_startup(); + unlock_kernel(); + goto out; + + case TUX_ACTION_SHUTDOWN: + lock_kernel(); + ret = user_req_shutdown(); + unlock_kernel(); + goto out; + + case TUX_ACTION_REGISTER_MODULE: + ret = user_register_module(u_info); + goto out; + + case TUX_ACTION_UNREGISTER_MODULE: + ret = user_unregister_module(u_info); + goto out; + + case TUX_ACTION_STARTTHREAD: + { + unsigned int nr; + + ret = copy_from_user(&nr, &u_info->thread_nr, + sizeof(int)); + if (ret) + GOTO_ERR_no_unlock; + if (nr >= nr_tux_threads) + GOTO_ERR_no_unlock; + ti = threadinfo + nr; + if (ti->started) + GOTO_ERR_unlock; + ti->started = 1; + current->tux_info = ti; + current->tux_exit = tux_exit; + if (ti->thread) + TUX_BUG(); + Dprintk("TUX: current open files limit for TUX%d: %ld.\n", nr, current->rlim[RLIMIT_NOFILE].rlim_cur); + lock_kernel(); + ret = user_req_start_thread(ti); + unlock_kernel(); + if (ret) { + current->tux_info = NULL; + current->tux_exit = NULL; + } else { + if (ti->thread != current) + TUX_BUG(); + } + goto out_userreq; + } + + case TUX_ACTION_STOPTHREAD: + if (!ti) + GOTO_ERR_no_unlock; + if (!ti->started) + GOTO_ERR_unlock; + req = ti->userspace_req; + if (req) + zap_userspace_req(req); + + lock_kernel(); + ret = user_req_stop_thread(ti); + unlock_kernel(); + goto out_userreq; + + case TUX_ACTION_CURRENT_DATE: + ret = strncpy_from_user(tux_date, u_info->new_date, + DATE_LEN); + if (ret <= 0) + GOTO_ERR_no_unlock; + goto out; + + case TUX_ACTION_REGISTER_MIMETYPE: + ret = register_mimetype(u_info); + if (ret) + GOTO_ERR_no_unlock; + goto out; + + case TUX_ACTION_QUERY_VERSION: + ret = (TUX_MAJOR_VERSION << 24) | (TUX_MINOR_VERSION << 16) | TUX_PATCHLEVEL_VERSION; + goto out; + default: + ; + } + +userspace_actions: + + if (!ti) + GOTO_ERR_no_unlock; + + if (!ti->started) + GOTO_ERR_unlock; + + req = ti->userspace_req; + if (!req) { + if (action == TUX_ACTION_EVENTLOOP) + goto eventloop; + GOTO_ERR_unlock; + } + if (!req->usermode) + TUX_BUG(); + + ret = copy_from_user(&req->event, &u_info->event, sizeof(int)); + if (ret) + GOTO_ERR_unlock; + ret = copy_from_user(&req->status, &u_info->http_status, sizeof(int)); + if (ret) + GOTO_ERR_unlock; + ret = copy_from_user(&req->bytes_sent, &u_info->bytes_sent, sizeof(int)); + if (ret) + GOTO_ERR_unlock; + ret = copy_from_user(&req->private, &u_info->priv, sizeof(req->private)); + if (ret) + GOTO_ERR_unlock; + + switch (action) { + + case TUX_ACTION_EVENTLOOP: +eventloop: + req = ti->userspace_req; + if (req) + zap_userspace_req(req); + ret = event_loop(ti); + goto out_userreq; + + /* + * Module forces keepalive off, server will close + * the connection. + */ + case TUX_ACTION_FINISH_CLOSE_REQ: + clear_keepalive(req); + + case TUX_ACTION_FINISH_REQ: + finish_userspace_req(req); + goto eventloop; + + case TUX_ACTION_REDIRECT_REQ: + + ti->userspace_req = NULL; + req->usermode = 0; + req->private = 0; + req->error = TUX_ERROR_REDIRECT; + DEC_STAT(nr_userspace_pending); + add_tux_atom(req, redirect_request); + add_req_to_workqueue(req); + + goto eventloop; + + case TUX_ACTION_POSTPONE_REQ: + + postpone_request(req); + ti->userspace_req = NULL; + ret = TUX_RETURN_USERSPACE_REQUEST; + break; + + case TUX_ACTION_GET_OBJECT: + release_req_dentry(req); + ret = strncpy_from_user(req->objectname, + u_info->objectname, MAX_URI_LEN-1); + if (ret <= 0) { + req->objectname[0] = 0; + req->objectname_len = 0; + GOTO_ERR_unlock; + } + req->objectname[ret] = 0; // string delimit + req->objectname_len = ret; + + Dprintk("got objectname {%s} (%d) from user-space req %p (req: %p).\n", req->objectname, req->objectname_len, u_info, req); + req->ti->userspace_req = NULL; + DEC_STAT(nr_userspace_pending); + user_get_object(req, 0); + goto eventloop; + + case TUX_ACTION_READ_OBJECT: + { + u64 u_addr; + char *addr; + loff_t ppos = 0; + struct file *filp; + + if (!req->dentry) + GOTO_ERR_unlock; + + ret = copy_from_user(&u_addr, &u_info->object_addr, + sizeof(u_addr)); + if (ret) + GOTO_ERR_unlock; + addr = (char *)(unsigned long)u_addr; + filp = dentry_open(req->dentry, O_RDONLY, 0); + dget(req->dentry); + generic_file_read(filp, addr, req->total_file_len, &ppos); + fput(filp); + ret = TUX_RETURN_USERSPACE_REQUEST; + break; + } + + case TUX_ACTION_SEND_OBJECT: + if (!req->dentry) + GOTO_ERR_unlock; + req->ti->userspace_req = NULL; + DEC_STAT(nr_userspace_pending); + user_send_object(req, 0); + goto eventloop; + + case TUX_ACTION_SEND_BUFFER: + { + u64 u_addr; + char *addr; + unsigned int len; + + ret = copy_from_user(&u_addr, + &u_info->object_addr, sizeof(u_addr)); + if (ret) + GOTO_ERR_unlock; + addr = (char *)(unsigned long)u_addr; + ret = copy_from_user(&len, + &u_info->objectlen, sizeof(addr)); + if (ret) + GOTO_ERR_unlock; + if ((int)len <= 0) + GOTO_ERR_unlock; + + ret = -EFAULT; + if (!access_ok(VERIFY_READ, addr, len)) + GOTO_ERR_unlock; + req->userbuf = addr; + req->userlen = len; + + req->ti->userspace_req = NULL; + DEC_STAT(nr_userspace_pending); + user_send_buffer(req, 0); + ret = 0; + goto eventloop; + } + + case TUX_ACTION_READ_HEADERS: + { + char *addr; + u64 u_addr; + + ret = copy_from_user(&u_addr, &u_info->object_addr, + sizeof(u_addr)); + if (ret) + GOTO_ERR_unlock; + addr = (char *)(unsigned long)u_addr; + ret = copy_to_user(&u_info->objectlen, + &req->headers_len, sizeof(req->headers_len)); + if (ret) + GOTO_ERR_unlock; + ret = copy_to_user(addr,req->headers, req->headers_len); + if (ret) + GOTO_ERR_unlock; + break; + } + + case TUX_ACTION_READ_POST_DATA: + { + char *addr; + unsigned int size; + u64 u_addr; + + ret = copy_from_user(&u_addr, &u_info->object_addr, + sizeof(u_addr)); + if (ret) + GOTO_ERR_unlock; + addr = (char *)(unsigned long)u_addr; + + ret = copy_from_user(&size, &u_info->objectlen, + sizeof(size)); + if (ret) + GOTO_ERR_unlock; + Dprintk("READ_POST_DATA: got %p(%d).\n", addr, size); + if (req->post_data_len < size) + size = req->post_data_len; + Dprintk("READ_POST_DATA: writing %d.\n", size); + ret = copy_to_user(&u_info->objectlen, + &size, sizeof(size)); + if (ret) + GOTO_ERR_unlock; + ret = copy_to_user(addr, req->post_data_str, size); + if (ret) + GOTO_ERR_unlock; + goto out; + } + + case TUX_ACTION_WATCH_PROXY_SOCKET: + { + struct socket *sock; + int err; + long fd; + u64 u_addr; + + ret = copy_from_user(&u_addr, &u_info->object_addr, + sizeof(u_addr)); + if (ret) + GOTO_ERR_unlock; + fd = (int)(unsigned long)u_addr; + + sock = sockfd_lookup(fd, &err); + if (!sock) + GOTO_ERR_unlock; + put_data_sock(req); + link_tux_data_socket(req, sock); + + ret = 0; + goto out; + } + + case TUX_ACTION_WAIT_PROXY_SOCKET: + { + if (!req->data_sock) + GOTO_ERR_unlock; + if (socket_input(req->data_sock)) { + ret = TUX_RETURN_USERSPACE_REQUEST; + goto out_userreq; + } + spin_lock_irq(&req->ti->work_lock); + add_keepalive_timer(req); + if (test_and_set_bit(0, &req->idle_input)) + TUX_BUG(); + spin_unlock_irq(&req->ti->work_lock); + if (socket_input(req->data_sock)) { + unidle_req(req); + ret = TUX_RETURN_USERSPACE_REQUEST; + goto out_userreq; + } + req->ti->userspace_req = NULL; + goto eventloop; + } + + default: + GOTO_ERR_unlock; + } + +out_userreq: + req = ti->userspace_req; + if (req) { + ret = prepare_userspace_req(ti, u_info); + if (ret < 0) { + TDprintk("hm, user req %p returned %d, zapping.\n", + req, ret); + zap_userspace_req(req); + goto eventloop; + } + } +out: + Dprintk("sys_tux(%d, %p) returning %d.\n", action, u_info, ret); + while (current->need_resched) { + __set_task_state(current, TASK_RUNNING); + schedule(); + } + return ret; +err_unlock: +err_no_unlock: + Dprintk("sys_tux(%d, %p) returning -EINVAL (ret:%d)!\n", action, u_info, ret); + while (current->need_resched) { + __set_task_state(current, TASK_RUNNING); + schedule(); + } + return -EINVAL; +} + +/* + * This gets called if a TUX thread does an exit(). + */ +void tux_exit (void) +{ + __sys_tux(TUX_ACTION_STOPTHREAD, NULL); +} + +int tux_init(void) +{ + start_sysctl(); + +#if CONFIG_TUX_MODULE + spin_lock(&tux_module_lock); + sys_tux_ptr = __sys_tux; + tux_module = THIS_MODULE; + spin_unlock(&tux_module_lock); +#endif + + return 0; +} + +void tux_cleanup (void) +{ +#if CONFIG_TUX_MODULE + spin_lock(&tux_module_lock); + tux_module = NULL; + sys_tux_ptr = NULL; + spin_unlock(&tux_module_lock); +#endif + + end_sysctl(); +} + +module_init(tux_init) +module_exit(tux_cleanup) + +MODULE_LICENSE("GPL"); + --- linux/net/tux/Makefile.orig +++ linux/net/tux/Makefile @@ -0,0 +1,16 @@ +# +# Makefile for TUX +# + +O_TARGET := tux.o +MOD_LIST_NAME := NET_MODULES + +obj-y := accept.o input.o userspace.o cachemiss.o output.o \ + redirect.o postpone.o logger.o proto_http.o proto_ftp.o \ + proc.o main.o mod.o abuf.o times.o directory.o gzip.o + +obj-$(CONFIG_TUX_EXTCGI) += cgi.o extcgi.o +obj-m := $(O_TARGET) + +include $(TOPDIR)/Rules.make + --- linux/net/tux/mod.c.orig +++ linux/net/tux/mod.c @@ -0,0 +1,262 @@ +/* + * TUX - Integrated Application Protocols Layer and Object Cache + * + * Copyright (C) 2000, 2001, Ingo Molnar + * + * mod.c: loading/registering of dynamic TUX modules + */ + +#include +#include + +/**************************************************************** + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + ****************************************************************/ + +spinlock_t tuxmodules_lock = SPIN_LOCK_UNLOCKED; +static LIST_HEAD(tuxmodules_list); + +tcapi_template_t * get_first_usermodule (void) +{ + tcapi_template_t *tcapi; + struct list_head *head, *curr, *next; + + spin_lock(&tuxmodules_lock); + head = &tuxmodules_list; + next = head->next; + + while ((curr = next) != head) { + tcapi = list_entry(curr, tcapi_template_t, modules); + next = curr->next; + if (tcapi->userspace_id) { + spin_unlock(&tuxmodules_lock); + return tcapi; + } + } + spin_unlock(&tuxmodules_lock); + return NULL; +} + +static tcapi_template_t * lookup_module (const char *vfs_name) +{ + tcapi_template_t *tcapi; + struct list_head *head, *curr, *next; + + while (*vfs_name == '/') + vfs_name++; + Dprintk("looking up TUX module {%s}.\n", vfs_name); + head = &tuxmodules_list; + next = head->next; + + while ((curr = next) != head) { + tcapi = list_entry(curr, tcapi_template_t, modules); + next = curr->next; + Dprintk("checking module {%s} == {%s}?\n", vfs_name, tcapi->vfs_name); + if (!strcmp(tcapi->vfs_name, vfs_name)) + return tcapi; + } + return NULL; +} + +/* + * Attempt to load a TUX application module. + * This is the slow path, we cache ('link') the module's + * API vector to the inode. + * The module loading path is serialized, and we handshake + * with the loaded module and fetch its API vector. + */ +tcapi_template_t * lookup_tuxmodule (const char *filename) +{ + tcapi_template_t *tcapi; + + spin_lock(&tuxmodules_lock); + tcapi = lookup_module(filename); + if (!tcapi) + Dprintk("did not find module vfs:{%s}\n", filename); + spin_unlock(&tuxmodules_lock); + return tcapi; +} + + +int register_tuxmodule (tcapi_template_t *tcapi) +{ + int ret = -EEXIST; + + spin_lock(&tuxmodules_lock); + + if (lookup_module(tcapi->vfs_name)) { + Dprintk("module with VFS binding '%s' already registered!\n", + tcapi->vfs_name); + goto out; + } + + list_add(&tcapi->modules, &tuxmodules_list); + ret = 0; + Dprintk("TUX module %s registered.\n", tcapi->vfs_name); +out: + spin_unlock(&tuxmodules_lock); + + return ret; +} + +void unregister_all_tuxmodules (void) +{ + tcapi_template_t *tcapi; + struct list_head *curr; + + spin_lock(&tuxmodules_lock); + while (((curr = tuxmodules_list.next)) != &tuxmodules_list) { + tcapi = list_entry(curr, tcapi_template_t, modules); + list_del(curr); + kfree(tcapi->vfs_name); + kfree(tcapi); + } + spin_unlock(&tuxmodules_lock); +} + +tcapi_template_t * unregister_tuxmodule (char *vfs_name) +{ + tcapi_template_t *tcapi; + int err = 0; + + spin_lock(&tuxmodules_lock); + tcapi = lookup_module(vfs_name); + if (!tcapi) { + Dprintk("huh, module %s not registered??\n", vfs_name); + err = -1; + } else { + list_del(&tcapi->modules); + Dprintk("TUX module %s unregistered.\n", vfs_name); + } + spin_unlock(&tuxmodules_lock); + + return tcapi; +} + +static int check_module_version (user_req_t *u_info) +{ + int major, minor, patch, ret; + + ret = copy_from_user(&major, &u_info->version_major, sizeof(int)); + ret += copy_from_user(&minor, &u_info->version_minor, sizeof(int)); + ret += copy_from_user(&patch, &u_info->version_patch, sizeof(int)); + if (ret) + return -EFAULT; + + if ((major != TUX_MAJOR_VERSION) || (minor > TUX_MINOR_VERSION)) { + + printk(KERN_ERR "TUX: module version %d:%d incompatible with kernel version %d:%d!\n", major, minor, TUX_MAJOR_VERSION, TUX_MINOR_VERSION); + return -EINVAL; + } + return 0; +} + +int user_register_module (user_req_t *u_info) +{ + int idx, len, ret; + tcapi_template_t *tcapi; + char modulename [MAX_URI_LEN+1]; + + ret = check_module_version(u_info); + if (ret) + return ret; + + /* + * Check module name length. + */ + ret = strnlen_user(u_info->objectname, MAX_URI_LEN+2); + if (ret < 0) + goto out; + ret = -EINVAL; + if (ret >= MAX_URI_LEN) + goto out; + + Dprintk("register user-module, %p.\n", u_info); + ret = strncpy_from_user(modulename, u_info->objectname, MAX_URI_LEN); + if (ret < 0) + goto out; + modulename[ret] = 0; + Dprintk("... user-module is: {%s}.\n", modulename); + len = strlen(modulename); + if (!len) + printk(KERN_ERR "no module name provided: please upgrade your TUX user-space utilities!\n"); + if (!len || (len > MAX_URI_LEN)) + return -EINVAL; + Dprintk("... user-module len is: %d.\n", len); + + ret = copy_from_user(&idx, &u_info->module_index, sizeof(int)); + if (ret || !idx) + goto out; + Dprintk("... user-module index is: %d.\n", idx); + + ret = -ENOMEM; + tcapi = (tcapi_template_t *) kmalloc(sizeof(*tcapi), GFP_KERNEL); + if (!tcapi) + goto out; + memset(tcapi, 0, sizeof(*tcapi)); + + tcapi->vfs_name = (char *) kmalloc(len+1, GFP_KERNEL); + if (!tcapi->vfs_name) { + kfree(tcapi); + goto out; + } + strcpy(tcapi->vfs_name, modulename); + tcapi->userspace_id = idx; + + Dprintk("... registering module {%s}.\n", tcapi->vfs_name); + ret = register_tuxmodule(tcapi); +out: + return ret; +} + +int user_unregister_module (user_req_t *u_info) +{ + int len, ret; + tcapi_template_t *tcapi; + char modulename [MAX_URI_LEN+1]; + + /* + * Check module name length. + */ + ret = strnlen_user(u_info->objectname, MAX_URI_LEN+2); + if (ret < 0) + goto out; + ret = -EINVAL; + if (ret >= MAX_URI_LEN) + goto out; + Dprintk("unregister user-module, %p.\n", u_info); + ret = strncpy_from_user(modulename, u_info->objectname, MAX_URI_LEN); + if (ret <= 0) + goto out; + modulename[ret] = 0; + Dprintk("... user-module is: {%s}.\n", modulename); + len = strlen(modulename); + if (!len || (len > MAX_URI_LEN)) + return -EINVAL; + Dprintk("... user-module len is: %d.\n", len); + + Dprintk("... unregistering module {%s}.\n", modulename); + tcapi = unregister_tuxmodule(modulename); + ret = -EINVAL; + if (tcapi) { + ret = 0; + kfree(tcapi->vfs_name); + kfree(tcapi); + } +out: + return ret; +} + --- linux/net/tux/output.c.orig +++ linux/net/tux/output.c @@ -0,0 +1,350 @@ +/* + * TUX - Integrated Application Protocols Layer and Object Cache + * + * Copyright (C) 2000, 2001, Ingo Molnar + * + * output.c: Send data to clients + */ + +#include + +/**************************************************************** + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + ****************************************************************/ + +int send_sync_buf (tux_req_t *req, struct socket *sock, const char *buf, const size_t length, unsigned long flags) +{ + struct msghdr msg; + struct iovec iov; + int len, written = 0, left = length; + + sock->sk->tp_pinfo.af_tcp.nonagle = 2; + + msg.msg_name = 0; + msg.msg_namelen = 0; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_flags = flags | MSG_NOSIGNAL; +repeat_send: + msg.msg_iov->iov_len = left; + msg.msg_iov->iov_base = (char *) buf + written; + + len = sock_sendmsg(sock, &msg, left); + + Dprintk("sendmsg ret: %d, written: %d, left: %d.\n", len,written,left); + if ((len == -ERESTARTSYS) || (!(flags & MSG_DONTWAIT) && + (len == -EAGAIN))) { + flush_all_signals(); + goto repeat_send; + } + if (len > 0) { + written += len; + left -= len; + if (left) + goto repeat_send; + } + if (len >= 0) { + if (written != length) + TUX_BUG(); + if (left) + TUX_BUG(); + } + if (req && (written > 0)) + req->bytes_sent += written; + Dprintk("sendmsg FINAL ret: %d, written: %d, left: %d.\n", len,written,left); + return written ? written : len; +} + +unsigned int tux_zerocopy_sendfile = 1; + +typedef struct sock_send_desc +{ + struct socket *sock; + tux_req_t *req; +} sock_send_desc_t; + +static int sock_send_actor (read_descriptor_t * desc, struct page *page, + unsigned long offset, unsigned long orig_size) +{ + sock_send_desc_t *sock_desc = (sock_send_desc_t *)desc->buf; + struct socket *sock = sock_desc->sock; + tux_req_t *req = sock_desc->req; + unsigned int flags; + ssize_t written; + char *buf = NULL; + unsigned int size; + + flags = MSG_DONTWAIT | MSG_NOSIGNAL; + if (desc->count < orig_size) + orig_size = desc->count; + if (desc->count > orig_size) + flags |= MSG_MORE; + Dprintk("sock_send_actor(), page: %p, offset: %ld, orig_size: %ld, sock: %p, desc->count: %d, desc->written: %d, MSG_MORE: %d.\n", page, offset, orig_size, sock, desc->count, desc->written, flags & MSG_MORE); + + if (req->content_gzipped >= 2) { + unsigned int gzip_left; + struct msghdr msg; + struct iovec iov; + mm_segment_t oldmm; + char *kaddr = kmap(page); + __u32 in_len, out_len; + out_len = orig_size*101/100 + 12; + buf = tux_kmalloc(out_len); + in_len = orig_size; + size = out_len; + gzip_left = 0; +// 8b1f 0808 fdc4 3bd8 0300 79 +buf[1] = 0x8b; buf[0] = 0x1f; buf[3] = 0x08; buf[2] = 0x08; +buf[5] = 0xfd; buf[4] = 0xc4; buf[7] = 0x3b; buf[6] = 0xd8; +buf[9] = 0x03; buf[8] = 0x00; buf[10] = 0x79; + size += 11; + Dprintk("pre-compress: in_len: %d, out_len: %d, gzip_left: %d, uncompressed size: %d.\n", in_len, out_len, gzip_left, size); + gzip_left = tux_gzip_compress(req->gzip_state, kaddr, buf+11, &in_len, &out_len); + size -= out_len; + buf[11] = 0x79; buf[12] = 0x00; + + Dprintk("post-compress: in_len: %d, out_len: %d, gzip_left: %d, compressed size: %d.\n", in_len, out_len, gzip_left, size); + kunmap(page); + msg.msg_name = NULL; + msg.msg_namelen = 0; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = NULL; + msg.msg_controllen = 0; + flags &= ~MSG_DONTWAIT; + msg.msg_flags = flags; + iov.iov_base = buf; + iov.iov_len = size; + + oldmm = get_fs(); set_fs(KERNEL_DS); + written = sock_sendmsg(sock, &msg, size); + set_fs(oldmm); + + Dprintk("buf: %p, offset: %ld, size: %d, written: %d.\n", buf, offset, size, written); + if (written == size) + written = orig_size; + else + written = size; + + } else { + size = orig_size; + if (tux_zerocopy_sendfile && sock->ops->sendpage && + (sock->sk->route_caps&NETIF_F_SG)) { + written = sock->ops->sendpage(sock, page, offset, size, flags); + } else { + struct msghdr msg; + struct iovec iov; + char *kaddr; + mm_segment_t oldmm; + + if (offset+size > PAGE_SIZE) + return -EFAULT; + + kaddr = kmap(page); + + msg.msg_name = NULL; + msg.msg_namelen = 0; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_flags = flags; + iov.iov_base = kaddr + offset; + iov.iov_len = size; + + oldmm = get_fs(); set_fs(KERNEL_DS); + written = sock_sendmsg(sock, &msg, size); + set_fs(oldmm); + + Dprintk("kaddr: %p, offset: %ld, size: %d, written: %d.\n", kaddr, offset, size, written); + kunmap(page); + } + } + if (written < 0) { + desc->error = written; + written = 0; + } + Dprintk("desc->count: %d, desc->written: %d, written: %d.\n", desc->count, desc->written, written); + desc->count -= written; + if ((int)desc->count < 0) + TUX_BUG(); + desc->written += written; + + if (buf) + kfree(buf); + + return written; +} + +/* + * Return 1 if the output space condition went away + * before adding the handler. + */ +int add_output_space_event (tux_req_t *req, struct socket *sock) +{ + struct sock *sk = sock->sk; + /* + * blocked due to socket IO? + */ + spin_lock_irq(&req->ti->work_lock); + add_keepalive_timer(req); + if (test_and_set_bit(0,&req->wait_output_space)) + TUX_BUG(); + INC_STAT(nr_output_space_pending); + + if ((sk->state == TCP_ESTABLISHED) && enough_wspace(sk)) { + if (test_and_clear_bit(0, &req->wait_output_space)) { + DEC_STAT(nr_output_space_pending); + del_keepalive_timer(req); + spin_unlock_irq(&req->ti->work_lock); + return 1; + } + } + spin_unlock_irq(&req->ti->work_lock); + + return 0; +} + +#define SEND_BLOCKSIZE (164*1024) + +int generic_send_file (tux_req_t *req, struct socket *sock, int cachemiss) +{ + sock_send_desc_t sock_desc; + int len, want, nonblock = !cachemiss; + + sock->sk->tp_pinfo.af_tcp.nonagle = 2; + + sock_desc.sock = sock; + sock_desc.req = req; + +repeat: + Dprintk("generic_send_file(%p,%d,%p) called, f_pos: %Ld, output_len: %Ld.\n", req, nonblock, sock, req->in_file.f_pos, req->output_len); + + if (req->proto->check_req_err(req, cachemiss)) + return -1; + if (connection_too_fast(req) == 2) { + len = -5; + goto out; + } + if (req->total_file_len < req->in_file.f_pos) + TUX_BUG(); + + req->desc.written = 0; + /* + * Careful, output_len can be 64-bit, while 'want' can be 32-bit. + */ + if (req->output_len > SEND_BLOCKSIZE) + want = SEND_BLOCKSIZE; + else + want = req->output_len; + req->desc.count = want; + req->desc.buf = (char *) &sock_desc; + req->desc.error = 0; + Dprintk("sendfile(), desc.count: %d.\n", req->desc.count); + do_generic_file_read(&req->in_file, &req->in_file.f_pos, &req->desc, sock_send_actor, nonblock ? F_ATOMIC : 0); + if (req->desc.written > 0) { + req->bytes_sent += req->desc.written; + req->output_len -= req->desc.written; + } + if (!nonblock && (req->desc.error == -EWOULDBLOCKIO)) + TUX_BUG(); + Dprintk("sendfile() wrote: %d bytes.\n", req->desc.written); + if (req->output_len && !req->desc.written && !req->desc.error) { +#if CONFIG_TUX_DEBUG + req->bytes_expected = 0; +#endif + req->in_file.f_pos = 0; + req->error = TUX_ERROR_CONN_CLOSE; + zap_request(req, cachemiss); + return -1; + } + + switch (req->desc.error) { + + case -EWOULDBLOCKIO: + len = -3; + break; + case -EAGAIN: +no_write_space: + Dprintk("sk->wmem_queued: %d, sk->sndbuf: %d.\n", + sock->sk->wmem_queued, sock->sk->sndbuf); + len = -4; + break; + default: + len = req->desc.written; +#if CONFIG_TUX_DEBUG + if (req->desc.error) + TDprintk("TUX: sendfile() returned error %d (signals pending: %08lx)!\n", req->desc.error, current->pending.signal.sig[0]); +#endif + if (!req->desc.error) { + if (req->output_len < 0) + BUG(); + if (req->output_len) { + if (test_bit(SOCK_NOSPACE, &sock->flags)) + goto no_write_space; + goto repeat; + } + } +#if CONFIG_TUX_DEBUG + if (req->desc.written != want) + TDprintk("TUX: sendfile() wrote %d bytes, wanted %d! (pos %Ld) (signals pending: %08lx).\n", req->desc.written, want, req->in_file.f_pos, current->pending.signal.sig[0]); + else + Dprintk("TUX: sendfile() FINISHED for req %p, wrote %d bytes.\n", req, req->desc.written); + req->bytes_expected = 0; +#endif + break; + } + +out: + Dprintk("sendfile() wrote %d bytes.\n", len); + + return len; +} + +static int file_fetch_actor (read_descriptor_t * desc, struct page *page, + unsigned long offset, unsigned long size) +{ + if (desc->count < size) + size = desc->count; + + desc->count -= size; + desc->written += size; + + return size; +} + +int tux_fetch_file (tux_req_t *req, int nonblock) +{ + int len; + + req->desc.written = 0; + req->desc.count = req->output_len; + req->desc.buf = NULL; + req->desc.error = 0; + + do_generic_file_read(&req->in_file, &req->in_file.f_pos, &req->desc, + file_fetch_actor, nonblock ? F_ATOMIC : 0); + if (nonblock && (req->desc.error == -EWOULDBLOCKIO)) + return 1; + len = req->desc.written; + if (req->desc.error) + Dprintk("fetchfile() returned %d error!\n", req->desc.error); + Dprintk("fetchfile() fetched %d bytes.\n", len); + return 0; +} + --- linux/net/tux/parser.h.orig +++ linux/net/tux/parser.h @@ -0,0 +1,102 @@ +/* + * TUX - Integrated Application Protocols Layer and Object Cache + * + * Copyright (C) 2000, Ingo Molnar + * + * parser.h: generic parsing routines + */ + +#define get_c(ptr,left) \ +({ \ + char __ret; \ + \ + if (!left) \ + GOTO_INCOMPLETE; \ + left--; \ + __ret = *((ptr)++); \ + if (!__ret) \ + GOTO_REDIR; \ + __ret; \ +}) + +#define PARSE_TOKEN(ptr,str,left) \ + ({ \ + int __ret; \ + \ + if (!left) \ + GOTO_INCOMPLETE; \ + if (sizeof(str)-1 > left) { \ + if (memcmp(ptr, str, left)) \ + GOTO_REDIR; \ + GOTO_INCOMPLETE; \ + } \ + \ + if (memcmp(ptr, str, sizeof(str)-1)) \ + __ret = 0; \ + else { \ + ptr += sizeof(str)-1; \ + left -= sizeof(str)-1; \ + __ret = 1; \ + } \ + __ret; \ + }) + +#define PARSE_METHOD(req,ptr,name,left) \ + ({ \ + int __ret; \ + \ + if (PARSE_TOKEN(ptr,#name" ",left)) { \ + req->method = METHOD_##name; \ + __ret = 1; \ + } else \ + __ret = 0; \ + __ret; \ + }) + +#define COPY_LINE(ptr,target,left) \ + do { \ + char prev_c = 0, c; \ + while (((c = get_c(ptr,left))) != '\n') \ + *target++ = prev_c = c; \ + if (prev_c != '\r') \ + GOTO_REDIR; \ + } while (0) + +#define COPY_LINE_TOLOWER(ptr,target,left,limit) \ + do { \ + char prev_c = 0, c; \ + while (((c = get_c(ptr,left))) != '\n') { \ + if ((c >= 'A') && (c <= 'Z')) \ + c -= 'A'-'a'; \ + *target++ = prev_c = c; \ + if (target == (limit)) \ + GOTO_REDIR; \ + } \ + if (prev_c != '\r') \ + GOTO_REDIR; \ + } while (0) + +#define COPY_FIELD(ptr,target,left) \ + do { \ + char c; \ + while ((c = get_c(ptr,left)) != ' ') \ + *target++ = c; \ + } while (0) + +#define SKIP_LINE(ptr,left) \ + do { \ + char prev_c = 0, c; \ + while (((c = get_c(ptr,left))) != '\n') \ + prev_c = c; \ + if (prev_c != '\r') \ + GOTO_REDIR; \ + } while (0) + +#define SKIP_WHITESPACE(curr,left) \ +do { \ + while ((left) && (*(curr) == ' ')) \ + (curr)++, (left)--; \ + if (!(left)) \ + GOTO_REDIR; \ +} while (0) + --- linux/net/tux/postpone.c.orig +++ linux/net/tux/postpone.c @@ -0,0 +1,77 @@ +/* + * TUX - Integrated Application Protocols Layer and Object Cache + * + * Copyright (C) 2000, 2001, Ingo Molnar + * + * postpone.c: postpone/continue userspace requests + */ + +#include + +/**************************************************************** + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + ****************************************************************/ + +void postpone_request (tux_req_t *req) +{ + if (!req->usermode) + TUX_BUG(); + INC_STAT(nr_postpone_pending); + req->postponed = 1; +} + +/* + * Continue a postponed request. The request will show up in the + * userspace queue and will be handled by the fast thread. + * A request can only be postponed in a TUX process, but can be + * continued from any process that has access to the socket file + * descriptor. + */ +int continue_request (int fd) +{ + threadinfo_t *ti; + struct socket *sock; + tux_req_t *req; + int err; + + sock = sockfd_lookup(fd, &err); + if (!sock || !sock->sk) + goto out; + req = sock->sk->user_data; + + err = -EINVAL; + if (!req) + goto out_put; + ti = req->ti; + if (!req->postponed) + goto out_unlock_put; + if (!req->usermode) + TUX_BUG(); + + req->postponed = 0; + DEC_STAT(nr_postpone_pending); + + Dprintk("continuing postponed req %p.\n", req); + add_req_to_workqueue(req); + +out_unlock_put: + err = 0; +out_put: + fput(sock->file); +out: + return err; +} + --- linux/net/tux/proc.c.orig +++ linux/net/tux/proc.c @@ -0,0 +1,1170 @@ +/* + * TUX - Integrated Application Protocols Layer and Object Cache + * + * Copyright (C) 2000, 2001, Ingo Molnar + * + * proc.c: /proc/sys/tux handling + */ + +#include + +/**************************************************************** + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + ****************************************************************/ + +char tux_common_docroot[200] = "/var/www/tux/"; +char tux_http_subdocroot[200] = ""; +char tux_ftp_subdocroot[200] = ""; +char tux_logfile[200] = "/var/log/tux"; +char tux_cgiroot[200] = "/var/www/tux/cgiroot/"; +char tux_404_page[200] = "404.html"; +char tux_default_vhost[200] = "default"; +char tux_extra_html_header[600]; +unsigned int tux_extra_html_header_size = 0; + +int tux_cgi_uid = -1; +int tux_cgi_gid = -1; +unsigned int tux_clientport = 8080; +unsigned int tux_logging = 0; +unsigned int tux_threads = 2; +unsigned int tux_max_connect = 10000; +unsigned int tux_max_keepalives = 10000; +unsigned int tux_max_backlog = 2048; +unsigned int tux_keepalive_timeout = 0; +unsigned int tux_max_output_bandwidth = 0; +unsigned int tux_defer_accept = 1; +unsigned int tux_mode_forbidden = 0 /*S_IXUGO*/; /* do not allow executable (CGI) files */ +unsigned int tux_mode_allowed = S_IROTH; /* allow access if read-other is set */ +unsigned int tux_virtual_server = 0; +unsigned int tux_ftp_virtual_server = 0; +unsigned int mass_hosting_hash = 0; +unsigned int strip_host_tail = 0; +unsigned int tux_max_object_size = 0; +unsigned int log_cpu_mask = ~0; +unsigned int tux_compression = 0; +unsigned int tux_noid = 0; +unsigned int tux_cgi_inherit_cpu = 0; +unsigned int tux_cgi_cpu_mask = ~0; +unsigned int tux_zerocopy_header = 1; +unsigned int tux_max_free_requests = 1000; +unsigned int tux_ignore_query = 0; +unsigned int tux_all_userspace = 0; +unsigned int tux_redirect_logging = 1; +unsigned int tux_max_header_len = 3000; +unsigned int tux_referer_logging = 0; +unsigned int tux_generate_etags = 1; +unsigned int tux_generate_last_mod = 1; +unsigned int tux_generate_cache_control = 1; +unsigned int tux_ip_logging = 1; +unsigned int tux_ftp_wait_close = 1; +unsigned int tux_ftp_log_retr_only = 0; +unsigned int tux_hide_unreadable = 1; +unsigned int tux_http_dir_indexing = 0; +unsigned int tux_log_incomplete = 0; +unsigned int tux_cpu_offset = 0; +unsigned int tux_ftp_login_message = 0; + +static struct ctl_table_header *tux_table_header; + +static ctl_table tux_table[] = { + { NET_TUX_DOCROOT, + "documentroot", + &tux_common_docroot, + sizeof(tux_common_docroot), + 0644, + NULL, + proc_dostring, + &sysctl_string, + NULL, + NULL, + NULL + }, + { NET_TUX_DOCROOT, + "http_subdocroot", + &tux_http_subdocroot, + sizeof(tux_http_subdocroot), + 0644, + NULL, + proc_dostring, + &sysctl_string, + NULL, + NULL, + NULL + }, + { NET_TUX_DOCROOT, + "ftp_subdocroot", + &tux_ftp_subdocroot, + sizeof(tux_ftp_subdocroot), + 0644, + NULL, + proc_dostring, + &sysctl_string, + NULL, + NULL, + NULL + }, + { NET_TUX_LOGFILE, + "logfile", + &tux_logfile, + sizeof(tux_logfile), + 0644, + NULL, + proc_dostring, + &sysctl_string, + NULL, + NULL, + NULL + }, + { NET_TUX_THREADS, + "threads", + &tux_threads, + sizeof(int), + 0644, + NULL, + proc_dointvec, + &sysctl_intvec, + NULL, + NULL, + NULL + }, + { NET_TUX_KEEPALIVE_TIMEOUT, + "keepalive_timeout", + &tux_keepalive_timeout, + sizeof(int), + 0644, + NULL, + proc_dointvec, + &sysctl_intvec, + NULL, + NULL, + NULL + }, + { NET_TUX_MAX_KEEPALIVE_BW, + "max_output_bandwidth", + &tux_max_output_bandwidth, + sizeof(int), + 0644, + NULL, + proc_dointvec, + &sysctl_intvec, + NULL, + NULL, + NULL + }, + { NET_TUX_DEFER_ACCEPT, + "defer_accept", + &tux_defer_accept, + sizeof(int), + 0644, + NULL, + proc_dointvec, + &sysctl_intvec, + NULL, + NULL, + NULL + }, + { NET_TUX_MAX_BACKLOG, + "max_backlog", + &tux_max_backlog, + sizeof(int), + 0644, + NULL, + proc_dointvec, + &sysctl_intvec, + NULL, + NULL, + NULL + }, + { NET_TUX_MAX_CONNECT, + "max_connect", + &tux_max_connect, + sizeof(int), + 0644, + NULL, + proc_dointvec, + &sysctl_intvec, + NULL, + NULL, + NULL + }, + { NET_TUX_MAX_KEEPALIVES, + "max_keepalives", + &tux_max_keepalives, + sizeof(int), + 0644, + NULL, + proc_dointvec, + &sysctl_intvec, + NULL, + NULL, + NULL + }, + { NET_TUX_MODE_FORBIDDEN, + "mode_forbidden", + &tux_mode_forbidden, + sizeof(int), + 0644, + NULL, + proc_dointvec, + &sysctl_intvec, + NULL, + NULL, + NULL + }, + { NET_TUX_MODE_ALLOWED, + "mode_allowed", + &tux_mode_allowed, + sizeof(int), + 0644, + NULL, + proc_dointvec, + &sysctl_intvec, + NULL, + NULL, + NULL + }, + { NET_TUX_CGI_UID, + "cgi_uid", + &tux_cgi_uid, + sizeof(int), + 0644, + NULL, + proc_dointvec, + &sysctl_intvec, + NULL, + NULL, + NULL + }, + { NET_TUX_CGI_GID, + "cgi_gid", + &tux_cgi_gid, + sizeof(int), + 0644, + NULL, + proc_dointvec, + &sysctl_intvec, + NULL, + NULL, + NULL + }, + { NET_TUX_CGIROOT, + "cgiroot", + &tux_cgiroot, + sizeof(tux_cgiroot), + 0644, + NULL, + proc_dostring, + &sysctl_string, + NULL, + NULL, + NULL + }, + { NET_TUX_404_PAGE, + "404_page", + &tux_404_page, + sizeof(tux_404_page), + 0644, + NULL, + proc_dostring, + &sysctl_string, + NULL, + NULL, + NULL + }, + { NET_TUX_404_PAGE, + "default_vhost", + &tux_default_vhost, + sizeof(tux_default_vhost), + 0644, + NULL, + proc_dostring, + &sysctl_string, + NULL, + NULL, + NULL + }, + { NET_TUX_404_PAGE, + "extra_html_header", + &tux_extra_html_header, + sizeof(tux_extra_html_header), + 0644, + NULL, + proc_dostring, + &sysctl_string, + NULL, + NULL, + NULL + }, + { NET_TUX_CLIENTPORT, + "extra_html_header_size", + &tux_extra_html_header_size, + sizeof(int), + 0644, + NULL, + proc_dointvec, + &sysctl_intvec, + NULL, + NULL, + NULL + }, + { NET_TUX_CLIENTPORT, + "clientport", + &tux_clientport, + sizeof(int), + 0644, + NULL, + proc_dointvec, + &sysctl_intvec, + NULL, + NULL, + NULL + }, + { NET_TUX_CLIENTPORT, + "generate_etags", + &tux_generate_etags, + sizeof(int), + 0644, + NULL, + proc_dointvec, + &sysctl_intvec, + NULL, + NULL, + NULL + }, + { NET_TUX_CLIENTPORT, + "generate_last_mod", + &tux_generate_last_mod, + sizeof(int), + 0644, + NULL, + proc_dointvec, + &sysctl_intvec, + NULL, + NULL, + NULL + }, + { NET_TUX_CLIENTPORT, + "generate_cache_control", + &tux_generate_cache_control, + sizeof(int), + 0644, + NULL, + proc_dointvec, + &sysctl_intvec, + NULL, + NULL, + NULL + }, + { NET_TUX_CLIENTPORT, + "ip_logging", + &tux_ip_logging, + sizeof(int), + 0644, + NULL, + proc_dointvec, + &sysctl_intvec, + NULL, + NULL, + NULL + }, + { NET_TUX_CLIENTPORT, + "ftp_wait_close", + &tux_ftp_wait_close, + sizeof(int), + 0644, + NULL, + proc_dointvec, + &sysctl_intvec, + NULL, + NULL, + NULL + }, + { NET_TUX_CLIENTPORT, + "ftp_log_retr_only", + &tux_ftp_log_retr_only, + sizeof(int), + 0644, + NULL, + proc_dointvec, + &sysctl_intvec, + NULL, + NULL, + NULL + }, + { NET_TUX_CLIENTPORT, + "http_dir_indexing", + &tux_http_dir_indexing, + sizeof(int), + 0644, + NULL, + proc_dointvec, + &sysctl_intvec, + NULL, + NULL, + NULL + }, + { NET_TUX_CLIENTPORT, + "hide_unreadable", + &tux_hide_unreadable, + sizeof(int), + 0644, + NULL, + proc_dointvec, + &sysctl_intvec, + NULL, + NULL, + NULL + }, + { NET_TUX_CLIENTPORT, + "log_incomplete", + &tux_log_incomplete, + sizeof(int), + 0644, + NULL, + proc_dointvec, + &sysctl_intvec, + NULL, + NULL, + NULL + }, + { NET_TUX_LOGGING, + "TDprintk", + &tux_TDprintk, + sizeof(int), + 0644, + NULL, + proc_dointvec, + &sysctl_intvec, + NULL, + NULL, + NULL + }, + { NET_TUX_LOGGING, + "Dprintk", + &tux_Dprintk, + sizeof(int), + 0644, + NULL, + proc_dointvec, + &sysctl_intvec, + NULL, + NULL, + NULL + }, +#if TUX_DPRINTK +#endif + { NET_TUX_LOGGING, + "logging", + &tux_logging, + sizeof(int), + 0644, + NULL, + proc_dointvec, + &sysctl_intvec, + NULL, + NULL, + NULL + }, + { NET_TUX_LOGENTRY_ALIGN_ORDER, + "logentry_align_order", + &tux_logentry_align_order, + sizeof(int), + 0644, + NULL, + proc_dointvec, + &sysctl_intvec, + NULL, + NULL, + NULL + }, + { NET_TUX_ACK_PINGPONG, + "ack_pingpong", + &tux_ack_pingpong, + sizeof(int), + 0644, + NULL, + proc_dointvec, + &sysctl_intvec, + NULL, + NULL, + NULL + }, + { NET_TUX_PUSH_ALL, + "push_all", + &tux_push_all, + sizeof(int), + 0644, + NULL, + proc_dointvec, + &sysctl_intvec, + NULL, + NULL, + NULL + }, + { NET_TUX_ZEROCOPY_PARSE, + "zerocopy_parse", + &tux_zerocopy_parse, + sizeof(int), + 0644, + NULL, + proc_dointvec, + &sysctl_intvec, + NULL, + NULL, + NULL + }, + { NET_TUX_VIRTUAL_SERVER, + "virtual_server", + &tux_virtual_server, + sizeof(int), + 0644, + NULL, + proc_dointvec, + &sysctl_intvec, + NULL, + NULL, + NULL + }, + { NET_TUX_VIRTUAL_SERVER, + "mass_hosting_hash", + &mass_hosting_hash, + sizeof(int), + 0644, + NULL, + proc_dointvec, + &sysctl_intvec, + NULL, + NULL, + NULL + }, + { NET_TUX_VIRTUAL_SERVER, + "strip_host_tail", + &strip_host_tail, + sizeof(int), + 0644, + NULL, + proc_dointvec, + &sysctl_intvec, + NULL, + NULL, + NULL + }, + { NET_TUX_VIRTUAL_SERVER, + "ftp_virtual_server", + &tux_ftp_virtual_server, + sizeof(int), + 0644, + NULL, + proc_dointvec, + &sysctl_intvec, + NULL, + NULL, + NULL + }, + { NET_TUX_MAX_OBJECT_SIZE, + "max_object_size", + &tux_max_object_size, + sizeof(int), + 0644, + NULL, + proc_dointvec, + &sysctl_intvec, + NULL, + NULL, + NULL + }, + { NET_TUX_COMPRESSION, + "compression", + &tux_compression, + sizeof(int), + 0644, + NULL, + proc_dointvec, + &sysctl_intvec, + NULL, + NULL, + NULL + }, + { NET_TUX_NOID, + "noid", + &tux_noid, + sizeof(int), + 0644, + NULL, + proc_dointvec, + &sysctl_intvec, + NULL, + NULL, + NULL + }, + { NET_TUX_CGI_INHERIT_CPU, + "cgi_inherit_cpu", + &tux_cgi_inherit_cpu, + sizeof(int), + 0644, + NULL, + proc_dointvec, + &sysctl_intvec, + NULL, + NULL, + NULL + }, + { NET_TUX_CGI_CPU_MASK, + "cgi_cpu_mask", + &tux_cgi_cpu_mask, + sizeof(int), + 0644, + NULL, + proc_dointvec, + &sysctl_intvec, + NULL, + NULL, + NULL + }, + { NET_TUX_ZEROCOPY_HEADER, + "zerocopy_header", + &tux_zerocopy_header, + sizeof(int), + 0644, + NULL, + proc_dointvec, + &sysctl_intvec, + NULL, + NULL, + NULL + }, + { NET_TUX_ZEROCOPY_SENDFILE, + "zerocopy_sendfile", + &tux_zerocopy_sendfile, + sizeof(int), + 0644, + NULL, + proc_dointvec, + &sysctl_intvec, + NULL, + NULL, + NULL + }, + { NET_TUX_MAX_FREE_REQUESTS, + "max_free_requests", + &tux_max_free_requests, + sizeof(int), + 0644, + NULL, + proc_dointvec, + &sysctl_intvec, + NULL, + NULL, + NULL + }, + { NET_TUX_ALL_USERSPACE, + "all_userspace", + &tux_all_userspace, + sizeof(int), + 0644, + NULL, + proc_dointvec, + &sysctl_intvec, + NULL, + NULL, + NULL + }, + { NET_TUX_REDIRECT_LOGGING, + "redirect_logging", + &tux_redirect_logging, + sizeof(int), + 0644, + NULL, + proc_dointvec, + &sysctl_intvec, + NULL, + NULL, + NULL + }, + { NET_TUX_IGNORE_QUERY, + "ignore_query", + &tux_ignore_query, + sizeof(int), + 0644, + NULL, + proc_dointvec, + &sysctl_intvec, + NULL, + NULL, + NULL + }, + { NET_TUX_REFERER_LOGGING, + "referer_logging", + &tux_referer_logging, + sizeof(int), + 0644, + NULL, + proc_dointvec, + &sysctl_intvec, + NULL, + NULL, + NULL + }, + { NET_TUX_REFERER_LOGGING, + "cpu_offset", + &tux_cpu_offset, + sizeof(int), + 0644, + NULL, + proc_dointvec, + &sysctl_intvec, + NULL, + NULL, + NULL + }, + { NET_TUX_REFERER_LOGGING, + "ftp_login_message", + &tux_ftp_login_message, + sizeof(int), + 0644, + NULL, + proc_dointvec, + &sysctl_intvec, + NULL, + NULL, + NULL + }, + { NET_TUX_MAX_HEADER_LEN, + "max_header_len", + &tux_max_header_len, + sizeof(int), + 0644, + NULL, + proc_dointvec, + &sysctl_intvec, + NULL, + NULL, + NULL + }, + {0,0,0,0,0,0,0,0,0,0,0} }; + + +static ctl_table tux_dir_table[] = { + {NET_TUX, "tux", NULL, 0, 0555, tux_table,0,0,0,0,0}, + {0,0,0,0,0,0,0,0,0,0,0} +}; + +static ctl_table tux_root_table[] = { + {CTL_NET, "net", NULL, 0, 0555, tux_dir_table,0,0,0,0,0}, + {0,0,0,0,0,0,0,0,0,0,0} +}; + + +static struct proc_dir_entry * root_tux_dir; +static struct proc_dir_entry * log_cpu_mask_entry; +static struct proc_dir_entry * stat_entry; +static struct proc_dir_entry * tux_dir [CONFIG_TUX_NUMTHREADS]; +static struct proc_dir_entry * listen_dir [CONFIG_TUX_NUMTHREADS]; + +tux_socket_t tux_listen [CONFIG_TUX_NUMTHREADS][CONFIG_TUX_NUMSOCKETS] = + { [0 ... CONFIG_TUX_NUMTHREADS-1] = { {&tux_proto_http, 0, 80, NULL}, } }; + +#define HEX_DIGITS 8 + +static int hex_read_proc (char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + if (count < HEX_DIGITS+1) + return -EINVAL; + return sprintf (page, "%08x\n", *(unsigned int *)data); +} + +static int hex_write_proc (struct file *file, const char *buffer, + unsigned long count, void *data) +{ + char hexnum [HEX_DIGITS]; + unsigned int new_value; + unsigned int i, full_count = count; + + if (!count) + return -EINVAL; + if (count > HEX_DIGITS) + count = HEX_DIGITS; + if (copy_from_user(hexnum, buffer, count)) + return -EFAULT; + + /* + * Parse the first 8 characters as a hex string, any non-hex char + * is end-of-string. '00e1', 'e1', '00E1', 'E1' are the same. + */ + new_value = 0; + + for (i = 0; i < count; i++) { + unsigned int c = hexnum[i]; + + switch (c) { + case '0' ... '9': c -= '0'; break; + case 'a' ... 'f': c -= 'a'-10; break; + case 'A' ... 'F': c -= 'A'-10; break; + default: + goto out; + } + new_value = (new_value << 4) | c; + } +out: + *(int *)data = new_value; + + return full_count; +} + +#define LINE_SIZE 1024 +#define LINE_MASK (LINE_SIZE-1) + +static int print_request_stats (threadinfo_t *ti, char *page, unsigned int skip_count, unsigned int max_count) +{ + struct list_head *head, *curr; + tux_req_t *req; + unsigned int count = 0, size, line_off, len; + char stat_line [LINE_SIZE]; + + if (!max_count) + BUG(); + + head = &ti->all_requests; + curr = head->next; + + while (curr != head) { + req = list_entry(curr, tux_req_t, all); + curr = curr->next; + count++; + if (count <= skip_count) + continue; + line_off = 0; +#define SP(x...) \ + line_off += sprintf(stat_line + line_off, x) + + if (req->proto == &tux_proto_http) + SP("0 "); + else + SP("1 "); + + SP("%p ", req); + SP("%d ", req->atom_idx); + if (req->atom_idx >= 1) + SP("%p ", req->atoms[0]); + else + SP("........ "); + if (req->atom_idx >= 2) + SP("%p ", req->atoms[1]); + else + SP("........ "); + if (!list_empty(&req->work)) SP("W"); else SP("."); + if (!list_empty(&req->free)) SP("F"); else SP("."); + if (!list_empty(&req->lru)) SP("L"); else SP("."); + if (req->keep_alive) SP("K"); else SP("."); + if (req->idle_input) SP("I"); else SP("."); + if (timer_pending(&req->keepalive_timer)) + SP("T(%lu/%lu)",jiffies,req->keepalive_timer.expires); else SP("."); + if (req->wait_output_space) SP("O"); else SP("."); + if (timer_pending(&req->output_timer)) + SP("T"); else SP("."); + SP(" %d ", req->error); + SP(" %d ", req->status); + +#define SP_HOST(ip,port) \ + SP("%d.%d.%d.%d:%d ",NIPQUAD(ip),port) + + if (req->sock) { + if (req->sock->sk) + SP("%d:", req->sock->sk->state); + else + SP("-2:"); + } else + SP("-1:"); + SP_HOST(req->client_addr, req->client_port); + + SP("%Ld ", req->total_file_len); + SP("%Ld ", req->in_file.f_pos); + if (req->proto == &tux_proto_http) { + SP("%d ", req->method); + SP("%d ", req->version); + } + if (req->proto == &tux_proto_ftp) { + SP("%d ", req->ftp_command); + if (req->data_sock) { + if (req->data_sock->sk) + SP("%d:",req->data_sock->sk->state); + else + SP("-2:"); + if (req->data_sock->sk) + SP_HOST(req->data_sock->sk->daddr, + req->data_sock->sk->dport); + else + SP("-1:-1 "); + } else + SP("-1 "); + } + SP("%p/%p %p/%p ", req->sock, req->sock ? req->sock->sk : (void *)-1, req->data_sock, req->data_sock ? req->data_sock->sk : (void *)-1); + + SP("%d\n", req->parsed_len); + len = req->headers_len; + if (len > 500) + len = 500; + SP("\n%d\n", len); + memcpy(stat_line + line_off, req->headers, len); + line_off += len; + len = req->objectname_len; + if (len > 100) + len = 100; + SP("\n%d\n", len); + memcpy(stat_line + line_off, req->objectname, len); + line_off += len; + SP("\n\n"); + if (line_off >= LINE_SIZE) + BUG(); + Dprintk("printing req %p, count %d, page %p: {%s}.\n", req, count, page, stat_line); + size = sprintf(page, "%-*s\n", LINE_SIZE-1, stat_line); + if (size != LINE_SIZE) + BUG(); + page += LINE_SIZE; + if (count-skip_count >= max_count) + break; + } + + Dprintk("count: %d.\n", count-skip_count); + return count - skip_count; +} + +static int stat_read_proc (char *page, char **start, off_t off, + int max_size, int *eof, void *data) +{ + unsigned int i, nr_total = 0, nr, nr_off, nr_skip, size = 0, nr_wanted; + + Dprintk("START, page: %p, max_size: %d, off: %ld.\n", page, max_size, off); + *eof = 1; + if (max_size & LINE_MASK) + return 0; + if (off & LINE_MASK) + return 0; + if (!max_size) + return 0; + + nr_off = off/LINE_SIZE; + + for (i = 0; i < nr_tux_threads; i++) { + threadinfo_t *ti = threadinfo + i; + spin_lock_irq(&ti->work_lock); + nr = ti->nr_requests; + Dprintk("ti: %p, nr: %d, nr_total: %d, nr_off: %d.\n", ti, nr, nr_total, nr_off); + nr_total += nr; + if (nr_off >= nr_total) { + spin_unlock_irq(&ti->work_lock); + continue; + } + nr_skip = nr_off - (nr_total - nr); + nr_wanted = (max_size-size) / LINE_SIZE; + Dprintk("nr_skip: %d, nr_wanted: %d.\n", nr_skip, nr_wanted); + nr = print_request_stats(ti, page + size, nr_skip, nr_wanted); + spin_unlock_irq(&ti->work_lock); + nr_off += nr; + size += nr * LINE_SIZE; + Dprintk("ret: %d requests, size: %d.\n", nr, size); + if (size > max_size) + BUG(); + if (size == max_size) + break; + } + Dprintk("DONE: size: %d.\n", size); + + *start = page; + + if (size) + *eof = 0; + return size; +} + +static int stat_write_proc (struct file *file, const char *buffer, + unsigned long count, void *data) +{ + return -EINVAL; +} + +#define MAX_STRING "http://255.255.255.255:65535" +#define MAX_STRINGLEN (sizeof(MAX_STRING)) + +#define INACTIVE_1 "[inactive]\n" +#define INACTIVE_2 "0\n" + +static int listen_read_proc (char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + tux_socket_t *listen = data; + + if (count < MAX_STRINGLEN) + return -EINVAL; + + if (!listen->proto) + return sprintf(page, INACTIVE_1); + + return sprintf (page, "%s://%u.%u.%u.%u:%hu\n", listen->proto->name, + HIPQUAD(listen->ip), listen->port); +} + +static int listen_write_proc (struct file *file, const char *buffer, + unsigned long count, void *data) +{ + char string [MAX_STRINGLEN]; + unsigned int d1, d2, d3, d4; + unsigned short port; + tux_socket_t *listen = data; + + if (!count) + return -EINVAL; + if (count > MAX_STRINGLEN) + count = MAX_STRINGLEN; + if (copy_from_user(string, buffer, count)) + return -EFAULT; + string[count] = 0; + + if (!strcmp(string, INACTIVE_1) || !strcmp(string, INACTIVE_2)) { + listen->proto = NULL; + listen->ip = 0; + listen->port = 0; + return count; + } + +#define MK_IP(a,b,c,d) ((a << 24) | (b << 16) | (c << 8) | d) + + if (sscanf(string, "http://%u.%u.%u.%u:%hu\n", + &d1, &d2, &d3, &d4, &port) == 5) { + listen->ip = MK_IP(d1,d2,d3,d4); + listen->port = port; + listen->proto = &tux_proto_http; + return count; + } + + if (sscanf(string, "ftp://%u.%u.%u.%u:%hu\n", + &d1, &d2, &d3, &d4, &port) == 5) { + listen->ip = MK_IP(d1,d2,d3,d4); + listen->port = port; + listen->proto = &tux_proto_ftp; + return count; + } + printk(KERN_ERR "tux: invalid listen-socket parameters: %s\n", string); + return -EINVAL; +} + +#define MAX_NAMELEN 10 + +static void register_tux_proc (unsigned int nr) +{ + struct proc_dir_entry *entry; + char name [MAX_NAMELEN]; + int i; + + if (!root_tux_dir) + TUX_BUG(); + + sprintf(name, "%d", nr); + + /* create /proc/net/tux/1234/ */ + tux_dir[nr] = proc_mkdir(name, root_tux_dir); + + /* create /proc/net/tux/1234/listen/ */ + listen_dir[nr] = proc_mkdir("listen", tux_dir[nr]); + + /* create /proc/net/tux/1234/listen/ */ + for (i = 0; i < CONFIG_TUX_NUMSOCKETS; i++) { + sprintf(name, "%d", i); + entry = create_proc_entry(name, 0700, listen_dir[nr]); + + entry->nlink = 1; + entry->data = (void *)(tux_listen[nr] + i); + entry->read_proc = listen_read_proc; + entry->write_proc = listen_write_proc; + tux_listen[nr][i].entry = entry; + } +} + +static void unregister_tux_proc (unsigned int nr) +{ + int i; + + for (i = 0; i < CONFIG_TUX_NUMSOCKETS; i++) { + remove_proc_entry(tux_listen[nr][i].entry->name,listen_dir[nr]); + tux_listen[nr][i].entry = NULL; + } + + remove_proc_entry(listen_dir[nr]->name, tux_dir[nr]); + + remove_proc_entry(tux_dir[nr]->name, root_tux_dir); +} + +static void cleanup_tux_proc (void) +{ + int i; + + Dprintk("cleaning up /proc/net/tux/\n"); + + for (i = 0; i < CONFIG_TUX_NUMTHREADS; i++) + unregister_tux_proc(i); + remove_proc_entry(stat_entry->name, root_tux_dir); + remove_proc_entry(log_cpu_mask_entry->name, root_tux_dir); + remove_proc_entry(root_tux_dir->name, proc_net); +} + +static void init_tux_proc (void) +{ + struct proc_dir_entry *entry; + int i; + + if (root_tux_dir) + return; + + /* create /proc/net/tux */ + root_tux_dir = proc_mkdir("tux", proc_net); + + entry = create_proc_entry("log_cpu_mask", 0700, root_tux_dir); + + entry->nlink = 1; + entry->data = (void *)&log_cpu_mask; + entry->read_proc = hex_read_proc; + entry->write_proc = hex_write_proc; + + log_cpu_mask_entry = entry; + + entry = create_proc_entry("stat", 0700, root_tux_dir); + + entry->nlink = 1; + entry->data = NULL; + entry->read_proc = stat_read_proc; + entry->write_proc = stat_write_proc; + + stat_entry = entry; + + /* + * Create entries for all existing threads. + */ + for (i = 0; i < CONFIG_TUX_NUMTHREADS; i++) + register_tux_proc(i); +} + +void start_sysctl(void) +{ + init_tux_proc(); + tux_table_header = register_sysctl_table(tux_root_table,1); +} + +void end_sysctl(void) +{ + cleanup_tux_proc(); + unregister_sysctl_table(tux_table_header); +} --- linux/net/tux/proto_ftp.c.orig +++ linux/net/tux/proto_ftp.c @@ -0,0 +1,1551 @@ +/* + * TUX - Integrated Application Protocols Layer and Object Cache + * + * Copyright (C) 2000, 2001, Ingo Molnar + * + * ftp_proto.c: FTP application protocol support + */ + +#define __KERNEL_SYSCALLS__ +#include + +/**************************************************************** + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + ****************************************************************/ + +#define HELLO "220 Linux 2.4 TUX 2.0 FTP server welcomes you!\r\n" +#define WRITE_DONE "226 Transfer complete.\r\n" +#define BAD_FILENAME "550 No such file or directory.\r\n" +#define GOOD_DIR "250 CWD command successful.\r\n" +#define LIST_ERR "503 LIST without PORT! Closing connection.\r\n" +#define LIST_ERR_MEM "503 LIST could not allocate memory! Closing connection.\r\n" +#define WRITE_FILE "150 Opening BINARY mode data connection.\r\n" +#define WRITE_LIST "150 Opening ASCII mode data connection.\r\n" +#define RETR_ERR "503 RETR without PORT! Closing connection.\r\n" +#define PORT_OK "200 PORT command successful.\r\n" +#define LOGIN_OK "230-There are currently %d users logged in, out of %d maximum.\r\n230-Bandwidth served by TUX currently: %d KB/sec\r\n230 TUX Guest login ok.\r\n" +#define LOGIN_OK_ONE "230-There is currently 1 user logged in, out of %d maximum.\r\n230-Bandwidth served by TUX currently: %d KB/sec\r\n230 TUX Guest login ok.\r\n" +#define LOGIN_OK_PASS "230 TUX Guest login ok.\r\n" +#define LOGIN_FORBIDDEN "530 Sorry, Login Denied!\r\n" +#define TYPE_OK "200 Type set to I.\r\n" +#define BYE "221 Thank You for using TUX!\r\n" +#define NOT_IMPLEMENTED "502 Command not implemented.\r\n" +#define CLOSE_2 "221 Cannot handle request, closing connection!\r\n" +#define CLOSE "500 Unknown command.\r\n" +#define CLOSE_TIMEOUT "421 Timeout, closing connection!\r\n" +#define LINUX_SYST "215 UNIX Type: L8, Linux/2.4 TUX/2.0\r\n" +#define COMMAND_OK "200 Command OK.\r\n" +#define REST_OK "350 Restart offset OK.\r\n" +#define WRITE_ABORTED "426 Transfer aborted, data connection closed.\r\n" +#define SITE "214 No SITE commands are recognized.\r\n" + +#define INTERVAL 10 + +unsigned long last_measurement; +unsigned int ftp_bytes_sent; +unsigned int ftp_bandwidth; + +static void __update_bandwidth (tux_req_t *req, unsigned int bytes) +{ + /* + * Bandwidth measurement. Not completely accurate, + * but it's good enough and lightweight enough. + */ + if (jiffies >= last_measurement + INTERVAL*HZ) { + ftp_bandwidth = (ftp_bytes_sent + 1023)/INTERVAL/1024; + ftp_bytes_sent = 0; + last_measurement = jiffies; + } + if (bytes) + atomic_add(bytes, (atomic_t *)&ftp_bytes_sent); + Dprintk("update_bandwidth(%p,%d), bytes_sent: %d, bandwidth: %d.\n", + req, bytes, ftp_bytes_sent, ftp_bandwidth); +} + +#define update_bandwidth(req,bytes) \ + do { \ + if (unlikely(tux_ftp_login_message)) \ + __update_bandwidth(req, bytes); \ + } while (0) + +static inline void __ftp_send_async_message (tux_req_t *req, + const char *message, int status, unsigned int size) +{ + update_bandwidth(req, size); + __send_async_message(req, message, status, size, 1); +} + +#define ftp_send_async_message(req,str,status) \ + __ftp_send_async_message(req,str,status,sizeof(str)-1) + + +static void ftp_flush_req (tux_req_t *req, int cachemiss) +{ + tux_push_pending(req->sock->sk); + add_req_to_workqueue(req); +} + +static void ftp_execute_command (tux_req_t *req, int cachemiss); + +static void ftp_lookup_vhost (tux_req_t *req, int cachemiss) +{ + struct dentry *dentry; + struct nameidata base; + struct vfsmount *mnt = NULL; + unsigned int flag = cachemiss ? 0 : LOOKUP_ATOMIC; + char ip[3+1+3+1+3+1+3 + 2]; + + sprintf(ip, "%d.%d.%d.%d", NIPQUAD(req->sock->sk->rcv_saddr)); + Dprintk("ftp_lookup_vhost(%p, %d, virtual: %d, host: %s.)\n", + req, flag, req->virtual, ip); + + base.flags = LOOKUP_POSITIVE|LOOKUP_FOLLOW|flag; + base.last_type = LAST_ROOT; + base.dentry = dget(req->proto->main_docroot.dentry); + base.mnt = mntget(req->proto->main_docroot.mnt); + + dentry = __tux_lookup(req, ip, &base, &mnt); + + Dprintk("looked up dentry %p.\n", dentry); + if (dentry && !IS_ERR(dentry) && !dentry->d_inode) + TUX_BUG(); + + if (!dentry || IS_ERR(dentry)) { + if (PTR_ERR(dentry) == -EWOULDBLOCKIO) { + add_tux_atom(req, ftp_lookup_vhost); + queue_cachemiss(req); + return; + } + goto abort; + } + + req->docroot_dentry = dentry; + req->docroot_mnt = mnt; + + add_tux_atom(req, ftp_execute_command); + add_req_to_workqueue(req); + return; +abort: + if (dentry) { + if (!IS_ERR(dentry)) + dput(dentry); + dentry = NULL; + } + if (mnt) { + if (!IS_ERR(mnt)) + mntput(mnt); + mnt = NULL; + } + req_err(req); + add_req_to_workqueue(req); +} + +static void ftp_got_request (tux_req_t *req) +{ + add_tux_atom(req, parse_request); + add_tux_atom(req, ftp_flush_req); + ftp_send_async_message(req, HELLO, 220); +} + +#define GOTO_ERR { TDprintk("FTP protocol error at: %s:%d\n", \ + __FILE__, __LINE__); goto error; } + +static void zap_data_socket (tux_req_t *req) +{ + if (!req->data_sock) + return; + Dprintk("zapping req %p's data socket %p.\n", req, req->data_sock); + + unlink_tux_data_socket(req); + sock_release(req->data_sock); + req->data_sock = NULL; +} + +static int parse_ftp_message (tux_req_t *req, const int total_len) +{ + int comm, comm1 = 0, comm2 = 0, comm3 = 0, comm4 = 0; + int newline_pos, i; + const char *mess, *curr; + + curr = mess = req->headers; + + Dprintk("FTP parser got %d bytes: --->{%s}<---\n", total_len, curr); + + newline_pos = -1; + for (i = 0; i < total_len; i++, curr++) { + if (!*curr) + GOTO_ERR; + if (!(*curr == '\r') || !(*(curr+1) == '\n')) + continue; + newline_pos = i; + break; + } + Dprintk("Newline pos: %d\n", newline_pos); + if (newline_pos == -1) { + Dprintk("incomplete mess on req %p!\n", req); + return 0; + } + if (newline_pos < 3) + GOTO_ERR; + +#define toup(c) ((((c) >= 'a') && ((c) <= 'z')) ? ((c) + 'A' - 'a') : (c)) + +#define STRING_VAL(c1,c2,c3,c4) \ + (toup(c1) + (toup(c2) << 8) + (toup(c3) << 16) + (toup(c4) << 24)) + +#define STRING_VAL_STR(str) \ + STRING_VAL(str[0], str[1], str[2], str[3]) + + Dprintk("string val (%c%c%c%c): %08x\n", + mess[0], mess[1], mess[2], mess[3], + STRING_VAL_STR(mess)); + +#define PARSE_FTP_COMM(c1,c2,c3,c4,name,num) \ + if (STRING_VAL_STR(mess) == STRING_VAL(c1,c2,c3,c4)) \ + { \ + Dprintk("parsed "#name".\n"); \ + comm##num = FTP_COMM_##name; \ + } + + PARSE_FTP_COMM('A','C','C','T', ACCT,2); + PARSE_FTP_COMM('C','D','U','P', CDUP,3); + PARSE_FTP_COMM('S','M','N','T', SMNT,4); + PARSE_FTP_COMM('Q','U','I','T', QUIT,1); + PARSE_FTP_COMM('R','E','I','N', REIN,2); + PARSE_FTP_COMM('P','A','S','V', PASV,3); + PARSE_FTP_COMM('S','T','R','U', STRU,4); + PARSE_FTP_COMM('S','T','O','R', STOR,2); + PARSE_FTP_COMM('S','T','O','U', STOU,3); + PARSE_FTP_COMM('A','P','P','E', APPE,4); + PARSE_FTP_COMM('A','L','L','O', ALLO,1); + PARSE_FTP_COMM('R','N','F','R', RNFR,2); + PARSE_FTP_COMM('R','N','T','O', RNTO,3); + PARSE_FTP_COMM('A','B','O','R', ABOR,4); + PARSE_FTP_COMM('D','E','L','E', DELE,1); + PARSE_FTP_COMM('R','M','D',' ', RMD, 2); + PARSE_FTP_COMM('M','K','D',' ', MKD, 3); + PARSE_FTP_COMM('P','W','D',' ', PWD, 4); + PARSE_FTP_COMM('S','Y','S','T', SYST,2); + PARSE_FTP_COMM('N','O','O','P', NOOP,3); + PARSE_FTP_COMM('F','E','A','T', FEAT,4); + + comm = comm1 | comm2 | comm3 | comm4; + + if (comm) { + if (newline_pos != 4) + GOTO_ERR; + req->ftp_command = comm; + goto out; + } + + switch (STRING_VAL(mess[0], mess[1], mess[2], mess[3])) { + +#define PARSE_FTP_COMM_3CHAR(c1,c2,c3,name) \ + case STRING_VAL(c1,c2,c3,'\r'): \ + { \ + Dprintk("parsed "#name".\n"); \ + req->ftp_command = FTP_COMM_##name; \ + if (newline_pos != 3) \ + GOTO_ERR; \ + } + +#define PARSE_FTP_3CHAR_COMM_IGNORE(c1,c2,c3,name) \ + case STRING_VAL(c1,c2,c3,' '): \ + { \ + Dprintk("parsed "#name".\n"); \ + req->ftp_command = FTP_COMM_##name; \ + } + +#define PARSE_FTP_COMM_IGNORE(c1,c2,c3,c4,name) \ + case STRING_VAL(c1,c2,c3,c4): \ + { \ + Dprintk("parsed "#name".\n"); \ + req->ftp_command = FTP_COMM_##name; \ + } + +#define PARSE_FTP_3CHAR_COMM_1_FIELD(c1,c2,c3,name,field,field_len,max) \ + case STRING_VAL(c1,c2,c3,' '): \ + { \ + Dprintk("parsed "#name".\n"); \ + req->ftp_command = FTP_COMM_##name; \ + if (newline_pos == 4) \ + GOTO_ERR; \ + if (newline_pos >= 5) { \ + curr = mess + 3; \ + if (*curr++ != ' ') \ + GOTO_ERR; \ + *(field_len) = newline_pos-4; \ + if (*(field_len) >= max) \ + GOTO_ERR; \ + memcpy(field, curr, *(field_len)); \ + (field)[*(field_len)] = 0; \ + } \ + } + +#define PARSE_FTP_COMM_1_FIELD(c1,c2,c3,c4,name,field,field_len,max) \ + case STRING_VAL(c1,c2,c3,c4): \ + { \ + Dprintk("parsed "#name".\n"); \ + req->ftp_command = FTP_COMM_##name; \ + if (newline_pos < 4) \ + GOTO_ERR; \ + if (newline_pos == 4) \ + *(field_len) = 0; \ + else { \ + curr = mess + 4; \ + if (*curr++ != ' ') \ + GOTO_ERR; \ + *(field_len) = newline_pos-5; \ + if (*(field_len) >= max) \ + GOTO_ERR; \ + memcpy(field, curr, *(field_len)); \ + (field)[*(field_len)] = 0; \ + } \ + } + + PARSE_FTP_COMM_1_FIELD('U','S','E','R', USER, + req->username, &req->username_len, + MAX_USERNAME_LEN-1); + if (!req->username_len) + GOTO_ERR; + break; + + { + #define MAX_PASS_LEN 100 + char pass[MAX_PASS_LEN]; + unsigned int pass_len; + PARSE_FTP_COMM_1_FIELD('P','A','S','S', PASS, + pass, &pass_len, + MAX_PASS_LEN-1); + if (!pass_len) + GOTO_ERR; + break; + } + + PARSE_FTP_3CHAR_COMM_1_FIELD('C','W','D', CWD, + req->objectname, &req->objectname_len, + MAX_OBJECTNAME_LEN-1); + if (!req->objectname_len) + GOTO_ERR; + req->uri_str = req->objectname; + req->uri_len = req->objectname_len; + break; + + PARSE_FTP_COMM_3CHAR('P','W','D', PWD); break; + + { + char type[3]; + unsigned int type_len; + + PARSE_FTP_COMM_1_FIELD('T','Y','P','E', TYPE, + type, &type_len, 2); + if (!type_len) + GOTO_ERR; + if ((type[0] != 'I') && (type[0] != 'A')) + GOTO_ERR; + } + break; + + PARSE_FTP_COMM_1_FIELD('R','E','T','R', RETR, + req->objectname, &req->objectname_len, + MAX_OBJECTNAME_LEN-1); + if (!req->objectname_len) { + zap_data_socket(req); + req->ftp_command = FTP_COMM_NONE; + } + req->uri_str = req->objectname; + req->uri_len = req->objectname_len; + break; + + PARSE_FTP_COMM_1_FIELD('S','I','Z','E', SIZE, + req->objectname, &req->objectname_len, + MAX_OBJECTNAME_LEN-1); + if (!req->objectname_len) + req->ftp_command = FTP_COMM_NONE; + req->uri_str = req->objectname; + req->uri_len = req->objectname_len; + break; + + PARSE_FTP_COMM_1_FIELD('M','D','T','M', MDTM, + req->objectname, &req->objectname_len, + MAX_OBJECTNAME_LEN-1); + if (!req->objectname_len) + req->ftp_command = FTP_COMM_NONE; + req->uri_str = req->objectname; + req->uri_len = req->objectname_len; + break; + + PARSE_FTP_COMM_IGNORE('M','O','D','E', MODE); + break; + + PARSE_FTP_COMM_IGNORE('S','T','A','T', STAT); + break; + + PARSE_FTP_COMM_IGNORE('S','I','T','E', SITE); + break; + + PARSE_FTP_COMM_1_FIELD('L','I','S','T', LIST, + req->objectname, &req->objectname_len, + MAX_OBJECTNAME_LEN-1); + if (req->objectname[0] == '-') { + req->objectname_len = 0; + req->objectname[0] = 0; + } + if (req->objectname_len) { + req->uri_str = req->objectname; + req->uri_len = req->objectname_len; + } + break; + + PARSE_FTP_COMM_1_FIELD('N','L','S','T', NLST, + req->objectname, &req->objectname_len, + MAX_OBJECTNAME_LEN-1); + if (req->objectname[0] == '-') { + req->objectname_len = 0; + req->objectname[0] = 0; + } + if (req->objectname_len) { + req->uri_str = req->objectname; + req->uri_len = req->objectname_len; + } + break; + + PARSE_FTP_COMM_IGNORE('H','E','L','P', HELP); + break; + + PARSE_FTP_COMM_IGNORE('C','L','N','T', CLNT); + break; + +#define IS_NUM(n) (((n) >= '0') && ((n) <= '9')) + +#define GET_DIGIT(curr,n) \ + n += (*curr) - '0'; \ + curr++; \ + if (IS_NUM(*curr)) { \ + n *= 10; + +#define PARSE_PORTNUM(curr,n) \ +do { \ + Dprintk("PORT NUM parser:--->{%s}<---\n", curr);\ + if (!IS_NUM(*curr)) \ + GOTO_ERR; \ + n = 0; \ + GET_DIGIT(curr,n); \ + GET_DIGIT(curr,n); \ + GET_DIGIT(curr,n); \ + }}} \ + if (n > 255) \ + GOTO_ERR; \ + Dprintk("PORT NUM parser:--->{%s}<---\n", curr);\ + Dprintk("PORT NUM parser parsed %d.\n", n); \ +} while (0) + +#define PARSE_NUM(curr,n) \ +do { \ + Dprintk("NUM parser:--->{%s}<---\n", curr); \ + if (!IS_NUM(*curr)) \ + GOTO_ERR; \ + n = 0; \ + GET_DIGIT(curr,n); \ + GET_DIGIT(curr,n); \ + GET_DIGIT(curr,n); \ + GET_DIGIT(curr,n); \ + GET_DIGIT(curr,n); \ + GET_DIGIT(curr,n); \ + GET_DIGIT(curr,n); \ + GET_DIGIT(curr,n); \ + GET_DIGIT(curr,n); \ + GET_DIGIT(curr,n); \ + }}}}}}}}}} \ + Dprintk("NUM parser:--->{%s}<---\n", curr); \ + Dprintk("NUM parser parsed %d.\n", n); \ +} while (0) + + case STRING_VAL('P','O','R','T'): + { + unsigned int h1, h2, h3, h4, p1, p2; + if (req->data_sock) + zap_data_socket(req); + /* + * Minimum size: "PORT 0,0,0,0,0,0", 16 bytes. + */ + if (newline_pos < 16) + GOTO_ERR; + Dprintk("parsed PORT.\n"); + if (req->data_sock) + GOTO_ERR; + curr = mess + 4; + if (*curr++ != ' ') + GOTO_ERR; + PARSE_PORTNUM(curr,h1); + if (*curr++ != ',') + GOTO_ERR; + PARSE_PORTNUM(curr,h2); + if (*curr++ != ',') + GOTO_ERR; + PARSE_PORTNUM(curr,h3); + if (*curr++ != ',') + GOTO_ERR; + PARSE_PORTNUM(curr,h4); + if (*curr++ != ',') + GOTO_ERR; + PARSE_PORTNUM(curr,p1); + if (*curr++ != ',') + GOTO_ERR; + PARSE_PORTNUM(curr,p2); + if (curr-mess != newline_pos) + GOTO_ERR; + req->ftp_command = FTP_COMM_PORT; + req->ftp_user_addr = (h1<<24) + (h2<<16) + (h3<<8) + h4; + req->ftp_user_port = (p1<<8) + p2; + Dprintk("FTP PORT got: %d.%d.%d.%d:%d.\n", + h1, h2, h3, h4, req->ftp_user_port); + Dprintk("FTP user-addr: %08x (htonl: %08x), socket: %08x.\n", + req->ftp_user_addr, htonl(req->ftp_user_addr), + req->sock->sk->daddr); + /* + * Do not allow redirection of connections, and do + * not allow reserved ports to be accessed. + */ + if (req->sock->sk->daddr != htonl(req->ftp_user_addr)) + GOTO_ERR; + if (req->ftp_user_port < 1024) + GOTO_ERR; + break; + } + case STRING_VAL('R','E','S','T'): + { + unsigned int offset; + + /* + * Minimum size: "REST 0", 6 bytes. + */ + if (newline_pos < 6) + GOTO_ERR; + Dprintk("parsed REST.\n"); + curr = mess + 4; + if (*curr++ != ' ') + GOTO_ERR; + PARSE_NUM(curr,offset); + if (curr-mess != newline_pos) + GOTO_ERR; + req->ftp_command = FTP_COMM_REST; + req->ftp_offset_start = offset; + Dprintk("FTP REST got: %d bytes offset.\n", offset); + + break; + } + default: + req->ftp_command = FTP_COMM_NONE; + break; + } + +out: + req->parsed_len = newline_pos + 2; + + req->virtual = tux_ftp_virtual_server; + if (req->virtual) + add_tux_atom(req, ftp_lookup_vhost); + else { + req->docroot_dentry = dget(req->proto->main_docroot.dentry); + req->docroot_mnt = mntget(req->proto->main_docroot.mnt); + add_tux_atom(req, ftp_execute_command); + } + + return req->parsed_len; +error: + clear_keepalive(req); + TDprintk("rejecting FTP session!\n"); + TDprintk("mess :--->{%s}<---\n", mess); + TDprintk("mess left:--->{%s}<---\n", curr); + req_err(req); + return -1; +} + +static void ftp_wait_close (tux_req_t *req, int cachemiss); +static void ftp_wait_syn (tux_req_t *req, int cachemiss); + +static int ftp_check_req_err (tux_req_t *req, int cachemiss) +{ + int state = req->sock->sk->state; + int err = req->sock->sk->err | req->error; + int urg = req->sock->sk->tp_pinfo.af_tcp.urg_data; + + if (req->data_sock) { + urg |= req->data_sock->sk->tp_pinfo.af_tcp.urg_data; + state |= req->data_sock->sk->state; + err |= req->data_sock->sk->err; + } + + if ((state <= TCP_SYN_RECV) && !err) { + if (!urg) + return 0; + req->in_file.f_pos = 0; + add_tux_atom(req, flush_request); + zap_data_socket(req); + ftp_send_async_message(req, WRITE_ABORTED, 426); + return 1; + } +#if CONFIG_TUX_DEBUG + req->bytes_expected = 0; + if (tux_TDprintk) + show_stack(NULL); +#endif + req->in_file.f_pos = 0; + TDprintk("zapping, data sock state: %d (err: %d, urg: %d)\n", + state, err, urg); + /* + * We are in the middle of a file transfer, + * zap it immediately: + */ + req->error = TUX_ERROR_CONN_CLOSE; + zap_request(req, cachemiss); + return 1; +} + +void ftp_send_file (tux_req_t *req, int cachemiss) +{ + int ret; + + SET_TIMESTAMP(req->output_timestamp); +repeat: + ret = generic_send_file(req, req->data_sock, cachemiss); + update_bandwidth(req, req->in_file.f_pos - req->prev_pos); + req->prev_pos = req->in_file.f_pos; + + switch (ret) { + case -5: + add_tux_atom(req, ftp_send_file); + output_timeout(req); + break; + case -4: + add_tux_atom(req, ftp_send_file); + if (add_output_space_event(req, req->data_sock)) { + del_tux_atom(req); + goto repeat; + } + break; + case -3: + add_tux_atom(req, ftp_send_file); + queue_cachemiss(req); + break; + case -1: + break; + default: + req->in_file.f_pos = 0; + + if (tux_ftp_wait_close) { + req->data_sock->ops->shutdown(req->data_sock, SEND_SHUTDOWN); + add_tux_atom(req, ftp_wait_close); + add_req_to_workqueue(req); + return; + } + Dprintk("FTP send file req %p finished!\n", req); + zap_data_socket(req); + add_tux_atom(req, ftp_flush_req); + if (req->error) + ftp_send_async_message(req, BAD_FILENAME, 200); + else + ftp_send_async_message(req, WRITE_DONE, 200); + break; + } +} + +#define sk_syn(sk) \ + (!(sk)->err && ((1 << (sk)->state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))) +#define req_syn(req) \ + (!(req)->error && sk_syn((req)->data_sock->sk)) + +static void ftp_wait_syn (tux_req_t *req, int cachemiss) +{ + Dprintk("ftp_wait_syn in: data socket state %d.\n", req->data_sock->state); + if (req_syn(req)) { + spin_lock_irq(&req->ti->work_lock); + add_keepalive_timer(req); + if (test_and_set_bit(0, &req->idle_input)) + TUX_BUG(); + spin_unlock_irq(&req->ti->work_lock); + if (req_syn(req)) { + add_tux_atom(req, ftp_wait_syn); + return; + } + unidle_req(req); + } + Dprintk("ftp_wait_syn out: data socket state %d.\n", req->data_sock->state); + add_req_to_workqueue(req); +} + +static void ftp_wait_close (tux_req_t *req, int cachemiss) +{ + struct sock *sk = req->data_sock->sk; + + Dprintk("ftp_wait_close: data socket state %d.\n", sk->state); + + if (!req->error && (sk->state <= TCP_FIN_WAIT1) && !sk->err) { + spin_lock_irq(&req->ti->work_lock); + add_keepalive_timer(req); + if (test_and_set_bit(0, &req->idle_input)) + TUX_BUG(); + spin_unlock_irq(&req->ti->work_lock); + if (!req->error && (sk->state <= TCP_FIN_WAIT1) && !sk->err) { + add_tux_atom(req, ftp_wait_close); + return; + } + unidle_req(req); + } + zap_data_socket(req); + add_tux_atom(req, ftp_flush_req); + if (req->error) + ftp_send_async_message(req, BAD_FILENAME, 200); + else + ftp_send_async_message(req, WRITE_DONE, 200); +} + +void ftp_get_size (tux_req_t *req, int cachemiss) +{ + char file_size[200]; + int missed, len; + + if (!req->dentry) { + missed = lookup_object(req, cachemiss ? 0 : LOOKUP_ATOMIC); + if (!missed && !req->dentry) { + ftp_send_async_message(req, BAD_FILENAME, 200); + return; + } + if (missed) { + if (cachemiss) + TUX_BUG(); + add_tux_atom(req, ftp_get_size); + queue_cachemiss(req); + return; + } + } + req->in_file.f_pos = 0; + len = sprintf(file_size, "213 %Li\r\n", req->dentry->d_inode->i_size); + __ftp_send_async_message(req, file_size, 200, len); +} + +void ftp_get_mdtm (tux_req_t *req, int cachemiss) +{ + unsigned int flag = cachemiss ? 0 : LOOKUP_ATOMIC; + struct dentry *dentry; + struct vfsmount *mnt = NULL; + char file_mdtm[200]; + unsigned int len; + int err; + + dentry = tux_lookup(req, req->objectname, flag, &mnt); + if (!dentry || IS_ERR(dentry)) { + if (PTR_ERR(dentry) == -EWOULDBLOCKIO) { + if (cachemiss) + TUX_BUG(); + add_tux_atom(req, ftp_get_mdtm); + queue_cachemiss(req); + return; + } + goto out_err; + } + err = permission(dentry->d_inode, MAY_READ); + if (err) + goto out_err_put; + + req->in_file.f_pos = 0; + len = mdtm_time (file_mdtm, dentry->d_inode->i_mtime); + dput(dentry); + mntput(mnt); + __ftp_send_async_message(req, file_mdtm, 200, len); + return; + +out_err_put: + dput(dentry); + mntput(mnt); +out_err: + ftp_send_async_message(req, BAD_FILENAME, 550); +} + +static void ftp_get_file (tux_req_t *req, int cachemiss) +{ + int missed; + + if (!req->dentry) { + missed = lookup_object(req, cachemiss ? 0 : LOOKUP_ATOMIC); + if (!missed && !req->dentry) { + ftp_send_async_message(req, BAD_FILENAME, 200); + return; + } + if (missed) { + if (cachemiss) + TUX_BUG(); + add_tux_atom(req, ftp_get_file); + queue_cachemiss(req); + return; + } + } + Dprintk("ftp_send_file %p, ftp_offset: %Ld, total_len: %Ld.\n", req, req->ftp_offset_start, req->total_file_len); + req->in_file.f_pos = 0; + if (req->ftp_offset_start) { + if (req->ftp_offset_start <= req->total_file_len) { + req->offset_start = req->ftp_offset_start; + req->in_file.f_pos = req->offset_start; + } + req->ftp_offset_start = 0; + } + req->output_len = req->total_file_len - req->offset_start; + req->prev_pos = req->in_file.f_pos; + Dprintk("ftp_send_file %p, f_pos: %Ld (out_len: %Ld).\n", req, req->in_file.f_pos, req->output_len); + add_tux_atom(req, ftp_send_file); + add_tux_atom(req, ftp_wait_syn); + add_tux_atom(req, ftp_flush_req); + ftp_send_async_message(req, WRITE_FILE, 200); +} + +static void __exchange_sockets (tux_req_t *req) +{ + struct socket *tmp; + + tmp = req->data_sock; + req->data_sock = req->sock; + req->sock = tmp; + + req->in_file.f_pos = 0; +} + +static void ftp_do_ls_start (tux_req_t *req, int cachemiss) +{ + Dprintk("ftp_do_ls_start(%p, %d).\n", req, cachemiss); + if (!req->cwd_dentry) + TUX_BUG(); + __exchange_sockets(req); + queue_cachemiss(req); +} + +static void ftp_do_ls_end (tux_req_t *req, int cachemiss) +{ + Dprintk("ftp_do_ls_end(%p, %d).\n", req, cachemiss); + __exchange_sockets(req); + if (tux_ftp_wait_close) { + req->data_sock->ops->shutdown(req->data_sock, SEND_SHUTDOWN); + add_tux_atom(req, ftp_wait_close); + add_req_to_workqueue(req); + return; + } + zap_data_socket(req); + add_tux_atom(req, ftp_flush_req); + if (req->error) + ftp_send_async_message(req, BAD_FILENAME, 200); + else + ftp_send_async_message(req, WRITE_DONE, 200); +} + +static void ftp_chdir (tux_req_t *req, int cachemiss) +{ + unsigned int flag = cachemiss ? 0 : LOOKUP_ATOMIC; + struct dentry *dentry; + struct vfsmount *mnt = NULL; + int err; + + Dprintk("ftp_chdir(%p, %d, {%s})\n", req, cachemiss, req->objectname); + dentry = tux_lookup(req, req->objectname, flag, &mnt); + if (!dentry || IS_ERR(dentry)) { + if (PTR_ERR(dentry) == -EWOULDBLOCKIO) { + if (cachemiss) + TUX_BUG(); + add_tux_atom(req, ftp_chdir); + queue_cachemiss(req); + return; + } + goto out_err; + } + err = permission(dentry->d_inode, MAY_EXEC); + if (err) + goto out_err_put; + req->cwd_dentry = dentry; + req->cwd_mnt = mnt; + ftp_send_async_message(req, GOOD_DIR, 200); + return; + +out_err_put: + dput(dentry); + mntput(mnt); +out_err: + ftp_send_async_message(req, BAD_FILENAME, 550); +} + +void ftp_accept_pasv (tux_req_t *req, int cachemiss) +{ + struct socket *sock, *new_sock = NULL; + struct tcp_opt *tp1, *tp2; + int err; + + tp1 = &req->data_sock->sk->tp_pinfo.af_tcp; + + Dprintk("PASV accept on req %p, accept_queue: %p.\n", + req, tp1->accept_queue); + if (req->error || (req->data_sock->sk->state != TCP_LISTEN)) + goto error; +new_socket: + if (!tp1->accept_queue) { + spin_lock_irq(&req->ti->work_lock); + add_keepalive_timer(req); + if (test_and_set_bit(0, &req->idle_input)) + TUX_BUG(); + spin_unlock_irq(&req->ti->work_lock); + if (!tp1->accept_queue) { + add_tux_atom(req, ftp_accept_pasv); + return; + } + unidle_req(req); + } + new_sock = sock_alloc(); + if (!new_sock) + goto error; + sock = req->data_sock; + new_sock->type = sock->type; + new_sock->ops = sock->ops; + + err = sock->ops->accept(sock, new_sock, O_NONBLOCK); + Dprintk("PASV accept() returned %d (state %d).\n", err, new_sock->sk->state); + if (err < 0) + goto error; + if (new_sock->sk->state != TCP_ESTABLISHED) + goto error; + /* + * Do not allow other clients to steal the FTP connection! + */ + if (new_sock->sk->daddr != req->sock->sk->daddr) { + Dprintk("PASV: ugh, unauthorized connect?\n"); + sock_release(new_sock); + new_sock = NULL; + goto new_socket; + } + /* + * Zap the listen socket: + */ + zap_data_socket(req); + + tp2 = &new_sock->sk->tp_pinfo.af_tcp; + tp2->nonagle = 2; + tp2->ack.pingpong = tux_ack_pingpong; + new_sock->sk->reuse = 1; + new_sock->sk->urginline = 1; + new_sock->sk->linger = 0; + new_sock->sk->tp_pinfo.af_tcp.nonagle = 2; + + link_tux_data_socket(req, new_sock); + add_req_to_workqueue(req); + return; + +error: + if (new_sock) + sock_release(new_sock); + req_err(req); + zap_data_socket(req); + ftp_send_async_message(req, CLOSE, 500); +} + +static char * ftp_print_dir_line (tux_req_t *req, char *tmp, char *d_name, int d_len, int d_type, struct dentry *dentry, struct inode *inode) +{ + char *string0 = tmp; + unsigned int size; + + if (req->ftp_command == FTP_COMM_NLST) { + memcpy(tmp, d_name, d_len); + tmp += d_len; + *tmp++ = '\r'; + *tmp++ = '\n'; + *tmp = 0; + return tmp; + } + switch (d_type) { + default: + case DT_UNKNOWN: + case DT_WHT: + if (tux_hide_unreadable) + goto out_dput; + *tmp++ = '?'; + break; + + case DT_FIFO: + if (tux_hide_unreadable) + goto out_dput; + *tmp++ = 'p'; + break; + + case DT_CHR: + if (tux_hide_unreadable) + goto out_dput; + *tmp++ = 'c'; + break; + + case DT_DIR: + *tmp++ = 'd'; + break; + + case DT_BLK: + if (tux_hide_unreadable) + goto out_dput; + *tmp++ = 'b'; + break; + + case DT_REG: + *tmp++ = '-'; + break; + + case DT_LNK: + *tmp++ = 'l'; + break; + + case DT_SOCK: + if (tux_hide_unreadable) + goto out_dput; + *tmp++ = 's'; + break; + } + + if (inode->i_mode & S_IRUSR) *tmp++ = 'r'; else *tmp++ = '-'; + if (inode->i_mode & S_IWUSR) *tmp++ = 'w'; else *tmp++ = '-'; + if (inode->i_mode & S_IXUSR) *tmp++ = 'x'; else *tmp++ = '-'; + if (inode->i_mode & S_IRGRP) *tmp++ = 'r'; else *tmp++ = '-'; + if (inode->i_mode & S_IWGRP) *tmp++ = 'w'; else *tmp++ = '-'; + if (inode->i_mode & S_IXGRP) *tmp++ = 'x'; else *tmp++ = '-'; + if (inode->i_mode & S_IROTH) *tmp++ = 'r'; else *tmp++ = '-'; + if (inode->i_mode & S_IWOTH) *tmp++ = 'w'; else *tmp++ = '-'; + if (inode->i_mode & S_IXOTH) *tmp++ = 'x'; else *tmp++ = '-'; + + *tmp++ = ' '; + + size = sprintf(tmp, "%4i %d", inode->i_nlink, inode->i_uid); + tmp += size; + + size = 14 - size; + if (size <= 0) + size = 1; + memset(tmp, ' ', size); + tmp += size; + + size = sprintf(tmp, "%d", inode->i_gid); + tmp += size; + + size = 9 - size; + if (size <= 0) + size = 1; + memset(tmp, ' ', size); + tmp += size; + + tmp += sprintf(tmp, "%8Li", inode->i_size); + *tmp++ = ' '; + + tmp += time_unix2ls(inode->i_mtime, tmp); + *tmp++ = ' '; + + memcpy(tmp, d_name, d_len); + tmp += d_len; + + if (d_type == DT_LNK) { + int len = 0, max_len; + #define ARROW " -> " + + memcpy(tmp, ARROW, sizeof(ARROW)-1); + tmp += sizeof(ARROW)-1; + max_len = MAX_OBJECTNAME_LEN-(tmp-string0); + if (inode->i_op && inode->i_op->readlink) { + mm_segment_t oldmm; + + oldmm = get_fs(); set_fs(KERNEL_DS); + set_fs(KERNEL_DS); + len = inode->i_op->readlink(dentry, tmp, max_len); + set_fs(oldmm); + } + if (len > 0) + tmp += len; + else + Dprintk("hm, readlink() returned %d.\n", len); + } + *tmp++ = '\r'; + *tmp++ = '\n'; + *tmp = 0; + + return tmp; +out_dput: + return NULL; +} + +static void ftp_do_ls_onefile (tux_req_t *req, int cachemiss) +{ + char string0[MAX_OBJECTNAME_LEN+200], *tmp; + + tmp = ftp_print_dir_line(req, string0, req->objectname, req->objectname_len, +DT_REG, req->dentry, req->dentry->d_inode); + if (!tmp) { + req_err(req); + add_req_to_workqueue(req); + return; + } + if (tmp - string0 >= MAX_OBJECTNAME_LEN+200) + BUG(); + __ftp_send_async_message(req, string0, 200, tmp - string0); +} + +static void ftp_lookup_listfile (tux_req_t *req, int cachemiss) +{ + unsigned int flag = cachemiss ? 0 : LOOKUP_ATOMIC; + struct dentry *dentry; + struct vfsmount *mnt = NULL; + int err; + + Dprintk("ftp_lookup_listfile(%p, %d, {%s})\n", req, cachemiss, req->objectname); + dentry = tux_lookup(req, req->objectname, flag, &mnt); + if (!dentry || IS_ERR(dentry)) { + if (PTR_ERR(dentry) == -EWOULDBLOCKIO) { + if (cachemiss) + TUX_BUG(); + add_tux_atom(req, ftp_lookup_listfile); + queue_cachemiss(req); + return; + } + goto out_err; + } + + if (S_ISDIR(dentry->d_inode->i_mode)) { + err = permission(dentry->d_inode, MAY_EXEC); + if (err) { + Dprintk("Directory permission error: %d.\n", err); + goto out_err_put; + } + install_req_dentry(req, dentry, mnt); + + add_tux_atom(req, ftp_do_ls_end); + if (!req->cwd_dentry) + TUX_BUG(); + add_tux_atom(req, list_directory); + } else { + install_req_dentry(req, dentry, mnt); + + add_tux_atom(req, ftp_do_ls_end); + add_tux_atom(req, ftp_do_ls_onefile); + } + + add_tux_atom(req, ftp_do_ls_start); + add_tux_atom(req, ftp_wait_syn); + add_tux_atom(req, ftp_flush_req); + ftp_send_async_message(req, WRITE_LIST, 200); + return; + +out_err_put: + dput(dentry); + mntput(mnt); +out_err: + ftp_send_async_message(req, BAD_FILENAME, 550); +} + +static void ftp_execute_command (tux_req_t *req, int cachemiss) +{ + if (!req->parsed_len) + TUX_BUG(); + trunc_headers(req); + req->keep_alive = 1; + + switch (req->ftp_command) { + +#define ABORTED \ + "226 Abort successful.\r\n" + + case FTP_COMM_ABOR: + { + zap_data_socket(req); + ftp_send_async_message(req, ABORTED, 226); + break; + } + + case FTP_COMM_PWD: + { + unsigned int str_len; + char *buf, *path; + + buf = (char *)__get_free_page(GFP_KERNEL); + if (!buf) { + req_err(req); + ftp_send_async_message(req, LIST_ERR_MEM, 200); + GOTO_ERR; + } + + if (!req->cwd_dentry) { + req->cwd_dentry = dget(req->docroot_dentry); + req->cwd_mnt = mntget(req->docroot_mnt); + } + +// "257 "/" is current directory.\r\n" + +#define PART_1 "257 \"" +#define PART_1_LEN (sizeof(PART_1)-1) + +#define PART_3 "\" is current directory.\r\n" +#define PART_3_LEN sizeof(PART_3) + + path = tux_print_path(req, req->cwd_dentry, req->cwd_mnt, + buf+PART_1_LEN, PAGE_SIZE - PART_3_LEN - PART_1_LEN); + + if (path < buf + PART_1_LEN) + BUG(); + + memcpy(path - PART_1_LEN, PART_1, PART_1_LEN); + memcpy(buf + PAGE_SIZE-PART_3_LEN-1, PART_3, PART_3_LEN); + str_len = buf + PAGE_SIZE-1 - (path - PART_1_LEN) - 1; + + __ftp_send_async_message(req, path - PART_1_LEN, 226, str_len); + free_page((unsigned long)buf); + break; + } + + case FTP_COMM_CDUP: + { + memcpy(req->objectname, "..", 3); + req->objectname_len = 2; + req->uri_str = req->objectname; + req->uri_len = req->objectname_len; + + // fall through to CWD: + } + case FTP_COMM_CWD: + { + ftp_chdir(req, cachemiss); + break; + } + + case FTP_COMM_NLST: + case FTP_COMM_LIST: + { + if (!req->data_sock) { + req_err(req); + ftp_send_async_message(req, LIST_ERR, 200); + GOTO_ERR; + } + if (req->dentry) + TUX_BUG(); + if (!req->cwd_dentry) { + req->cwd_dentry = dget(req->docroot_dentry); + req->cwd_mnt = mntget(req->docroot_mnt); + } + if (req->objectname_len) + ftp_lookup_listfile(req, cachemiss); + else { + dget(req->cwd_dentry); + mntget(req->cwd_mnt); + install_req_dentry(req, req->cwd_dentry, req->cwd_mnt); + if (!req->dentry) + TUX_BUG(); + add_tux_atom(req, ftp_do_ls_end); + if (!req->cwd_dentry) + TUX_BUG(); + add_tux_atom(req, list_directory); + add_tux_atom(req, ftp_do_ls_start); + add_tux_atom(req, ftp_wait_syn); + add_tux_atom(req, ftp_flush_req); + ftp_send_async_message(req, WRITE_LIST, 200); + } + break; + } + + case FTP_COMM_RETR: + { + if (!req->data_sock) { + req_err(req); + ftp_send_async_message(req, RETR_ERR, 200); + GOTO_ERR; + } + ftp_get_file(req, cachemiss); + break; + } + + case FTP_COMM_SIZE: + { + ftp_get_size(req, cachemiss); + break; + } + + case FTP_COMM_MDTM: + { + ftp_get_mdtm(req, cachemiss); + break; + } + + case FTP_COMM_PASV: + { + char buf [36 + 4*3 + 5 + 10]; + struct socket *data_sock; + struct sockaddr_in addr; + unsigned int str_len; + struct tcp_opt *tp; + u32 local_addr; + int err; + + if (req->data_sock) + zap_data_socket(req); + /* + * Create FTP data connection to client: + */ + err = sock_create(AF_INET, SOCK_STREAM, IPPROTO_IP, &data_sock); + if (err < 0) { + Dprintk("sock create err: %d\n", err); + req_err(req); + ftp_send_async_message(req, CLOSE, 500); + GOTO_ERR; + } + + local_addr = req->sock->sk->rcv_saddr; + addr.sin_family = AF_INET; + addr.sin_port = 0; + addr.sin_addr.s_addr = local_addr; + Dprintk("client address: (%d,%d,%d,%d).\n", + NIPQUAD(req->sock->sk->daddr)); + + data_sock->sk->reuse = 1; + data_sock->sk->urginline = 1; + data_sock->sk->linger = 0; + + err = data_sock->ops->bind(data_sock, + (struct sockaddr*)&addr, sizeof(addr)); + data_sock->sk->tp_pinfo.af_tcp.nonagle = 2; + Dprintk("PASV bind() ret: %d.\n", err); + if (err < 0) { + req_err(req); + sock_release(data_sock); + ftp_send_async_message(req, CLOSE, 500); + GOTO_ERR; + } + data_sock->sk->tp_pinfo.af_tcp.nonagle = 2; + + tp = &data_sock->sk->tp_pinfo.af_tcp; + tp->ack.pingpong = tux_ack_pingpong; + + if (!tux_keepalive_timeout) + tp->linger2 = 0; + else + tp->linger2 = tux_keepalive_timeout * HZ; + + err = data_sock->ops->listen(data_sock, 1); + Dprintk("PASV listen() ret: %d\n", err); + if (err) { + req_err(req); + sock_release(data_sock); + ftp_send_async_message(req, CLOSE, 500); + GOTO_ERR; + } + link_tux_data_socket(req, data_sock); + + Dprintk("FTP PASV listen sock state: %d, sk state: %d\n", + data_sock->state, data_sock->sk->state); + + str_len = sprintf(buf, + "227 Entering Passive Mode (%d,%d,%d,%d,%d,%d)\r\n", + NIPQUAD(local_addr), + ntohs(data_sock->sk->sport) / 256, + ntohs(data_sock->sk->sport) & 255 ); + Dprintk("PASV mess: {%s}\n", buf); + + add_tux_atom(req, ftp_accept_pasv); + add_tux_atom(req, ftp_flush_req); + __ftp_send_async_message(req, buf, 227, str_len); + break; + } + + case FTP_COMM_PORT: + { + struct socket *data_sock; + struct sockaddr_in addr; + kernel_cap_t saved_cap; + u32 local_addr; + int err; + + /* + * Create FTP data connection to client: + */ + err = sock_create(AF_INET, SOCK_STREAM, IPPROTO_IP, &data_sock); + if (err < 0) { + Dprintk("sock create err: %d\n", err); + req_err(req); + ftp_send_async_message(req, CLOSE, 500); + GOTO_ERR; + } + + local_addr = req->sock->sk->rcv_saddr; + addr.sin_family = AF_INET; + addr.sin_port = htons(20); + addr.sin_addr.s_addr = local_addr; + + Dprintk("data socket address: (%d,%d,%d,%d).\n", + NIPQUAD(local_addr)); + + data_sock->sk->reuse = 1; + data_sock->sk->urginline = 1; + data_sock->sk->linger = 0; + + saved_cap = current->cap_effective; + cap_raise (current->cap_effective, CAP_NET_BIND_SERVICE); + err = data_sock->ops->bind(data_sock, + (struct sockaddr*)&addr, sizeof(addr)); + current->cap_effective = saved_cap; + + Dprintk("ACTIVE bind() ret: %d.\n", err); + if (err) { + sock_release(data_sock); + req_err(req); + ftp_send_async_message(req, CLOSE, 500); + GOTO_ERR; + } + data_sock->sk->tp_pinfo.af_tcp.nonagle = 2; + + link_tux_data_socket(req, data_sock); + + addr.sin_family = AF_INET; + addr.sin_port = htons(req->ftp_user_port); + addr.sin_addr.s_addr = htonl(req->ftp_user_addr); + + err = data_sock->ops->connect(data_sock, (struct sockaddr *) &addr, sizeof(addr), O_RDWR|O_NONBLOCK); + if (err && (err != -EINPROGRESS)) { + Dprintk("connect error: %d\n", err); + zap_data_socket(req); + req_err(req); + ftp_send_async_message(req, CLOSE, 500); + GOTO_ERR; + } + Dprintk("FTP data sock state: %d, sk state: %d\n", data_sock->state, data_sock->sk->state); + ftp_send_async_message(req, PORT_OK, 200); + break; + } + + case FTP_COMM_USER: + { + if (!strcmp(req->username, "ftp") + || !strcmp(req->username, "FTP") + || !strcmp(req->username, "anonymous") + || !strcmp(req->username, "ANONYMOUS")) { + unsigned int str_len; + char login_ok [200]; + + if (!tux_ftp_login_message) { + ftp_send_async_message(req, LOGIN_OK_PASS, 230); + break; + } + update_bandwidth(req, 0); /* get current bandwidth */ + if (nr_requests_used() == 1) + str_len = sprintf(login_ok, LOGIN_OK_ONE, + tux_max_connect, ftp_bandwidth); + else + str_len = sprintf(login_ok, LOGIN_OK, + nr_requests_used(), tux_max_connect, ftp_bandwidth); + __ftp_send_async_message(req, login_ok, 200, str_len); + } else { + clear_keepalive(req); + ftp_send_async_message(req, LOGIN_FORBIDDEN, 530); + } + break; + } + case FTP_COMM_PASS: + { + ftp_send_async_message(req, LOGIN_OK_PASS, 230); + break; + } + case FTP_COMM_SITE: + { + ftp_send_async_message(req, SITE, 214); + break; + } + case FTP_COMM_SYST: + { + ftp_send_async_message(req, LINUX_SYST, 200); + break; + } + case FTP_COMM_TYPE: + { + ftp_send_async_message(req, TYPE_OK, 200); + break; + } +#define EXTRA_FEATURES "211-Extensions supported:\r\n SIZE\r\n MDTM\r\n211 End\r\n" + + case FTP_COMM_FEAT: + { + ftp_send_async_message(req, EXTRA_FEATURES, 211); + break; + } + case FTP_COMM_HELP: + case FTP_COMM_CLNT: + case FTP_COMM_NOOP: + { + ftp_send_async_message(req, COMMAND_OK, 200); + break; + } + case FTP_COMM_REST: + { + ftp_send_async_message(req, REST_OK, 200); + break; + } + case FTP_COMM_QUIT: + { + clear_keepalive(req); + ftp_send_async_message(req, BYE, 200); + break; + } + + default: + { + req->keep_alive = 1; + ftp_send_async_message(req, CLOSE, 500); + break; + } + } + return; +error: + Dprintk("rejecting FTP session!\n"); + return; +} + + +static void ftp_timeout (tux_req_t *req, int cachemiss) +{ + Dprintk("called ftp_timeout(%p)\n", req); + if (req->error != TUX_ERROR_CONN_TIMEOUT) + TUX_BUG(); + ftp_send_async_message(req, CLOSE_TIMEOUT, 421); +} + +static void ftp_close (tux_req_t *req, int cachemiss) +{ + Dprintk("called ftp_close(%p)\n", req); + ftp_send_async_message(req, CLOSE, 500); +} + +static void ftp_pre_log (tux_req_t *req) +{ + if (tux_ftp_log_retr_only && (req->ftp_command != FTP_COMM_RETR)) + req->status = 0; + else + req->status = req->ftp_command; +} + +tux_proto_t tux_proto_ftp = { + defer_accept: 0, + can_redirect: 0, + got_request: ftp_got_request, + parse_message: parse_ftp_message, + illegal_request: ftp_close, + request_timeout: ftp_timeout, + pre_log: ftp_pre_log, + check_req_err: ftp_check_req_err, + print_dir_line: ftp_print_dir_line, + name: "ftp", +}; + --- linux/net/tux/proto_http.c.orig +++ linux/net/tux/proto_http.c @@ -0,0 +1,2193 @@ +/* + * TUX - Integrated Application Protocols Layer and Object Cache + * + * Copyright (C) 2000, 2001, Ingo Molnar + * + * proto_http.c: HTTP application protocol support + * + * Right now we detect simple GET headers, anything more + * subtle gets redirected to secondary server port. + */ + +#include +#include "parser.h" + +/**************************************************************** + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + ****************************************************************/ + +/* + * Parse the HTTP message and put results into the request structure. + * CISAPI extensions do not see the actual message buffer. + * + * Any perceived irregularity is honored with a redirect to the + * secondary server - which in most cases should be Apache. So + * if TUX gets confused by some strange request we fall back + * to Apache to be RFC-correct. + * + * The parser is 'optimistic', ie. it's optimized for the case where + * the whole message is available and correct. The parser is also + * supposed to be 'robust', ie. it can be called multiple times with + * an incomplete message, as new packets arrive. + */ + +static inline int TOHEX (char c) +{ + switch (c) { + case '0' ... '9': c -= '0'; break; + case 'a' ... 'f': c -= 'a'-10; break; + case 'A' ... 'F': c -= 'A'-10; break; + default: + c = -1; + } + return c; +} + +/* + * This function determines whether the client supports + * gzip-type content-encoding. + */ +static int may_gzip (const char *str, int len) +{ + const char *tmp, *curr; + int i; + + if (len <= 4) + return 0; + tmp = str; + for (i = 0; i <= len-6; i++) { + Dprintk("gzip-checking: {%s}\n", tmp); + if (memcmp(tmp, " gzip", 5)) { + tmp++; + continue; + } + curr = tmp + 5; + + if (*curr == ',' || *curr == '\r') + return 1; + if (memcmp(curr, ";q=", 3)) + return 0; + curr += 3; + /* + * Every qvalue except explicitly zero is accepted. + * Zero values are "q=0.0", "q=0.00", "q=0.000". + * Parsing is optimized. + */ + if (*curr == '0') { + curr += 2; + if (*curr == '0') { + curr++; + if (*curr == ' ' || *curr == '\r') + return 0; + if (*curr == '0') { + curr++; + if (*curr == ' ' || *curr == '\r') + return 0; + if (*curr == '0') { + curr++; + if (*curr == ' ' || + *curr == '\r') + return 0; + } + } + } + } + return 1; + } + return 0; +} + +/* + * This function strips off 'strip_host_tail' number of hostname + * components from the tail of the hostname. + * + * Eg. with a value of '1', the "somesite.hosting.com" hostname gets + * transformed into the "somesite" string. + */ +static void strip_hostname(tux_req_t *req) +{ + int strip = strip_host_tail; + int left = req->host_len; + int component = 0; + + if (!strip || !left) + return; + + while (--left) { + if (req->host[left] != '.') + continue; + if (++component == strip) + break; + } + if (!left) + return; + req->host[left] = 0; + req->host_len = left; +} + +static void http_lookup_vhost (tux_req_t *req, int cachemiss); +static void http_process_message (tux_req_t *req, int cachemiss); + +int parse_http_message (tux_req_t *req, const int total_len) +{ + int hexhex = 0, hex_val_0 = 0, hex_val_1 = 0; + const char *curr, *uri, *message; + unsigned int objectname_len, left; + unsigned int have_r = 0; + char c; + + left = total_len; + message = req->headers; + Dprintk("parsing request:\n---\n%s\n---\n", message); +/* + * RFC 2616, 5.1: + * + * Request-Line = Method SP Request-URI SP HTTP-Version CRLF + */ + + if (!total_len) + TUX_BUG(); + + curr = message; + +#define GOTO_INCOMPLETE do { Dprintk("incomplete at %s:%d.\n", __FILE__, __LINE__); goto incomplete_message; } while (0) +#define GOTO_REDIR do { TDprintk("redirect secondary at %s:%d.\n", __FILE__, __LINE__); goto error; } while (0) + +#define PRINT_MESSAGE_LEFT \ + Dprintk("message left (%d) at %s:%d:\n--->{%s}<---\n", left, __FILE__, __LINE__, curr) + + switch (*curr) { + case 'G': + if (PARSE_METHOD(req,curr,GET,left)) + break; + GOTO_REDIR; + + case 'H': + if (PARSE_METHOD(req,curr,HEAD,left)) + break; + GOTO_REDIR; + + case 'P': + if (PARSE_METHOD(req,curr,POST,left)) + break; + if (PARSE_METHOD(req,curr,PUT,left)) + break; + GOTO_REDIR; + + default: + GOTO_REDIR; + } + + req->method_str = message; + req->method_len = curr-message-1; + + Dprintk("got method %d\n", req->method); + + PRINT_MESSAGE_LEFT; + + /* + * Ok, we got one of the methods we can handle, parse + * the URI: + */ + + { + // Do not allow leading "../" and intermediate "/../" + int dotdot = 1; + char *tmp = req->objectname; + int slashcheck = 1; + + req->uri_str = uri = curr; + + for (;;) { + c = get_c(curr,left); + if (slashcheck) { + if (c == '/') + continue; + slashcheck = 0; + } + + PRINT_MESSAGE_LEFT; + if (c == ' ' || ((c == '?') && (tux_ignore_query != 1)) || c == '\r' || c == '\n') + break; + if (c == '#') + GOTO_REDIR; + + Dprintk("hexhex: %d.\n", hexhex); + /* + * First handle HEX HEX encoding + */ + switch (hexhex) { + case 0: + if (c == '%') { + hexhex = 1; + goto continue_parsing; + } + break; + case 1: + hex_val_0 = TOHEX(c); + if (hex_val_0 < 0) + GOTO_REDIR; + hexhex = 2; + goto continue_parsing; + case 2: + hex_val_1 = TOHEX(c); + if (hex_val_1 < 0) + GOTO_REDIR; + c = (hex_val_0 << 4) | hex_val_1; + if (!c) + GOTO_REDIR; + hexhex = 0; + break; + default: + TUX_BUG(); + } + if (hexhex) + TUX_BUG(); + + switch (dotdot) { + case 0: + break; + case 1: + if (c == '.') + dotdot = 2; + else + dotdot = 0; + break; + case 2: + if (c == '.') + dotdot = 3; + else + dotdot = 0; + break; + case 3: + if (c == '/') + GOTO_REDIR; + else + dotdot = 0; + break; + default: + TUX_BUG(); + } + if (!dotdot && (c == '/')) + dotdot = 1; + + *(tmp++) = c; +continue_parsing: + if (curr - uri >= MAX_OBJECTNAME_LEN) + GOTO_REDIR; + } + PRINT_MESSAGE_LEFT; + *tmp = 0; + + // handle trailing "/.." + if (dotdot == 3) + GOTO_REDIR; + + objectname_len = tmp - req->objectname; + req->objectname_len = objectname_len; + } + Dprintk("got filename %s (%d)\n", req->objectname, req->objectname_len); + + PRINT_MESSAGE_LEFT; + + /* + * Parse optional query string. Copy until end-of-string or space. + */ + if (c == '?') { + int query_len; + const char *query; + + req->query_str = query = curr; + + for (;;) { + c = get_c(curr,left); + if (c == ' ') + break; + if (c == '#') + GOTO_REDIR; + } + if (unlikely(tux_ignore_query == 2)) + req->query_str = NULL; + else { + query_len = curr-query-1; + req->query_len = query_len; + } + } + if (req->query_len) + Dprintk("got query string %s (%d)\n", req->query_str, req->query_len); + req->uri_len = curr-uri-1; + if (!req->uri_len) + GOTO_REDIR; + Dprintk("got URI %s (%d)\n", req->uri_str, req->uri_len); + + PRINT_MESSAGE_LEFT; + /* + * Parse the HTTP version field: + */ + req->version_str = curr; + if (!PARSE_TOKEN(curr,"HTTP/1.",left)) + GOTO_REDIR; + + switch (get_c(curr,left)) { + case '0': + req->version = HTTP_1_0; + break; + case '1': + req->version = HTTP_1_1; + break; + default: + GOTO_REDIR; + } + /* + * We default to keepalive in the HTTP/1.1 case and default + * to non-keepalive in the HTTP/1.0 case. If max_keepalives + * is 0 then we do no keepalives. + */ + clear_keepalive(req); + if (tux_max_keepalives && (req->version == HTTP_1_1)) + req->keep_alive = 1; + req->version_len = curr - req->version_str; + + if (get_c(curr,left) != '\r') + GOTO_REDIR; + if (get_c(curr,left) != '\n') + GOTO_REDIR; + + Dprintk("got version %d [%d]\n", req->version, req->version_len); + PRINT_MESSAGE_LEFT; + + /* + * Now parse (optional) request header fields: + */ + for (;;) { + char c; + + c = get_c(curr,left); + switch (c) { + case '\r': + if (have_r) + GOTO_REDIR; + have_r = 1; + continue; + case '\n': + if (!have_r) + GOTO_REDIR; + goto out; + default: + if (have_r) + GOTO_REDIR; + } + +#define PARSE_STR_FIELD(char,field,str,len) \ + if (PARSE_TOKEN(curr,field,left)) { \ + req->str = curr; \ + SKIP_LINE(curr,left); \ + req->len = curr - req->str - 2; \ + Dprintk(char field "field: %s.\n", req->str); \ + break; \ + } + +#define ALLOW_UNKNOWN_FIELDS 1 +#ifdef ALLOW_UNKNOWN_FIELDS +# define UNKNOWN_FIELD { SKIP_LINE(curr,left); break; } +#else +# define UNKNOWN_FIELD GOTO_REDIR +#endif + + switch (c) { + case 'A': + PARSE_STR_FIELD("A","ccept: ", + accept_str,accept_len); + if (PARSE_TOKEN(curr,"ccept-Encoding: ",left)) { + const char *str = curr-1; + + req->accept_encoding_str = curr; + SKIP_LINE(curr,left); + req->accept_encoding_len = curr - req->accept_encoding_str - 2; + Dprintk("Accept-Encoding field: {%s}.\n", str); + + if (tux_compression && may_gzip(str,curr-str)) { + Dprintk("client accepts gzip!.\n"); + req->may_send_gzip = 1; + } + break; + } + PARSE_STR_FIELD("A","ccept-Charset: ", + accept_charset_str,accept_charset_len); + PARSE_STR_FIELD("A","ccept-Language: ", + accept_language_str,accept_language_len); + UNKNOWN_FIELD; + + case 'C': + if (PARSE_TOKEN(curr,"onnection: ",left)) { +next_token: + switch (get_c(curr,left)) { + case 'K': + if (!PARSE_TOKEN(curr,"eep-Alive",left)) + GOTO_REDIR; + if (tux_max_keepalives) + req->keep_alive = 1; + break; + + case 'C': + case 'c': + if (!PARSE_TOKEN(curr,"lose",left)) + GOTO_REDIR; + clear_keepalive(req); + break; + + case 'k': + if (!PARSE_TOKEN(curr,"eep-alive",left)) + GOTO_REDIR; + if (tux_max_keepalives) + req->keep_alive = 1; + break; + case 'T': + if (PARSE_TOKEN(curr,"E",left)) + break; + if (PARSE_TOKEN(curr,"railers",left)) + break; + if (PARSE_TOKEN(curr,"ransfer-Encoding",left)) + break; + GOTO_REDIR; + case 'P': + if (PARSE_TOKEN(curr,"roxy-Authenticate",left)) + break; + if (PARSE_TOKEN(curr,"roxy-Authorization",left)) + break; + GOTO_REDIR; + case 'U': + if (!PARSE_TOKEN(curr,"pgrade",left)) + GOTO_REDIR; + break; + case ' ': + PRINT_MESSAGE_LEFT; + goto next_token; + case ',': + PRINT_MESSAGE_LEFT; + goto next_token; + default: + GOTO_REDIR; + } + PRINT_MESSAGE_LEFT; + if (*curr != '\r') + goto next_token; + // allow other tokens. + SKIP_LINE(curr,left); + break; + } + + PARSE_STR_FIELD("C","ookie: ", + cookies_str,cookies_len); + PARSE_STR_FIELD("C","ontent-Type: ", + content_type_str,content_type_len); + + if (PARSE_TOKEN(curr,"ontent-Length: ",left) || + PARSE_TOKEN(curr,"ontent-length: ",left)) { + const char *tmp; + req->contentlen_str = curr; + SKIP_LINE(curr,left); + req->contentlen_len = curr - req->contentlen_str - 2; + if (req->contentlen_len) { + tmp = req->contentlen_str; + req->content_len = simple_strtoul(tmp, NULL, 10); + } + Dprintk("Content-Length field: %s [%d].\n", req->contentlen_str, req->contentlen_len); + Dprintk("Content-Length value: %d.\n", req->content_len); + break; + } + PARSE_STR_FIELD("C","ache-Control: ", + cache_control_str,cache_control_len); + UNKNOWN_FIELD; + + case 'H': + if (PARSE_TOKEN(curr,"ost: ",left)) { + const char *tmp = curr; + char *tmp2 = req->host; + + /* + * canonize the hostname: + * + * 1) strip off preceding 'www.' variants, + * 2) transform it to lowercase. + * 3) strip trailing dots + * 4) potentially strip off tail + */ + +#define is_w(n) ((curr[n] == 'w') || (curr[n] == 'W')) + + if ((left > 4) && is_w(0) && is_w(1) && + is_w(2) && curr[3] == '.') { + curr += 4; + left -= 4; + tmp = curr; + } + + COPY_LINE_TOLOWER(curr, tmp2, left, req->host+MAX_HOST_LEN-2); + req->host_len = curr - tmp - 2; + while (req->host[req->host_len] == '.') { + if (!req->host_len) + break; + req->host_len--; + } + req->host[req->host_len] = 0; + if (strip_host_tail) + strip_hostname(req); + Dprintk("Host field: %s [%d].\n", req->host, req->host_len); + break; + } + UNKNOWN_FIELD; + + case 'I': + PARSE_STR_FIELD("I","f-None-Match: ", + if_none_match_str,if_none_match_len); + PARSE_STR_FIELD("I","f-Modified-Since: ", + if_modified_since_str,if_modified_since_len); + PARSE_STR_FIELD("I","f-Range: ", + if_range_str,if_range_len); + UNKNOWN_FIELD; + + case 'N': + PARSE_STR_FIELD("N","egotiate: ", + negotiate_str,negotiate_len); + UNKNOWN_FIELD; + + case 'P': + PARSE_STR_FIELD("P","ragma: ", + pragma_str,pragma_len); + UNKNOWN_FIELD; + + case 'R': + + PARSE_STR_FIELD("R","eferer: ", + referer_str,referer_len); + if (!PARSE_TOKEN(curr,"ange: bytes=",left)) + UNKNOWN_FIELD; + { + const char *tmp = curr; + char *tmp2 = (char *)curr; + unsigned int offset_start = 0, offset_end = 0; + + if (*tmp2 != '-') + offset_start = simple_strtoul(tmp2, &tmp2, 10); + if (*tmp2 == '-') { + tmp2++; + if (*tmp2 != '\r') + offset_end = simple_strtoul(tmp2, &tmp2, 10) +1; + } + curr = tmp2; + left -= tmp2-tmp; + + req->offset_start = offset_start; + req->offset_end = offset_end; + + SKIP_LINE(curr,left); + Dprintk("Range field: %s [%d] (%d-%d).\n", tmp, curr-tmp, offset_start, offset_end); + break; + } + + case 'U': + PARSE_STR_FIELD("U","ser-Agent: ", + user_agent_str,user_agent_len); + UNKNOWN_FIELD; + + default: + UNKNOWN_FIELD; + } + PRINT_MESSAGE_LEFT; + } +out: + /* + * POST data. + */ + if ((req->method == METHOD_POST) && req->content_len) { + PRINT_MESSAGE_LEFT; + if (curr + req->content_len > message + total_len) + GOTO_INCOMPLETE; + req->post_data_str = curr; + req->post_data_len = req->content_len; + curr += req->content_len; + left -= req->content_len; + Dprintk("POST-ed data: {%s}\n", req->post_data_str); + } + + switch (req->method) { + default: + GOTO_REDIR; + case METHOD_GET: + case METHOD_HEAD: + case METHOD_POST: + case METHOD_PUT: + ; + } + +#define TUX_SCHEME "http://" +#define TUX_SCHEME_LEN (sizeof(TUX_SCHEME)-1) + + if (!memcmp(req->objectname, TUX_SCHEME, TUX_SCHEME_LEN)) { + + /* http://user:password@host:port/object */ + + const char *head, *tail, *end, *host, *port; + int host_len, objectname_len; + + head = req->objectname + TUX_SCHEME_LEN; + end = req->objectname + req->objectname_len; + + tail = memchr(head, '/', end - head); + if (!tail) + GOTO_REDIR; + host = memchr(head, '@', tail - head); + if (!host) + host = head; + else + host++; + if (!*host) + GOTO_REDIR; + port = memchr(host, ':', tail - host); + if (port) + host_len = port - host; + else + host_len = tail - host; + if (host_len >= MAX_HOST_LEN) + GOTO_REDIR; + memcpy(req->host, host, host_len); + req->host_len = host_len; + req->host[host_len] = 0; + + if (*tail != '/') + TUX_BUG(); + + req->uri_str = tail; + req->uri_len = end - tail; + + tail++; + while (*tail == '/') + tail++; + + objectname_len = end - tail; + memcpy(req->objectname, tail, objectname_len); + req->objectname_len = objectname_len; + req->objectname[objectname_len] = 0; + } else + if (req->uri_str[0] != '/') + GOTO_REDIR; + + if ((req->version == HTTP_1_1) && !req->host_len) + GOTO_REDIR; + if (req->objectname[0] == '/') + GOTO_REDIR; + /* + * Lets make sure nobody plays games with the host + * header in a virtual hosting environment: + */ + if (req->virtual && req->host_len) { + if (memchr(req->host, '/', req->host_len)) + GOTO_REDIR; + if (req->host[0] == '.') { + if (req->host_len == 1) + GOTO_REDIR; + if ((req->host_len == 2) && (req->host[0] == '.')) + GOTO_REDIR; + } + } + /* + * From this point on the request is for the main TUX engine: + */ + Dprintk("ok, request accepted.\n"); + + if (req->keep_alive) { + req->nr_keepalives++; + if (req->nr_keepalives == -1) + req->nr_keepalives--; + INC_STAT(nr_keepalive_reqs); + } else + INC_STAT(nr_nonkeepalive_reqs); + INC_STAT(keepalive_hist[req->nr_keepalives]); + + PRINT_MESSAGE_LEFT; + req->parsed_len = curr-message; + if (req->dentry) + TUX_BUG(); + req->virtual = tux_virtual_server; + if (req->virtual) + add_tux_atom(req, http_lookup_vhost); + else { + req->docroot_dentry = dget(req->proto->main_docroot.dentry); + req->docroot_mnt = mntget(req->proto->main_docroot.mnt); + add_tux_atom(req, http_process_message); + } + + return req->parsed_len; + +incomplete_message: + Dprintk("incomplete message!\n"); + PRINT_MESSAGE_LEFT; + + return 0; + +error: + if (total_len > 0) + req->parsed_len = total_len; + else + req->parsed_len = 0; + PRINT_MESSAGE_LEFT; + if (tux_TDprintk) { + TDprintk("redirecting message to secondary server.\n"); + print_req(req); + } + return -1; +} + +static int lookup_url (tux_req_t *req, const unsigned int flag) +{ + /* + * -1 : no previous checks made + * 0 : previous check failed, do not check farther, + * 1 : previous check successed, check farther + */ + int not_modified = -1; + int perm = 0, i; + struct dentry *dentry = NULL; + struct vfsmount *mnt = NULL; + struct inode *inode; + const char *filename; + + /* + * Do not do any etag or last_modified header checking + * if both unset. + */ + if (!tux_generate_etags && !tux_generate_last_mod) + not_modified = 0; + +repeat_lookup: + if (req->dentry) + TUX_BUG(); + + filename = req->objectname; + Dprintk("will look up {%s} (%d)\n", filename, req->objectname_len); + Dprintk("current->fsuid: %d, current->fsgid: %d, ngroups: %d\n", + current->fsuid, current->fsgid, current->ngroups); + for (i = 0; i < current->ngroups; i++) + Dprintk(".. group #%d: %d.\n", i, current->groups[i]); + + dentry = tux_lookup(req, filename, flag, &mnt); + +#define INDEX "/index.html" + + if (!dentry || IS_ERR(dentry)) { + if (PTR_ERR(dentry) == -EWOULDBLOCKIO) + goto cachemiss; + + if (tux_http_dir_indexing && (req->lookup_dir == 1)) { + // undo the index.html appending: + req->objectname_len -= sizeof(INDEX)-1; + req->objectname[req->objectname_len] = 0; + req->lookup_dir = 2; + goto repeat_lookup; + } + if (!req->lookup_404) { + int len = strlen(tux_404_page); + memcpy(req->objectname, tux_404_page, len); + req->objectname[len] = 0; + req->objectname_len = len; + req->lookup_404 = 1; + req->status = 404; + goto repeat_lookup; + } + TDprintk("abort - lookup error.\n"); + goto abort; + } + + Dprintk("SUCCESS, looked up {%s} == dentry %p (inode %p, count %d.)\n", filename, dentry, dentry->d_inode, atomic_read(&dentry->d_count)); + inode = dentry->d_inode; + + /* + * At this point we have a real, non-negative dentry. + */ + perm = tux_permission(inode); + + if ((perm < 0) || (!S_ISDIR(dentry->d_inode->i_mode) + && !S_ISREG(dentry->d_inode->i_mode))) { + Dprintk("FAILED trusted dentry %p permission %d.\n", dentry, perm); + req->status = 403; + goto abort; + } + if ((req->lookup_dir != 2) && S_ISDIR(dentry->d_inode->i_mode)) { + if (req->lookup_dir || (req->objectname_len + + sizeof(INDEX) >= MAX_OBJECTNAME_LEN)) { + req->status = 403; + goto abort; + } + if (req->objectname_len && (req->objectname[req->objectname_len-1] != '/')) { + dput(dentry); + mntput(mnt); + req->lookup_dir = 0; + return 2; + } + memcpy(req->objectname + req->objectname_len, + INDEX, sizeof(INDEX)); + req->objectname_len += sizeof(INDEX)-1; + req->lookup_dir = 1; + dput(dentry); + mntput(mnt); + mnt = NULL; + dentry = NULL; + goto repeat_lookup; + } + if (tux_max_object_size && (inode->i_size > tux_max_object_size)) { + TDprintk("too big object, %Ld bytes.\n", inode->i_size); + req->status = 403; + goto abort; + } + req->total_file_len = inode->i_size; + req->mtime = inode->i_mtime; + + { + loff_t num = req->total_file_len; + int nr_digits = 0; + unsigned long modulo; + char * etag_p = req->etag; + char digits [30]; + + do { + modulo = do_div(num, 10); + digits[nr_digits++] = '0' + modulo; + } while (num); + + req->lendigits = nr_digits; + req->etaglen = nr_digits; + + while (nr_digits) + *etag_p++ = digits[--nr_digits]; + + *etag_p++ = '-'; + num = req->mtime; + nr_digits = 0; + + do { + digits[nr_digits++] = 'a' + num % 16; + num /= 16; + } while (num); + req->etaglen += nr_digits+1; + while (nr_digits) + *etag_p++ = digits[--nr_digits]; + *etag_p = 0; + } + + if ((req->if_none_match_len >= req->etaglen) && (abs(not_modified) == 1)) { + + char * etag_p = req->etag; + const char * match_p = req->if_none_match_str; + int pos = req->etaglen - 1; + int matchpos = req->etaglen - 1; + + do { + while (etag_p[matchpos--] == match_p[pos--]) + if (matchpos < 0) + break; + if (matchpos < 0) + pos = req->if_none_match_len; + else { + if (match_p[pos+1] == ',') + pos += req->etaglen + 2; + else + pos += req->etaglen-matchpos; + matchpos = req->etaglen - 1; + } + } while (pos < req->if_none_match_len); + + if (matchpos < 0) { + not_modified = 1; + TDprintk("Etag matched.\n"); + } else + not_modified = 0; + } + + if ((req->if_modified_since_len >= 24) && (abs(not_modified) == 1)) { + if (parse_time(req->if_modified_since_str, req->if_modified_since_len) >= req->mtime ) { + not_modified = 1; + Dprintk("Last-Modified matched.\n"); + } else + not_modified = 0; + } + + if (not_modified == 1) { + req->status = 304; + goto abort; + } + + Dprintk("looked up cached dentry %p, (count %d.)\n", dentry, dentry ? atomic_read(&dentry->d_count) : -1 ); + + url_hist_hit(req->total_file_len); +out: + install_req_dentry(req, dentry, mnt); + req->lookup_dir = 0; + return 0; + +cachemiss: + return 1; + +abort: + if (dentry) { + if (!IS_ERR(dentry)) + dput(dentry); + dentry = NULL; + } + if (mnt) { + if (!IS_ERR(mnt)) + mntput(mnt); + mnt = NULL; + } +#if CONFIG_TUX_DEBUG + if (!not_modified) { + TDprintk("req %p has lookup errors!\n", req); + if (tux_TDprintk) + print_req(req); + } +#endif + req_err(req); + goto out; +} + +int handle_gzip_req (tux_req_t *req, unsigned int flags) +{ + char *curr = req->objectname + req->objectname_len; + struct dentry *dentry; + struct vfsmount *mnt = NULL; + struct inode *inode, *orig_inode; + loff_t size, orig_size; + + *curr++ = '.'; + *curr++ = 'g'; + *curr++ = 'z'; + *curr++ = 0; + req->objectname_len += 3; + + dentry = tux_lookup(req, req->objectname, flags, &mnt); + + req->objectname_len -= 3; + req->objectname[req->objectname_len] = 0; + + if (!dentry) + return 0; + if (IS_ERR(dentry)) { + if (PTR_ERR(dentry) == -EWOULDBLOCKIO) { + release_req_dentry(req); + return 1; + } + return 0; + } + + inode = dentry->d_inode; + size = inode->i_size; + orig_inode = req->dentry->d_inode; + orig_size = orig_inode->i_size; + + if (!tux_permission(inode) + && (size < orig_size) + && (inode->i_mtime >= orig_inode->i_mtime)) { + + release_req_dentry(req); + install_req_dentry(req, dentry, mnt); + req->total_file_len = req->output_len = size; + Dprintk("content WILL be gzipped!\n"); + req->content_gzipped = 1; + } else { + dput(dentry); + mntput(mnt); + } + + return 0; +} + +static spinlock_t mimetypes_lock = SPIN_LOCK_UNLOCKED; + +static LIST_HEAD(mimetypes_head); + +static mimetype_t default_mimetype = { type: "text/plain", type_len: 10, expire_str: "", expire_str_len: 0 }; + +#define MAX_MIMETYPE_LEN 128 +#define MAX_CACHE_CONTROL_AGE_LEN 30 + +void add_mimetype (char *new_ext, char *new_type, char *new_expire) +{ + int type_len = strlen(new_type); + int ext_len = strlen(new_ext); + int expire_len = strlen(new_expire); + mimetype_t *mime; + char *ext, *type, *expire; + + if (type_len > MAX_MIMETYPE_LEN) + type_len = MAX_MIMETYPE_LEN; + if (ext_len > MAX_URI_LEN) + ext_len = MAX_URI_LEN; + if (expire_len > MAX_CACHE_CONTROL_AGE_LEN) + expire_len = MAX_CACHE_CONTROL_AGE_LEN; + + mime = tux_kmalloc(sizeof(*mime)); + memset(mime, 0, sizeof(*mime)); + ext = tux_kmalloc(ext_len + 1); + type = tux_kmalloc(type_len + 1); + expire = tux_kmalloc(expire_len + 1); + + strncpy(ext, new_ext, ext_len); + strncpy(type, new_type, type_len); + strncpy(expire, new_expire, expire_len); + + // in case one of the above parameters was too long : + + ext[ext_len] = '\0'; + type[type_len] = '\0'; + expire[expire_len] = '\0'; + + mime->ext = ext; + mime->ext_len = ext_len; + + mime->type = type; + mime->type_len = type_len; + + mime->expire_str = expire; + mime->expire_str_len = expire_len; + + mime->special = NORMAL_MIME_TYPE; + if (!strcmp(type, "TUX/redirect")) + mime->special = MIME_TYPE_REDIRECT; + if (!strcmp(type, "TUX/CGI")) + mime->special = MIME_TYPE_CGI; + if (!strcmp(type, "TUX/module")) + mime->special = MIME_TYPE_MODULE; + + spin_lock(&mimetypes_lock); + list_add(&mime->list, &mimetypes_head); + spin_unlock(&mimetypes_lock); +} + +static inline int ext_matches (char *file, int len, char *ext, int extlen) +{ + int i; + char *tmp = file + len-1; + char *tmp2 = ext + extlen-1; + + if (len < extlen) + return 0; + + for (i = 0; i < extlen; i++) { + if (*tmp != *tmp2) + return 0; + tmp--; + tmp2--; + } + return 1; +} + +/* + * Overhead is not a problem, we cache the MIME type + * in the dentry. + */ +static mimetype_t * lookup_mimetype (tux_req_t *req) +{ + char *objectname = req->objectname; + int len = req->objectname_len; + mimetype_t *mime = NULL; + struct list_head *head, *tmp, *tmp1, *tmp2, *tmp3; + + if (!memchr(objectname, '.', len)) + goto out; + + spin_lock(&mimetypes_lock); + head = &mimetypes_head; + tmp = head->next; + + while (tmp != head) { + mime = list_entry(tmp, mimetype_t, list); + if (ext_matches(objectname, len, mime->ext, mime->ext_len)) { + /* + * Percolate often-used mimetypes up: + */ + if (tmp->prev != &mimetypes_head) { + tmp1 = tmp; + tmp2 = tmp->prev; + tmp3 = tmp->prev->prev; + list_del(tmp1); + list_del(tmp2); + list_add(tmp, tmp3); + list_add(tmp2, tmp); + } + break; + } else + mime = NULL; + tmp = tmp->next; + } + spin_unlock(&mimetypes_lock); + +out: + if (!mime) + mime = &default_mimetype; + return mime; +} + +void free_mimetypes (void) +{ + struct list_head *head, *tmp, *next; + mimetype_t *mime; + + spin_lock(&mimetypes_lock); + head = &mimetypes_head; + tmp = head->next; + + while (tmp != head) { + next = tmp->next; + mime = list_entry(tmp, mimetype_t, list); + list_del(tmp); + + kfree(mime->ext); + mime->ext = NULL; + kfree(mime->type); + mime->type = NULL; + kfree(mime); + + tmp = next; + } + spin_unlock(&mimetypes_lock); +} + +/* + * Various constant HTTP responses: + */ + +static const char forbidden[] = + "HTTP/1.1 403 Forbidden\r\n" + "Connection: Keep-Alive\r\n" \ + "Content-Length: 24\r\n\r\n" + " Forbidden "; + +static const char not_found[] = + "HTTP/1.1 404 Not Found\r\n" + "Connection: Keep-Alive\r\n" \ + "Content-Length: 29\r\n\r\n" + " Page Not Found "; + +#define NOTMODIFIED_1 \ + "HTTP/1.1 304 Not Modified\r\n" \ + "Connection: Keep-Alive\r\n" \ + "Date: " + +#define NOTMODIFIED_1_LEN (sizeof(NOTMODIFIED_1) - 1) + +#define NOTMODIFIED_2 \ + "\r\nETag: \"" + +#define NOTMODIFIED_2_LEN (sizeof(NOTMODIFIED_2) - 1) + +#define NOTMODIFIED_3 \ + "\"\r\n\r\n" + +#define NOTMODIFIED_3_LEN (sizeof(NOTMODIFIED_3) - 1) + +#define REDIRECT_1 \ + "HTTP/1.1 301 Moved Permanently\r\n" \ + "Location: http://" + +#define REDIRECT_1_LEN (sizeof(REDIRECT_1) - 1) + +#define REDIRECT_2 \ + "/\r\nContent-Length: 36\r\n" \ + "Connection: Keep-Alive\r\n" \ + "Content-Type: text/html\r\n\r\n" \ + " 301 Moved Permanently " + +#define REDIRECT_2_LEN (sizeof(REDIRECT_2) - 1) + +void send_async_err_forbidden (tux_req_t *req) +{ + send_async_message(req, forbidden, 403, 1); +} + +void send_async_err_not_found (tux_req_t *req) +{ + send_async_message(req, not_found, 404, 1); +} + +static void send_ret_notmodified (tux_req_t *req) +{ + char *buf; + int size; + + size = NOTMODIFIED_1_LEN + DATE_LEN - 1 + NOTMODIFIED_2_LEN + req->etaglen + NOTMODIFIED_3_LEN; + buf = get_abuf(req, size); + memcpy(buf, NOTMODIFIED_1, NOTMODIFIED_1_LEN); + buf += NOTMODIFIED_1_LEN; + memcpy(buf, tux_date, DATE_LEN-1); + buf += DATE_LEN-1; + memcpy(buf, NOTMODIFIED_2, NOTMODIFIED_2_LEN); + buf += NOTMODIFIED_2_LEN; + memcpy(buf, &req->etag, req->etaglen); + buf += req->etaglen; + memcpy(buf, NOTMODIFIED_3, NOTMODIFIED_3_LEN); + buf += NOTMODIFIED_3_LEN; + + req->status = 304; + send_abuf(req, size, MSG_DONTWAIT); + add_req_to_workqueue(req); +} + +static void send_ret_redirect (tux_req_t *req, int cachemiss) +{ + char *buf; + unsigned int size; + unsigned int uts_len = 0; + + size = REDIRECT_1_LEN; + if (req->host_len) + size += req->host_len; + else { + down_read(&uts_sem); + uts_len = strlen(system_utsname.nodename); + size += uts_len; + } + if (req->objectname[0] != '/') + size++; + size += req->objectname_len; + size += REDIRECT_2_LEN; + + if (size > PAGE_SIZE) { + req->error = TUX_ERROR_CONN_CLOSE; + zap_request(req, cachemiss); + return; + } + + buf = get_abuf(req, size); + + memcpy(buf, REDIRECT_1, REDIRECT_1_LEN); + buf += REDIRECT_1_LEN; + + Dprintk("req %p, host: %s, host_len: %d.\n", req, req->host, req->host_len); + if (req->host_len) { + memcpy(buf, req->host, req->host_len); + buf += req->host_len; + } else { + memcpy(buf, system_utsname.nodename, uts_len); + up_read(&uts_sem); + buf += uts_len; + } + if (req->objectname[0] != '/') { + buf[0] = '/'; + buf++; + } + + memcpy(buf, req->objectname, req->objectname_len); + buf += req->objectname_len; + + memcpy(buf, REDIRECT_2, REDIRECT_2_LEN); + buf += REDIRECT_2_LEN; + + req->status = 301; + send_abuf(req, size, MSG_DONTWAIT); + add_req_to_workqueue(req); +} + +static void http_got_request (tux_req_t *req) +{ + req->host[0] = 0; + req->host_len = 0; + add_tux_atom(req, parse_request); + add_req_to_workqueue(req); +} + + +tux_attribute_t * lookup_tux_attribute (tux_req_t *req) +{ + tux_attribute_t *attr; + struct inode *inode; + mimetype_t *mime; + + attr = tux_kmalloc(sizeof(*attr)); + memset(attr, 0, sizeof(*attr)); + + mime = lookup_mimetype(req); + + inode = req->dentry->d_inode; + if (!inode->i_uid && !inode->i_gid) { + if (mime->special == MIME_TYPE_MODULE) { + attr->tcapi = lookup_tuxmodule(req->objectname); + if (!attr->tcapi) { + req_err(req); + mime = &default_mimetype; + } + } + } else { + if (mime->special && (mime->special != MIME_TYPE_REDIRECT)) + mime = &default_mimetype; + } + attr->mime = mime; + + return attr; +} + +static void handle_range(tux_req_t *req) +{ + if (req->if_range_len) { + time_t range_time; + + range_time = parse_time(req->if_range_str, req->if_range_len); + + /* + * If the file is newer then we send the whole file. + */ + if (range_time < req->mtime ) + goto out_no_range; + } + /* if no offset_end was specified then default to 'end of file': */ + if (!req->offset_end) + req->offset_end = req->total_file_len; + /* + * Sanity checks: + * + * - is the range between 0...file_len-1 ? + * - is offset_end after offset_start? + * + * (note that offset_end is higher by 1) + */ + if ((req->offset_end > req->total_file_len) || + (req->offset_start >= req->total_file_len) || + (req->offset_end <= req->offset_start)) + goto out_no_range; + /* + * If the range is 0...file_len-1 then send the whole file: + */ + if (!req->offset_start && (req->offset_end == req->total_file_len)) + goto out_no_range; + + /* ok, the range is valid, use it: */ + + req->output_len = req->offset_end - req->offset_start; + req->in_file.f_pos = req->offset_start; + return; + +out_no_range: + req->offset_start = 0; + req->offset_end = 0; +} + +static void http_pre_header (tux_req_t *req, int push); +static void http_post_header (tux_req_t *req, int cachemiss); +static void http_send_body (tux_req_t *req, int cachemiss); + +#define DIRLIST_HEAD_1 "\ +\ +Index of %s\ +

Index of %s


\n%s" + +#define DIRLIST_HEAD_2 "\ + Parent Directory\n" + +#define DIRLIST_HEAD_SIZE (sizeof(DIRLIST_HEAD_1) + sizeof(DIRLIST_HEAD_2)) + +static void http_dirlist_head (tux_req_t *req, int cachemiss) +{ + char *buf1, *buf2, *path; + int len; + + buf1 = (char *)__get_free_page(GFP_KERNEL); + buf2 = (char *)__get_free_page(GFP_KERNEL); + if (!buf1 || !buf2) + goto out; + path = tux_print_path(req, req->dentry, req->mnt, buf1, PAGE_SIZE); + if (path[0] == '/' && path[1] == '/' && !path[3]) + path = "/"; + if (2*strlen(path) + DIRLIST_HEAD_SIZE >= PAGE_SIZE) + goto out; + len = sprintf(buf2, DIRLIST_HEAD_1, path, path, req->dentry == req->docroot_dentry ? "" : DIRLIST_HEAD_2); + __send_async_message(req, buf2, 200, len, 0); + +out: + if (buf1) + free_page((unsigned long)buf1); + if (buf2) + free_page((unsigned long)buf2); +} + +#define DIRLIST_TAIL "\ +

Powered by Linux 2.4, TUX 2.0
\n" + +static void http_dirlist_tail (tux_req_t *req, int cachemiss) +{ + __send_async_message(req, DIRLIST_TAIL, 200, sizeof(DIRLIST_TAIL)-1, 1); +} + +static void http_dirlist (tux_req_t *req, int cachemiss) +{ + int head = (req->method == METHOD_HEAD); + + req->lookup_dir = 3; + clear_keepalive(req); + if (!head) { + add_tux_atom(req, http_dirlist_tail); + add_tux_atom(req, list_directory); + add_tux_atom(req, http_dirlist_head); + } + http_pre_header(req, head); + add_req_to_workqueue(req); +} + +static char *host_path_hash(tux_req_t *req, char *tmp) +{ + if (req->host_len < 2) + return NULL; + + switch (mass_hosting_hash) { + default: + case 0: + return req->host; + case 1: + + // www.ABCDEFG.com => A/ABCDEFG.com + + tmp[0] = req->host[0]; + tmp[1] = '/'; + memcpy(tmp + 2, req->host, req->host_len); + tmp[req->host_len + 2] = 0; + + return tmp; + case 2: + // www.ABCDEFG.com => A/AB/ABCDEFG.com + + tmp[0] = req->host[0]; + tmp[1] = '/'; + tmp[2] = req->host[0]; + tmp[3] = req->host[1]; + tmp[4] = '/'; + memcpy(tmp + 5, req->host, req->host_len); + tmp[req->host_len + 5] = 0; + + return tmp; + case 3: + // www.ABCDEFG.com => A/AB/ABC/ABCDEFG.com + + tmp[0] = req->host[0]; + tmp[1] = '/'; + tmp[2] = req->host[0]; + tmp[3] = req->host[1]; + tmp[4] = '/'; + tmp[5] = req->host[0]; + tmp[6] = req->host[1]; + tmp[7] = req->host[2]; + tmp[8] = '/'; + memcpy(tmp + 9, req->host, req->host_len); + tmp[req->host_len + 9] = 0; + + return tmp; + } +} + +static struct dentry * vhost_lookup (tux_req_t *req, struct nameidata* base, struct vfsmount **mnt) +{ + struct dentry *dentry = NULL; + // 255.255.255.255 + char ip [3+1+3+1+3+1+3 + 2]; + + if (req->virtual >= TUX_VHOST_IP) { + sprintf(ip, "%d.%d.%d.%d", NIPQUAD(req->sock->sk->rcv_saddr)); + dentry = __tux_lookup (req, ip, base, mnt); + if (!dentry || IS_ERR(dentry)) { + if (PTR_ERR(dentry) == -EWOULDBLOCKIO) + return dentry; + base->dentry = dget(req->proto->main_docroot.dentry); + base->mnt = mntget(req->proto->main_docroot.mnt); + goto lookup_default; + } + if (req->virtual == TUX_VHOST_IP) + goto done; + + // fall through in mixed mode: + } + + if (!req->host_len) { +lookup_default: + *mnt = NULL; + dentry = __tux_lookup (req, tux_default_vhost, base, mnt); + } else { + char tmp [MAX_HOST_LEN*2]; + char *host_path; + + host_path = host_path_hash(req, tmp); + Dprintk("host path hash returned: {%s}\n", host_path); + + dentry = NULL; + if (host_path) { + *mnt = NULL; + dentry = __tux_lookup (req, host_path, base, mnt); + } + if (!dentry || IS_ERR(dentry)) { + if (PTR_ERR(dentry) == -EWOULDBLOCKIO) + return dentry; + base->dentry = dget(req->proto->main_docroot.dentry); + base->mnt = mntget(req->proto->main_docroot.mnt); + if (req->virtual >= TUX_VHOST_IP) { + *mnt = NULL; + dentry = __tux_lookup (req, ip, base, mnt); + if (!dentry || IS_ERR(dentry)) { + if (PTR_ERR(dentry) == -EWOULDBLOCKIO) + return dentry; + base->dentry = dget(req->proto->main_docroot.dentry); + base->mnt = mntget(req->proto->main_docroot.mnt); + } + } + goto lookup_default; + } + } +done: + return dentry; +} + +static void http_lookup_vhost (tux_req_t *req, int cachemiss) +{ + struct dentry *dentry; + struct nameidata base; + struct vfsmount *mnt = NULL; + unsigned int flag = cachemiss ? 0 : LOOKUP_ATOMIC; + + Dprintk("http_lookup_vhost(%p, %d, virtual: %d, host: %s (%d).)\n", req, flag, req->virtual, req->host, req->host_len); + + base.flags = LOOKUP_POSITIVE|LOOKUP_FOLLOW|flag; + base.last_type = LAST_ROOT; + base.dentry = dget(req->proto->main_docroot.dentry); + base.mnt = mntget(req->proto->main_docroot.mnt); + + dentry = vhost_lookup(req, &base, &mnt); + + Dprintk("looked up dentry %p.\n", dentry); + + if (dentry && !IS_ERR(dentry) && !dentry->d_inode) + TUX_BUG(); + + if (!dentry || IS_ERR(dentry)) { + if (PTR_ERR(dentry) == -EWOULDBLOCKIO) { + add_tux_atom(req, http_lookup_vhost); + queue_cachemiss(req); + return; + } + goto abort; + } + + req->docroot_dentry = dentry; + req->docroot_mnt = mnt; + + add_tux_atom(req, http_process_message); + add_req_to_workqueue(req); + return; +abort: + if (dentry) { + if (!IS_ERR(dentry)) + dput(dentry); + dentry = NULL; + } + if (mnt) { + if (!IS_ERR(mnt)) + mntput(mnt); + mnt = NULL; + } + req_err(req); + add_req_to_workqueue(req); +} + +static void http_process_message (tux_req_t *req, int cachemiss) +{ + tux_attribute_t *attr; + int missed; + unsigned int lookup_flag = cachemiss ? 0 : LOOKUP_ATOMIC; + + Dprintk("handling req %p, cachemiss: %d.\n", req, cachemiss); + + /* + * URL redirection support - redirect all valid requests + * to the first userspace module. + */ + if (tux_all_userspace) { + tcapi_template_t *tcapi = get_first_usermodule(); + if (tcapi) { + req->usermode = 1; + req->usermodule_idx = tcapi->userspace_id; + goto usermode; + } + } + missed = lookup_url(req, lookup_flag); + if (missed == 2) { + if (req->query_str) { + req->error = TUX_ERROR_REDIRECT; + goto error; + } + send_ret_redirect(req, cachemiss); + return; + } + if (req->error) + goto error; + if (missed) { +cachemiss: + if (cachemiss) + TUX_BUG(); + Dprintk("uncached request.\n"); + INC_STAT(static_lookup_cachemisses); + if (req->dentry) + TUX_BUG(); + add_tux_atom(req, http_process_message); + queue_cachemiss(req); + return; + } + /* + * HTML directory indexing. + */ + if (S_ISDIR(req->dentry->d_inode->i_mode)) + return http_dirlist(req, cachemiss); + if (!S_ISREG(req->dentry->d_inode->i_mode)) + TUX_BUG(); + + + attr = req->dentry->d_extra_attributes; + if (!attr) { + attr = lookup_tux_attribute(req); + if (!attr) + TUX_BUG(); + req->dentry->d_extra_attributes = attr; + } + if (attr->mime) + Dprintk("using MIME type %s:%s, %d.\n", attr->mime->type, attr->mime->ext, attr->mime->special); + if (attr->tcapi) { + req->usermode = 1; + req->usermodule_idx = attr->tcapi->userspace_id; + if (req->module_dentry) + TUX_BUG(); + req->module_dentry = dget(req->dentry); + release_req_dentry(req); + goto usermode; + } + + switch (attr->mime->special) { + case MIME_TYPE_MODULE: + req->usermode = 1; + goto usermode; + + case MIME_TYPE_REDIRECT: + req->error = TUX_ERROR_REDIRECT; + goto error; + + case MIME_TYPE_CGI: +#if CONFIG_TUX_EXTCGI + Dprintk("CGI request %p.\n", req); + query_extcgi(req); + return; +#endif + + default: + if (req->query_str) { + req->error = TUX_ERROR_REDIRECT; + goto error; + } + } + req->attr = attr; + switch (req->method) { + case METHOD_GET: + case METHOD_HEAD: + break; + default: + req->error = TUX_ERROR_REDIRECT; + goto error; + } + if (req->usermode) + TUX_BUG(); + + req->output_len = req->total_file_len; + /* + * Do range calculations. + */ + if (req->offset_end || req->offset_start) + handle_range(req); + + if (req->may_send_gzip && !req->offset_start && !req->offset_end) { + if (handle_gzip_req(req, lookup_flag)) + goto cachemiss; + if ((tux_compression >= 2) && !req->content_gzipped) { + tux_gzip_start(req); + req->content_gzipped = 2; + } + } + if (req->parsed_len) + trunc_headers(req); + + if (req->error) + goto error; + + add_tux_atom(req, http_send_body); + add_tux_atom(req, http_post_header); + + http_pre_header(req, req->method == METHOD_HEAD); + + add_req_to_workqueue(req); + return; + +error: + if (req->error) + zap_request(req, cachemiss); + return; + +usermode: + add_req_to_workqueue(req); +} + +static void http_post_header (tux_req_t *req, int cachemiss) +{ +#if CONFIG_TUX_DEBUG + req->bytes_expected = req->output_len; +#endif + req->bytes_sent = 0; // data comes now. + + add_req_to_workqueue(req); +} + +static void http_send_body (tux_req_t *req, int cachemiss) +{ + int ret; + + Dprintk("SEND req %p <%p> (sock %p, sk %p) (keepalive: %d, status: %d)\n", req, __builtin_return_address(0), req->sock, req->sock->sk, req->keep_alive, req->status); + + SET_TIMESTAMP(req->output_timestamp); + + if (req->error) { +#if CONFIG_TUX_DEBUG + req->bytes_expected = 0; +#endif + req->in_file.f_pos = 0; + /* + * We are in the middle of a file transfer, + * zap it immediately: + */ + TDprintk("req->error = TUX_ERROR_CONN_CLOSE.\n"); + req->error = TUX_ERROR_CONN_CLOSE; + zap_request(req, cachemiss); + return; + } + +repeat: + ret = 0; + if (!req->status) + req->status = 200; + if (req->method != METHOD_HEAD) + ret = generic_send_file(req, req->sock, cachemiss); + else { +#if CONFIG_TUX_DEBUG + req->bytes_expected = 0; +#endif + } + + switch (ret) { + case -5: + add_tux_atom(req, http_send_body); + output_timeout(req); + break; + case -4: + add_tux_atom(req, http_send_body); + if (add_output_space_event(req, req->sock)) { + del_tux_atom(req); + goto repeat; + } + break; + case -3: + INC_STAT(static_sendfile_cachemisses); + add_tux_atom(req, http_send_body); + queue_cachemiss(req); + break; + case -1: + break; + default: + req->in_file.f_pos = 0; + add_req_to_workqueue(req); + break; + } +} + +#define DEFAULT_DATE "Wed, 01 Jan 1970 00:00:01 GMT" + +char tux_date [DATE_LEN] = DEFAULT_DATE; + +/* + * HTTP header + */ + +#define HEADER_PART1A \ + "HTTP/1.1 200 OK\r\n" \ + "Content-Type: " + +#define HEADER_PART1B \ + "HTTP/1.1 200 OK" + +#define HEADER_PART1AP \ + "HTTP/1.1 206 Partial Content\r\n" \ + "Content-Type: " + +#define HEADER_PART1BP \ + "HTTP/1.1 206 Partial Content" + +#define HEADER_PART1C \ + "HTTP/1.1 404 Page Not Found\r\n" \ + "Content-Type: " + +#define HEADER_PART1D \ + "HTTP/1.1 200 OK\r\n" \ + "Content-Type: text/html\r\n" \ + "Connection: close\r\n" + +#define HEADER_PART2_keepalive "\r\nConnection: Keep-Alive\r\nDate: " + +#define HEADER_PART2_close "\r\nConnection: close\r\nDate: " + +#define HEADER_PART2_none "\r\nDate: " + +// date "%s" + +#define HEADER_PART3A "\r\nContent-Encoding: gzip" +#define HEADER_PART3BX "\r\nContent-Length: " + +/* + * Please acknowledge our hard work by not changing this define, or + * at least please acknowledge us by leaving "TUX/2.0 (Linux)" in + * the ID string. Thanks! :-) + */ +#define HEADER_PART3BY "\r\nServer: TUX/2.0 (Linux)\r\nContent-Length: " +#define HEADER_PART3C "\r\nETag: \"" +#define HEADER_PART3ACC "\r\nAccept-Ranges: bytes" +#define HEADER_PART3L "\r\nLast-Modified: " +#define HEADER_PART3P "\r\nContent-Range: bytes " +#define HEADER_PART3CA "\r\nCache-Control: max-age=" +#define HEADER_PART4 "\r\n\r\n" + +#define MAX_OUT_HEADER_LEN (sizeof(HEADER_PART1AP) + MAX_MIMETYPE_LEN + \ + sizeof(HEADER_PART2_keepalive) + DATE_LEN + \ + sizeof(HEADER_PART3A) + sizeof(HEADER_PART3BY) + \ + 12 + sizeof(HEADER_PART3C) + 21 + sizeof(HEADER_PART3L) + \ + sizeof(HEADER_PART3P) + 32 + \ + DATE_LEN + sizeof(HEADER_PART4) + sizeof(tux_extra_html_header) \ + + sizeof(HEADER_PART3CA) + MAX_CACHE_CONTROL_AGE_LEN) + +static void http_pre_header (tux_req_t *req, int head) +{ + int partial = req->offset_start | req->offset_end; + unsigned long flags; + char *buf, *curr; + mimetype_t *mime = NULL; + int size; + + + if (MAX_OUT_HEADER_LEN > PAGE_SIZE) + TUX_BUG(); + if ((req->attr && req->attr->tcapi) || req->usermode) + TUX_BUG(); + +#define COPY_STATIC_PART(nr,curr) \ + do { \ + memcpy(curr, HEADER_PART##nr, sizeof(HEADER_PART##nr)-1); \ + curr += sizeof(HEADER_PART##nr)-1; \ + } while (0) + + buf = curr = get_abuf(req, MAX_OUT_HEADER_LEN); + + if (req->lookup_dir) { + COPY_STATIC_PART(1D, curr); + goto dir_next; + } + mime = req->attr->mime; + if (!mime) + TUX_BUG(); + + if (req->status == 404) { + COPY_STATIC_PART(1C, curr); + memcpy(curr, mime->type, mime->type_len); + curr += mime->type_len; + } else { + if (tux_noid && (mime == &default_mimetype)) { + if (partial) + COPY_STATIC_PART(1BP, curr); + else + COPY_STATIC_PART(1B, curr); + } else { + if (partial) + COPY_STATIC_PART(1AP, curr); + else + COPY_STATIC_PART(1A, curr); + memcpy(curr, mime->type, mime->type_len); + curr += mime->type_len; + } + } + + if (tux_generate_cache_control && mime->expire_str_len) { + COPY_STATIC_PART(3CA, curr); + memcpy(curr, mime->expire_str, mime->expire_str_len); + curr += mime->expire_str_len; + } + + if (req->keep_alive /* && (req->version == HTTP_1_0) */) + COPY_STATIC_PART(2_keepalive, curr); + else if (!req->keep_alive && (req->version == HTTP_1_1)) + COPY_STATIC_PART(2_close, curr); + else + // HTTP/1.0 default means close + COPY_STATIC_PART(2_none, curr); + +dir_next: + memcpy(curr, tux_date, DATE_LEN-1); + curr += DATE_LEN-1; + + if (req->content_gzipped) + COPY_STATIC_PART(3A, curr); + + /* + * Content-Length: + */ + if (!req->lookup_dir) { + if (tux_noid) + COPY_STATIC_PART(3BX, curr); + else + COPY_STATIC_PART(3BY, curr); + + if (partial) + curr += sprintf(curr, "%Ld", req->output_len); + else { + // "%d" req->total_file_len + memcpy(curr, &req->etag, req->lendigits); + curr += req->lendigits; + } + if (tux_generate_etags && (req->status != 404)) + { + COPY_STATIC_PART(3C, curr); + memcpy(curr, &req->etag, req->etaglen); + curr += req->etaglen; + curr[0] = '"'; + curr++; + } + if (tux_generate_last_mod || tux_generate_etags) + COPY_STATIC_PART(3ACC, curr); + } + if (tux_generate_last_mod && (req->status != 404)) { + COPY_STATIC_PART(3L, curr); + last_mod_time(curr, req->mtime); + curr += DATE_LEN-1; + } + if (partial) { + COPY_STATIC_PART(3P, curr); + curr += sprintf(curr, "%Ld-%Ld/%Ld", req->offset_start, + req->offset_end-1, req->total_file_len); + } + COPY_STATIC_PART(4, curr); + /* + * Possibly add an extra HTML header: + */ + if (tux_extra_html_header_size && mime && !strcmp(mime->type, "text/html")) { + unsigned int len = tux_extra_html_header_size; + + memcpy(curr, tux_extra_html_header, len); + curr += len; + } + + size = curr-buf; + +#if CONFIG_TUX_DEBUG + *curr = 0; + Dprintk("{%s} [%d/%d]\n", buf, size, strlen(buf)); +#endif + + flags = MSG_DONTWAIT; + if (!head) + flags |= MSG_MORE; + send_abuf(req, size, flags); +} + +void http_illegal_request (tux_req_t *req, int cachemiss) +{ + if (req->status == 304) + send_ret_notmodified(req); + else { + if (req->status == 403) + send_async_err_forbidden(req); + else + send_async_err_not_found(req); + } +} + +static int http_check_req_err (tux_req_t *req, int cachemiss) +{ + if ((req->sock->sk->state <= TCP_SYN_RECV) && + !req->sock->sk->tp_pinfo.af_tcp.urg_data) + return 0; +#if CONFIG_TUX_DEBUG + req->bytes_expected = 0; +#endif + req->in_file.f_pos = 0; + req->error = TUX_ERROR_CONN_CLOSE; + zap_request(req, cachemiss); + + return 1; +} + +#define COPY_STR(str) \ + do { memcpy(tmp, str, sizeof(str)-1); \ + tmp += sizeof(str)-1; } while (0) + +static char * http_print_dir_line (tux_req_t *req, char *tmp, char *d_name, int d_len, int d_type, struct dentry *dentry, struct inode *inode) +{ + int len, spaces; + loff_t size; + + switch (d_type) { + case DT_DIR: + COPY_STR("\"[DIR]\""); + break; + case DT_REG: + if ((d_len >= 3) && + (d_name[d_len-3] == '.') && + (d_name[d_len-2] == 'g') && + (d_name[d_len-1] == 'z')) + COPY_STR("\"["); + else + if ((d_len >= 4) && + (d_name[d_len-4] == '.') && + (d_name[d_len-3] == 't') && + (d_name[d_len-2] == 'g') && + (d_name[d_len-1] == 'z')) + COPY_STR("\"["); + else + if ((d_len >= 4) && + (d_name[d_len-4] == '.') && + (d_name[d_len-3] == 't') && + (d_name[d_len-2] == 'x') && + (d_name[d_len-1] == 't')) + COPY_STR("\"["); + else + if ((d_len >= 4) && + (d_name[d_len-4] == '.') && + (d_name[d_len-3] == 'b') && + (d_name[d_len-2] == 'z') && + (d_name[d_len-1] == '2')) + COPY_STR("\"["); + else + if ((d_len >= 4) && + (d_name[d_len-4] == '.') && + (d_name[d_len-3] == 'z') && + (d_name[d_len-2] == 'i') && + (d_name[d_len-1] == 'p')) + COPY_STR("\"["); + else + COPY_STR("\"["); + break; + case DT_LNK: + COPY_STR("\"[LNK]\""); + break; + default: + if (tux_hide_unreadable) + goto out_dput; + COPY_STR("\"["); + break; + } + +#define LIST_1 " " +#define LIST_2_DIR "/\">" +#define LIST_3 " " + + COPY_STR(LIST_1); + memcpy(tmp, d_name, d_len); + tmp += d_len; + if (d_type == DT_DIR) + COPY_STR(LIST_2_DIR); + else + COPY_STR(LIST_2); + spaces = 0; + len = d_len; + + if (len > 25) + len = 25; + memcpy(tmp, d_name, len); + tmp += len; + if (len != d_len) { + *tmp++ = '.'; + *tmp++ = '.'; + } else { + if (d_type == DT_DIR) + *tmp++ = '/'; + else + spaces++; + spaces++; + } + COPY_STR(LIST_3); + while (spaces) { + *tmp++ = ' '; + spaces--; + } +#define FILL 25 + if (d_len < FILL) { + memset(tmp, ' ', FILL-d_len); + tmp += FILL-d_len; + } + + tmp += time_unix2ls(inode->i_mtime, tmp); + *tmp++ = ' '; + + if (d_type != DT_REG) { + COPY_STR(" - "); + goto out_size; + } + size = inode->i_size >> 10; + if (size < 1024) { + tmp += sprintf(tmp, "%8Lik ", size); + goto out_size; + } + size >>= 10; + if (size < 1024) { + tmp += sprintf(tmp, "%8LiM ", size); + goto out_size; + } + size >>= 10; + if (size < 1024) { + tmp += sprintf(tmp, "%8LiG ", size); + goto out_size; + } + size >>= 10; + if (size < 1024) { + tmp += sprintf(tmp, "%8LiT ", size); + goto out_size; + } + size >>= 10; + tmp += sprintf(tmp, "%8LiT ", size); + +out_size: + *tmp++ = '\n'; + *tmp = 0; + + return tmp; +out_dput: + return NULL; +} + +tux_proto_t tux_proto_http = { + defer_accept: 1, + can_redirect: 1, + got_request: http_got_request, + parse_message: parse_http_message, + illegal_request: http_illegal_request, + check_req_err: http_check_req_err, + print_dir_line: http_print_dir_line, + name: "http", +}; + --- linux/net/tux/redirect.c.orig +++ linux/net/tux/redirect.c @@ -0,0 +1,154 @@ +/* + * TUX - Integrated Application Protocols Layer and Object Cache + * + * Copyright (C) 2000, 2001, Ingo Molnar + * + * redirect.c: redirect requests to other server sockets (such as Apache). + */ + +#include + +/**************************************************************** + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + ****************************************************************/ + +static void dummy_destructor(struct open_request *req) +{ +} + +static struct or_calltable dummy = +{ + 0, + NULL, + NULL, + &dummy_destructor, + NULL +}; + +static int redirect_sock (tux_req_t *req, const int port) +{ + struct socket *sock = req->sock; + struct open_request *tcpreq; + struct sock *sk, *oldsk; + int err = -1; + + /* + * Look up (optional) listening user-space socket. + */ + local_bh_disable(); + sk = tcp_v4_lookup_listener(INADDR_ANY, port, 0); + /* + * Look up localhost listeners as well. + */ + if (!sk) { + u32 daddr; + ((unsigned char *)&daddr)[0] = 127; + ((unsigned char *)&daddr)[1] = 0; + ((unsigned char *)&daddr)[2] = 0; + ((unsigned char *)&daddr)[3] = 1; + sk = tcp_v4_lookup_listener(daddr, port, 0); + } + local_bh_enable(); + + /* No secondary server found */ + if (!sk) + goto out; + + /* + * Requeue the 'old' socket as an accept-socket of + * the listening socket. This way we can shuffle + * a socket around. Since we've read the input data + * via the non-destructive MSG_PEEK, the secondary + * server can be used transparently. + */ + oldsk = sock->sk; + lock_sock(sk); + + if (sk->state != TCP_LISTEN) + goto out_unlock; + + tcpreq = tcp_openreq_alloc(); + if (!tcpreq) + goto out_unlock; + + unlink_tux_socket(req); + + sock->sk = NULL; + sock->state = SS_UNCONNECTED; + + tcpreq->class = &dummy; + write_lock_irq(&oldsk->callback_lock); + oldsk->socket = NULL; + oldsk->sleep = NULL; + write_unlock_irq(&oldsk->callback_lock); + + oldsk->tp_pinfo.af_tcp.nonagle = 0; + + tcp_acceptq_queue(sk, tcpreq, oldsk); + + sk->data_ready(sk, 0); + + /* + * It's now completely up to the secondary + * server to handle this request. + */ + sock_release(req->sock); + req->sock = NULL; + req->parsed_len = 0; + err = 0; + Dprintk("req %p redirected to secondary server!\n", req); + +out_unlock: + release_sock(sk); + sock_put(sk); +out: + if (err) + Dprintk("NO secondary server for req %p!\n", req); + return err; +} + +void redirect_request (tux_req_t *req, int cachemiss) +{ + if (tux_TDprintk && (req->status != 304)) { + TDprintk("trying to redirect req %p, req->error: %d, req->status: %d.\n", req, req->error, req->status); + print_req(req); + } + + if (cachemiss) + TUX_BUG(); + if (req->error == TUX_ERROR_CONN_CLOSE) + goto out_flush; + if (!req->sock) + TUX_BUG(); + + if (!req->status) + req->status = -1; + if (!req->proto->can_redirect || (req->status == 304) || redirect_sock(req, tux_clientport)) { + if (req->parsed_len) + trunc_headers(req); + req->proto->illegal_request(req, cachemiss); + return; + } else { + if (req->data_sock) + BUG(); + } +out_flush: + clear_keepalive(req); + if (!tux_redirect_logging) + req->status = 0; + flush_request(req, cachemiss); +} + --- linux/net/tux/times.c.orig +++ linux/net/tux/times.c @@ -0,0 +1,392 @@ +/* + * TUX - Integrated Application Protocols Layer and Object Cache + * + * Copyright (C) 2000, 2001, Ingo Molnar + * + * times.c: time conversion routines. + * + * Original time convserion code Copyright (C) 1999 by Arjan van de Ven + */ + +/**************************************************************** + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + ****************************************************************/ + +#include +#include +#include +#include + + +#include "times.h" + +char *dayName[7] = { + "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat" +}; + +static char *monthName[12] = { + "Jan", "Feb", "Mar", "Apr", "May", "Jun", + "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" +}; + +char itoa_h[60]={'0','0','0','0','0','0','0','0','0','0', + '1','1','1','1','1','1','1','1','1','1', + '2','2','2','2','2','2','2','2','2','2', + '3','3','3','3','3','3','3','3','3','3', + '4','4','4','4','4','4','4','4','4','4', + '5','5','5','5','5','5','5','5','5','5'}; + +char itoa_l[60]={'0','1','2','3','4','5','6','7','8','9', + '0','1','2','3','4','5','6','7','8','9', + '0','1','2','3','4','5','6','7','8','9', + '0','1','2','3','4','5','6','7','8','9', + '0','1','2','3','4','5','6','7','8','9', + '0','1','2','3','4','5','6','7','8','9'}; + +int time_unix2ls(time_t zulu, char *buf) +{ + int Y=0,M=0,D=0; + int H=0,Min=0,S=0,WD=0; + int I,I2; + time_t rest, delta; + + if (zulu > xtime.tv_sec) + zulu = xtime.tv_sec; + + I=0; + while (Izulu) + break; + I++; + } + + Y=--I; + if (I<0) { + Y=0; + goto BuildYear; + } + I2=0; + while (I2<=12) { + if (TimeDays[I][I2]>zulu) + break; + I2++; + } + + M=I2-1; + + rest=zulu - TimeDays[Y][M]; + WD=WeekDays[Y][M]; + D=rest/86400; + rest=rest%86400; + WD+=D; + WD=WD%7; + H=rest/3600; + rest=rest%3600; + Min=rest/60; + rest=rest%60; + S=rest; + +BuildYear: + Y+=KHTTPD_YEAROFFSET; + + + /* Format: Day, 01 Mon 1999 01:01:01 GMT */ + + delta = xtime.tv_sec - zulu; + if (delta > 6*30*24*60) + // "May 23 2000" + return sprintf( buf, "%s %02i %04i", monthName[M], D+1, Y); + else + // "May 23 10:14" + return sprintf( buf, "%s %02i %02i:%02i", + monthName[M], D+1, H, Min); +} + +static int MonthHash[32] = + {0,0,7,0,0,0,0,0,0,0,0,3,0,0,0,2,6,0,5,0,9,8,4,0,0,11,1,10,0,0,0,0}; + +#define is_digit(c) ((c) >= '0' && (c) <= '9') + +static inline int skip_atoi(char **s) +{ + int i=0; + + while (is_digit(**s)) + i = i*10 + *((*s)++) - '0'; + return i; +} + +time_t mimetime_to_unixtime(char *Q) +{ + int Y,M,D,H,Min,S; + unsigned int Hash; + time_t Temp; + char *s,**s2; + + s=Q; + s2=&s; + + if (strlen(s)<30) return 0; + if (s[3]!=',') return 0; + if (s[19]!=':') return 0; + + s+=5; /* Skip day of week */ + D = skip_atoi(s2); /* Day of month */ + s++; + Hash = (char)s[0]+(char)s[2]; + Hash = (Hash<<1) + (char)s[1]; + Hash = (Hash&63)>>1; + M = MonthHash[Hash]; + s+=4; + Y = skip_atoi(s2); /* Year */ + s++; + H = skip_atoi(s2); /* Hour */ + s++; + Min = skip_atoi(s2); /* Minutes */ + s++; + S = skip_atoi(s2); /* Seconds */ + s++; + if ((s[0]!='G')||(s[1]!='M')||(s[2]!='T')) + { + return 0; /* No GMT */ + } + + if (YKHTTPD_YEAROFFSET+9) Y = KHTTPD_YEAROFFSET+9; + + Temp = TimeDays[Y-KHTTPD_YEAROFFSET][M]; + Temp += D*86400+H*3600+Min*60+S; + + return Temp; +} + +// writes the full http date, corresponding to time_t received + +void last_mod_time(char * curr, const time_t t) +{ + int day, tod, year, wday, mon, hour, min, sec; + + tod = t % 86400; + day = t / 86400; + if (tod < 0) { + tod += 86400; + --day; + } + + hour = tod / 3600; + tod %= 3600; + min = tod / 60; + sec = tod % 60; + + wday = (day + 4) % 7; + if (wday < 0) + wday += 7; + + day -= 11017; + /* day 0 is march 1, 2000 */ + year = 5 + day / 146097; + day = day % 146097; + if (day < 0) { + day += 146097; + --year; + } + /* from now on, day is nonnegative */ + year *= 4; + if (day == 146096) { + year += 3; + day = 36524; + } else { + year += day / 36524; + day %= 36524; + } + year *= 25; + year += day / 1461; + day %= 1461; + year *= 4; + if (day == 1460) { + year += 3; + day = 365; + } else { + year += day / 365; + day %= 365; + } + + day *= 10; + mon = (day + 5) / 306; + day = day + 5 - 306 * mon; + day /= 10; + if (mon >= 10) { + ++year; + mon -= 10; + } else + mon += 2; + + sprintf(curr, "%s, %.2d %s %d %.2d:%.2d:%.2d GMT", dayName[wday], + day+1, monthName[mon], year, hour, min, sec); +} + +// writes the full date in ISO8601 format, +// corresponding to time_t received +// example: 20011126224910 + +int mdtm_time(char * curr, const time_t t) +{ + int day, tod, year, wday, mon, hour, min, sec; + + tod = t % 86400; + day = t / 86400; + if (tod < 0) { + tod += 86400; + --day; + } + + hour = tod / 3600; + tod %= 3600; + min = tod / 60; + sec = tod % 60; + + wday = (day + 4) % 7; + if (wday < 0) + wday += 7; + + day -= 11017; + /* day 0 is march 1, 2000 */ + year = 5 + day / 146097; + day = day % 146097; + if (day < 0) { + day += 146097; + --year; + } + /* from now on, day is nonnegative */ + year *= 4; + if (day == 146096) { + year += 3; + day = 36524; + } else { + year += day / 36524; + day %= 36524; + } + year *= 25; + year += day / 1461; + day %= 1461; + year *= 4; + if (day == 1460) { + year += 3; + day = 365; + } else { + year += day / 365; + day %= 365; + } + + day *= 10; + mon = (day + 5) / 306; + day = day + 5 - 306 * mon; + day /= 10; + if (mon >= 10) { + ++year; + mon -= 10; + } else + mon += 2; + + return sprintf(curr, "213 %.4d%.2d%.2d%.2d%.2d%.2d\r\n", + year, mon+1, day+1, hour, min, sec); +} + +static inline int make_num(const char *s) +{ + if (*s >= '0' && *s <= '9') + return 10 * (*s - '0') + *(s + 1) - '0'; + else + return *(s + 1) - '0'; +} + +static inline int make_month(const char *s) +{ + int i; + + for (i = 0; i < 12; i++) + if (!strncmp(monthName[i], s, 3)) + return i+1; + return 0; +} + +time_t parse_time(const char *str, const int str_len) +{ + int hour; + int min; + int sec; + int mday; + int mon; + int year; + + if (str[3] == ',') { + /* Thu, 09 Jan 1993 01:29:59 GMT */ + + if (str_len < 29) + return -1; + + mday = make_num(str+5); + mon = make_month(str + 8); + year = 100 * make_num(str + 12) + make_num(str + 14); + hour = make_num(str + 17); + min = make_num(str + 20); + sec = make_num(str + 23); + } + else { + const char *s; + s = strchr(str, ','); + if (!s || (str_len - (s - str) < 24)) { + /* Wed Jun 9 01:29:59 1993 */ + + if (str_len < 24) + return -1; + + mon = make_month(str+4); + mday = make_num(str+8); + hour = make_num(str+11); + min = make_num(str+14); + sec = make_num(str+17); + year = make_num(str+20)*100 + make_num(str+22); + } + else { + /* Thursday, 10-Jun-93 01:29:59 GMT */ + + mday = make_num(s + 2); + mon = make_month(s + 5); + year = make_num(s + 9) + 1900; + if (year < 1970) + year += 100; + hour = make_num(s + 12); + min = make_num(s + 15); + sec = make_num(s + 18); + } + } + + if (sec < 0 || sec > 59) + return -1; + if (min < 0 || min > 59) + return -1; + if (hour < 0 || hour > 23) + return -1; + if (mday < 1 || mday > 31) + return -1; + if (mon < 1 || mon > 12) + return -1; + if (year < 1970 || year > 2020) + return -1; + + return mktime(year, mon, mday, hour, min, sec); +} --- linux/net/tux/times.h.orig +++ linux/net/tux/times.h @@ -0,0 +1,26 @@ +static time_t TimeDays[10][13] = { + { 852073200, 854751600, 857170800, 859849200, 862441200, 865119600, 867711600, 870390000, 873068400, 875660400, 878338800, 880930800, 883609200 } , + { 883609200, 886287600, 888706800, 891385200, 893977200, 896655600, 899247600, 901926000, 904604400, 907196400, 909874800, 912466800, 915145200 } , + { 915145200, 917823600, 920242800, 922921200, 925513200, 928191600, 930783600, 933462000, 936140400, 938732400, 941410800, 944002800, 946681200 } , + { 946681200, 949359600, 951865200, 954543600, 957135600, 959814000, 962406000, 965084400, 967762800, 970354800, 973033200, 975625200, 978303600 } , + { 978303600, 980982000, 983401200, 986079600, 988671600, 991350000, 993942000, 996620400, 999298800, 1001890800, 1004569200, 1007161200, 1009839600 } , + { 1009839600, 1012518000, 1014937200, 1017615600, 1020207600, 1022886000, 1025478000, 1028156400, 1030834800, 1033426800, 1036105200, 1038697200, 1041375600 } , + { 1041375600, 1044054000, 1046473200, 1049151600, 1051743600, 1054422000, 1057014000, 1059692400, 1062370800, 1064962800, 1067641200, 1070233200, 1072911600 } , + { 1072911600, 1075590000, 1078095600, 1080774000, 1083366000, 1086044400, 1088636400, 1091314800, 1093993200, 1096585200, 1099263600, 1101855600, 1104534000 } , + { 1104534000, 1107212400, 1109631600, 1112310000, 1114902000, 1117580400, 1120172400, 1122850800, 1125529200, 1128121200, 1130799600, 1133391600, 1136070000 } , + { 1136070000, 1138748400, 1141167600, 1143846000, 1146438000, 1149116400, 1151708400, 1154386800, 1157065200, 1159657200, 1162335600, 1164927600, 1167606000 } +}; +static int WeekDays[10][13] = { + { 3, 6, 6, 2, 4, 0, 2, 5, 1, 3, 6, 1, 4 } , + { 4, 0, 0, 3, 5, 1, 3, 6, 2, 4, 0, 2, 5 } , + { 5, 1, 1, 4, 6, 2, 4, 0, 3, 5, 1, 3, 6 } , + { 6, 2, 3, 6, 1, 4, 6, 2, 5, 0, 3, 5, 1 } , + { 1, 4, 4, 0, 2, 5, 0, 3, 6, 1, 4, 6, 2 } , + { 2, 5, 5, 1, 3, 6, 1, 4, 0, 2, 5, 0, 3 } , + { 3, 6, 6, 2, 4, 0, 2, 5, 1, 3, 6, 1, 4 } , + { 4, 0, 1, 4, 6, 2, 4, 0, 3, 5, 1, 3, 6 } , + { 6, 2, 2, 5, 0, 3, 5, 1, 4, 6, 2, 4, 0 } , + { 0, 3, 3, 6, 1, 4, 6, 2, 5, 0, 3, 5, 1 } +}; +#define KHTTPD_YEAROFFSET 1997 +#define KHTTPD_NUMYEARS 10 --- linux/net/tux/userspace.c.orig +++ linux/net/tux/userspace.c @@ -0,0 +1,27 @@ +/* + * TUX - Integrated Application Protocols Layer and Object Cache + * + * Copyright (C) 2000, 2001, Ingo Molnar + * + * userspace.c: handle userspace-module requests + */ + +#include + +/**************************************************************** + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + ****************************************************************/ + --- linux/drivers/block/loop.c.orig +++ linux/drivers/block/loop.c @@ -285,7 +285,7 @@ static int lo_receive(struct loop_device spin_lock_irq(&lo->lo_lock); file = lo->lo_backing_file; spin_unlock_irq(&lo->lo_lock); - do_generic_file_read(file, &pos, &desc, lo_read_actor); + do_generic_file_read(file, &pos, &desc, lo_read_actor, 0); return desc.error; } --- linux/drivers/video/vgacon.c.orig +++ linux/drivers/video/vgacon.c @@ -50,6 +50,7 @@ #include #include #include +#include #include --- linux/arch/i386/kernel/entry.S.orig +++ linux/arch/i386/kernel/entry.S @@ -626,7 +626,15 @@ ENTRY(sys_call_table) .long SYMBOL_NAME(sys_madvise) .long SYMBOL_NAME(sys_getdents64) /* 220 */ .long SYMBOL_NAME(sys_fcntl64) - .long SYMBOL_NAME(sys_ni_syscall) /* reserved for TUX */ +#ifdef CONFIG_TUX + .long SYMBOL_NAME(__sys_tux) +#else +# ifdef CONFIG_TUX_MODULE + .long SYMBOL_NAME(sys_tux) +# else + .long SYMBOL_NAME(sys_ni_syscall) +# endif +#endif .long SYMBOL_NAME(sys_ni_syscall) /* Reserved for Security */ .long SYMBOL_NAME(sys_gettid) .long SYMBOL_NAME(sys_readahead) /* 225 */ --- linux/arch/i386/kernel/i386_ksyms.c.orig +++ linux/arch/i386/kernel/i386_ksyms.c @@ -177,6 +177,7 @@ EXPORT_SYMBOL(atomic_dec_and_lock); extern int is_sony_vaio_laptop; EXPORT_SYMBOL(is_sony_vaio_laptop); +EXPORT_SYMBOL_GPL(show_stack); #ifdef CONFIG_MULTIQUAD EXPORT_SYMBOL(xquad_portio); --- linux/arch/alpha/kernel/alpha_ksyms.c.orig +++ linux/arch/alpha/kernel/alpha_ksyms.c @@ -157,15 +157,10 @@ EXPORT_SYMBOL(alpha_write_fp_reg_s); /* In-kernel system calls. */ EXPORT_SYMBOL(kernel_thread); EXPORT_SYMBOL(sys_open); -EXPORT_SYMBOL(sys_dup); EXPORT_SYMBOL(sys_exit); -EXPORT_SYMBOL(sys_write); -EXPORT_SYMBOL(sys_read); -EXPORT_SYMBOL(sys_lseek); EXPORT_SYMBOL(__kernel_execve); EXPORT_SYMBOL(sys_setsid); EXPORT_SYMBOL(sys_sync); -EXPORT_SYMBOL(sys_wait4); /* Networking helper routines. */ EXPORT_SYMBOL(csum_tcpudp_magic); --- linux/arch/alpha/kernel/entry.S.orig +++ linux/arch/alpha/kernel/entry.S @@ -993,7 +993,15 @@ sys_call_table: .quad alpha_ni_syscall .quad alpha_ni_syscall /* 220 */ .quad alpha_ni_syscall +#ifdef CONFIG_TUX + .quad __sys_tux +#else +# ifdef CONFIG_TUX_MODULE + .quad sys_tux +# else .quad alpha_ni_syscall +# endif +#endif .quad alpha_ni_syscall .quad alpha_ni_syscall .quad alpha_ni_syscall /* 225 */ --- linux/arch/x86_64/kernel/x8664_ksyms.c.orig +++ linux/arch/x86_64/kernel/x8664_ksyms.c @@ -199,11 +199,7 @@ EXPORT_SYMBOL(copy_user_generic); /* Export kernel syscalls */ EXPORT_SYMBOL(sys_wait4); EXPORT_SYMBOL(sys_exit); -EXPORT_SYMBOL(sys_write); -EXPORT_SYMBOL(sys_read); EXPORT_SYMBOL(sys_open); -EXPORT_SYMBOL(sys_lseek); -EXPORT_SYMBOL(sys_dup); EXPORT_SYMBOL(sys_delete_module); EXPORT_SYMBOL(sys_sync); EXPORT_SYMBOL(sys_pause); --- linux/Makefile.orig +++ linux/Makefile @@ -91,8 +91,14 @@ export MODLIB CPPFLAGS := -D__KERNEL__ -I$(HPATH) +ifeq "$(CONFIG_TUX_DEBUG)" "" CFLAGS := $(CPPFLAGS) -Wall -Wstrict-prototypes -Wno-trigraphs -O2 \ - -fno-strict-aliasing -fno-common + -fomit-frame-pointer -fno-strict-aliasing -fno-common +else +CFLAGS := $(CPPFLAGS) -Wall -Wstrict-prototypes -Wno-trigraphs -O2 \ + -fno-omit-frame-pointer -fno-strict-aliasing -fno-common -g +endif + ifndef CONFIG_FRAME_POINTER CFLAGS += -fomit-frame-pointer endif