--- linux/kernel/time.c.orig Sun Dec 12 15:48:01 1999 +++ linux/kernel/time.c Sun Dec 12 15:49:17 1999 @@ -98,7 +98,7 @@ if (get_user(value, tptr)) return -EFAULT; write_lock_irq(&xtime_lock); - xtime.tv_sec = value; + vsys_data->tv_sec = xtime.tv_sec = value; xtime.tv_usec = 0; time_adjust = 0; /* stop active adjtime() */ time_status |= STA_UNSYNC; @@ -145,6 +145,7 @@ { write_lock_irq(&xtime_lock); xtime.tv_sec += sys_tz.tz_minuteswest * 60; + vsys_data->tv_sec = xtime.tv_sec; write_unlock_irq(&xtime_lock); } --- linux/kernel/sched.c.orig Sun Dec 12 17:09:35 1999 +++ linux/kernel/sched.c Sun Dec 12 17:09:55 1999 @@ -1104,7 +1104,7 @@ case TIME_INS: if (xtime.tv_sec % 86400 == 0) { - xtime.tv_sec--; + vsys_data->tv_sec--, xtime.tv_sec--; time_state = TIME_OOP; printk(KERN_NOTICE "Clock: inserting leap second 23:59:60 UTC\n"); } @@ -1112,7 +1112,7 @@ case TIME_DEL: if ((xtime.tv_sec + 1) % 86400 == 0) { - xtime.tv_sec++; + vsys_data->tv_sec++, xtime.tv_sec++; time_state = TIME_WAIT; printk(KERN_NOTICE "Clock: deleting leap second 23:59:59 UTC\n"); } @@ -1242,7 +1242,7 @@ if (xtime.tv_usec >= 1000000) { xtime.tv_usec -= 1000000; - xtime.tv_sec++; + vsys_data->tv_sec++, xtime.tv_sec++; second_overflow(); } } --- linux/include/asm-i386/processor.h.orig Sun Dec 12 03:35:24 1999 +++ linux/include/asm-i386/processor.h Sun Dec 12 03:55:56 1999 @@ -114,7 +114,9 @@ #define cpu_has_pae \ (boot_cpu_data.x86_capability & X86_FEATURE_PAE) #define cpu_has_tsc \ - (cpu_data[smp_processor_id()].x86_capability & X86_FEATURE_TSC) + (boot_cpu_data.x86_capability & X86_FEATURE_TSC) +#define cpu_has_fastcall \ + (boot_cpu_data.x86_capability & X86_FEATURE_SEP) extern char ignore_irq13; --- linux/include/asm-i386/bugs.h.orig Sun Dec 12 03:35:17 1999 +++ linux/include/asm-i386/bugs.h Sun Dec 12 03:56:08 1999 @@ -398,10 +398,18 @@ * If we configured ourselves for a TSC, we'd better have one! */ #ifdef CONFIG_X86_TSC - if (!(boot_cpu_data.x86_capability & X86_FEATURE_TSC)) + if (!cpu_has_tsc) panic("Kernel compiled for Pentium+, requires TSC"); #endif +/* + * If we configured ourselves for a 686 CPU, we'd better have + * SYSENTER/SYSEXIT! + */ +#ifdef CONFIG_X86_FASTCALL + if (!cpu_has_fastcall) + panic("Kernel compiled for Pentium+, requires fastcall"); +#endif /* * If we were told we had a good APIC for SMP, we'd better be a PPro */ --- linux/include/asm-i386/vsyscall.h.orig Sun Dec 12 04:07:04 1999 +++ linux/include/asm-i386/vsyscall.h Sun Dec 12 17:03:59 1999 @@ -0,0 +1,32 @@ +/* + * vsyscall.h: Virtual System Call Interface support + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1999 Ingo Molnar + */ + +#ifndef _ASM_VSYSCALL_H +#define _ASM_VSYSCALL_H + +extern void vsyscall_init (void); + +/* + * Virtual System Call entry at 0xffff0000. We reserve 124k space for + * future expansion. (this is not the upper limit at all, we can use + * lower fixmap addresses as well) Although we should not grow out of + * this space too soon. + */ +#define NR_VSYSCALL_FIXPAGES 15 +#define VSYSCALL_CODE (fix_to_virt(FIX_VSYSCALL_END)) +#define VSYSCALL_DATA (fix_to_virt(FIX_VSYSCALL_END-2)) + +typedef struct vsys_data_struct { + unsigned int tv_sec; +} vsys_data_t; + +#define vsys_data ((vsys_data_t *)VSYSCALL_DATA) + +#endif --- linux/include/asm-i386/fixmap.h.orig Sun Dec 12 06:17:24 1999 +++ linux/include/asm-i386/fixmap.h Sun Dec 12 17:04:12 1999 @@ -17,6 +17,7 @@ #include #include #include +#include #ifdef CONFIG_HIGHMEM #include #include @@ -48,6 +49,11 @@ * fix-mapped? */ enum fixed_addresses { + /* + * Dont move these! User-space relies on it. + */ + FIX_VSYSCALL_START = 0, + FIX_VSYSCALL_END = FIX_VSYSCALL_START + NR_VSYSCALL_FIXPAGES-1, #ifdef CONFIG_X86_LOCAL_APIC FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */ #endif @@ -69,6 +75,7 @@ }; extern void set_fixmap (enum fixed_addresses idx, unsigned long phys); +extern void set_fixmap_ro (enum fixed_addresses idx, unsigned long phys); /* * used by vmalloc.c. --- linux/arch/i386/boot/compressed/Makefile.orig Sun Dec 12 08:58:01 1999 +++ linux/arch/i386/boot/compressed/Makefile Sun Dec 12 09:48:34 1999 @@ -37,6 +37,7 @@ tmppiggy=_tmp_$$$$piggy; \ rm -f $$tmppiggy $$tmppiggy.gz $$tmppiggy.lnk; \ $(OBJCOPY) $(SYSTEM) $$tmppiggy; \ + cp $$tmppiggy 1; \ gzip -f -9 < $$tmppiggy > $$tmppiggy.gz; \ echo "SECTIONS { .data : { input_len = .; LONG(input_data_end - input_data) input_data = .; *(.data) input_data_end = .; }}" > $$tmppiggy.lnk; \ $(LD) -r -o piggy.o -b binary $$tmppiggy.gz -b elf32-i386 -T $$tmppiggy.lnk; \ --- linux/arch/i386/mm/init.c.orig Sun Dec 12 06:02:38 1999 +++ linux/arch/i386/mm/init.c Sun Dec 12 07:22:33 1999 @@ -239,20 +239,25 @@ extern char _text, _etext, _edata, __bss_start, _end; extern char __init_begin, __init_end; -static void set_pte_phys (unsigned long vaddr, unsigned long phys) +static void set_pte_phys(unsigned long vaddr, unsigned long phys, pgprot_t prot) { - pgprot_t prot; pgd_t *pgd; pmd_t *pmd; pte_t *pte; + printk("set_pte_phys(%08lx, phys:%08lx, prot:%08lx)\n", + vaddr, phys, pgprot_val(prot)); + pgd = swapper_pg_dir + __pgd_offset(vaddr); +pgd_ERROR(*pgd); pmd = pmd_offset(pgd, vaddr); +pmd_ERROR(*pmd); pte = pte_offset(pmd, vaddr); - prot = PAGE_KERNEL; +pte_ERROR(*pte); if (boot_cpu_data.x86_capability & X86_FEATURE_PGE) pgprot_val(prot) |= _PAGE_GLOBAL; set_pte(pte, mk_pte_phys(phys, prot)); +pte_ERROR(*pte); /* * It's enough to flush this one mapping. @@ -260,6 +265,17 @@ __flush_tlb_one(vaddr); } +void set_fixmap_ro (enum fixed_addresses idx, unsigned long phys) +{ + unsigned long address = __fix_to_virt(idx); + + if (idx >= __end_of_fixed_addresses) { + printk("Invalid set_fixmap\n"); + return; + } + set_pte_phys(address, phys, PAGE_READONLY); +} + void set_fixmap (enum fixed_addresses idx, unsigned long phys) { unsigned long address = __fix_to_virt(idx); @@ -268,7 +284,7 @@ printk("Invalid set_fixmap\n"); return; } - set_pte_phys(address,phys); + set_pte_phys(address, phys, PAGE_KERNEL); } static void __init fixrange_init (unsigned long start, unsigned long end, pgd_t *pgd_base) @@ -297,7 +313,7 @@ for (; (j < PTRS_PER_PMD) && start; pmd++, j++) { if (pmd_none(*pmd)) { pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); - set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(pte))); + set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte))); if (pte != pte_offset(pmd, 0)) BUG(); } --- linux/arch/i386/kernel/entry.S.orig Sun Dec 12 03:35:30 1999 +++ linux/arch/i386/kernel/entry.S Sun Dec 12 03:35:57 1999 @@ -2,6 +2,8 @@ * linux/arch/i386/entry.S * * Copyright (C) 1991, 1992 Linus Torvalds + * + * implemented fast system call support, Ingo Molnar */ /* @@ -45,6 +47,7 @@ #include #define ASSEMBLY #include +#include EBX = 0x00 ECX = 0x04 @@ -95,7 +98,7 @@ movl %dx,%ds; \ movl %dx,%es; -#define RESTORE_ALL \ +#define RESTORE_ALL_REGS\ popl %ebx; \ popl %ecx; \ popl %edx; \ @@ -106,12 +109,71 @@ 1: popl %ds; \ 2: popl %es; \ addl $4,%esp; \ -3: iret; \ .section .fixup,"ax"; \ 4: movl $0,(%esp); \ jmp 1b; \ 5: movl $0,(%esp); \ jmp 2b; \ +.previous; \ +.section __ex_table,"a";\ + .align 4; \ + .long 1b,4b; \ + .long 2b,5b; \ +.previous; + +#define RESTORE_ALL \ + RESTORE_ALL_REGS \ +3: iret; \ +.section .fixup,"ax"; \ +6: pushl %ss; \ + popl %ds; \ + pushl %ss; \ + popl %es; \ + pushl $11; \ + call do_exit; \ +.previous; \ +.section __ex_table,"a";\ + .align 4; \ + .long 3b,6b; \ +.previous + +#define SAVE_ALL_FASTSYS \ + pushl $__USER_DS; \ + pushl %ebp; \ + pushfl; \ + cld; \ + pushl $__USER_CS; \ + pushl %edi; \ + pushl %eax; \ + subl $8, %esp; \ + pushl %eax; \ + pushl %ebp; \ + pushl %edi; \ + pushl %esi; \ + pushl %edx; \ + pushl %ecx; \ + pushl %ebx; \ + movl $(__KERNEL_DS),%edx; \ + movl %dx,%ds; \ + movl %dx,%es; + +#define RESTORE_ALL_REGS_FASTSYS\ + movl $(__USER_DS),%eax; \ + movl %ax,%ds; \ + movl %ax,%es; \ + popl %ebx; \ + addl $8,%esp; \ + popl %esi; \ + popl %edx; \ + popl %ecx; \ + popl %eax; \ + addl $20,%esp; \ + popfl; + +#define RESTORE_ALL_FASTSYS \ + RESTORE_ALL_REGS_FASTSYS \ +3: sysexit; \ +.section .fixup,"ax"; \ 6: pushl %ss; \ popl %ds; \ pushl %ss; \ @@ -121,15 +183,88 @@ .previous; \ .section __ex_table,"a";\ .align 4; \ - .long 1b,4b; \ - .long 2b,5b; \ .long 3b,6b; \ .previous + #define GET_CURRENT(reg) \ movl %esp, reg; \ andl $-8192, reg; +#ifdef CONFIG_X86_FASTCALL + +ENTRY(fast_system_call) + +/* + * %ebp has the caller's stack, first entry on the stack is the + * return eip ... a bit ugly but we have to do it this way due + * to the (only?) 5 parameter system call select(). + * + * alternatively, if we could restrict all x86 system calls to be + * 4-parameter only, and send 5-6 parameter system calls through + * special stubs which fetch additional parameters from the stack, + * then we could further speed up the common path. + */ + + sti # ugh! SYSENTER disables IRQs + SAVE_ALL_FASTSYS + + GET_CURRENT(%ebx) + cmpl $(NR_syscalls),%eax + jae badfastsys + testl $0x20,flags(%ebx) # PF_TRACESYS + jne tracefastsys + call *SYMBOL_NAME(fastsys_call_table)(,%eax,4) + movl %eax,EAX(%esp) # save the return value + .globl ret_from_fastsys_call +ret_from_fastsys_call: + movl SYMBOL_NAME(bh_mask),%eax + andl SYMBOL_NAME(bh_active),%eax + jne handle_bottom_half_fastsys +ret_with_reschedule_fastsys: + cmpl $0,need_resched(%ebx) + jne reschedule_fastsys + cmpl $0,sigpending(%ebx) + jne signal_return_fastsys +restore_all_fastsys: + RESTORE_ALL_FASTSYS + + ALIGN +handle_bottom_half_fastsys: + call SYMBOL_NAME(do_bottom_half) + jmp ret_with_reschedule_fastsys; + + ALIGN +reschedule_fastsys: + call SYMBOL_NAME(schedule) + jmp ret_with_reschedule_fastsys; + +signal_return_fastsys: + sti # we can get here from an interrupt handler + movl %esp,%eax + xorl %edx,%edx + call SYMBOL_NAME(do_signal) + jmp restore_all_fastsys + +tracefastsys: + movl $-ENOSYS,EAX(%esp) + call SYMBOL_NAME(syscall_trace) + movl ORIG_EAX(%esp),%eax + cmpl $(NR_syscalls),%eax + jae tracefastsys_exit + call *SYMBOL_NAME(fastsys_call_table)(,%eax,4) + movl %eax,EAX(%esp) # save the return value +tracefastsys_exit: + call SYMBOL_NAME(syscall_trace) + jmp ret_from_fastsys_call +badfastsys: + movl $-ENOSYS,EAX(%esp) + jmp ret_from_fastsys_call + + ALIGN + +#endif /* CONFIG_X86_FASTCALL */ + ENTRY(lcall7) pushfl # We get a different stack layout with call gates, pushl %eax # which has to be cleaned up later.. @@ -173,15 +308,14 @@ jmp ret_from_sys_call - ALIGN - .globl ret_from_fork -ret_from_fork: +ENTRY(ret_from_fork) pushl %ebx call SYMBOL_NAME(schedule_tail) addl $4, %esp GET_CURRENT(%ebx) jmp ret_from_sys_call + /* * Return to user mode is not as complex as all this looks, * but we want the default path for a system call return to @@ -269,7 +403,7 @@ ALIGN reschedule: - call SYMBOL_NAME(schedule) # test + call SYMBOL_NAME(schedule) jmp ret_from_sys_call ENTRY(divide_error) @@ -399,6 +533,215 @@ jmp error_code .data +#ifdef CONFIG_X86_FASTCALL +ENTRY(fastsys_call_table) + .long SYMBOL_NAME(sys_ni_syscall) /* 0 - old "setup()" system call*/ + .long SYMBOL_NAME(sys_exit) + .long SYMBOL_NAME(sys_fork) + .long SYMBOL_NAME(sys_read) + .long SYMBOL_NAME(sys_write) + .long SYMBOL_NAME(sys_open) /* 5 */ + .long SYMBOL_NAME(sys_close) + .long SYMBOL_NAME(sys_waitpid) + .long SYMBOL_NAME(sys_creat) + .long SYMBOL_NAME(sys_link) + .long SYMBOL_NAME(sys_unlink) /* 10 */ + .long SYMBOL_NAME(sys_execve) + .long SYMBOL_NAME(sys_chdir) + .long SYMBOL_NAME(sys_time) + .long SYMBOL_NAME(sys_mknod) + .long SYMBOL_NAME(sys_chmod) /* 15 */ + .long SYMBOL_NAME(sys_lchown) + .long SYMBOL_NAME(sys_ni_syscall) /* old break syscall holder */ + .long SYMBOL_NAME(sys_stat) + .long SYMBOL_NAME(sys_lseek) + .long SYMBOL_NAME(sys_getpid) /* 20 */ + .long SYMBOL_NAME(sys_mount) + .long SYMBOL_NAME(sys_oldumount) + .long SYMBOL_NAME(sys_setuid) + .long SYMBOL_NAME(sys_getuid) + .long SYMBOL_NAME(sys_stime) /* 25 */ + .long SYMBOL_NAME(sys_ptrace) + .long SYMBOL_NAME(sys_alarm) + .long SYMBOL_NAME(sys_fstat) + .long SYMBOL_NAME(sys_pause) + .long SYMBOL_NAME(sys_utime) /* 30 */ + .long SYMBOL_NAME(sys_ni_syscall) /* old stty syscall holder */ + .long SYMBOL_NAME(sys_ni_syscall) /* old gtty syscall holder */ + .long SYMBOL_NAME(sys_access) + .long SYMBOL_NAME(sys_nice) + .long SYMBOL_NAME(sys_ni_syscall) /* 35 */ /* old ftime syscall holder */ + .long SYMBOL_NAME(sys_sync) + .long SYMBOL_NAME(sys_kill) + .long SYMBOL_NAME(sys_rename) + .long SYMBOL_NAME(sys_mkdir) + .long SYMBOL_NAME(sys_rmdir) /* 40 */ + .long SYMBOL_NAME(sys_dup) + .long SYMBOL_NAME(sys_pipe) + .long SYMBOL_NAME(sys_times) + .long SYMBOL_NAME(sys_ni_syscall) /* old prof syscall holder */ + .long SYMBOL_NAME(sys_brk) /* 45 */ + .long SYMBOL_NAME(sys_setgid) + .long SYMBOL_NAME(sys_getgid) + .long SYMBOL_NAME(sys_signal) + .long SYMBOL_NAME(sys_geteuid) + .long SYMBOL_NAME(sys_getegid) /* 50 */ + .long SYMBOL_NAME(sys_acct) + .long SYMBOL_NAME(sys_umount) /* recycled never used phys() */ + .long SYMBOL_NAME(sys_ni_syscall) /* old lock syscall holder */ + .long SYMBOL_NAME(sys_ioctl) + .long SYMBOL_NAME(sys_fcntl) /* 55 */ + .long SYMBOL_NAME(sys_ni_syscall) /* old mpx syscall holder */ + .long SYMBOL_NAME(sys_setpgid) + .long SYMBOL_NAME(sys_ni_syscall) /* old ulimit syscall holder */ + .long SYMBOL_NAME(sys_olduname) + .long SYMBOL_NAME(sys_umask) /* 60 */ + .long SYMBOL_NAME(sys_chroot) + .long SYMBOL_NAME(sys_ustat) + .long SYMBOL_NAME(sys_dup2) + .long SYMBOL_NAME(sys_getppid) + .long SYMBOL_NAME(sys_getpgrp) /* 65 */ + .long SYMBOL_NAME(sys_setsid) + .long SYMBOL_NAME(sys_sigaction) + .long SYMBOL_NAME(sys_sgetmask) + .long SYMBOL_NAME(sys_ssetmask) + .long SYMBOL_NAME(sys_setreuid) /* 70 */ + .long SYMBOL_NAME(sys_setregid) + .long SYMBOL_NAME(sys_sigsuspend) + .long SYMBOL_NAME(sys_sigpending) + .long SYMBOL_NAME(sys_sethostname) + .long SYMBOL_NAME(sys_setrlimit) /* 75 */ + .long SYMBOL_NAME(sys_old_getrlimit) + .long SYMBOL_NAME(sys_getrusage) + .long SYMBOL_NAME(sys_gettimeofday) + .long SYMBOL_NAME(sys_settimeofday) + .long SYMBOL_NAME(sys_getgroups) /* 80 */ + .long SYMBOL_NAME(sys_setgroups) + .long SYMBOL_NAME(old_select) + .long SYMBOL_NAME(sys_symlink) + .long SYMBOL_NAME(sys_lstat) + .long SYMBOL_NAME(sys_readlink) /* 85 */ + .long SYMBOL_NAME(sys_uselib) + .long SYMBOL_NAME(sys_swapon) + .long SYMBOL_NAME(sys_reboot) + .long SYMBOL_NAME(old_readdir) + .long SYMBOL_NAME(old_mmap) /* 90 */ + .long SYMBOL_NAME(sys_munmap) + .long SYMBOL_NAME(sys_truncate) + .long SYMBOL_NAME(sys_ftruncate) + .long SYMBOL_NAME(sys_fchmod) + .long SYMBOL_NAME(sys_fchown) /* 95 */ + .long SYMBOL_NAME(sys_getpriority) + .long SYMBOL_NAME(sys_setpriority) + .long SYMBOL_NAME(sys_ni_syscall) /* old profil syscall holder */ + .long SYMBOL_NAME(sys_statfs) + .long SYMBOL_NAME(sys_fstatfs) /* 100 */ + .long SYMBOL_NAME(sys_ioperm) + .long SYMBOL_NAME(sys_socketcall) + .long SYMBOL_NAME(sys_syslog) + .long SYMBOL_NAME(sys_setitimer) + .long SYMBOL_NAME(sys_getitimer) /* 105 */ + .long SYMBOL_NAME(sys_newstat) + .long SYMBOL_NAME(sys_newlstat) + .long SYMBOL_NAME(sys_newfstat) + .long SYMBOL_NAME(sys_uname) + .long SYMBOL_NAME(sys_iopl) /* 110 */ + .long SYMBOL_NAME(sys_vhangup) + .long SYMBOL_NAME(sys_ni_syscall) /* old "idle" system call */ + .long SYMBOL_NAME(sys_vm86old) + .long SYMBOL_NAME(sys_wait4) + .long SYMBOL_NAME(sys_swapoff) /* 115 */ + .long SYMBOL_NAME(sys_sysinfo) + .long SYMBOL_NAME(sys_ipc) + .long SYMBOL_NAME(sys_fsync) + .long SYMBOL_NAME(sys_sigreturn) + .long SYMBOL_NAME(sys_clone) /* 120 */ + .long SYMBOL_NAME(sys_setdomainname) + .long SYMBOL_NAME(sys_newuname) + .long SYMBOL_NAME(sys_modify_ldt) + .long SYMBOL_NAME(sys_adjtimex) + .long SYMBOL_NAME(sys_mprotect) /* 125 */ + .long SYMBOL_NAME(sys_sigprocmask) + .long SYMBOL_NAME(sys_create_module) + .long SYMBOL_NAME(sys_init_module) + .long SYMBOL_NAME(sys_delete_module) + .long SYMBOL_NAME(sys_get_kernel_syms) /* 130 */ + .long SYMBOL_NAME(sys_quotactl) + .long SYMBOL_NAME(sys_getpgid) + .long SYMBOL_NAME(sys_fchdir) + .long SYMBOL_NAME(sys_bdflush) + .long SYMBOL_NAME(sys_sysfs) /* 135 */ + .long SYMBOL_NAME(sys_personality) + .long SYMBOL_NAME(sys_ni_syscall) /* for afs_syscall */ + .long SYMBOL_NAME(sys_setfsuid) + .long SYMBOL_NAME(sys_setfsgid) + .long SYMBOL_NAME(sys_llseek) /* 140 */ + .long SYMBOL_NAME(sys_getdents) + .long SYMBOL_NAME(sys_select) + .long SYMBOL_NAME(sys_flock) + .long SYMBOL_NAME(sys_msync) + .long SYMBOL_NAME(sys_readv) /* 145 */ + .long SYMBOL_NAME(sys_writev) + .long SYMBOL_NAME(sys_getsid) + .long SYMBOL_NAME(sys_fdatasync) + .long SYMBOL_NAME(sys_sysctl) + .long SYMBOL_NAME(sys_mlock) /* 150 */ + .long SYMBOL_NAME(sys_munlock) + .long SYMBOL_NAME(sys_mlockall) + .long SYMBOL_NAME(sys_munlockall) + .long SYMBOL_NAME(sys_sched_setparam) + .long SYMBOL_NAME(sys_sched_getparam) /* 155 */ + .long SYMBOL_NAME(sys_sched_setscheduler) + .long SYMBOL_NAME(sys_sched_getscheduler) + .long SYMBOL_NAME(sys_sched_yield) + .long SYMBOL_NAME(sys_sched_get_priority_max) + .long SYMBOL_NAME(sys_sched_get_priority_min) /* 160 */ + .long SYMBOL_NAME(sys_sched_rr_get_interval) + .long SYMBOL_NAME(sys_nanosleep) + .long SYMBOL_NAME(sys_mremap) + .long SYMBOL_NAME(sys_setresuid) + .long SYMBOL_NAME(sys_getresuid) /* 165 */ + .long SYMBOL_NAME(sys_vm86) + .long SYMBOL_NAME(sys_query_module) + .long SYMBOL_NAME(sys_poll) + .long SYMBOL_NAME(sys_nfsservctl) + .long SYMBOL_NAME(sys_setresgid) /* 170 */ + .long SYMBOL_NAME(sys_getresgid) + .long SYMBOL_NAME(sys_prctl) + .long SYMBOL_NAME(sys_rt_sigreturn) + .long SYMBOL_NAME(sys_rt_sigaction) + .long SYMBOL_NAME(sys_rt_sigprocmask) /* 175 */ + .long SYMBOL_NAME(sys_rt_sigpending) + .long SYMBOL_NAME(sys_rt_sigtimedwait) + .long SYMBOL_NAME(sys_rt_sigqueueinfo) + .long SYMBOL_NAME(sys_rt_sigsuspend) + .long SYMBOL_NAME(sys_pread) /* 180 */ + .long SYMBOL_NAME(sys_pwrite) + .long SYMBOL_NAME(sys_chown) + .long SYMBOL_NAME(sys_getcwd) + .long SYMBOL_NAME(sys_capget) + .long SYMBOL_NAME(sys_capset) /* 185 */ + .long SYMBOL_NAME(sys_sigaltstack) + .long SYMBOL_NAME(sys_sendfile) + .long SYMBOL_NAME(sys_ni_syscall) /* streams1 */ + .long SYMBOL_NAME(sys_ni_syscall) /* streams2 */ + .long SYMBOL_NAME(sys_vfork) /* 190 */ + .long SYMBOL_NAME(sys_getrlimit) + .long SYMBOL_NAME(sys_mmap2_4arg) + .long SYMBOL_NAME(sys_truncate64) + .long SYMBOL_NAME(sys_ftruncate64) + /* 195 */ + + /* + * NOTE!! This doesn't have to be exact - we just have + * to make sure we have _enough_ of the "sys_ni_syscall" + * entries. Don't panic if you notice that this hasn't + * been shrunk every time we add a new system call. + */ + .rept NR_syscalls-194 + .long SYMBOL_NAME(sys_ni_syscall) + .endr +#endif /* CONFIG_X86_FASTCALL */ ENTRY(sys_call_table) .long SYMBOL_NAME(sys_ni_syscall) /* 0 - old "setup()" system call*/ .long SYMBOL_NAME(sys_exit) --- linux/arch/i386/kernel/setup.c.orig Sun Dec 12 03:35:30 1999 +++ linux/arch/i386/kernel/setup.c Sun Dec 12 08:42:19 1999 @@ -560,7 +560,7 @@ init_mm.start_code = (unsigned long) &_text; init_mm.end_code = (unsigned long) &_etext; init_mm.end_data = (unsigned long) &_edata; - init_mm.brk = (unsigned long) &_end; + init_mm.brk = (unsigned long) (&_end) + 8192; code_resource.start = virt_to_bus(&_text); code_resource.end = virt_to_bus(&_etext)-1; @@ -585,7 +585,7 @@ * partially used pages are not usable - thus * we are rounding upwards: */ - start_pfn = PFN_UP(__pa(&_end)); + start_pfn = PFN_UP(__pa(&_end) + 8192); /* * Find the highest page frame number we have available @@ -1479,6 +1479,36 @@ return p - buffer; } +#ifdef CONFIG_X86_FASTCALL +/* + * SYSENTER/SYSEXIT fast system call support on PPro+ CPUs. + * K6+ CPUs have this too - only the MSRs differ. + * + * This functions sets up the MSRs which show the kernel entrypoint + * for fast syscalls. See entry.S's fast_system_call for more. + */ +static inline void fastsys_init (void) +{ + extern char fast_system_call; + + if (cpu_has_fastcall && + (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && + (boot_cpu_data.x86 == 6) && + (boot_cpu_data.x86_model < 3) && + (boot_cpu_data.x86_mask < 3)) { + + printk("oops, this early PPro does not support fast system calls.\n"); + return; + } + + printk("fastsys init on CPU#%d.\n", smp_processor_id()); + + wrmsr(0x174, __KERNEL_CS, 0); + wrmsr(0x175, 0, 0); + wrmsr(0x176, &fast_system_call, 0); +} +#endif + int cpus_initialized = 0; unsigned long cpu_initialized = 0; @@ -1488,7 +1518,7 @@ * and IDT. We reload them nevertheless, this function acts as a * 'CPU state barrier', nothing should get across. */ -void cpu_init (void) +void __init cpu_init (void) { int nr = smp_processor_id(); struct tss_struct * t = &init_tss[nr]; @@ -1538,4 +1568,8 @@ current->flags &= ~PF_USEDFPU; current->used_math = 0; stts(); + +#ifdef CONFIG_X86_FASTCALL + fastsys_init(); +#endif } --- linux/arch/i386/kernel/sys_i386.c.orig Sun Dec 12 03:35:30 1999 +++ linux/arch/i386/kernel/sys_i386.c Sun Dec 12 03:35:57 1999 @@ -78,6 +78,26 @@ return do_mmap2(addr, len, prot, flags, fd, pgoff); } +asmlinkage long sys_mmap2_4arg(unsigned long addr, unsigned long len, + unsigned long prot, unsigned long * userstack) +{ + int err; + unsigned long flags; + unsigned long fd; + unsigned long pgoff; + + if (verify_area(VERIFY_READ, userstack, 3*sizeof(long))) + return -EFAULT; + + err = 0; + err |= __get_user(flags, userstack++); + err |= __get_user(fd, userstack++); + err |= __get_user(pgoff, userstack); + if (err) + return -EFAULT; + + return do_mmap2(addr, len, prot, flags, fd, pgoff); +} /* * Perform the select(nd, in, out, ex, tv) and mmap() system * calls. Linux/i386 didn't use to be able to handle more than --- linux/arch/i386/kernel/process.c.orig Sun Dec 12 03:35:26 1999 +++ linux/arch/i386/kernel/process.c Sun Dec 12 03:35:57 1999 @@ -547,6 +547,15 @@ : /* no output */ \ :"r" (thread->debugreg[register])) +#if CONFIG_X86_FASTCALL +#define LOAD_FASTSYS(p) \ + __asm__ __volatile__("wrmsr" : : \ + "c" (0x175), "a" ((unsigned int)(p) + 2*PAGE_SIZE), "d" (0)) +#else +#define LOAD_FASTSYS(p) \ + do { } while (0) +#endif + /* * switch_to(x,yn) should switch tasks from x to y. * @@ -579,6 +588,7 @@ unlazy_fpu(prev_p); + LOAD_FASTSYS(next_p); /* * Reload esp0, LDT and the page table pointer: */ --- linux/arch/i386/kernel/vsyscall.c.orig Sun Dec 12 03:50:48 1999 +++ linux/arch/i386/kernel/vsyscall.c Sun Dec 12 17:10:41 1999 @@ -0,0 +1,57 @@ +/* + * linux/arch/i386/kernel/vsyscall.c + * + * x86 Virtual System Call Interface & support routines + * + * Copyright (C) 1999 Ingo Molnar + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static char *vsyscall_code_page, *vsyscall_data_page; +extern char _edata; + +/* + * Make the Virtual System Call address visible to user-space: + */ +void vsyscall_init (void) +{ + __beep(); + + vsyscall_code_page = (char *) alloc_bootmem_pages(PAGE_SIZE); + printk("vsyscall_code_page: %p\n", vsyscall_code_page); + printk("vsyscall_begin: %p\n", &_edata); + + memcpy(vsyscall_code_page, &_edata, PAGE_SIZE); +#if 1 + vsyscall_code_page[0x100] = 1; + vsyscall_code_page[0x101] = 2; + vsyscall_code_page[0x102] = 3; + vsyscall_code_page[0x103] = 4; +#endif + + printk("VSYSCALL_CODE: %08lx\n", VSYSCALL_CODE); + printk("VSYSCALL_DATA: %08lx\n", VSYSCALL_DATA); + printk("FIX_VSYSCALL_START: %d\n", FIX_VSYSCALL_START); + printk("FIX_VSYSCALL_END: %d\n", FIX_VSYSCALL_END); + set_fixmap_ro(FIX_VSYSCALL_END, __pa(vsyscall_code_page)); + /* + * Read-only data page for user-space: + */ + set_fixmap_ro(FIX_VSYSCALL_END-1, __pa(vsyscall_data_page)); + /* + * Read-write alias to data page for kernel-space: + */ + set_fixmap(FIX_VSYSCALL_END-2, __pa(vsyscall_data_page)); +} + --- linux/arch/i386/kernel/Makefile.orig Sun Dec 12 03:55:18 1999 +++ linux/arch/i386/kernel/Makefile Sun Dec 12 10:17:02 1999 @@ -14,7 +14,8 @@ O_TARGET := kernel.o O_OBJS := process.o semaphore.o signal.o entry.o traps.o irq.o vm86.o \ - ptrace.o i8259.o ioport.o ldt.o setup.o time.o sys_i386.o + ptrace.o i8259.o ioport.o ldt.o setup.o time.o sys_i386.o \ + vsyscall.o OX_OBJS := i386_ksyms.o MX_OBJS := --- linux/arch/i386/kernel/vsys.h.orig Sun Dec 12 16:18:40 1999 +++ linux/arch/i386/kernel/vsys.h Sun Dec 12 16:18:38 1999 @@ -0,0 +1,8 @@ + +#include +#include +#include +#define ASSEMBLY +#include +#include + --- linux/arch/i386/kernel/traps.c.orig Sun Dec 12 04:05:57 1999 +++ linux/arch/i386/kernel/traps.c Sun Dec 12 04:09:28 1999 @@ -36,6 +36,7 @@ #include #include +#include #ifdef CONFIG_X86_VISWS_APIC #include @@ -819,4 +820,5 @@ lithium_init(); cobalt_init(); #endif + vsyscall_init(); } --- linux/arch/i386/kernel/vsys.S.orig Sun Dec 12 07:33:17 1999 +++ linux/arch/i386/kernel/vsys.S Sun Dec 12 17:18:05 1999 @@ -0,0 +1,66 @@ +/* + * linux/arch/i386/kernel/vsys.S + * + * x86 Virtual System Call Interface assembly trampoline + * + * Copyright (C) 1999 Ingo Molnar + */ + +#include "vsys.h" + +.section .text.vsyscall, "a" + +ENTRY(stext) +ENTRY(vsyscall_entry) + + pushl %ebp + movzbl %bl, %ebp + jmp *vsys_class_table(,%ebp,4) + +ENTRY(vsys_time) + popl %ebp + movl tv_sec, %eax + ret + +ENTRY(vsys_gettimeofday) + popl %ebp + movl $-1, %eax + ret + +ENTRY(vsys_getpid) + popl %ebp + movl $20, %eax + int $80 + ret + +ENTRY(vsys_call_table_class0) + .long vsys_time + .long vsys_gettimeofday + +ENTRY(vsys_call_class0) + movzbl %bh, %ebp + jmp *vsys_call_table_class0(,%ebp,4) + +ENTRY(vsys_call_table_class1) + .long vsys_getpid + +ENTRY(vsys_call_table_class2) + .long vsys_getpid + +ENTRY(vsys_call_class1) + movzbl %bh, %ebp + jmp *vsys_call_table_class1(,%ebp,4) + +ENTRY(vsys_call_class2) + movzbl %bh, %ebp + jmp *vsys_call_table_class2(,%ebp,4) + +ENTRY(vsys_class_table) + .long vsys_call_class0 + .long vsys_call_class1 + .long vsys_call_class2 + +.org 4096 +ENTRY(tv_sec) + + --- linux/arch/i386/config.in.orig Sun Dec 12 03:35:30 1999 +++ linux/arch/i386/config.in Sun Dec 12 03:35:57 1999 @@ -35,6 +35,7 @@ fi if [ "$CONFIG_M686" = "y" ]; then define_bool CONFIG_X86_GOOD_APIC y + define_bool CONFIG_X86_FASTCALL y fi if [ "$CONFIG_MK7" = "y" ]; then define_bool CONFIG_X86_TSC y --- linux/arch/i386/vmlinux.lds.orig Sun Dec 12 03:57:01 1999 +++ linux/arch/i386/vmlinux.lds Sun Dec 12 15:44:15 1999 @@ -6,8 +6,9 @@ ENTRY(_start) SECTIONS { + . = 0xC0000000 + 0x100000; - _text = .; /* Text and read-only data */ + _text = .; /* Text and read-only data */ .text : { *(.text) *(.fixup) @@ -34,11 +35,12 @@ } _edata = .; /* End of data section */ + . = . + 8192; /* vsyscall code and data */ . = ALIGN(8192); /* init_task */ .data.init_task : { *(.data.init_task) } - . = ALIGN(4096); /* Init code and data */ + . = ALIGN(4096); /* Init code and data */ __init_begin = .; .text.init : { *(.text.init) } .data.init : { *(.data.init) } --- linux/arch/i386/vsys.lds.orig Sun Dec 12 10:16:26 1999 +++ linux/arch/i386/vsys.lds Sun Dec 12 10:16:26 1999 @@ -0,0 +1,25 @@ +/* + * Virtual System Call area ld script + * Ingo Molnar + */ +OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") +OUTPUT_ARCH(i386) +ENTRY(_start_vsys) +SECTIONS +{ + + . = 0xFFFF0000; + __vsyscall_begin = .; /* Virtual System Call page(s) */ + .text.vsyscall : { *(.text.vsyscall) } + . = ALIGN(4096); /* Init code and data */ + __vsyscall_end = .; + + /* Stabs debugging sections. */ + .stab 0 : { *(.stab) } + .stabstr 0 : { *(.stabstr) } + .stab.excl 0 : { *(.stab.excl) } + .stab.exclstr 0 : { *(.stab.exclstr) } + .stab.index 0 : { *(.stab.index) } + .stab.indexstr 0 : { *(.stab.indexstr) } + .comment 0 : { *(.comment) } +} --- linux/arch/i386/Makefile.orig Sun Dec 12 08:20:53 1999 +++ linux/arch/i386/Makefile Sun Dec 12 11:18:53 1999 @@ -84,32 +84,44 @@ vmlinux: arch/i386/vmlinux.lds +vsys: vmlinux $(CONFIGURATION) arch/i386/kernel/vsys.o arch/i386/vsys.lds + $(LD) -T $(TOPDIR)/arch/i386/vsys.lds $(LDFLAGS) arch/i386/kernel/vsys.o -o vsys + objcopy --remove-section=.text vsys + objcopy --remove-section=.data vsys + objcopy --remove-section=.bss vsys + objcopy -O binary -S vsys + objcopy --remove-section=.text.vsyscall vmlinux + objcopy --add-section=.text.vsyscall=vsys vmlinux + objcopy --set-section-flags=.text.vsyscall=contents,alloc,load,data vmlinux + ld -m elf_i386 -T $(TOPDIR)/arch/i386/vmlinux.lds -e stext vmlinux -o vmlinux2 + cp vmlinux2 vmlinux + FORCE: ; .PHONY: zImage bzImage compressed zlilo bzlilo zdisk bzdisk install \ clean archclean archmrproper archdep -zImage: vmlinux +zImage: vmlinux vsys @$(MAKEBOOT) zImage -bzImage: vmlinux +bzImage: vmlinux vsys @$(MAKEBOOT) bzImage compressed: zImage -zlilo: vmlinux +zlilo: vmlinux vsys @$(MAKEBOOT) BOOTIMAGE=zImage zlilo -bzlilo: vmlinux +bzlilo: vmlinux vsys @$(MAKEBOOT) BOOTIMAGE=bzImage zlilo -zdisk: vmlinux +zdisk: vmlinux vsys @$(MAKEBOOT) BOOTIMAGE=zImage zdisk -bzdisk: vmlinux +bzdisk: vmlinux vsys @$(MAKEBOOT) BOOTIMAGE=bzImage zdisk -install: vmlinux +install: vmlinux vsys @$(MAKEBOOT) BOOTIMAGE=bzImage install archclean: --- linux/Documentation/syscall.txt.orig Sun Dec 12 03:50:15 1999 +++ linux/Documentation/syscall.txt Sun Dec 12 17:50:27 1999 @@ -0,0 +1,61 @@ + +'x86 Virtual System Call Interface', Ingo Molnar + +The point is to have an extensible system call interface. The kernel +provides a generic entry point, which is a readonly user-visible page +range. User-space does not know about the internals of this entry +point, it's a simple generic function call. + +Parameter is _not_ the system call ID, but rather a more abstract +(class ID, nr) pair. This interface can be extended at any time via +either implementing different classes differently, or adding new +classes. + +The entry point is 0xFFFF0000. Arguments are placed into registers. +The first 3 parameters are in regparm order: %eax, %edx, %ecx. +%ebx is the class identifier. %esi, %edi, %ebp are parameters 4-6. +(future classes might define more parameters) The kernel guarantees +to preserve all registers during system calls, except flags. + +NOTE: it's completely up to the kernel how it proceeds further! +User-space must not assume anything about the particular way the +kernel executes the virtual system call. The kernel might optimize +certain functions in user-space, it might execute a 'real' system +call via SYSENTER, or via the int $80 interface. + + %ebx - class descriptor: + %bl - class ID + %bh - function # + + + %eax - virtual argument 1 + %edx - virtual argument 2 + %ecx - virtual argument 3 + + %esi - virtual argument 4 + %edi - virtual argument 5 + %ebp - virtual argument 6 + + %esp + %eip + +Class IDs: + + ID 0 : 'constant syscalls': system calls which can be executed purely + in user-space. Such are sys_time() and sys_gettimeofday(). In + certain cases the kernel might chose to execute a real system + call though. + + ID 1 : 0-parameter syscalls + ID 2 : 1-parameter syscalls + ID 3 : 2-parameter syscalls + ID 4 : 3-parameter syscalls + + ID 5 : 4-6 parameter syscalls + +call virtsyscall_EIP[class] + +syscall_EIP # user-space trampoline + +system-call address is 0xffff0000 - this is cast into stone. +