--- ./arch/i386/kernel/Makefile%KMD Sun Dec 15 18:07:48 2002 +++ ./arch/i386/kernel/Makefile Mon Dec 16 10:55:38 2002 @@ -29,6 +29,7 @@ obj-$(CONFIG_PROFILING) += profile.o obj-$(CONFIG_EDD) += edd.o obj-$(CONFIG_MODULES) += module.o +obj-$(CONFIG_KMSGDUMP) += kmsgdump.o EXTRA_AFLAGS := -traditional --- ./arch/i386/kernel/process.c%KMD Mon Dec 16 10:54:33 2002 +++ ./arch/i386/kernel/process.c Mon Dec 16 11:41:48 2002 @@ -468,9 +468,9 @@ if (chkpnt<100) { unsigned long l, r; chkpnt=100; - cli(); + local_irq_disable(); chkpnt++; -#if __SMP__ +#ifdef CONFIG_SMP /* * Stop all CPUs and turn off local APICs and the IO-APIC, so * other OSs see a clean IRQ state. --- ./arch/i386/kernel/kmsgdump.S%KMD Mon Dec 16 10:55:01 2002 +++ ./arch/i386/kernel/kmsgdump.S Mon Dec 16 13:39:40 2002 @@ -235,7 +235,7 @@ jnz 5f call probeprinter jmp waitevent -5: cmpb $'i', %al /* 'T' stands for 'prinTer' */ +5: cmpb $'i', %al /* 'I' stands for 'Info' (help/about) */ jnz 6f movw $MKWORD(HELPLINE+2,0), %cx /* first line of the messages window */ pushw %cx /* save this, it will serve us later */ @@ -530,7 +530,7 @@ /* * do_dump(): * This function dumps the message buffer onto a disk. CF is set if an error - * occured. + * occurred. * */ do_dump: @@ -734,7 +734,7 @@ /* * dumpdata(): * writes bytes of data on drive DRIVENUM, from logical sector %si. - * CF is set if an error occured. + * CF is set if an error occurred. * */ dumpdata: @@ -893,5 +893,5 @@ .ascii "successfully (print, dump), you should hear 3 short beeps.\n" .ascii "When dumping data onto a floppy, be warned that FLOPPY CONTENTS WILL BE \377L\377O\377S\377T.\n" .asciz "Hit <\377U\377p> key to list kernel messages again." -/* just a public label at the end of the code so we know its lenght. */ +/* just a public label at the end of the code so we know its length. */ kmsgdump_end: --- ./arch/i386/Kconfig%KMD Mon Dec 16 10:54:33 2002 +++ ./arch/i386/Kconfig Mon Dec 16 11:59:33 2002 @@ -1612,7 +1612,7 @@ by writing to /proc/sys/kernel/kmsgdump if CONFIG_SYSCTL is enabled. Don't say Y unless you really want to hack your kernel and/or help - developpers to debug it. This isn't a toy, you have been warned ! + developers to debug it. This isn't a toy, you have been warned ! config KMSGDUMP_FAT bool "floppy as FAT by default" @@ -1630,7 +1630,7 @@ supported, the contents of the diskette would not be lost, which is false. It's better if people remember that the diskette is unusable after a dump. On the other hand, accessing the file after a crash is - easier when the diskette is formated as FAT. One other advantage of + easier when the diskette is formatted as FAT. One other advantage of FAT is that the boot sector of the diskette is filled with a boot redirector which makes the system boot from the first hard disk even if the bios tried to boot from the floppy. If unsure, say Y. --- ./include/asm-i386/kmsgdump.h%KMD Mon Dec 16 10:54:33 2002 +++ ./include/asm-i386/kmsgdump.h Mon Dec 16 12:14:48 2002 @@ -9,7 +9,7 @@ /* LOG_BUF_LEN : should match 's */ #ifndef LOG_BUF_LEN -#define LOG_BUF_LEN (16384) +#define LOG_BUF_LEN (32768) #endif #define CODEORIGIN 0x0700 --- ./Documentation/kmsgdump.txt%KMD Mon Dec 16 10:54:33 2002 +++ ./Documentation/kmsgdump.txt Mon Dec 16 12:14:15 2002 @@ -18,7 +18,7 @@ There are two ways of getting a dump : - by pressing SysRQ+D (RightAlt - PrintScrn - D together) ; - - after a kernel panic has occured, a dump may be automatically + - after a kernel panic has occurred, a dump may be automatically generated. Before anything else, you MUST KNOW that in order to get maximal @@ -26,7 +26,7 @@ real mode and disk accesses are made via the Bios. This ensures that even if kernel memory is really corrupted, the dump still has chances to work, but this also implies that after a dump has -occured, it is IMPOSSIBLE TO CONTINUE TO WORK WITH THE CURRENT +occurred, it is IMPOSSIBLE TO CONTINUE TO WORK WITH THE CURRENT KERNEL. You will have to REBOOT. So when your kernel still responds, you'd better get a similar dump by entering one of the following commands : @@ -52,7 +52,7 @@ Manual mode (or interactive mode) is always entered if you hit SysRQ+D. But it is also entered during a kernel panic if the current mode is set -to "manual". This mode is recommended for a developper's workstation, +to "manual". This mode is recommended for a developer's workstation, or a kernel running under an emulator such as vmware. It's recommended to disable interactive mode on servers which may crash when nobody is near to reboot them. @@ -62,9 +62,9 @@ even kmsgdump can cause recursive crashes (this has been reported to me once). For this reason I've added a checkpoint mechanism to the code : every little part of code is checkpointed, and if a crash occurs again, the same part is -not executed again, to prevent loopings. So there are more chances to get +not executed again, to prevent looping. So there are more chances to get to the reset routine which will, in the worst case, reboot the system, but -not let it loop undefinetely. +not let it loop indefinitely. 3.1. Manual mode ~~~~~~~~~~~~~~~~ @@ -103,7 +103,7 @@ T : select next available prinTer. The system tests if a printer is connected at the other end of the cable, and skips the empty ports. U : change drive Unit. Although dump is possible on hard disks, they are - never proposed in the interface to avoid dramatical mistakes. + never proposed in the interface to avoid dramatic mistakes. Other keys are simply ignored. @@ -116,7 +116,7 @@ Automated operation is performed by the system only when a kernel panic occurs. In this case, the system waits for the "panic_timeout" delay -to let you a few seconds if you want to try to play with SysRQ (sync, +to give you a few seconds if you want to try to play with SysRQ (sync, unmount filesystems, ...). This delay is configurable by entering a number of seconds in "/proc/sys/kernel/panic". @@ -137,7 +137,7 @@ 3.2.2. End of operation ~~~~~~~~~~~~~~~~~~~~~~~ After completion of an automatic dump, or when a dump is aborted, the system -can either halt or reboot. In case of redundant servers, you may prefer halt +can either halt or reboot. In case of redundant servers, you may prefer to halt a buggy system, because another one ensures the service continues to work. But in other cases, you may prefer rebooting to quickly restart services. This is also configurable (read section 4). @@ -148,7 +148,7 @@ 4.1. Kernel options ~~~~~~~~~~~~~~~~~~~ -First, choose the kernel compilation options which matches better your +First, choose the kernel compilation options which closely match your situation. This may seem obvious, but you can reduce the risks of crash by not enabling drivers designated for hardware you don't have. Specially on servers, use only a reduced feature set, because you know exactly what @@ -165,7 +165,7 @@ so 14 seconds are won. If you have changed your messages buffer size (which is 16 kB by default), -you should accord the size in "include/asm/kmsgdump.h", parameter LOG_BUG_LEN. +you should modify the size in "include/asm/kmsgdump.h", parameter LOG_BUF_LEN. Some people required 32 kB. But you shouldn't exceed 60 kB since the dump is done in real mode (16 bits). @@ -173,7 +173,7 @@ ~~~~~~~~~~~~~~~~~~~~~~~ If your kernel supports SYSCTL, you can adjust KMSGDUMP parameters by -writing a string to /proc/sys/kernel/kmsgdump. This string consists in +writing a string to /proc/sys/kernel/kmsgdump. This string consists of a concatenation of flags. Most of them are only booleans. For each boolean, a complementary flag exists to avoid any ambiguous interpretation. For the moment, the flags are : @@ -230,7 +230,7 @@ FAT mode dump. On the other hand, when a RAW dump is done at the beginning of the disk, it -cannot be used again as a "safe kmsgdump disk". Moreover, letting it in the +cannot be used again as a "safe kmsgdump disk". Moreover, leaving it in the drive when rebooting will cause the system to hang if the bios tries to boot from the floppy first. @@ -241,7 +241,7 @@ your bios to boot from hard disk or anything but the floppy first because the bios will find anything but a bootable system on this floppy. The problem is with older systems on which the boot sequence cannot be changed. For this -reason, when a diskette is formated in FAT mode, a small code is inserted on +reason, when a diskette is formatted in FAT mode, a small code is inserted on the boot sector which tries to redirect the boot to the first hard disk seen by the bios. This is *generally* the bootable disk, but this may not be the right on specific systems, so you may have to do some tests before considering @@ -249,16 +249,16 @@ If your system is a server, you may reduce the time the bios tests the PC to ensure quick reboot. On some systems, you can turn on the option "Quick -power-on self test", and disable testings of memory above 1MB. +power-on self test", and disable testing of memory above 1MB. 5. Reading the messages back ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -5.1. FAT-formated disks +5.1. FAT-formatted disks ~~~~~~~~~~~~~~~~~~~~~~~ -If the disk has been formated as FAT12, you'll find on it a file -named "MESSAGES.TXT" which contains all messages buffer. If the +If the disk has been formatted as FAT12, you'll find on it a file +named "MESSAGES.TXT" which contains all of the 'messages' buffer. If the buffer is not full, the end of the file is filled with zeroes, so it's better to delete them using "tr" under linux. @@ -284,7 +284,7 @@ Raw disks will be readable under linux by using the utility DD. By default, the dump will be performed from the first sector of the disk. -Example with 16 kB messages : +Example with 16 kB messages buffer: # dd if=/dev/fd0 bs=512 count=32 | tr -d '\000' @@ -308,16 +308,16 @@ 6. Other speed improvements ~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Here are some advices to make a system reboot faster, especially if +Here is some advice to make a system reboot faster, especially if you don't use filesystem journalling. -When a file server crashes, it may FSCK during a long time. There are good docs +When a file server crashes, it may FSCK for a long time. There are good docs about how to dramatically reduce FSCK time, but at least consider these methods : - in /etc/fstab, set the sixth field (fs_passno) to 1 for the root fs, and 2 for every other fs. FSCK will know it what it can parallelize - depending on hardware dependencies. In the better case, you can devide + depending on hardware dependencies. In the better case, you can divide the total time by the number of physical disks. (man fstab and man fsck for more info). @@ -331,12 +331,15 @@ # mount -ro remount /mount/point - - change the number of bytes by inode and the block size when formating + - change the number of bytes by inode and the block size when formatting your FS. I personnaly use 16384 bytes/inode, a block size of 4096 bytes, the sparse flag set (reduces the number of superblocks). This makes me waste about 1% space, but total mount time is about 1 second for a total of 8 FS's, 11 gigs on 5 separate disks and the total FSCK time after a loosy power-off is less than 3 minutes. + + - use a Linux journaling file system, such as ext3fs, IBM JFS, or + reiserfs in Linux 2.4, or any any of those or SGI XFS in Linux 2.5 And of course, don't start services you don't need ! Sendmail itself can take a long time if it cannot resolve the domain name. --- ./kernel/panic.c%KMD Mon Dec 16 10:54:33 2002 +++ ./kernel/panic.c Mon Dec 16 11:39:31 2002 @@ -81,7 +81,7 @@ */ printk(KERN_EMERG "Dumping messages in %d seconds : last chance for Alt-SysRq...", panic_timeout); - sti(); + local_irq_enable(); for(panic_timeout*=10; panic_timeout>0; panic_timeout--) { CHECK_EMERGENCY_SYNC; mdelay(100);