diff -Nur linux-2.4.19-lb/arch/alpha/kernel/machine_kexec.c linux-2.4.19-lb.kexec/arch/alpha/kernel/machine_kexec.c --- linux-2.4.19-lb/arch/alpha/kernel/machine_kexec.c 1970-01-01 08:00:00.000000000 +0800 +++ linux-2.4.19-lb.kexec/arch/alpha/kernel/machine_kexec.c 2002-09-06 16:38:45.000000000 +0800 @@ -0,0 +1,241 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "proto.h" + +#if ((PAGE_SHIFT != 13) || (PTRS_PER_PTE != PTRS_PER_PMD) || (PTRS_PER_PMD != PTRS_PER_PGD)) +#error unexpected page table constants +#endif + +/* + * Hard coded page tables mapping syncing virtual & physical addresses. + * ===================================================================== + */ +static void build_pte_page(pte_t *ptep, unsigned long addr) +{ + unsigned long end_addr; + addr &= PAGE_MASK; + end_addr = addr + (PTRS_PER_PTE << PAGE_SHIFT); + while(addr < end_addr) { + *ptep = mk_pte_phys(addr, PAGE_KERNEL); + addr += PAGE_SIZE; + ptep++; + } +} + +static unsigned long build_pmd_page( + unsigned long address, unsigned long end_address, + pmd_t *pmdp, unsigned long addr) +{ + unsigned long end_addr; + addr &= PAGE_MASK; + end_addr = addr + (PTRS_PER_PMD << PMD_SHIFT); + while((address < end_address) && (addr < end_addr)) { + pte_t *ptep; + ptep = (void *)address; + address += PAGE_SIZE; + build_pte_page(ptep, addr); + pmd_set(pmdp, ptep); + addr += (PTRS_PER_PTE << PAGE_SHIFT); + pmdp++; + } + while(addr < end_addr) { + pmd_clear(pmdp); + addr += (PTRS_PER_PTE << PAGE_SHIFT); + pmdp++; + } + return address; +} + + +static pgd_t *build_pgd_page(unsigned long address, unsigned long end_address) +{ + unsigned long addr; + unsigned long end_addr; + pgd_t *pgdp, *table; + + if (address < end_address) { + pgdp = (void *)address; + address += PAGE_SIZE; + } + else { + printk(KERN_EMERG "build_pgd_page no pages!\n"); + return 0; + } + + table = pgdp; + addr = 0; + end_addr = addr + (PTRS_PER_PGD << PGDIR_SHIFT); + while((address < end_address) && (addr < end_addr)) { + pmd_t *pmdp; + pmdp = (void *)address; + address += PAGE_SIZE; + address = build_pmd_page(address, end_address, pmdp, addr); + pgd_set(pgdp, pmdp); + addr += (PTRS_PER_PMD << PMD_SHIFT); + pgdp++; + } + /* clear the unused entries */ + while(addr < end_addr) { + pgd_clear(pgdp); + addr += (PTRS_PER_PMD << PMD_SHIFT); + pgdp++; + } + pgd_set(pgdp - 1, (pmd_t *)table); + return table; +} +/* + * machine_kexec + * ======================= + */ + + +#define __kexec __attribute__((section(".text.kexec"))) + +typedef void (*relocate_new_kernel_t)( + unsigned long indirection_page, unsigned long reboot_code_buffer, + unsigned long header, unsigned long start_address); + +extern void relocate_new_kernel( + unsigned long indirection_page, unsigned long reboot_code_buffer, + unsigned long header, unsigned long start_address); + +extern unsigned char __start__text_kexec; +extern unsigned char __stop__text_kexec; + +__kexec struct pcb_struct kexec_pcb; + +#define reboot_addr(base, symbol) ((void *)((unsigned char *)(base) + (((unsigned char *)&symbol) - &__start__text_kexec))) + +static inline unsigned long +set_PCB(struct pcb_struct *pcb) +{ + register unsigned long sp __asm__("$30"); + pcb->ksp = sp; + return __reload_thread((struct thread_struct *)pcb); +} + +void machine_kexec(struct kimage *image) +{ + unsigned long *indirection_page; + void *reboot_code_buffer; + relocate_new_kernel_t rnk; + pgd_t *pgdp; + struct pcb_struct *pcb; + unsigned long start_pgtable, end_pgtable; + + /* FIXME SMP */ + + printk(KERN_EMERG "machine_kexec\n"); + /* Build the page tables doing a + * 1-1 mapping of physical to virtual memory + */ + start_pgtable = image->reboot_code_buffer; + end_pgtable = start_pgtable + (KEXEC_PGTABLE_PAGES << PAGE_SHIFT); + pgdp = build_pgd_page(start_pgtable, end_pgtable); + + printk(KERN_EMERG "machine_kexec 1\n"); + /* Build the new PCB */ + kexec_pcb.ksp = 0; + kexec_pcb.usp = 0; + kexec_pcb.ptbr = (unsigned long)virt_to_phys(pgdp) >> PAGE_SHIFT; + kexec_pcb.asn = 0; + kexec_pcb.pcc = 0; + kexec_pcb.unique = 0; + kexec_pcb.flags = 1; + kexec_pcb.res1 = 0; + kexec_pcb.res2 = 0; + + printk(KERN_EMERG "machine_kexec 2\n"); + /* Interrupts aren't acceptable while we reboot */ + cli(); + + printk(KERN_EMERG "machine_kexec 3\n"); + /* Do the final architecture shutdown */ + if (alpha_mv.kill_arch) + alpha_mv.kill_arch(LINUX_REBOOT_CMD_KEXEC); + + printk(KERN_EMERG "machine_kexec 4\n"); + /* compute the virtual address of various things */ + reboot_code_buffer = end_pgtable; + indirection_page = __va(image->head & PAGE_MASK); + rnk = reboot_addr(reboot_code_buffer, relocate_new_kernel); + pcb = reboot_addr(reboot_code_buffer, kexec_pcb); + + printk(KERN_EMERG "machine_kexec 5\n"); + /* Copy the reboot code and data to a safe location */ + memcpy(reboot_code_buffer, &__start__text_kexec, + &__stop__text_kexec - &__start__text_kexec); + + printk(KERN_EMERG "machine_kexec 6\n"); + /* Switch to the new virtual address space */ + tbia(); + wrvptptr(0xfffffffe00000000); + hwrpb->vptb = 0xfffffffe00000000; + printk(KERN_EMERG "machine_kexec 7\n"); + hwrpb_update_checksum(hwrpb); + printk(KERN_EMERG "machine_kexec 8\n"); + set_PCB(pcb); + tbia(); + printk(KERN_EMERG "machine_kexec 9\n"); + + /* now do everything in physical addresses */ + rnk = (void *)virt_to_phys(rnk); + + printk(KERN_EMERG "machine_kexec 10\n"); + /* now relocate the new kernel */ + (*rnk)(__pa(indirection_page), __pa(reboot_code_buffer), + image->header, image->start); +} + +/* On alpha the only fixed addresses are virtual, so we + * need some way to find the hwrpb, when dealing with just + * physical addresses. + * + * The whole virtual address interface is crazy, for loading + * a standalone operating system. For a lot of extra complexity + * you gain the ability to work around bad ram, which you cannot + * detect reliably. + * + * This is especially crazy when you consider moder chipsets let + * you enable/disable individual sticks of RAM. So detected bad + * ram can be made to look as if it didn't exist. + * + * A slightly better case is made virtually mapped tables on a NUMA + * box when you want the tables in localram. In that case having + * firmware tables live at fixed offset into localram, should be just + * as easy to compute. + * + * The case against virtually mapped firmware tables is that + * physically mapped tables are both easier to construct and deal + * with. As well as they can trivally be found after an arbitrary + * chain of bootloaders. + * + * So please don't copy the alpha poorly designed firmware. + */ +unsigned long get_elf_boot_notes(void **notes_p, unsigned long *note_count_p) +{ + static struct arch_notes { + Elf_Nhdr hwrpb; + unsigned char hwrpb_name[8]; + unsigned long hwrpb_ptr; + } notes = { + .hwrpb = { + .n_namesz = 6, + .n_descsz = sizeof(notes.hwrpb_ptr), + .n_type = LBN_HWRPB, + }, + .hwrpb_name = "Linux", + .hwrpb_ptr = 0, + }; + notes.hwrpb_ptr = __pa(hwrpb); + *note_count_p = 1; + *notes_p = ¬es; + return sizeof(notes); +} diff -Nur linux-2.4.19-lb/arch/alpha/kernel/relocate_kernel.S linux-2.4.19-lb.kexec/arch/alpha/kernel/relocate_kernel.S --- linux-2.4.19-lb/arch/alpha/kernel/relocate_kernel.S 1970-01-01 08:00:00.000000000 +0800 +++ linux-2.4.19-lb.kexec/arch/alpha/kernel/relocate_kernel.S 2002-09-06 16:38:45.000000000 +0800 @@ -0,0 +1,133 @@ +.set noat +.set noreorder + +/* Return value */ +#define v0 $0 +/* temporaries */ +#define t0 $1 +#define t1 $2 +#define t2 $3 +#define t3 $4 +#define t4 $5 +#define t5 $6 +#define t6 $7 +#define t7 $8 +#define t8 $22 +#define t9 $23 +#define t10 $24 +#define t11 $25 +#define t12 $27 +/* Saved registers */ +#define s0 $9 +#define s1 $10 +#define s2 $11 +#define s3 $12 +#define s4 $13 +#define s5 $14 +#define s6 $15 +/* Frame pointer */ +#define fp $15 +/* Argument registers */ +#define a0 $16 +#define a1 $17 +#define a2 $18 +#define a3 $19 +#define a4 $20 +#define a5 $21 +/* return address */ +#define ra $26 +/* Procedure value */ +#define pv $27 +/* Assember temporary */ +#define at $28 +/* Global pointer */ +#define gp $29 +/* Stack Pointer */ +#define sp $30 +/* zero */ +#define zero $31 + +.section .text.kexec,"ax",@progbits + .align 5 + + /* Must be relocatable PIC code callable as a C function, that once + * it starts can not use the previous processes stack. + */ + + /* Arguments are passed starting in $16 */ + /* Return values are passed in $0 */ + /* Temporaries start at $1 */ + /* The stack pointer is in $30 */ + /* $31 is always zero */ + /* The return address is in $26 */ + + .globl relocate_new_kernel +relocate_new_kernel: + /* All of my arguments are passed registers, good */ + /* a0 indirection_page */ + /* a1 reboot_code_buffer */ + /* a2 header */ + /* a3 start_address */ + + /* Copy the arguments to a safe place */ + bis a0, a0, s0 + bis a1, a1, s1 + bis a2, a2, s2 + bis a3, a3, s3 + + /* set a new stack, just in cases.. */ + lda sp, 8192(s1) + + /* Do the copies */ + + /* Load the initial register values */ + /* t4 is the destination pointer */ + /* s0 is the indirection_pointer */ + /* t5 is the page mask value */ + /* t2 is the source pointer */ + /* t1 is the indirection_entry */ + /* t3 & t0 are general purpose temporarys */ + bis zero, zero, t4 + lda t5, -8192(zero) + br zero, $top + .align 4 + +$srcp: + /* Is it the source indicator? */ + and t1, 8, t0 + beq t0, $top + and t1, t5, t2 + lda t3, 8192(t2) + + .align 4 +$cpy: + ldq t1, 0(t2) + addq t2, 8, t2 + stq t1, 0(t4) + addq t4, 8, t4 + cmpult t2, t3, t0 + bne t0, $cpy + +$top: + /* top, read another word for the indirection page into t1 */ + ldq t1, 0(s0) + addq s0, 0x8, s0 + + /* Is it a destination page? */ + blbc t1, $indirectp + and t1, t5, t4 + br zero, $top +$indirectp: + /* Is it a an indirection page? */ + and t1, 2, t0 + beq t0, $donep + and t1, t5, s0 + br zero, $top +$donep: + /* Is it the done indicator? */ + and t1, 4, t0 + beq t0, $srcp + bis s3, s3, pv + bis s2, s2, a0 + jmp ra, (pv) + diff -Nur linux-2.4.19-lb/arch/i386/config.in linux-2.4.19-lb.kexec/arch/i386/config.in --- linux-2.4.19-lb/arch/i386/config.in 2002-09-04 16:51:20.000000000 +0800 +++ linux-2.4.19-lb.kexec/arch/i386/config.in 2002-09-06 16:38:47.000000000 +0800 @@ -269,6 +269,7 @@ tristate 'Kernel support for a.out binaries' CONFIG_BINFMT_AOUT tristate 'Kernel support for ELF binaries' CONFIG_BINFMT_ELF tristate 'Kernel support for MISC binaries' CONFIG_BINFMT_MISC +bool 'Kernel execing kernel support' CONFIG_KEXEC bool 'Power Management support' CONFIG_PM diff -Nur linux-2.4.19-lb/arch/i386/kernel/Makefile linux-2.4.19-lb.kexec/arch/i386/kernel/Makefile --- linux-2.4.19-lb/arch/i386/kernel/Makefile 2002-09-04 16:51:20.000000000 +0800 +++ linux-2.4.19-lb.kexec/arch/i386/kernel/Makefile 2002-09-06 16:39:47.000000000 +0800 @@ -39,6 +39,7 @@ obj-$(CONFIG_SMP) += smp.o smpboot.o trampoline.o obj-$(CONFIG_X86_LOCAL_APIC) += mpparse.o apic.o nmi.o obj-$(CONFIG_X86_IO_APIC) += io_apic.o acpitable.o +obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o obj-$(CONFIG_X86_VISWS_APIC) += visws_apic.o include $(TOPDIR)/Rules.make diff -Nur linux-2.4.19-lb/arch/i386/kernel/machine_kexec.c linux-2.4.19-lb.kexec/arch/i386/kernel/machine_kexec.c --- linux-2.4.19-lb/arch/i386/kernel/machine_kexec.c 1970-01-01 08:00:00.000000000 +0800 +++ linux-2.4.19-lb.kexec/arch/i386/kernel/machine_kexec.c 2002-09-06 16:38:47.000000000 +0800 @@ -0,0 +1,137 @@ +#include +#include +#include +#include +#include +#include + + +/* + * machine_kexec + * ======================= + */ + + +static void set_idt(void *newidt, __u16 limit) +{ + unsigned char curidt[6]; + + /* ia32 supports unaliged loads & stores */ + (*(__u16 *)(curidt)) = limit; + (*(__u32 *)(curidt +2)) = (unsigned long)(newidt); + + __asm__ __volatile__ ( + "lidt %0\n" + : "=m" (curidt) + ); +}; + + +static void set_gdt(void *newgdt, __u16 limit) +{ + unsigned char curgdt[6]; + + /* ia32 supports unaliged loads & stores */ + (*(__u16 *)(curgdt)) = limit; + (*(__u32 *)(curgdt +2)) = (unsigned long)(newgdt); + + __asm__ __volatile__ ( + "lgdt %0\n" + : "=m" (curgdt) + ); +}; + +static void load_segments(void) +{ +#define __STR(X) #X +#define STR(X) __STR(X) + + __asm__ __volatile__ ( + "\tljmp $"STR(__KERNEL_CS)",$1f\n" + "\t1:\n" + "\tmovl $"STR(__KERNEL_DS)",%eax\n" + "\tmovl %eax,%ds\n" + "\tmovl %eax,%es\n" + "\tmovl %eax,%fs\n" + "\tmovl %eax,%gs\n" + "\tmovl %eax,%ss\n" + ); +#undef STR +#undef __STR +} + +static void identity_map_page(unsigned long address) +{ + /* This code is x86 specific... + * general purpose code must be more carful + * of caches and tlbs... + */ + pgd_t *pgd; + pmd_t *pmd; + struct mm_struct *mm = current->mm; + + pgd = pgd_offset(mm, address); + pmd = pmd_alloc(mm, pgd, address); + + if (pmd) { + pte_t *pte = pte_alloc(mm, pmd, address); + if (pte) { + set_pte(pte, + mk_pte(virt_to_page(phys_to_virt(address)), + PAGE_SHARED)); + __flush_tlb_one(address); + } + } +} + + +typedef void (*relocate_new_kernel_t)( + unsigned long indirection_page, unsigned long reboot_code_buffer, + unsigned long header, unsigned long start_address); + +const extern unsigned char relocate_new_kernel[]; +extern void relocate_new_kernel_end(void); +const extern unsigned int relocate_new_kernel_size; + +void machine_kexec(struct kimage *image) +{ + unsigned long *indirection_page; + void *reboot_code_buffer; + relocate_new_kernel_t rnk; + + /* Interrupts aren't acceptable while we reboot */ + cli(); + reboot_code_buffer = image->reboot_code_buffer; + indirection_page = phys_to_virt(image->head & PAGE_MASK); + + identity_map_page(virt_to_phys(reboot_code_buffer)); + + /* copy it out */ + memcpy(reboot_code_buffer, relocate_new_kernel, + relocate_new_kernel_size); + + /* The segment registers are funning things, they are + * automatically loaded from a table, in memory wherever you + * set them to a specific selector, but this table is never + * accessed again you set the segment to a different selector. + * + * The more common model is are caches where the behide + * the scenes work is done, but is also dropped at arbitrary + * times. + * + * I take advantage of this here by force loading the + * segments, before I zap the gdt with an invalid value. + */ + load_segments(); + /* The gdt & idt are now invalid. + * If you want to load them you must set up your own idt & gdt. + */ + set_gdt(phys_to_virt(0),0); + set_idt(phys_to_virt(0),0); + + /* now call it */ + rnk = (relocate_new_kernel_t) virt_to_phys(reboot_code_buffer); + (*rnk)(virt_to_phys(indirection_page), virt_to_phys(reboot_code_buffer), + image->header, image->start); +} + diff -Nur linux-2.4.19-lb/arch/i386/kernel/relocate_kernel.S linux-2.4.19-lb.kexec/arch/i386/kernel/relocate_kernel.S --- linux-2.4.19-lb/arch/i386/kernel/relocate_kernel.S 1970-01-01 08:00:00.000000000 +0800 +++ linux-2.4.19-lb.kexec/arch/i386/kernel/relocate_kernel.S 2002-09-06 16:38:47.000000000 +0800 @@ -0,0 +1,85 @@ +#include +#include + + /* Must be relocatable PIC code callable as a C function, that once + * it starts can not use the previous processes stack. + * + */ +ENTRY(relocate_new_kernel) + /* read the arguments and say goodbye to the stack */ + movl 4(%esp), %ebx /* indirection_page */ + movl 8(%esp), %ebp /* reboot_code_buffer */ + movl 12(%esp), %eax /* header */ + movl 16(%esp), %edx /* start address */ + + /* set a new stack at the bottom of our page... */ + lea 4096(%ebp), %esp + + /* store the parameters back on the stack */ + pushl %edx /* store header address */ + pushl %eax /* store the start address */ + + /* Store register value + pushl + + /* zero out flags */ + pushl $0 + popfl + cli + + /* Turn off paging, leave protection turned on */ + movl %cr0, %eax /* Turn off paging (bit 31 in CR0) */ + andl $0x7FFFFFFF, %eax + movl %eax, %cr0 + jmp 1f +1: + + /* Flush the TLB (needed?) */ + xorl %eax, %eax + movl %eax, %cr3 + + /* Do the copies */ + cld +0: /* top, read another word for the indirection page */ + movl %ebx, %ecx + movl (%ebx), %ecx + addl $4, %ebx + testl $0x1, %ecx /* is it a destination page */ + jz 1f + movl %ecx, %edi + andl $0xfffff000, %edi + jmp 0b +1: + testl $0x2, %ecx /* is it an indirection page */ + jz 1f + movl %ecx, %ebx + andl $0xfffff000, %ebx + jmp 0b +1: + testl $0x4, %ecx /* is it the done indicator */ + jz 1f + jmp 2f +1: + testl $0x8, %ecx /* is it the source indicator */ + jz 0b /* Ignore it otherwise */ + movl %ecx, %esi /* For every source page do a copy */ + andl $0xfffff000, %esi + + movl $1024, %ecx + rep ; movsl + jmp 0b + +2: + /* leave %esp alone */ + xorl %ecx, %ecx + xorl %edx, %edx + xorl %esi, %esi + xorl %edi, %edi + xorl %ebp, %ebp + movl $0x0E1FB007, %eax /* magic number ... */ + popl %ebx + ret +relocate_new_kernel_end: + +ENTRY(relocate_new_kernel_size) + .long relocate_new_kernel_end - SYMBOL_NAME(relocate_new_kernel) diff -Nur linux-2.4.19-lb/fs/Makefile linux-2.4.19-lb.kexec/fs/Makefile --- linux-2.4.19-lb/fs/Makefile 2002-09-04 16:51:31.000000000 +0800 +++ linux-2.4.19-lb.kexec/fs/Makefile 2002-09-06 16:38:47.000000000 +0800 @@ -70,6 +70,7 @@ subdir-$(CONFIG_XFS_FS) += xfs subdir-$(CONFIG_JFS_FS) += jfs +obj-$(CONFIG_KEXEC) += kexec.o obj-$(CONFIG_BINFMT_AOUT) += binfmt_aout.o obj-$(CONFIG_BINFMT_EM86) += binfmt_em86.o obj-$(CONFIG_BINFMT_MISC) += binfmt_misc.o diff -Nur linux-2.4.19-lb/fs/binfmt_elf.c linux-2.4.19-lb.kexec/fs/binfmt_elf.c --- linux-2.4.19-lb/fs/binfmt_elf.c 2002-09-04 16:51:31.000000000 +0800 +++ linux-2.4.19-lb.kexec/fs/binfmt_elf.c 2002-09-06 16:38:47.000000000 +0800 @@ -41,6 +41,7 @@ #define DLINFO_ITEMS 13 #include +#include static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs); static int load_elf_library(struct file*); @@ -876,6 +877,81 @@ return error; } +#ifdef CONFIG_KEXEC +int load_elf_kernel(struct kimage *image, struct file *file) +{ + struct elf_phdr * elf_ppnt, *elf_phdata; + int retval, size, i; + struct elfhdr elf_ex; + unsigned long elf_entry; + + /* Initialize some variables */ + elf_phdata = 0; + + /* Get the exec-header */ + elf_ex = *((struct elfhdr *) image->buf); + + retval = -ENOEXEC; + /* First of all, some simple consistency checks */ + if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0) + goto out; + + if (elf_ex.e_type != ET_EXEC) + goto out; + if (!elf_check_arch(&elf_ex)) + goto out; + + /* Now read in all of the header information */ + + retval = -ENOMEM; + size = elf_ex.e_phentsize * elf_ex.e_phnum; + if (size > 65536) + goto out; + + elf_phdata = (struct elf_phdr *) kmalloc(size, GFP_KERNEL); + if (!elf_phdata) + goto out; + + retval = kernel_read(file, elf_ex.e_phoff, (char *) elf_phdata, size); + if (retval < 0) + goto out; + + elf_ppnt = elf_phdata; + elf_entry = (unsigned long) elf_ex.e_entry; + + for(i = 0, elf_ppnt = elf_phdata; i < elf_ex.e_phnum; i++, elf_ppnt++) { + unsigned long start, end, mem_start, mem_end; + if (elf_ppnt->p_type != PT_LOAD) { + continue; + } + start = elf_ppnt->p_offset + - ELF_PAGEOFFSET(elf_ppnt->p_paddr); + end = start + elf_ppnt->p_filesz + + ELF_PAGEOFFSET(elf_ppnt->p_paddr); + mem_start = elf_ppnt->p_paddr + - ELF_PAGEOFFSET(elf_ppnt->p_paddr); + mem_end = mem_start + elf_ppnt->p_memsz + + ELF_PAGEOFFSET(elf_ppnt->p_paddr); + + retval = kimage_load_segment(image, file, start, end, + mem_start, mem_end); + if (retval < 0) { + goto out; + } + } + /* Some interesting variables... */ + + kfree(elf_phdata); + image->start = elf_entry; + + retval = 0; + out: + if (elf_phdata) + kfree(elf_phdata); + return retval; +} +#endif /* CONFIG_KEXEC */ + /* * Note that some platforms still use traditional core dumps and not * the ELF core dump. Each platform can select it as appropriate. diff -Nur linux-2.4.19-lb/fs/kexec.c linux-2.4.19-lb.kexec/fs/kexec.c --- linux-2.4.19-lb/fs/kexec.c 1970-01-01 08:00:00.000000000 +0800 +++ linux-2.4.19-lb.kexec/fs/kexec.c 2002-09-06 16:38:47.000000000 +0800 @@ -0,0 +1,749 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define BOOTLOADER "Linux" +#define BOOTLOADER_VERSION UTS_RELEASE " (" LINUX_COMPILE_BY "@" LINUX_COMPILE_HOST ") " UTS_VERSION + +#define RND_NOTE(X) (((X) + 3) & ~3) +#define UPSZ(X) (RND_NOTE(sizeof(X))) +static struct { + Elf_Bhdr hdr; + Elf_Nhdr bl_hdr; + unsigned char bl_desc[UPSZ(BOOTLOADER)]; + Elf_Nhdr blv_hdr; + unsigned char blv_desc[UPSZ(BOOTLOADER_VERSION)]; + Elf_Nhdr cmd_hdr; +} elf_boot_notes = { + .hdr = { + .b_signature = 0x0E1FB007, + .b_size = sizeof(elf_boot_notes), + .b_checksum = 0, + .b_records = 3, + }, + .bl_hdr = { + .n_namesz = 0, + .n_descsz = sizeof(BOOTLOADER), + .n_type = EBN_BOOTLOADER_NAME, + }, + .bl_desc = BOOTLOADER, + .blv_hdr = { + .n_namesz = 0, + .n_descsz = sizeof(BOOTLOADER_VERSION), + .n_type = EBN_BOOTLOADER_VERSION, + }, + .blv_desc = BOOTLOADER_VERSION, + .cmd_hdr = { + .n_namesz = 0, + .n_descsz = 0, + .n_type = EBN_COMMAND_LINE, + }, +}; + +#define KEXEC_ENV_ALIGN 8 /* Must be a power of two! */ +#define KEXEC_HEADER_SIZE (sizeof(struct uniform_boot_header)) + +#define KEXEC_ENV_ALIGNED(value) (((value) + KEXEC_ENV_ALIGN - 1) & ~(KEXEC_ENV_ALIGN -1 )) + + +/* As designed kexec can only use the memory that you don't + * need to use kmap to access. Memory that you can use virt_to_phys() + * on an call get_free_page to allocate. + * + * In the best case you need one page for the transition from + * virtual to physical memory. And this page must be identity + * mapped. Which pretty much leaves you with pages < PAGE_OFFSET + * as you can only mess with user pages. + * + * As the only subset of memory that it is easy to restrict allocation + * to is the physical memory mapped into the kernel, I do that + * with get_free_page and hope it is enough. + * + * I don't know of a good way to do this calcuate which pages get_free_page + * will return independent of architecture so I depend on + * to properly set + * KEXEC_SOURCE_MEMORY_LIMIT and KEXEC_DESTINATION_MEMORY_LIMIT + * + */ + +void kimage_init(struct kimage *image) +{ + memset(image, 0, sizeof(*image)); + image->head = 0; + image->entry = &image->head; + image->last_entry = &image->head; +} +static int kimage_add_entry(struct kimage *image, kimage_entry_t entry) +{ + if (image->offset != 0) { + image->entry++; + } + if (image->entry == image->last_entry) { + kimage_entry_t *ind_page; + ind_page = (void *)get_free_page(GFP_KERNEL); + if (!ind_page) { + return -ENOMEM; + } + *image->entry = virt_to_phys(ind_page) | IND_INDIRECTION; + image->entry = ind_page; + image->last_entry = + ind_page + ((PAGE_SIZE/sizeof(kimage_entry_t)) - 1); + } + *image->entry = entry; + image->entry++; + image->offset = 0; + return 0; +} + +static int kimage_verify_destination(unsigned long destination) +{ + int result; + + /* Assume the page is bad unless we pass the checks */ + result = -EADDRNOTAVAIL; + + if (destination >= KEXEC_DESTINATION_MEMORY_LIMIT) { + goto out; + } + + /* FIXME: + * add checking to ensure the new image doesn't go into + * invalid or reserved areas of RAM. + */ + result = 0; +out: + return result; +} + +static int kimage_set_destination( + struct kimage *image, unsigned long destination) +{ + int result; + destination &= PAGE_MASK; + result = kimage_verify_destination(destination); + if (result) { + return result; + } + result = kimage_add_entry(image, destination | IND_DESTINATION); + if (result == 0) { + image->destination = destination; + } + return result; +} + + +static int kimage_add_page(struct kimage *image, unsigned long page) +{ + int result; + page &= PAGE_MASK; + result = kimage_verify_destination(image->destination); + if (result) { + return result; + } + result = kimage_add_entry(image, page | IND_SOURCE); + if (result == 0) { + image->destination += PAGE_SIZE; + } + return result; +} + + +static int kimage_terminate(struct kimage *image) +{ + int result; + result = kimage_add_entry(image, IND_DONE); + if (result == 0) { + /* Point at the terminating element */ + image->entry--; + } + return result; +} + +#define for_each_kimage_entry(image, ptr, entry) \ + for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE); \ + ptr = (entry & IND_INDIRECTION)? \ + phys_to_virt((entry & PAGE_MASK)): ptr +1) + +void kimage_free(struct kimage *image) +{ + kimage_entry_t *ptr, entry; + kimage_entry_t ind = 0; + for_each_kimage_entry(image, ptr, entry) { + if (entry & IND_INDIRECTION) { + /* Free the previous indirection page */ + if (ind & IND_INDIRECTION) { + free_page((unsigned long)phys_to_virt(ind & PAGE_MASK)); + } + /* Save this indirection page until we are + * done with it. + */ + ind = entry; + } + else if (entry & IND_SOURCE) { + free_page((unsigned long)phys_to_virt(entry & PAGE_MASK)); + } + } +} + +static int kimage_is_destination_page( + struct kimage *image, unsigned long page) +{ + kimage_entry_t *ptr, entry; + unsigned long destination; + destination = 0; + page &= PAGE_MASK; + for_each_kimage_entry(image, ptr, entry) { + if (entry & IND_DESTINATION) { + destination = entry & PAGE_MASK; + } + else if (entry & IND_SOURCE) { + if (page == destination) { + return 1; + } + destination += PAGE_SIZE; + } + } + return 0; +} + +static int kimage_get_unused_area( + struct kimage *image, unsigned long size, unsigned long align, + unsigned long *area) +{ + /* Walk through mem_map and find the first chunk of + * ununsed memory that is at least size bytes long. + */ + /* Since the kernel plays with Page_Reseved mem_map is less + * than ideal for this purpose, but it will give us a correct + * conservative estimate of what we need to do. + */ + /* For now we take advantage of the fact that all kernel pages + * are marked with PG_resereved to allocate a large + * contiguous area for the reboot code buffer. + */ + unsigned long addr; + unsigned long start, end; + unsigned long mask; + mask = ((1 << align) -1); + start = end = PAGE_SIZE; + for(addr = PAGE_SIZE; addr < KEXEC_SOURCE_MEMORY_LIMIT; addr += PAGE_SIZE) { + struct page *page; + unsigned long aligned_start; + page = virt_to_page(phys_to_virt(addr)); + if (PageReserved(page) || PageSkip(page) || + kimage_is_destination_page(image, addr)) { + /* The current page is reserved so the start & + * end of the next area must be atleast at the + * next page. + */ + start = end = addr + PAGE_SIZE; + } + else { + /* O.k. The current page isn't reserved + * so push up the end of the area. + */ + end = addr; + } + aligned_start = (start + mask) & ~mask; + if (aligned_start > start) { + continue; + } + if (aligned_start > end) { + continue; + } + if (end - aligned_start >= size) { + *area = aligned_start; + return 0; + } + } + *area = 0; + return -ENOSPC; +} + +static kimage_entry_t *kimage_dst_conflict( + struct kimage *image, unsigned long page, kimage_entry_t *limit) +{ + kimage_entry_t *ptr, entry; + unsigned long destination = 0; + for_each_kimage_entry(image, ptr, entry) { + if (ptr == limit) { + return 0; + } + else if (entry & IND_DESTINATION) { + destination = entry & PAGE_MASK; + } + else if (entry & IND_SOURCE) { + if (page == destination) { + return ptr; + } + destination += PAGE_SIZE; + } + } + return 0; +} + +static kimage_entry_t *kimage_src_conflict( + struct kimage *image, unsigned long destination, kimage_entry_t *limit) +{ + kimage_entry_t *ptr, entry; + for_each_kimage_entry(image, ptr, entry) { + unsigned long page; + if (ptr == limit) { + return 0; + } + else if (entry & IND_DESTINATION) { + /* nop */ + } + else if (entry & IND_DONE) { + /* nop */ + } + else { + /* SOURCE & INDIRECTION */ + page = entry & PAGE_MASK; + if (page == destination) { + return ptr; + } + } + } + return 0; +} + +static int kimage_get_off_destination_pages(struct kimage *image) +{ + kimage_entry_t *ptr, *cptr, entry; + unsigned long buffer, page; + unsigned long destination = 0; + + /* Here we implement safe guards to insure that + * a source page is not copied to it's destination + * page before the data on the destination page is + * no longer useful. + * + * To make it work we actually wind up with a + * stronger condition. For every page considered + * it is either it's own destination page or it is + * not a destination page of any page considered. + * + * Invariants + * 1. buffer is not a destination of a previous page. + * 2. page is not a destination of a previous page. + * 3. destination is not a previous source page. + * + * Result: Either a source page and a destination page + * are the same or the page is not a destination page. + * + * These checks could be done when we allocate the pages, + * but doing it as a final pass allows us more freedom + * on how we allocate pages. + * + * Also while the checks are necessary, in practice nothing + * happens. The destination kernel wants to sit in the + * same physical addresses as the current kernel so we never + * actually allocate a destination page. + * + * BUGS: This is a O(N^2) algorithm. + */ + + + buffer = __get_free_page(GFP_KERNEL); + if (!buffer) { + return -ENOMEM; + } + buffer = virt_to_phys((void *)buffer); + for_each_kimage_entry(image, ptr, entry) { + /* Here we check to see if an allocated page */ + kimage_entry_t *limit; + if (entry & IND_DESTINATION) { + destination = entry & PAGE_MASK; + } + else if (entry & IND_INDIRECTION) { + /* Indirection pages must include all of their + * contents in limit checking. + */ + limit = phys_to_virt(page + PAGE_SIZE - sizeof(*limit)); + } + if (!((entry & IND_SOURCE) | (entry & IND_INDIRECTION))) { + continue; + } + page = entry & PAGE_MASK; + limit = ptr; + + /* See if a previous page has the current page as it's + * destination. + * i.e. invariant 2 + */ + cptr = kimage_dst_conflict(image, page, limit); + if (cptr) { + unsigned long cpage; + kimage_entry_t centry; + centry = *cptr; + cpage = centry & PAGE_MASK; + memcpy(phys_to_virt(buffer), phys_to_virt(page), PAGE_SIZE); + memcpy(phys_to_virt(page), phys_to_virt(cpage), PAGE_SIZE); + *cptr = page | (centry & ~PAGE_MASK); + *ptr = buffer | (entry & ~PAGE_MASK); + buffer = cpage; + } + if (!(entry & IND_SOURCE)) { + continue; + } + + /* See if a previous page is our destination page. + * If so claim it now. + * i.e. invariant 3 + */ + cptr = kimage_src_conflict(image, destination, limit); + if (cptr) { + unsigned long cpage; + kimage_entry_t centry; + centry = *cptr; + cpage = centry & PAGE_MASK; + memcpy(phys_to_virt(buffer), phys_to_virt(cpage), PAGE_SIZE); + memcpy(phys_to_virt(cpage), phys_to_virt(page), PAGE_SIZE); + *cptr = buffer | (centry & ~PAGE_MASK); + *ptr = cpage | ( entry & ~PAGE_MASK); + buffer = page; + } + /* If the buffer is my destination page do the copy now + * i.e. invariant 3 & 1 + */ + if (buffer == destination) { + memcpy(phys_to_virt(buffer), phys_to_virt(page), PAGE_SIZE); + *ptr = buffer | (entry & ~PAGE_MASK); + buffer = page; + } + } + free_page((unsigned long)phys_to_virt(buffer)); + return 0; +} + +static int kimage_add_empty_pages(struct kimage *image, + unsigned long len) +{ + unsigned long pos; + int result; + for(pos = 0; pos < len; pos += PAGE_SIZE) { + char *page; + result = -ENOMEM; + page = (void *)get_free_page(GFP_KERNEL); + if (!page) { + goto out; + } + result = kimage_add_page(image, virt_to_phys(page)); + if (result) { + goto out; + } + } + result = 0; + out: + return result; +} + +static int kimage_copy_array( + struct kimage *image, void *addr, unsigned long data_len) +{ + int result; + unsigned long pos; + unsigned long length; + char *ptr; + ptr = addr; + /* Pad to a 4 byte boundary... */ + length = RND_NOTE(data_len); + + for(pos = 0; pos < length; pos+= PAGE_SIZE) { + unsigned long len, cplen, offset; + char *page; + if ((*image->entry) & IND_SOURCE) { + page = phys_to_virt((*image->entry) & PAGE_MASK); + } + else { + result = -ENOMEM; + page = (void *)__get_free_page(GFP_KERNEL); + if (!page) { + goto out; + } + result = kimage_add_page(image, virt_to_phys(page)); + if (result) { + goto out; + } + /* Don't advance the entry until full */ + image->entry--; + } + offset = image->offset; + len = length - pos; + if (len > (PAGE_SIZE - offset)) { + len = (PAGE_SIZE - offset); + } + cplen = (pos < data_len)? data_len - pos : 0; + if (cplen > len) { + cplen = len; + } + /* Copy the data */ + memcpy(page + offset, ptr + pos, cplen); + /* Zero the pad bytes or I mess up the checksum... */ + memset(page + offset + cplen, 0, len - cplen); + offset += len; + if (offset == PAGE_SIZE) { + offset = 0; + image->entry++; + } + /* write back the offset */ + image->offset = offset; + } + result = 0; + out: + return result; +} + +int kimage_load_segment(struct kimage *image, struct file *file, + unsigned long fstart, unsigned long fend, + unsigned long mstart, unsigned long mend) +{ + int result; + unsigned long offset; + unsigned long offset_end; + + result = 0; + + offset_end = mend - mstart + fstart; + + result = kimage_set_destination(image, mstart); + if (result < 0) { + goto out; + } + for(offset = fstart; offset < offset_end; offset += PAGE_SIZE) { + void *page; + size_t size; + page = (void *)__get_free_page(GFP_KERNEL); + if (page == 0) { + result = -ENOMEM; + goto out; + } + result = kimage_add_page(image, virt_to_phys(page)); + if (result < 0) { + goto out; + } + if (fend < offset) { + /* We are past the end zero the whole page */ + memset((void *)page, 0, PAGE_SIZE); + continue; + } + size = PAGE_SIZE; + if (size > (fend - offset)) { + size = fend - offset; + } + + /* FIXME maybe I should steel the cache page here? */ + result = kernel_read(file, offset, page, size); + if (result != size) { + result = (result < 0)?result : -EIO; + goto out; + } + if (size < PAGE_SIZE) { + /* zero the trailing part of the page */ + memset(((char *)page) + size, 0, PAGE_SIZE - size); + } + } + out: + return result; +} + + +static inline void put_args(char *command_line) +{ + free_page((unsigned long)command_line); +} + +static inline char *get_args(const char *args) +{ + char *result; + char *dest; + int retval; + int len; + unsigned long page; + + result = ERR_PTR(-ENOMEM); + page = __get_free_page(GFP_KERNEL); + if (!page) + goto out; + + len = PAGE_SIZE; + dest = (char *)page; + retval = strncpy_from_user(dest, args, len); + if (retval < 0) { + put_args(dest); + result = ERR_PTR(retval); + goto out; + } + + /* See if the arg list is too long */ + if (strnlen(dest, len) >= len) { + put_args(dest); + result = ERR_PTR(-E2BIG); + goto out; + } + + result = dest; + out: + return result; +} + + +/* do_kexec executes a new kernel + */ +int do_kexec(const char *arg_filename, const char *arg_arg, struct kimage *image) +{ + char *filename, *arg; + struct nameidata nd; + struct file *file; + int result; + void *arch_notes; + unsigned long arg_len, extra_len; + unsigned long arch_len, arch_note_count; + unsigned long reboot_code_buffer; + unsigned long csum; + kimage_entry_t *end; + + /* Initialize variables */ + file = 0; + filename = 0; + arg = 0; + + /* We only trust the superuser with rebooting the system. */ + if (!capable(CAP_SYS_BOOT)) { + result = -EPERM; + goto out; + } + + filename = getname(arg_filename); + if (IS_ERR(filename)) { + result = PTR_ERR(filename); + filename = 0; + goto out; + } + + arg = get_args(arg_arg); + if (IS_ERR(arg)) { + result = PTR_ERR(arg); + arg = 0; + goto out; + } + + result = -ENOENT; + if (path_init(filename, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd)) + result = path_walk(filename, &nd); + if (result) + goto out; + + file = dentry_open(nd.dentry, nd.mnt, O_RDONLY); + result = PTR_ERR(file); + if (IS_ERR(file)) + goto out; + + memset(image->buf, 0, sizeof(image->buf)); + result = kernel_read(file, 0, image->buf, sizeof(image->buf)); + if (result < 0) + goto out; + if (result != sizeof(image->buf)) { + result = -ENOEXEC; + goto out; + } + result = load_elf_kernel(image, file); + if (result) + goto out; + + /* Get any architecutre specific boot notes we want to pass */ + arch_len = get_elf_boot_notes(&arch_notes, &arch_note_count); + + /* Find out how long our string is */ + arg_len = strlen(arg) +1; /* include terminating null */ + + extra_len = sizeof(elf_boot_notes) + RND_NOTE(arg_len) + arch_len; + + /* Find a non overlapping location where I can put the notes... */ + result = kimage_get_unused_area(image, extra_len, 0, &image->header); + if (result) + goto out; + + /* Compute all of the pieces that change dynamically */ + elf_boot_notes.hdr.b_size = extra_len; + elf_boot_notes.hdr.b_records += arch_note_count; + elf_boot_notes.hdr.b_checksum = 0; + elf_boot_notes.cmd_hdr.n_descsz = arg_len; + csum = 0; + csum = csum_partial(arch_notes, arch_len, csum); + csum = csum_partial(arg, arg_len, csum); + csum = csum_partial((void *)&elf_boot_notes, sizeof(elf_boot_notes), csum); + elf_boot_notes.hdr.b_checksum = csum_fold(csum); + + result = kimage_set_destination(image, image->header); + if (result) + goto out; + + result = kimage_copy_array(image, &elf_boot_notes, sizeof(elf_boot_notes)); + if (result) + goto out; + + result = kimage_copy_array(image, arg, arg_len); + if (result) + goto out; + + result = kimage_copy_array(image, arch_notes, arch_len); + if (result) + goto out; + + /* Terminate early so I can get a place holder. */ + result = kimage_terminate(image); + if (result) + goto out; + end = image->entry; + + result = kimage_get_unused_area( + image, KEXEC_REBOOT_CODE_SIZE, KEXEC_REBOOT_CODE_ALIGN, + &reboot_code_buffer); + if (result) + goto out; + + /* Allocating pages we should never need is silly but the + * code won't work correctly unless we have dummy pages to + * work with. + */ + result = kimage_set_destination(image, reboot_code_buffer); + if (result) + goto out; + result = kimage_add_empty_pages(image, KEXEC_REBOOT_CODE_SIZE); + if (result) + goto out; + image->reboot_code_buffer = phys_to_virt(reboot_code_buffer); + + result = kimage_terminate(image); + if (result) + goto out; + + result = kimage_get_off_destination_pages(image); + if (result) + goto out; + + /* Now hide the extra source pages for the reboot code buffer */ + image->entry = end; + result = kimage_terminate(image); + if (result) + goto out; + + result = 0; + out: + /* cleanup and exit */ + if (file) fput(file); + if (arg) put_args(arg); + if (filename) putname(filename); + return result; +} + diff -Nur linux-2.4.19-lb/include/asm-alpha/kexec.h linux-2.4.19-lb.kexec/include/asm-alpha/kexec.h --- linux-2.4.19-lb/include/asm-alpha/kexec.h 1970-01-01 08:00:00.000000000 +0800 +++ linux-2.4.19-lb.kexec/include/asm-alpha/kexec.h 2002-09-06 16:38:47.000000000 +0800 @@ -0,0 +1,31 @@ +#ifndef _ALPHA_KEXEC_H +#define _ALPHA_KEXEC_H + +/* Right now the alpha is has 40 bits, of memory physical address + * space. There's got to be a better way but for now I'm just + * hardcoding everything to stay within 40 bits. + */ + + /* Maximum physical address we can use pages from 40 bits */ +#define KEXEC_SOURCE_MEMORY_LIMIT 0xFFFFFFFFFF + /* Maximum address we can reach in physical address mode 40 bits */ +#define KEXEC_DESTINATION_MEMORY_LIMIT 0xFFFFFFFFFF + +/* To reboot on alpha if we copy the palcode we need a + * a buffer with 32k alignment. + */ +extern unsigned char __start__text_kexec; +extern unsigned char __stop__text_kexec; + +#if 0 +#define KEXEC_REBOOT_CODE_SIZE (__stop__text_kexec - __start__text_kexec) +#define KEXEC_REBOOT_CODE_ALIGN 15 +#else +#define KEXEC_PGTABLE_PAGES (((max_mapnr + ((1 << 10) -1)) >> 10) + ((max_mapnr + ((1 << 20) -1))>> 20) + 1) + +#define KEXEC_REBOOT_CODE_SIZE ((__stop__text_kexec - __start__text_kexec)+(KEXEC_PGTABLE_PAGES<< PAGE_SHIFT)) +#define KEXEC_REBOOT_CODE_ALIGN 0 +#endif + + +#endif /* _ALPHA_KEXEC_H */ diff -Nur linux-2.4.19-lb/include/asm-i386/kexec.h linux-2.4.19-lb.kexec/include/asm-i386/kexec.h --- linux-2.4.19-lb/include/asm-i386/kexec.h 1970-01-01 08:00:00.000000000 +0800 +++ linux-2.4.19-lb.kexec/include/asm-i386/kexec.h 2002-09-06 16:38:47.000000000 +0800 @@ -0,0 +1,33 @@ +#ifndef _I386_KEXEC_H +#define _I386_KEXEC_H + +#include + +/* + * KEXEC_SOURCE_MEMORY_LIMIT maximum page get_free_page can return. + * I.e. Maximum page that is mapped directly into kernel memory, + * and kmap is not required. + * + * Someone correct me if FIXADDR_START - PAGEOFFSET is not the correct + * calculation for the amount of memory directly mappable into the + * kernel memory space. + */ + +/* Maximum physical address we can use pages from */ +#define KEXEC_SOURCE_MEMORY_LIMIT (FIXADDR_START - PAGE_OFFSET) +/* Maximum address we can reach in physical address mode */ +#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL) + +#define KEXEC_REBOOT_CODE_SIZE 4096 +#define KEXEC_REBOOT_CODE_ALIGN 0 + +static inline unsigned long get_elf_boot_notes( + void **note_addr_p, unsigned long *note_count) +{ + *note_addr_p = 0; + *note_count = 0; + return 0; +} + + +#endif /* _I386_KEXEC_H */ diff -Nur linux-2.4.19-lb/include/linux/kexec.h linux-2.4.19-lb.kexec/include/linux/kexec.h --- linux-2.4.19-lb/include/linux/kexec.h 1970-01-01 08:00:00.000000000 +0800 +++ linux-2.4.19-lb.kexec/include/linux/kexec.h 2002-09-06 16:38:47.000000000 +0800 @@ -0,0 +1,103 @@ +#ifndef LINUX_KEXEC_H +#define LINUX_KEXEC_H + +#include +#include + +/* This defines the structure of a table of parameters useful for ELF + * bootable images. These parameters are all passed and generated + * by the bootloader to the booted image. For simplicity and + * consistency the Elf Note format is reused. + * + * All of the information must be Position Independent Data. + * That is it must be safe to relocate the whole ELF boot parameter + * block without changing the meaning or correctnes of the data. + * Additionally it must be safe to permute the order of the ELF notes + * to any possible permutation without changing the meaning or correctness + * of the data. + * + */ + +typedef uint16_t Elf_Half; +typedef uint32_t Elf_Word; +typedef uint64_t Elf_Xword; + +typedef struct +{ + Elf_Word b_signature; /* "0x0E1FB007" */ + Elf_Word b_size; + Elf_Half b_checksum; + Elf_Half b_records; +} Elf_Bhdr; + +typedef struct +{ + Elf_Word n_namesz; /* Length of the note's name. */ + Elf_Word n_descsz; /* Length of the note's descriptor. */ + Elf_Word n_type; /* Type of the note. */ +} Elf_Nhdr; + + +/* For standard notes n_namesz must be zero */ +/* All of the following standard note types provide a single null + * terminated string in the descriptor. + */ +#define EBN_FIRMWARE_TYPE 0x00000001 +/* On platforms that support multiple classes of firmware this field + * specifies the class of firmware you are loaded under. + */ +#define EBN_BOOTLOADER_NAME 0x00000002 +/* This specifies just the name of the bootloader for easy comparison */ +#define EBN_BOOTLOADER_VERSION 0x00000003 +/* This specifies the version of the bootlader */ +#define EBN_COMMAND_LINE 0x00000004 +/* This specifies a command line that can be set by user interaction, + * and is provided as a free form string to the loaded image. + */ + + +/* For Linux specific notes n_namesz must be 6, and n_name must be "Linux" */ +#define LBN_HWRPB 0x00000001 + + +/* + * This structure is used to hold the arguments that are used when loading + * kernel binaries. + */ + +typedef unsigned long kimage_entry_t; +#define IND_DESTINATION 0x1 +#define IND_INDIRECTION 0x2 +#define IND_DONE 0x4 +#define IND_SOURCE 0x8 + +struct kimage { + char buf[BINPRM_BUF_SIZE]; + + kimage_entry_t head; + kimage_entry_t *entry; + kimage_entry_t *last_entry; + + unsigned long destination; + unsigned long offset; + + unsigned long header; + unsigned long start; + void *reboot_code_buffer; +}; + +/* kexec helper functions */ +void kimage_init(struct kimage *image); +void kimage_free(struct kimage *image); +int kimage_load_segment(struct kimage *image, struct file *file, + unsigned long fstart, unsigned long fend, + unsigned long mstart, unsigned long mend); + +/* kexec interface functions */ +extern unsigned long get_elf_boot_notes(void **note_addr_p, unsigned long *note_count); +extern void machine_kexec(struct kimage *image); +extern int do_kexec(const char *filename, const char *arg, struct kimage *image); +extern int load_elf_kernel(struct kimage *image, struct file *file); + +#endif /* LINUX_KEXEC_H */ + diff -Nur linux-2.4.19-lb/include/linux/reboot.h linux-2.4.19-lb.kexec/include/linux/reboot.h --- linux-2.4.19-lb/include/linux/reboot.h 2002-09-04 16:51:32.000000000 +0800 +++ linux-2.4.19-lb.kexec/include/linux/reboot.h 2002-09-06 16:38:47.000000000 +0800 @@ -28,7 +28,12 @@ #define LINUX_REBOOT_CMD_CAD_OFF 0x00000000 #define LINUX_REBOOT_CMD_POWER_OFF 0x4321FEDC #define LINUX_REBOOT_CMD_RESTART2 0xA1B2C3D4 +#define LINUX_REBOOT_CMD_KEXEC 0x81726354 +struct kexec_args { + const char *filename; + const char *arg; +}; #ifdef __KERNEL__ diff -Nur linux-2.4.19-lb/kernel/sys.c linux-2.4.19-lb.kexec/kernel/sys.c --- linux-2.4.19-lb/kernel/sys.c 2002-09-04 16:51:32.000000000 +0800 +++ linux-2.4.19-lb.kexec/kernel/sys.c 2002-09-06 16:38:47.000000000 +0800 @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -323,6 +324,38 @@ machine_restart(buffer); break; +#if CONFIG_KEXEC + case LINUX_REBOOT_CMD_KEXEC: + { + /* Am I using to much stack space here? */ + struct kexec_args args; + struct kimage image; + int result; + + result = copy_from_user(&args, arg, sizeof(args)); + if (result) { + unlock_kernel(); + return result; + } + kimage_init(&image); + result = do_kexec(args.filename, args.arg, &image); + if (result) { + kimage_free(&image); + unlock_kernel(); + return result; + } + + /* The point of no return is here... */ + notifier_call_chain(&reboot_notifier_list, SYS_RESTART, NULL); + + machine_kexec(&image); + /* We never get here but... */ + kimage_free(&image); + unlock_kernel(); + break; + } +#endif /* CONFIG_KEXEC */ + default: unlock_kernel(); return -EINVAL;