Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > 89877e42827f16fa5f86b1df0c2860b1 > files > 2492

kernel-2.6.18-128.1.10.el5.src.rpm

diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig
index daa75ce..a0707d7 100644
--- a/arch/i386/Kconfig
+++ b/arch/i386/Kconfig
@@ -761,11 +761,24 @@ config CRASH_DUMP
 	help
 	  Generate crash dump after being started by kexec.
 
+config RELOCATABLE
+	bool "Build a relocatable kernel"
+	help
+	  This build a kernel image that retains relocation information
+          so it can be loaded someplace besides the default 1MB.
+	  The relocations tend to the kernel binary about 10% larger,
+          but are discarded at runtime.
+
+	  One use is for the kexec on panic case where the recovery kernel
+          must live at a different physical address than the primary
+          kernel.
+
 config PHYSICAL_START
 	hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP)
 
 	default "0x1000000" if CRASH_DUMP
 	default "0x100000"
+	range 0x100000 0x37c00000
 	help
 	  This gives the physical address where the kernel is loaded. Normally
 	  for regular kernels this value is 0x100000 (1MB). But in the case
diff --git a/arch/i386/Makefile b/arch/i386/Makefile
index 3e4adb1..e9d6eac 100644
--- a/arch/i386/Makefile
+++ b/arch/i386/Makefile
@@ -26,7 +26,7 @@ endif
 
 LDFLAGS		:= -m elf_i386
 OBJCOPYFLAGS	:= -O binary -R .note -R .comment -S
-LDFLAGS_vmlinux :=
+LDFLAGS_vmlinux := --emit-relocs
 CHECKFLAGS	+= -D__i386__
 
 CFLAGS += -pipe -msoft-float
diff --git a/arch/i386/boot/Makefile b/arch/i386/boot/Makefile
index e979466..44ef35c 100644
--- a/arch/i386/boot/Makefile
+++ b/arch/i386/boot/Makefile
@@ -43,7 +43,7 @@ # --------------------------------------
 
 quiet_cmd_image = BUILD   $@
 cmd_image = $(obj)/tools/build $(BUILDFLAGS) $(obj)/bootsect $(obj)/setup \
-	    $(obj)/vmlinux.bin $(ROOT_DEV) > $@
+	    $(obj)/vmlinux.bin $(ROOT_DEV) vmlinux > $@
 
 $(obj)/zImage $(obj)/bzImage: $(obj)/bootsect $(obj)/setup \
 			      $(obj)/vmlinux.bin $(obj)/tools/build FORCE
diff --git a/arch/i386/boot/bootsect.S b/arch/i386/boot/bootsect.S
index 011b7a4..847dc8f 100644
--- a/arch/i386/boot/bootsect.S
+++ b/arch/i386/boot/bootsect.S
@@ -13,6 +13,12 @@
  *
  */
 
+#include <linux/version.h>
+#include <linux/utsrelease.h>
+#include <linux/compile.h>
+#include <linux/elf.h>
+#include <linux/elf_boot.h>
+#include <asm/page.h>
 #include <asm/boot.h>
 
 SETUPSECTS	= 4			/* default nr of setup-sectors */
@@ -42,10 +49,92 @@ #endif
 
 .global _start
 _start:
-
+ehdr:
+	# e_ident is carefully crafted so if this is treated
+	# as an x86 bootsector you will execute through
+	# e_ident and then print the bugger off message.
+	# The 1 stores to bx+di is unfortunate it is
+	# unlikely to affect the ability to print
+	# a message and you aren't supposed to be booting a
+	# bzImage directly from a floppy anyway.
+
+	# e_ident
+	.byte ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3
+	.byte ELFCLASS32, ELFDATA2LSB, EV_CURRENT, ELFOSABI_STANDALONE
+	.byte 0xeb, 0x3d, 0, 0, 0, 0, 0, 0
+#ifndef CONFIG_RELOCATABLE
+	.word ET_EXEC				# e_type
+#else
+	.word ET_DYN				# e_type
+#endif
+	.word EM_386				# e_machine
+	.int  1					# e_version
+	.int  CONFIG_PHYSICAL_START		# e_entry
+	.int  phdr - _start			# e_phoff
+	.int  0					# e_shoff
+	.int  0					# e_flags
+	.word e_ehdr - ehdr			# e_ehsize
+	.word e_phdr1 - phdr			# e_phentsize
+	.word (e_phdr - phdr)/(e_phdr1 - phdr)	# e_phnum
+	.word 40				# e_shentsize
+	.word 0					# e_shnum
+	.word 0					# e_shstrndx
+e_ehdr:
+
+.org 71
+normalize:
 	# Normalize the start address
 	jmpl	$BOOTSEG, $start2
 
+.org 80
+phdr:
+	.int PT_LOAD					# p_type
+	.int (SETUPSECTS+1)*512				# p_offset
+	.int __PAGE_OFFSET + CONFIG_PHYSICAL_START	# p_vaddr
+	.int CONFIG_PHYSICAL_START			# p_paddr
+	.int SYSSIZE*16					# p_filesz
+	.int 0						# p_memsz
+	.int PF_R | PF_W | PF_X				# p_flags
+	.int 4*1024*1024				# p_align
+e_phdr1:
+
+	.int PT_NOTE					# p_type
+	.int b_note - _start				# p_offset
+	.int 0						# p_vaddr
+	.int 0						# p_paddr
+	.int e_note - b_note				# p_filesz
+	.int 0						# p_memsz
+	.int 0						# p_flags
+	.int 0						# p_align
+e_phdr:
+
+.macro note name, type
+	.balign 4
+	.int	2f - 1f			# n_namesz
+	.int	4f - 3f			# n_descsz
+	.int	\type			# n_type
+	.balign 4
+1:	.asciz "\name"
+2:	.balign 4
+3:
+.endm
+.macro enote
+4:	.balign 4
+.endm
+
+	.balign 4
+b_note:
+	note ELF_NOTE_BOOT, EIN_PROGRAM_NAME
+		.asciz	"Linux"
+	enote
+	note ELF_NOTE_BOOT, EIN_PROGRAM_VERSION
+		.asciz	UTS_RELEASE
+	enote
+	note ELF_NOTE_BOOT, EIN_ARGUMENT_STYLE
+		.asciz	"Linux"
+	enote
+e_note:
+
 start2:
 	movw	%cs, %ax
 	movw	%ax, %ds
@@ -78,11 +167,11 @@ die:
 
 
 bugger_off_msg:
-	.ascii	"Direct booting from floppy is no longer supported.\r\n"
-	.ascii	"Please use a boot loader program instead.\r\n"
+	.ascii	"Booting linux without a boot loader is no longer supported.\r\n"
 	.ascii	"\n"
-	.ascii	"Remove disk and press any key to reboot . . .\r\n"
+	.ascii	"Press any key to reboot . . .\r\n"
 	.byte	0
+ebugger_off_msg:
 
 
 	# Kernel attributes; used by setup
diff --git a/arch/i386/boot/compressed/Makefile b/arch/i386/boot/compressed/Makefile
index 258ea95..1c486d1 100644
--- a/arch/i386/boot/compressed/Makefile
+++ b/arch/i386/boot/compressed/Makefile
@@ -7,19 +7,33 @@ #
 targets		:= vmlinux vmlinux.bin vmlinux.bin.gz head.o misc.o piggy.o
 EXTRA_AFLAGS	:= -traditional
 
-LDFLAGS_vmlinux := -Ttext $(IMAGE_OFFSET) -e startup_32
+LDFLAGS_vmlinux := -T
+CFLAGS_misc.o += -fPIC
+hostprogs-y	:= relocs
 
-$(obj)/vmlinux: $(obj)/head.o $(obj)/misc.o $(obj)/piggy.o FORCE
+$(obj)/vmlinux: $(src)/vmlinux.lds $(obj)/head.o $(obj)/misc.o $(obj)/piggy.o FORCE
 	$(call if_changed,ld)
 	@:
 
 $(obj)/vmlinux.bin: vmlinux FORCE
 	$(call if_changed,objcopy)
 
-$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE
+quiet_cmd_relocs = RELOCS  $@
+      cmd_relocs = $(obj)/relocs $< > $@
+$(obj)/vmlinux.relocs: vmlinux $(obj)/relocs FORCE
+	$(call if_changed,relocs)
+
+vmlinux.bin.all-y := $(obj)/vmlinux.bin
+vmlinux.bin.all-$(CONFIG_RELOCATABLE) += $(obj)/vmlinux.relocs
+quiet_cmd_relocbin = BUILD   $@
+      cmd_relocbin = cat $(filter-out FORCE,$^) > $@
+$(obj)/vmlinux.bin.all: $(vmlinux.bin.all-y) FORCE
+	$(call if_changed,relocbin)
+
+$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin.all FORCE
 	$(call if_changed,gzip)
 
 LDFLAGS_piggy.o := -r --format binary --oformat elf32-i386 -T
 
-$(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.gz FORCE
+$(obj)/piggy.o: $(src)/vmlinux.scr $(obj)/vmlinux.bin.gz FORCE
 	$(call if_changed,ld)
diff --git a/arch/i386/boot/compressed/head.S b/arch/i386/boot/compressed/head.S
index b5893e4..418e425 100644
--- a/arch/i386/boot/compressed/head.S
+++ b/arch/i386/boot/compressed/head.S
@@ -27,8 +27,9 @@
 #include <asm/segment.h>
 #include <asm/page.h>
 
+.section ".text.head"
 	.globl startup_32
-	
+
 startup_32:
 	cld
 	cli
@@ -37,93 +39,141 @@ startup_32:
 	movl %eax,%es
 	movl %eax,%fs
 	movl %eax,%gs
+	movl %eax,%ss
 
-	lss stack_start,%esp
-	xorl %eax,%eax
-1:	incl %eax		# check that A20 really IS enabled
-	movl %eax,0x000000	# loop forever if it isn't
-	cmpl %eax,0x100000
-	je 1b
+/* Calculate the delta between where we were compiled to run
+ * at and where we were actually loaded at.  This can only be done
+ * with a short local call on x86.  Nothing  else will tell us what
+ * address we are running at.  The reserved chunk of the real-mode
+ * data at 0x34-0x3f are used as the stack for this calculation.
+ * Only 4 bytes are needed.
+ */
+	leal 0x40(%esi), %esp
+	call 1f
+1:	popl %ebp
+	subl $1b, %ebp
+
+/* Compute the delta between where we were compiled to run at
+ * and where the code will actually run at.
+ */
+	/* Start with the delta to where the kernel will run at.  If we are
+	 * a relocatable kernel this is the delta to our load address otherwise
+	 * this is the delta to CONFIG_PHYSICAL start.
+	 */
+#ifdef CONFIG_RELOCATABLE
+	movl %ebp, %ebx
+#else
+	movl $(CONFIG_PHYSICAL_START - startup_32), %ebx
+#endif
+
+	/* Replace the compressed data size with the uncompressed size */
+	subl input_len(%ebp), %ebx
+	movl output_len(%ebp), %eax
+	addl %eax, %ebx
+	/* Add 8 bytes for every 32K input block */
+	shrl $12, %eax
+	addl %eax, %ebx
+	/* Add 32K + 18 bytes of extra slack */
+	addl $(32768 + 18), %ebx
+	/* Align on a 4K boundary */
+	addl $4095, %ebx
+	andl $~4095, %ebx
+
+/* Copy the compressed kernel to the end of our buffer
+ * where decompression in place becomes safe.
+ */
+	pushl %esi
+	leal _end(%ebp), %esi
+	leal _end(%ebx), %edi
+	movl $(_end - startup_32), %ecx
+	std
+	rep
+	movsb
+	cld
+	popl %esi
+
+/* Compute the kernel start address.
+ */
+#ifdef CONFIG_RELOCATABLE
+	leal	startup_32(%ebp), %ebp
+#else
+	movl	$CONFIG_PHYSICAL_START, %ebp
+#endif
 
 /*
- * Initialize eflags.  Some BIOS's leave bits like NT set.  This would
- * confuse the debugger if this code is traced.
- * XXX - best to initialize before switching to protected mode.
+ * Jump to the relocated address.
  */
-	pushl $0
-	popfl
+	leal relocated(%ebx), %eax
+	jmp *%eax
+.section ".text"
+relocated:
+
 /*
  * Clear BSS
  */
 	xorl %eax,%eax
-	movl $_edata,%edi
-	movl $_end,%ecx
+	leal _edata(%ebx),%edi
+	leal _end(%ebx), %ecx
 	subl %edi,%ecx
 	cld
 	rep
 	stosb
+
+/*
+ * Setup the stack for the decompressor
+ */
+	leal stack_end(%ebx), %esp
+
 /*
  * Do the decompression, and jump to the new kernel..
  */
-	subl $16,%esp	# place for structure on the stack
-	movl %esp,%eax
+	movl output_len(%ebx), %eax
+	pushl %eax
+	pushl %ebp	# output address
+	movl input_len(%ebx), %eax
+	pushl %eax	# input_len
+	leal input_data(%ebx), %eax
+	pushl %eax	# input_data
+	leal _end(%ebx), %eax
+	pushl %eax	# end of the image as third argument
 	pushl %esi	# real mode pointer as second arg
-	pushl %eax	# address of structure as first arg
 	call decompress_kernel
-	orl  %eax,%eax 
-	jnz  3f
-	popl %esi	# discard address
-	popl %esi	# real mode pointer
-	xorl %ebx,%ebx
-	ljmp $(__BOOT_CS), $__PHYSICAL_START
+	addl $20, %esp
+	popl %ecx
+
+#if CONFIG_RELOCATABLE
+/* Find the address of the relocations.
+ */
+	movl %ebp, %edi
+	addl %ecx, %edi
+
+/* Calculate the delta between where vmlinux was compiled to run
+ * and where it was actually loaded.
+ */
+	movl %ebp, %ebx
+	subl $CONFIG_PHYSICAL_START, %ebx
 
 /*
- * We come here, if we were loaded high.
- * We need to move the move-in-place routine down to 0x1000
- * and then start it with the buffer addresses in registers,
- * which we got from the stack.
+ * Process relocations.
  */
-3:
-	movl $move_routine_start,%esi
-	movl $0x1000,%edi
-	movl $move_routine_end,%ecx
-	subl %esi,%ecx
-	addl $3,%ecx
-	shrl $2,%ecx
-	cld
-	rep
-	movsl
-
-	popl %esi	# discard the address
-	popl %ebx	# real mode pointer
-	popl %esi	# low_buffer_start
-	popl %ecx	# lcount
-	popl %edx	# high_buffer_start
-	popl %eax	# hcount
-	movl $__PHYSICAL_START,%edi
-	cli		# make sure we don't get interrupted
-	ljmp $(__BOOT_CS), $0x1000 # and jump to the move routine
+
+1:	subl $4, %edi
+	movl 0(%edi), %ecx
+	testl %ecx, %ecx
+	jz 2f
+	addl %ebx, -__PAGE_OFFSET(%ebx, %ecx)
+	jmp 1b
+2:
+#endif
 
 /*
- * Routine (template) for moving the decompressed kernel in place,
- * if we were high loaded. This _must_ PIC-code !
+ * Jump to the decompressed kernel.
  */
-move_routine_start:
-	movl %ecx,%ebp
-	shrl $2,%ecx
-	rep
-	movsl
-	movl %ebp,%ecx
-	andl $3,%ecx
-	rep
-	movsb
-	movl %edx,%esi
-	movl %eax,%ecx	# NOTE: rep movsb won't move if %ecx == 0
-	addl $3,%ecx
-	shrl $2,%ecx
-	rep
-	movsl
-	movl %ebx,%esi	# Restore setup pointer
 	xorl %ebx,%ebx
-	ljmp $(__BOOT_CS), $__PHYSICAL_START
-move_routine_end:
+	jmp *%ebp
+
+.bss
+.balign 4
+stack:
+	.fill 4096, 1, 0
+stack_end:
diff --git a/arch/i386/boot/compressed/misc.c b/arch/i386/boot/compressed/misc.c
index b2ccd54..809eb93 100644
--- a/arch/i386/boot/compressed/misc.c
+++ b/arch/i386/boot/compressed/misc.c
@@ -9,12 +9,96 @@
  * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996
  */
 
+#define __init
 #include <linux/linkage.h>
 #include <linux/vmalloc.h>
+#include <linux/serial_reg.h>
 #include <linux/screen_info.h>
 #include <asm/io.h>
+#include <asm/setup.h>
 #include <asm/page.h>
 
+/* WARNING!!
+ * This code is compiled with -fPIC and it is relocated dynamically
+ * at run time, but no relocation processing is performed.
+ * This means that it is not safe to place pointers in static structures.
+ */
+
+/*
+ * Getting to provable safe in place decompression is hard.
+ * Worst case behaviours need to be analized.
+ * Background information:
+ *
+ * The file layout is:
+ *    magic[2]
+ *    method[1]
+ *    flags[1]
+ *    timestamp[4]
+ *    extraflags[1]
+ *    os[1]
+ *    compressed data blocks[N]
+ *    crc[4] orig_len[4]
+ *
+ * resulting in 18 bytes of non compressed data overhead.
+ *
+ * Files divided into blocks
+ * 1 bit (last block flag)
+ * 2 bits (block type)
+ *
+ * 1 block occurs every 32K -1 bytes or when there 50% compression has been achieved.
+ * The smallest block type encoding is always used.
+ *
+ * stored:
+ *    32 bits length in bytes.
+ *
+ * fixed:
+ *    magic fixed tree.
+ *    symbols.
+ *
+ * dynamic:
+ *    dynamic tree encoding.
+ *    symbols.
+ *
+ *
+ * The buffer for decompression in place is the length of the
+ * uncompressed data, plus a small amount extra to keep the algorithm safe.
+ * The compressed data is placed at the end of the buffer.  The output
+ * pointer is placed at the start of the buffer and the input pointer
+ * is placed where the compressed data starts.  Problems will occur
+ * when the output pointer overruns the input pointer.
+ *
+ * The output pointer can only overrun the input pointer if the input
+ * pointer is moving faster than the output pointer.  A condition only
+ * triggered by data whose compressed form is larger than the uncompressed
+ * form.
+ *
+ * The worst case at the block level is a growth of the compressed data
+ * of 5 bytes per 32767 bytes.
+ *
+ * The worst case internal to a compressed block is very hard to figure.
+ * The worst case can at least be boundined by having one bit that represents
+ * 32764 bytes and then all of the rest of the bytes representing the very
+ * very last byte.
+ *
+ * All of which is enough to compute an amount of extra data that is required
+ * to be safe.  To avoid problems at the block level allocating 5 extra bytes
+ * per 32767 bytes of data is sufficient.  To avoind problems internal to a block
+ * adding an extra 32767 bytes (the worst case uncompressed block size) is
+ * sufficient, to ensure that in the worst case the decompressed data for
+ * block will stop the byte before the compressed data for a block begins.
+ * To avoid problems with the compressed data's meta information an extra 18
+ * bytes are needed.  Leading to the formula:
+ *
+ * extra_bytes = (uncompressed_size >> 12) + 32768 + 18 + decompressor_size.
+ *
+ * Adding 8 bytes per 32K is a bit excessive but much easier to calculate.
+ * Adding 32768 instead of 32767 just makes for round numbers.
+ * Adding the decompressor_size is necessary as it musht live after all
+ * of the data as well.  Last I measured the decompressor is about 14K.
+ * 10K of actuall data and 4K of bss.
+ *
+ */
+
 /*
  * gzip declarations
  */
@@ -24,21 +109,28 @@ #define STATIC static
 
 #undef memset
 #undef memcpy
+#undef memcmp
 #define memzero(s, n)     memset ((s), 0, (n))
+char *strstr(const char *haystack, const char *needle);
 
 typedef unsigned char  uch;
 typedef unsigned short ush;
 typedef unsigned long  ulg;
 
-#define WSIZE 0x8000		/* Window size must be at least 32k, */
-				/* and a power of two */
+#define WSIZE 0x80000000	/* Window size must be at least 32k,
+				 * and a power of two
+				 * We don't actually have a window just
+				 * a huge output buffer so I report
+				 * a 2G windows size, as that should
+				 * always be larger than our output buffer.
+				 */
 
-static uch *inbuf;	     /* input buffer */
-static uch window[WSIZE];    /* Sliding window buffer */
+static uch *inbuf;	/* input buffer */
+static uch *window;	/* Sliding window buffer, (and final output buffer) */
 
-static unsigned insize = 0;  /* valid bytes in inbuf */
-static unsigned inptr = 0;   /* index of next byte to be processed in inbuf */
-static unsigned outcnt = 0;  /* bytes in output buffer */
+static unsigned insize;  /* valid bytes in inbuf */
+static unsigned inptr;   /* index of next byte to be processed in inbuf */
+static unsigned outcnt;  /* bytes in output buffer */
 
 /* gzip flag byte */
 #define ASCII_FLAG   0x01 /* bit 0 set: file probably ASCII text */
@@ -78,48 +170,57 @@ static void gzip_release(void **);
  * This is set up by the setup-routine at boot-time
  */
 static unsigned char *real_mode; /* Pointer to real-mode data */
+static char saved_command_line[COMMAND_LINE_SIZE];
 
 #define RM_EXT_MEM_K   (*(unsigned short *)(real_mode + 0x2))
 #ifndef STANDARD_MEMORY_BIOS_CALL
 #define RM_ALT_MEM_K   (*(unsigned long *)(real_mode + 0x1e0))
 #endif
 #define RM_SCREEN_INFO (*(struct screen_info *)(real_mode+0))
+#define RM_NEW_CL_POINTER ((char *)(unsigned long)(*(unsigned *)(real_mode+0x228)))
+#define RM_OLD_CL_MAGIC (*(unsigned short *)(real_mode + 0x20))
+#define RM_OLD_CL_OFFSET (*(unsigned short *)(real_mode + 0x22))
+#define OLD_CL_MAGIC 0xA33F
 
 extern unsigned char input_data[];
 extern int input_len;
 
 static long bytes_out = 0;
-static uch *output_data;
-static unsigned long output_ptr = 0;
 
 static void *malloc(int size);
 static void free(void *where);
 
 static void *memset(void *s, int c, unsigned n);
 static void *memcpy(void *dest, const void *src, unsigned n);
+static int memcmp(const void *s1, const void *s2, unsigned n);
 
 static void putstr(const char *);
+static unsigned simple_strtou(const char *cp,char **endp,unsigned base);
 
-extern int end;
-static long free_mem_ptr = (long)&end;
-static long free_mem_end_ptr;
+static unsigned long free_mem_ptr;
+static unsigned long free_mem_end_ptr;
 
-#define INPLACE_MOVE_ROUTINE  0x1000
-#define LOW_BUFFER_START      0x2000
-#define LOW_BUFFER_MAX       0x90000
 #define HEAP_SIZE             0x3000
-static unsigned int low_buffer_end, low_buffer_size;
-static int high_loaded =0;
-static uch *high_buffer_start /* = (uch *)(((ulg)&end) + HEAP_SIZE)*/;
 
-static char *vidmem = (char *)0xb8000;
+static char *vidmem;
 static int vidport;
 static int lines, cols;
 
 #ifdef CONFIG_X86_NUMAQ
-static void * xquad_portio = NULL;
+static void * xquad_portio;
 #endif
 
+/* The early serial console */
+
+#define DEFAULT_BAUD 9600
+#define DEFAULT_BASE 0x3f8 /* ttyS0 */
+static unsigned serial_base = DEFAULT_BASE;
+
+#define CONSOLE_NOOP   0
+#define CONSOLE_VID    1
+#define CONSOLE_SERIAL 2
+static int console = CONSOLE_NOOP;
+
 #include "../../../../lib/inflate.c"
 
 static void *malloc(int size)
@@ -151,10 +252,11 @@ static void gzip_mark(void **ptr)
 
 static void gzip_release(void **ptr)
 {
-	free_mem_ptr = (long) *ptr;
+	free_mem_ptr = (unsigned long) *ptr;
 }
- 
-static void scroll(void)
+
+/* The early video console */
+static void vid_scroll(void)
 {
 	int i;
 
@@ -163,7 +265,7 @@ static void scroll(void)
 		vidmem[i] = ' ';
 }
 
-static void putstr(const char *s)
+static void vid_putstr(const char *s)
 {
 	int x,y,pos;
 	char c;
@@ -175,15 +277,15 @@ static void putstr(const char *s)
 		if ( c == '\n' ) {
 			x = 0;
 			if ( ++y >= lines ) {
-				scroll();
+				vid_scroll();
 				y--;
 			}
 		} else {
-			vidmem [ ( x + cols * y ) * 2 ] = c; 
+			vidmem [ ( x + cols * y ) * 2 ] = c;
 			if ( ++x >= cols ) {
 				x = 0;
 				if ( ++y >= lines ) {
-					scroll();
+					vid_scroll();
 					y--;
 				}
 			}
@@ -200,6 +302,178 @@ static void putstr(const char *s)
 	outb_p(0xff & (pos >> 1), vidport+1);
 }
 
+static void vid_console_init(void)
+{
+	if (RM_SCREEN_INFO.orig_video_mode == 7) {
+		vidmem = (char *) 0xb0000;
+		vidport = 0x3b4;
+	} else {
+		vidmem = (char *) 0xb8000;
+		vidport = 0x3d4;
+	}
+
+	lines = RM_SCREEN_INFO.orig_video_lines;
+	cols = RM_SCREEN_INFO.orig_video_cols;
+}
+
+/* The early serial console */
+static void serial_putc(int ch)
+{
+	if (ch == '\n') {
+		serial_putc('\r');
+	}
+	/* Wait until I can send a byte */
+	while ((inb(serial_base + UART_LSR) & UART_LSR_THRE) == 0)
+		;
+
+	/* Send the byte */
+	outb(ch, serial_base + UART_TX);
+
+	/* Wait until the byte is transmitted */
+	while (!(inb(serial_base + UART_LSR) & UART_LSR_TEMT))
+		;
+}
+
+static void serial_putstr(const char *str)
+{
+	int ch;
+	while((ch = *str++) != '\0') {
+		if (ch == '\n') {
+			serial_putc('\r');
+		}
+		serial_putc(ch);
+	}
+}
+
+static void serial_console_init(char *s)
+{
+	unsigned base = DEFAULT_BASE;
+	unsigned baud = DEFAULT_BAUD;
+	unsigned divisor;
+	char *e;
+
+	if (*s == ',')
+		++s;
+	if (*s && (*s != ' ')) {
+		if (memcmp(s, "0x", 2) == 0) {
+			base = simple_strtou(s, &e, 16);
+		} else {
+			static const unsigned bases[] = { 0x3f8, 0x2f8 };
+			unsigned port;
+
+			if (memcmp(s, "ttyS", 4) == 0)
+				s += 4;
+			port = simple_strtou(s, &e, 10);
+			if ((port > 1) || (s == e))
+				port = 0;
+			base = bases[port];
+		}
+		s = e;
+		if (*s == ',')
+			++s;
+	}
+	if (*s && (*s != ' ')) {
+		baud = simple_strtou(s, &e, 0);
+		if ((baud == 0) || (s == e))
+			baud = DEFAULT_BAUD;
+	}
+	divisor = 115200 / baud;
+	serial_base = base;
+
+	outb(0x00, serial_base + UART_IER); /* no interrupt */
+	outb(0x00, serial_base + UART_FCR); /* no fifo */
+	outb(0x03, serial_base + UART_MCR); /* DTR + RTS */
+
+	/* Set Baud Rate divisor  */
+	outb(0x83, serial_base + UART_LCR);
+	outb(divisor & 0xff, serial_base + UART_DLL);
+	outb(divisor >> 8, serial_base + UART_DLM);
+	outb(0x03, serial_base + UART_LCR); /* 8n1 */
+
+}
+
+static void putstr(const char *str)
+{
+	if (console == CONSOLE_VID) {
+		vid_putstr(str);
+	} else if (console == CONSOLE_SERIAL) {
+		serial_putstr(str);
+	}
+}
+
+static void console_init(char *cmdline)
+{
+	cmdline = strstr(cmdline, "earlyprintk=");
+	if (!cmdline)
+		return;
+	cmdline += 12;
+	if (memcmp(cmdline, "vga", 3) == 0) {
+		vid_console_init();
+		console = CONSOLE_VID;
+	} else if (memcmp(cmdline, "serial", 6) == 0) {
+		serial_console_init(cmdline + 6);
+		console = CONSOLE_SERIAL;
+	} else if (memcmp(cmdline, "ttyS", 4) == 0) {
+		serial_console_init(cmdline);
+		console = CONSOLE_SERIAL;
+	}
+}
+
+static inline int tolower(int ch)
+{
+	return ch | 0x20;
+}
+
+static inline int isdigit(int ch)
+{
+	return (ch >= '0') && (ch <= '9');
+}
+
+static inline int isxdigit(int ch)
+{
+	ch = tolower(ch);
+	return isdigit(ch) || ((ch >= 'a') && (ch <= 'f'));
+}
+
+
+static inline int digval(int ch)
+{
+	return isdigit(ch)? (ch - '0') : tolower(ch) - 'a' + 10;
+}
+
+/**
+ * simple_strtou - convert a string to an unsigned
+ * @cp: The start of the string
+ * @endp: A pointer to the end of the parsed string will be placed here
+ * @base: The number base to use
+ */
+static unsigned simple_strtou(const char *cp, char **endp, unsigned base)
+{
+	unsigned result = 0,value;
+
+	if (!base) {
+		base = 10;
+		if (*cp == '0') {
+			base = 8;
+			cp++;
+			if ((tolower(*cp) == 'x') && isxdigit(cp[1])) {
+				cp++;
+				base = 16;
+			}
+		}
+	} else if (base == 16) {
+		if (cp[0] == '0' && tolower(cp[1]) == 'x')
+			cp += 2;
+	}
+	while (isxdigit(*cp) && ((value = digval(*cp)) < base)) {
+		result = result*base + value;
+		cp++;
+	}
+	if (endp)
+		*endp = (char *)cp;
+	return result;
+}
+
 static void* memset(void* s, int c, unsigned n)
 {
 	int i;
@@ -218,64 +492,60 @@ static void* memcpy(void* dest, const vo
 	return dest;
 }
 
+static int memcmp(const void *s1, const void *s2, unsigned n)
+{
+	const unsigned char *str1 = s1, *str2 = s2;
+	size_t i;
+	int result = 0;
+	for(i = 0; (result == 0) && (i < n); i++) {
+		result = *str1++ - *str2++;
+		}
+	return result;
+}
+
+char *strstr(const char *haystack, const char *needle)
+{
+	size_t len;
+	len = strlen(needle);
+	while(*haystack) {
+		if (memcmp(haystack, needle, len) == 0)
+			return (char *)haystack;
+		haystack++;
+	}
+	return NULL;
+}
+
 /* ===========================================================================
  * Fill the input buffer. This is called only when the buffer is empty
  * and at least one byte is really needed.
  */
 static int fill_inbuf(void)
 {
-	if (insize != 0) {
-		error("ran out of input data");
-	}
-
-	inbuf = input_data;
-	insize = input_len;
-	inptr = 1;
-	return inbuf[0];
+	error("ran out of input data");
+	return 0;
 }
 
 /* ===========================================================================
  * Write the output window window[0..outcnt-1] and update crc and bytes_out.
  * (Used for the decompressed data only.)
  */
-static void flush_window_low(void)
-{
-    ulg c = crc;         /* temporary variable */
-    unsigned n;
-    uch *in, *out, ch;
-    
-    in = window;
-    out = &output_data[output_ptr]; 
-    for (n = 0; n < outcnt; n++) {
-	    ch = *out++ = *in++;
-	    c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8);
-    }
-    crc = c;
-    bytes_out += (ulg)outcnt;
-    output_ptr += (ulg)outcnt;
-    outcnt = 0;
-}
-
-static void flush_window_high(void)
-{
-    ulg c = crc;         /* temporary variable */
-    unsigned n;
-    uch *in,  ch;
-    in = window;
-    for (n = 0; n < outcnt; n++) {
-	ch = *output_data++ = *in++;
-	if ((ulg)output_data == low_buffer_end) output_data=high_buffer_start;
-	c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8);
-    }
-    crc = c;
-    bytes_out += (ulg)outcnt;
-    outcnt = 0;
-}
-
 static void flush_window(void)
 {
-	if (high_loaded) flush_window_high();
-	else flush_window_low();
+	/* With my window equal to my output buffer
+	 * I only need to compute the crc here.
+	 */
+	ulg c = crc;         /* temporary variable */
+	unsigned n;
+	uch *in, ch;
+
+	in = window;
+	for (n = 0; n < outcnt; n++) {
+		ch = *in++;
+		c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8);
+	}
+	crc = c;
+	bytes_out += (ulg)outcnt;
+	outcnt = 0;
 }
 
 static void error(char *x)
@@ -287,87 +557,46 @@ static void error(char *x)
 	while(1);	/* Halt */
 }
 
-#define STACK_SIZE (4096)
-
-long user_stack [STACK_SIZE];
-
-struct {
-	long * a;
-	short b;
-	} stack_start = { & user_stack [STACK_SIZE] , __BOOT_DS };
-
-static void setup_normal_output_buffer(void)
+static void save_command_line(void)
 {
-#ifdef STANDARD_MEMORY_BIOS_CALL
-	if (RM_EXT_MEM_K < 1024) error("Less than 2MB of memory");
-#else
-	if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < 1024) error("Less than 2MB of memory");
-#endif
-	output_data = (unsigned char *)__PHYSICAL_START; /* Normally Points to 1M */
-	free_mem_end_ptr = (long)real_mode;
-}
-
-struct moveparams {
-	uch *low_buffer_start;  int lcount;
-	uch *high_buffer_start; int hcount;
-};
-
-static void setup_output_buffer_if_we_run_high(struct moveparams *mv)
-{
-	high_buffer_start = (uch *)(((ulg)&end) + HEAP_SIZE);
-#ifdef STANDARD_MEMORY_BIOS_CALL
-	if (RM_EXT_MEM_K < (3*1024)) error("Less than 4MB of memory");
-#else
-	if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < (3*1024)) error("Less than 4MB of memory");
-#endif	
-	mv->low_buffer_start = output_data = (unsigned char *)LOW_BUFFER_START;
-	low_buffer_end = ((unsigned int)real_mode > LOW_BUFFER_MAX
-	  ? LOW_BUFFER_MAX : (unsigned int)real_mode) & ~0xfff;
-	low_buffer_size = low_buffer_end - LOW_BUFFER_START;
-	high_loaded = 1;
-	free_mem_end_ptr = (long)high_buffer_start;
-	if ( (__PHYSICAL_START + low_buffer_size) > ((ulg)high_buffer_start)) {
-		high_buffer_start = (uch *)(__PHYSICAL_START + low_buffer_size);
-		mv->hcount = 0; /* say: we need not to move high_buffer */
+	/* Find the command line */
+	char *cmdline;
+	cmdline = saved_command_line;
+	if (RM_NEW_CL_POINTER) {
+		cmdline = RM_NEW_CL_POINTER;
+	} else if (OLD_CL_MAGIC == RM_OLD_CL_MAGIC) {
+		cmdline = real_mode + RM_OLD_CL_OFFSET;
 	}
-	else mv->hcount = -1;
-	mv->high_buffer_start = high_buffer_start;
+	memcpy(saved_command_line, cmdline, COMMAND_LINE_SIZE);
+	saved_command_line[COMMAND_LINE_SIZE - 1] = '\0';
 }
 
-static void close_output_buffer_if_we_run_high(struct moveparams *mv)
-{
-	if (bytes_out > low_buffer_size) {
-		mv->lcount = low_buffer_size;
-		if (mv->hcount)
-			mv->hcount = bytes_out - low_buffer_size;
-	} else {
-		mv->lcount = bytes_out;
-		mv->hcount = 0;
-	}
-}
-
-asmlinkage int decompress_kernel(struct moveparams *mv, void *rmode)
+asmlinkage void decompress_kernel(void *rmode, unsigned long end,
+	uch *input_data, unsigned long input_len, uch *output)
 {
 	real_mode = rmode;
+	save_command_line();
+	console_init(saved_command_line);
 
-	if (RM_SCREEN_INFO.orig_video_mode == 7) {
-		vidmem = (char *) 0xb0000;
-		vidport = 0x3b4;
-	} else {
-		vidmem = (char *) 0xb8000;
-		vidport = 0x3d4;
-	}
-
-	lines = RM_SCREEN_INFO.orig_video_lines;
-	cols = RM_SCREEN_INFO.orig_video_cols;
-
-	if (free_mem_ptr < 0x100000) setup_normal_output_buffer();
-	else setup_output_buffer_if_we_run_high(mv);
+	window = output;  	/* Output buffer (Normally at 1M) */
+	free_mem_ptr     = end;	/* Heap  */
+	free_mem_end_ptr = end + HEAP_SIZE;
+	inbuf  = input_data;	/* Input buffer */
+	insize = input_len;
+	inptr  = 0;
+
+	if (((u32)output - CONFIG_PHYSICAL_START) & 0x3fffff)
+		error("Destination address not 4M aligned");
+	if (end > ((-__PAGE_OFFSET-(512 <<20)-1) & 0x7fffffff))
+		error("Destination address too large");
+#ifndef CONFIG_RELOCATABLE
+	if ((u32)output != CONFIG_PHYSICAL_START)
+		error("Wrong destination address");
+#endif
 
 	makecrc();
 	putstr("Uncompressing Linux... ");
 	gunzip();
 	putstr("Ok, booting the kernel.\n");
-	if (high_loaded) close_output_buffer_if_we_run_high(mv);
-	return high_loaded;
+	return;
 }
diff --git a/arch/i386/boot/compressed/relocs.c b/arch/i386/boot/compressed/relocs.c
new file mode 100644
index 0000000..0551ceb
--- /dev/null
+++ b/arch/i386/boot/compressed/relocs.c
@@ -0,0 +1,563 @@
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <elf.h>
+#include <byteswap.h>
+#define USE_BSD
+#include <endian.h>
+
+#define MAX_SHDRS 100
+static Elf32_Ehdr ehdr;
+static Elf32_Shdr shdr[MAX_SHDRS];
+static Elf32_Sym  *symtab[MAX_SHDRS];
+static Elf32_Rel  *reltab[MAX_SHDRS];
+static char *strtab[MAX_SHDRS];
+static unsigned long reloc_count, reloc_idx;
+static unsigned long *relocs;
+
+static void die(char *fmt, ...)
+{
+	va_list ap;
+	va_start(ap, fmt);
+	vfprintf(stderr, fmt, ap);
+	va_end(ap);
+	exit(1);
+}
+
+static const char *sym_type(unsigned type)
+{
+	static const char *type_name[] = {
+#define SYM_TYPE(X) [X] = #X
+		SYM_TYPE(STT_NOTYPE),
+		SYM_TYPE(STT_OBJECT),
+		SYM_TYPE(STT_FUNC),
+		SYM_TYPE(STT_SECTION),
+		SYM_TYPE(STT_FILE),
+		SYM_TYPE(STT_COMMON),
+		SYM_TYPE(STT_TLS),
+#undef SYM_TYPE
+	};
+	const char *name = "unknown sym type name";
+	if (type < sizeof(type_name)/sizeof(type_name[0])) {
+		name = type_name[type];
+	}
+	return name;
+}
+
+static const char *sym_bind(unsigned bind)
+{
+	static const char *bind_name[] = {
+#define SYM_BIND(X) [X] = #X
+		SYM_BIND(STB_LOCAL),
+		SYM_BIND(STB_GLOBAL),
+		SYM_BIND(STB_WEAK),
+#undef SYM_BIND
+	};
+	const char *name = "unknown sym bind name";
+	if (bind < sizeof(bind_name)/sizeof(bind_name[0])) {
+		name = bind_name[bind];
+	}
+	return name;
+}
+
+static const char *sym_visibility(unsigned visibility)
+{
+	static const char *visibility_name[] = {
+#define SYM_VISIBILITY(X) [X] = #X
+		SYM_VISIBILITY(STV_DEFAULT),
+		SYM_VISIBILITY(STV_INTERNAL),
+		SYM_VISIBILITY(STV_HIDDEN),
+		SYM_VISIBILITY(STV_PROTECTED),
+#undef SYM_VISIBILITY
+	};
+	const char *name = "unknown sym visibility name";
+	if (visibility < sizeof(visibility_name)/sizeof(visibility_name[0])) {
+		name = visibility_name[visibility];
+	}
+	return name;
+}
+
+static const char *rel_type(unsigned type)
+{
+	static const char *type_name[] = {
+#define REL_TYPE(X) [X] = #X
+		REL_TYPE(R_386_NONE),
+		REL_TYPE(R_386_32),
+		REL_TYPE(R_386_PC32),
+		REL_TYPE(R_386_GOT32),
+		REL_TYPE(R_386_PLT32),
+		REL_TYPE(R_386_COPY),
+		REL_TYPE(R_386_GLOB_DAT),
+		REL_TYPE(R_386_JMP_SLOT),
+		REL_TYPE(R_386_RELATIVE),
+		REL_TYPE(R_386_GOTOFF),
+		REL_TYPE(R_386_GOTPC),
+#undef REL_TYPE
+	};
+	const char *name = "unknown type rel type name";
+	if (type < sizeof(type_name)/sizeof(type_name[0])) {
+		name = type_name[type];
+	}
+	return name;
+}
+
+static const char *sec_name(unsigned shndx)
+{
+	const char *sec_strtab;
+	const char *name;
+	sec_strtab = strtab[ehdr.e_shstrndx];
+	name = "<noname>";
+	if (shndx < ehdr.e_shnum) {
+		name = sec_strtab + shdr[shndx].sh_name;
+	}
+	else if (shndx == SHN_ABS) {
+		name = "ABSOLUTE";
+	}
+	else if (shndx == SHN_COMMON) {
+		name = "COMMON";
+	}
+	return name;
+}
+
+static const char *sym_name(const char *sym_strtab, Elf32_Sym *sym)
+{
+	const char *name;
+	name = "<noname>";
+	if (sym->st_name) {
+		name = sym_strtab + sym->st_name;
+	}
+	else {
+		name = sec_name(shdr[sym->st_shndx].sh_name);
+	}
+	return name;
+}
+
+
+
+#if BYTE_ORDER == LITTLE_ENDIAN
+#define le16_to_cpu(val) (val)
+#define le32_to_cpu(val) (val)
+#endif
+#if BYTE_ORDER == BIG_ENDIAN
+#define le16_to_cpu(val) bswap_16(val)
+#define le32_to_cpu(val) bswap_32(val)
+#endif
+
+static uint16_t elf16_to_cpu(uint16_t val)
+{
+	return le16_to_cpu(val);
+}
+
+static uint32_t elf32_to_cpu(uint32_t val)
+{
+	return le32_to_cpu(val);
+}
+
+static void read_ehdr(FILE *fp)
+{
+	if (fread(&ehdr, sizeof(ehdr), 1, fp) != 1) {
+		die("Cannot read ELF header: %s\n",
+			strerror(errno));
+	}
+	if (memcmp(ehdr.e_ident, ELFMAG, 4) != 0) {
+		die("No ELF magic\n");
+	}
+	if (ehdr.e_ident[EI_CLASS] != ELFCLASS32) {
+		die("Not a 32 bit executable\n");
+	}
+	if (ehdr.e_ident[EI_DATA] != ELFDATA2LSB) {
+		die("Not a LSB ELF executable\n");
+	}
+	if (ehdr.e_ident[EI_VERSION] != EV_CURRENT) {
+		die("Unknown ELF version\n");
+	}
+	/* Convert the fields to native endian */
+	ehdr.e_type      = elf16_to_cpu(ehdr.e_type);
+	ehdr.e_machine   = elf16_to_cpu(ehdr.e_machine);
+	ehdr.e_version   = elf32_to_cpu(ehdr.e_version);
+	ehdr.e_entry     = elf32_to_cpu(ehdr.e_entry);
+	ehdr.e_phoff     = elf32_to_cpu(ehdr.e_phoff);
+	ehdr.e_shoff     = elf32_to_cpu(ehdr.e_shoff);
+	ehdr.e_flags     = elf32_to_cpu(ehdr.e_flags);
+	ehdr.e_ehsize    = elf16_to_cpu(ehdr.e_ehsize);
+	ehdr.e_phentsize = elf16_to_cpu(ehdr.e_phentsize);
+	ehdr.e_phnum     = elf16_to_cpu(ehdr.e_phnum);
+	ehdr.e_shentsize = elf16_to_cpu(ehdr.e_shentsize);
+	ehdr.e_shnum     = elf16_to_cpu(ehdr.e_shnum);
+	ehdr.e_shstrndx  = elf16_to_cpu(ehdr.e_shstrndx);
+
+	if ((ehdr.e_type != ET_EXEC) && (ehdr.e_type != ET_DYN)) {
+		die("Unsupported ELF header type\n");
+	}
+	if (ehdr.e_machine != EM_386) {
+		die("Not for x86\n");
+	}
+	if (ehdr.e_version != EV_CURRENT) {
+		die("Unknown ELF version\n");
+	}
+	if (ehdr.e_ehsize != sizeof(Elf32_Ehdr)) {
+		die("Bad Elf header size\n");
+	}
+	if (ehdr.e_phentsize != sizeof(Elf32_Phdr)) {
+		die("Bad program header entry\n");
+	}
+	if (ehdr.e_shentsize != sizeof(Elf32_Shdr)) {
+		die("Bad section header entry\n");
+	}
+	if (ehdr.e_shstrndx >= ehdr.e_shnum) {
+		die("String table index out of bounds\n");
+	}
+}
+
+static void read_shdrs(FILE *fp)
+{
+	int i;
+	if (ehdr.e_shnum > MAX_SHDRS) {
+		die("%d section headers supported: %d\n",
+			ehdr.e_shnum, MAX_SHDRS);
+	}
+	if (fseek(fp, ehdr.e_shoff, SEEK_SET) < 0) {
+		die("Seek to %d failed: %s\n",
+			ehdr.e_shoff, strerror(errno));
+	}
+	if (fread(&shdr, sizeof(shdr[0]), ehdr.e_shnum, fp) != ehdr.e_shnum) {
+		die("Cannot read ELF section headers: %s\n",
+			strerror(errno));
+	}
+	for(i = 0; i < ehdr.e_shnum; i++) {
+		shdr[i].sh_name      = elf32_to_cpu(shdr[i].sh_name);
+		shdr[i].sh_type      = elf32_to_cpu(shdr[i].sh_type);
+		shdr[i].sh_flags     = elf32_to_cpu(shdr[i].sh_flags);
+		shdr[i].sh_addr      = elf32_to_cpu(shdr[i].sh_addr);
+		shdr[i].sh_offset    = elf32_to_cpu(shdr[i].sh_offset);
+		shdr[i].sh_size      = elf32_to_cpu(shdr[i].sh_size);
+		shdr[i].sh_link      = elf32_to_cpu(shdr[i].sh_link);
+		shdr[i].sh_info      = elf32_to_cpu(shdr[i].sh_info);
+		shdr[i].sh_addralign = elf32_to_cpu(shdr[i].sh_addralign);
+		shdr[i].sh_entsize   = elf32_to_cpu(shdr[i].sh_entsize);
+	}
+
+}
+
+static void read_strtabs(FILE *fp)
+{
+	int i;
+	for(i = 0; i < ehdr.e_shnum; i++) {
+		if (shdr[i].sh_type != SHT_STRTAB) {
+			continue;
+		}
+		strtab[i] = malloc(shdr[i].sh_size);
+		if (!strtab[i]) {
+			die("malloc of %d bytes for strtab failed\n",
+				shdr[i].sh_size);
+		}
+		if (fseek(fp, shdr[i].sh_offset, SEEK_SET) < 0) {
+			die("Seek to %d failed: %s\n",
+				shdr[i].sh_offset, strerror(errno));
+		}
+		if (fread(strtab[i], 1, shdr[i].sh_size, fp) != shdr[i].sh_size) {
+			die("Cannot read symbol table: %s\n",
+				strerror(errno));
+		}
+	}
+}
+
+static void read_symtabs(FILE *fp)
+{
+	int i,j;
+	for(i = 0; i < ehdr.e_shnum; i++) {
+		if (shdr[i].sh_type != SHT_SYMTAB) {
+			continue;
+		}
+		symtab[i] = malloc(shdr[i].sh_size);
+		if (!symtab[i]) {
+			die("malloc of %d bytes for symtab failed\n",
+				shdr[i].sh_size);
+		}
+		if (fseek(fp, shdr[i].sh_offset, SEEK_SET) < 0) {
+			die("Seek to %d failed: %s\n",
+				shdr[i].sh_offset, strerror(errno));
+		}
+		if (fread(symtab[i], 1, shdr[i].sh_size, fp) != shdr[i].sh_size) {
+			die("Cannot read symbol table: %s\n",
+				strerror(errno));
+		}
+		for(j = 0; j < shdr[i].sh_size/sizeof(symtab[i][0]); j++) {
+			symtab[i][j].st_name  = elf32_to_cpu(symtab[i][j].st_name);
+			symtab[i][j].st_value = elf32_to_cpu(symtab[i][j].st_value);
+			symtab[i][j].st_size  = elf32_to_cpu(symtab[i][j].st_size);
+			symtab[i][j].st_shndx = elf16_to_cpu(symtab[i][j].st_shndx);
+		}
+	}
+}
+
+
+static void read_relocs(FILE *fp)
+{
+	int i,j;
+	for(i = 0; i < ehdr.e_shnum; i++) {
+		if (shdr[i].sh_type != SHT_REL) {
+			continue;
+		}
+		reltab[i] = malloc(shdr[i].sh_size);
+		if (!reltab[i]) {
+			die("malloc of %d bytes for relocs failed\n",
+				shdr[i].sh_size);
+		}
+		if (fseek(fp, shdr[i].sh_offset, SEEK_SET) < 0) {
+			die("Seek to %d failed: %s\n",
+				shdr[i].sh_offset, strerror(errno));
+		}
+		if (fread(reltab[i], 1, shdr[i].sh_size, fp) != shdr[i].sh_size) {
+			die("Cannot read symbol table: %s\n",
+				strerror(errno));
+		}
+		for(j = 0; j < shdr[i].sh_size/sizeof(reltab[0][0]); j++) {
+			reltab[i][j].r_offset = elf32_to_cpu(reltab[i][j].r_offset);
+			reltab[i][j].r_info   = elf32_to_cpu(reltab[i][j].r_info);
+		}
+	}
+}
+
+
+static void print_absolute_symbols(void)
+{
+	int i;
+	printf("Absolute symbols\n");
+	printf(" Num:    Value Size  Type       Bind        Visibility  Name\n");
+	for(i = 0; i < ehdr.e_shnum; i++) {
+		char *sym_strtab;
+		Elf32_Sym *sh_symtab;
+		int j;
+		if (shdr[i].sh_type != SHT_SYMTAB) {
+			continue;
+		}
+		sh_symtab = symtab[i];
+		sym_strtab = strtab[shdr[i].sh_link];
+		for(j = 0; j < shdr[i].sh_size/sizeof(symtab[0][0]); j++) {
+			Elf32_Sym *sym;
+			const char *name;
+			sym = &symtab[i][j];
+			name = sym_name(sym_strtab, sym);
+			if (sym->st_shndx != SHN_ABS) {
+				continue;
+			}
+			printf("%5d %08x %5d %10s %10s %12s %s\n",
+				j, sym->st_value, sym->st_size,
+				sym_type(ELF32_ST_TYPE(sym->st_info)),
+				sym_bind(ELF32_ST_BIND(sym->st_info)),
+				sym_visibility(ELF32_ST_VISIBILITY(sym->st_other)),
+				name);
+		}
+	}
+	printf("\n");
+}
+
+static void print_absolute_relocs(void)
+{
+	int i;
+	printf("Absolute relocations\n");
+	printf("Offset     Info     Type     Sym.Value Sym.Name\n");
+	for(i = 0; i < ehdr.e_shnum; i++) {
+		char *sym_strtab;
+		Elf32_Sym *sh_symtab;
+		unsigned sec_applies, sec_symtab;
+		int j;
+		if (shdr[i].sh_type != SHT_REL) {
+			continue;
+		}
+		sec_symtab  = shdr[i].sh_link;
+		sec_applies = shdr[i].sh_info;
+		if (!(shdr[sec_applies].sh_flags & SHF_ALLOC)) {
+			continue;
+		}
+		sh_symtab = symtab[sec_symtab];
+		sym_strtab = strtab[shdr[sec_symtab].sh_link];
+		for(j = 0; j < shdr[i].sh_size/sizeof(reltab[0][0]); j++) {
+			Elf32_Rel *rel;
+			Elf32_Sym *sym;
+			const char *name;
+			rel = &reltab[i][j];
+			sym = &sh_symtab[ELF32_R_SYM(rel->r_info)];
+			name = sym_name(sym_strtab, sym);
+			if (sym->st_shndx != SHN_ABS) {
+				continue;
+			}
+			printf("%08x %08x %10s %08x  %s\n",
+				rel->r_offset,
+				rel->r_info,
+				rel_type(ELF32_R_TYPE(rel->r_info)),
+				sym->st_value,
+				name);
+		}
+	}
+	printf("\n");
+}
+
+static void walk_relocs(void (*visit)(Elf32_Rel *rel, Elf32_Sym *sym))
+{
+	int i;
+	/* Walk through the relocations */
+	for(i = 0; i < ehdr.e_shnum; i++) {
+		char *sym_strtab;
+		Elf32_Sym *sh_symtab;
+		unsigned sec_applies, sec_symtab;
+		int j;
+		if (shdr[i].sh_type != SHT_REL) {
+			continue;
+		}
+		sec_symtab  = shdr[i].sh_link;
+		sec_applies = shdr[i].sh_info;
+		if (!(shdr[sec_applies].sh_flags & SHF_ALLOC)) {
+			continue;
+		}
+		sh_symtab = symtab[sec_symtab];
+		sym_strtab = strtab[shdr[sec_symtab].sh_link];
+		for(j = 0; j < shdr[i].sh_size/sizeof(reltab[0][0]); j++) {
+			Elf32_Rel *rel;
+			Elf32_Sym *sym;
+			unsigned r_type;
+			rel = &reltab[i][j];
+			sym = &sh_symtab[ELF32_R_SYM(rel->r_info)];
+			r_type = ELF32_R_TYPE(rel->r_info);
+			/* Don't visit relocations to absolute symbols */
+			if (sym->st_shndx == SHN_ABS) {
+				continue;
+			}
+			if (r_type == R_386_PC32) {
+				/* PC relative relocations don't need to be adjusted */
+			}
+			else if (r_type == R_386_32) {
+				/* Visit relocations that need to be adjusted */
+				visit(rel, sym);
+			}
+			else {
+				die("Unsupported relocation type: %d\n", r_type);
+			}
+		}
+	}
+}
+
+static void count_reloc(Elf32_Rel *rel, Elf32_Sym *sym)
+{
+	reloc_count += 1;
+}
+
+static void collect_reloc(Elf32_Rel *rel, Elf32_Sym *sym)
+{
+	/* Remember the address that needs to be adjusted. */
+	relocs[reloc_idx++] = rel->r_offset;
+}
+
+static int cmp_relocs(const void *va, const void *vb)
+{
+	const unsigned long *a, *b;
+	a = va; b = vb;
+	return (*a == *b)? 0 : (*a > *b)? 1 : -1;
+}
+
+static void emit_relocs(int as_text)
+{
+	int i;
+	/* Count how many relocations I have and allocate space for them. */
+	reloc_count = 0;
+	walk_relocs(count_reloc);
+	relocs = malloc(reloc_count * sizeof(relocs[0]));
+	if (!relocs) {
+		die("malloc of %d entries for relocs failed\n",
+			reloc_count);
+	}
+	/* Collect up the relocations */
+	reloc_idx = 0;
+	walk_relocs(collect_reloc);
+
+	/* Order the relocations for more efficient processing */
+	qsort(relocs, reloc_count, sizeof(relocs[0]), cmp_relocs);
+
+	/* Print the relocations */
+	if (as_text) {
+		/* Print the relocations in a form suitable that
+		 * gas will like.
+		 */
+		printf(".section \".data.reloc\",\"a\"\n");
+		printf(".balign 4\n");
+		for(i = 0; i < reloc_count; i++) {
+			printf("\t .long 0x%08lx\n", relocs[i]);
+		}
+		printf("\n");
+	}
+	else {
+		unsigned char buf[4];
+		buf[0] = buf[1] = buf[2] = buf[3] = 0;
+		/* Print a stop */
+		printf("%c%c%c%c", buf[0], buf[1], buf[2], buf[3]);
+		/* Now print each relocation */
+		for(i = 0; i < reloc_count; i++) {
+			buf[0] = (relocs[i] >>  0) & 0xff;
+			buf[1] = (relocs[i] >>  8) & 0xff;
+			buf[2] = (relocs[i] >> 16) & 0xff;
+			buf[3] = (relocs[i] >> 24) & 0xff;
+			printf("%c%c%c%c", buf[0], buf[1], buf[2], buf[3]);
+		}
+	}
+}
+
+static void usage(void)
+{
+	die("i386_reloc [--abs | --text] vmlinux\n");
+}
+
+int main(int argc, char **argv)
+{
+	int show_absolute;
+	int as_text;
+	const char *fname;
+	FILE *fp;
+	int i;
+
+	show_absolute = 0;
+	as_text = 0;
+	fname = NULL;
+	for(i = 1; i < argc; i++) {
+		char *arg = argv[i];
+		if (*arg == '-') {
+			if (strcmp(argv[1], "--abs") == 0) {
+				show_absolute = 1;
+				continue;
+			}
+			else if (strcmp(argv[1], "--text") == 0) {
+				as_text = 1;
+				continue;
+			}
+		}
+		else if (!fname) {
+			fname = arg;
+			continue;
+		}
+		usage();
+	}
+	if (!fname) {
+		usage();
+	}
+	fp = fopen(fname, "r");
+	if (!fp) {
+		die("Cannot open %s: %s\n",
+			fname, strerror(errno));
+	}
+	read_ehdr(fp);
+	read_shdrs(fp);
+	read_strtabs(fp);
+	read_symtabs(fp);
+	read_relocs(fp);
+	if (show_absolute) {
+		print_absolute_symbols();
+		print_absolute_relocs();
+		return 0;
+	}
+	emit_relocs(as_text);
+	return 0;
+}
diff --git a/arch/i386/boot/compressed/vmlinux.lds b/arch/i386/boot/compressed/vmlinux.lds
new file mode 100644
index 0000000..973a23e
--- /dev/null
+++ b/arch/i386/boot/compressed/vmlinux.lds
@@ -0,0 +1,40 @@
+OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
+OUTPUT_ARCH(i386)
+ENTRY(startup_32)
+SECTIONS
+{
+	. =  0 	;
+	.text.head : {
+		_head = . ;
+		*(.text.head)
+		_ehead = . ;
+	}
+	.data.compressed : {
+		*(.data.compressed)
+	}
+	.text :	{
+		_text = .; 	/* Text */
+		*(.text)
+		*(.text.*)
+		_etext = . ;
+	}
+	.rodata : {
+		_rodata = . ;
+		*(.rodata)	 /* read-only data */
+		*(.rodata.*)
+		_erodata = . ;
+	}
+	.data :	{
+		_data = . ;
+		*(.data)
+		*(.data.*)
+		_edata = . ;
+	}
+	.bss : {
+		_bss = . ;
+		*(.bss)
+		*(.bss.*)
+		*(COMMON)
+		_end = . ;
+	}
+}
diff --git a/arch/i386/boot/compressed/vmlinux.scr b/arch/i386/boot/compressed/vmlinux.scr
index 1ed9d79..707a88f 100644
--- a/arch/i386/boot/compressed/vmlinux.scr
+++ b/arch/i386/boot/compressed/vmlinux.scr
@@ -1,9 +1,10 @@
 SECTIONS
 {
-  .data : { 
+  .data.compressed : {
 	input_len = .;
 	LONG(input_data_end - input_data) input_data = .; 
 	*(.data) 
+	output_len = . - 4;
 	input_data_end = .; 
 	}
 }
diff --git a/arch/i386/boot/setup.S b/arch/i386/boot/setup.S
index d2b684c..04b6ea8 100644
--- a/arch/i386/boot/setup.S
+++ b/arch/i386/boot/setup.S
@@ -588,11 +588,6 @@ rmodeswtch_normal:
 	call	default_switch
 
 rmodeswtch_end:
-# we get the code32 start address and modify the below 'jmpi'
-# (loader may have changed it)
-	movl	%cs:code32_start, %eax
-	movl	%eax, %cs:code32
-
 # Now we move the system to its rightful place ... but we check if we have a
 # big-kernel. In that case we *must* not move it ...
 	testb	$LOADED_HIGH, %cs:loadflags
@@ -788,11 +783,12 @@ a20_err_msg:
 a20_done:
 
 #endif /* CONFIG_X86_VOYAGER */
-# set up gdt and idt
+# set up gdt and idt and 32bit start address
 	lidt	idt_48				# load idt with 0,0
 	xorl	%eax, %eax			# Compute gdt_base
 	movw	%ds, %ax			# (Convert %ds:gdt to a linear ptr)
 	shll	$4, %eax
+	addl	%eax, code32
 	addl	$gdt, %eax
 	movl	%eax, (gdt_48+2)
 	lgdt	gdt_48				# load gdt with whatever is
@@ -851,9 +847,26 @@ #	take our 48 bit far pointer. (INTeL 80
 #	Manual, Mixing 16-bit and 32-bit code, page 16-6)
 
 	.byte 0x66, 0xea			# prefix + jmpi-opcode
-code32:	.long	0x1000				# will be set to 0x100000
-						# for big kernels
+code32:	.long	startup_32			# will be set to %cs+startup_32
 	.word	__BOOT_CS
+.code32
+startup_32:
+	movl $(__BOOT_DS), %eax
+	movl %eax, %ds
+	movl %eax, %es
+	movl %eax, %fs
+	movl %eax, %gs
+	movl %eax, %ss
+
+	xorl %eax, %eax
+1:	incl %eax				# check that A20 really IS enabled
+	movl %eax, 0x00000000			# loop forever if it isn't
+	cmpl %eax, 0x00100000
+	je 1b
+
+	# Jump to the 32bit entry point
+	jmpl *(code32_start - start + (DELTA_INITSEG << 4))(%esi)
+.code16
 
 # Here's a bunch of information about your current kernel..
 kernel_version:	.ascii	UTS_RELEASE
diff --git a/arch/i386/boot/tools/build.c b/arch/i386/boot/tools/build.c
index 0579841..2daca93 100644
--- a/arch/i386/boot/tools/build.c
+++ b/arch/i386/boot/tools/build.c
@@ -27,6 +27,11 @@ #include <stdio.h>
 #include <string.h>
 #include <stdlib.h>
 #include <stdarg.h>
+#include <elf.h>
+#include <byteswap.h>
+#define USE_BSD
+#include <endian.h>
+#include <errno.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <sys/sysmacros.h>
@@ -48,6 +53,10 @@ byte buf[1024];
 int fd;
 int is_big_kernel;
 
+#define MAX_PHDRS 100
+static Elf32_Ehdr ehdr;
+static Elf32_Phdr phdr[MAX_PHDRS];
+
 void die(const char * str, ...)
 {
 	va_list args;
@@ -57,20 +66,151 @@ void die(const char * str, ...)
 	exit(1);
 }
 
+#if BYTE_ORDER == LITTLE_ENDIAN
+#define le16_to_cpu(val) (val)
+#define le32_to_cpu(val) (val)
+#endif
+#if BYTE_ORDER == BIG_ENDIAN
+#define le16_to_cpu(val) bswap_16(val)
+#define le32_to_cpu(val) bswap_32(val)
+#endif
+
+static uint16_t elf16_to_cpu(uint16_t val)
+{
+	return le16_to_cpu(val);
+}
+
+static uint32_t elf32_to_cpu(uint32_t val)
+{
+	return le32_to_cpu(val);
+}
+
 void file_open(const char *name)
 {
 	if ((fd = open(name, O_RDONLY, 0)) < 0)
 		die("Unable to open `%s': %m", name);
 }
 
+static void read_ehdr(void)
+{
+	if (read(fd, &ehdr, sizeof(ehdr)) != sizeof(ehdr)) {
+		die("Cannot read ELF header: %s\n",
+			strerror(errno));
+	}
+	if (memcmp(ehdr.e_ident, ELFMAG, 4) != 0) {
+		die("No ELF magic\n");
+	}
+	if (ehdr.e_ident[EI_CLASS] != ELFCLASS32) {
+		die("Not a 32 bit executable\n");
+	}
+	if (ehdr.e_ident[EI_DATA] != ELFDATA2LSB) {
+		die("Not a LSB ELF executable\n");
+	}
+	if (ehdr.e_ident[EI_VERSION] != EV_CURRENT) {
+		die("Unknown ELF version\n");
+	}
+	/* Convert the fields to native endian */
+	ehdr.e_type      = elf16_to_cpu(ehdr.e_type);
+	ehdr.e_machine   = elf16_to_cpu(ehdr.e_machine);
+	ehdr.e_version   = elf32_to_cpu(ehdr.e_version);
+	ehdr.e_entry     = elf32_to_cpu(ehdr.e_entry);
+	ehdr.e_phoff     = elf32_to_cpu(ehdr.e_phoff);
+	ehdr.e_shoff     = elf32_to_cpu(ehdr.e_shoff);
+	ehdr.e_flags     = elf32_to_cpu(ehdr.e_flags);
+	ehdr.e_ehsize    = elf16_to_cpu(ehdr.e_ehsize);
+	ehdr.e_phentsize = elf16_to_cpu(ehdr.e_phentsize);
+	ehdr.e_phnum     = elf16_to_cpu(ehdr.e_phnum);
+	ehdr.e_shentsize = elf16_to_cpu(ehdr.e_shentsize);
+	ehdr.e_shnum     = elf16_to_cpu(ehdr.e_shnum);
+	ehdr.e_shstrndx  = elf16_to_cpu(ehdr.e_shstrndx);
+
+	if ((ehdr.e_type != ET_EXEC) && (ehdr.e_type != ET_DYN)) {
+		die("Unsupported ELF header type\n");
+	}
+	if (ehdr.e_machine != EM_386) {
+		die("Not for x86\n");
+	}
+	if (ehdr.e_version != EV_CURRENT) {
+		die("Unknown ELF version\n");
+	}
+	if (ehdr.e_ehsize != sizeof(Elf32_Ehdr)) {
+		die("Bad Elf header size\n");
+	}
+	if (ehdr.e_phentsize != sizeof(Elf32_Phdr)) {
+		die("Bad program header entry\n");
+	}
+	if (ehdr.e_shentsize != sizeof(Elf32_Shdr)) {
+		die("Bad section header entry\n");
+	}
+	if (ehdr.e_shstrndx >= ehdr.e_shnum) {
+		die("String table index out of bounds\n");
+	}
+}
+
+static void read_phds(void)
+{
+	int i;
+	size_t size;
+	if (ehdr.e_phnum > MAX_PHDRS) {
+		die("%d program headers supported: %d\n",
+			ehdr.e_phnum, MAX_PHDRS);
+	}
+	if (lseek(fd, ehdr.e_phoff, SEEK_SET) < 0) {
+		die("Seek to %d failed: %s\n",
+			ehdr.e_phoff, strerror(errno));
+	}
+	size = sizeof(phdr[0])*ehdr.e_phnum;
+	if (read(fd, &phdr, size) != size) {
+		die("Cannot read ELF section headers: %s\n",
+			strerror(errno));
+	}
+	for(i = 0; i < ehdr.e_phnum; i++) {
+		phdr[i].p_type      = elf32_to_cpu(phdr[i].p_type);
+		phdr[i].p_offset    = elf32_to_cpu(phdr[i].p_offset);
+		phdr[i].p_vaddr     = elf32_to_cpu(phdr[i].p_vaddr);
+		phdr[i].p_paddr     = elf32_to_cpu(phdr[i].p_paddr);
+		phdr[i].p_filesz    = elf32_to_cpu(phdr[i].p_filesz);
+		phdr[i].p_memsz     = elf32_to_cpu(phdr[i].p_memsz);
+		phdr[i].p_flags     = elf32_to_cpu(phdr[i].p_flags);
+		phdr[i].p_align     = elf32_to_cpu(phdr[i].p_align);
+	}
+}
+
+unsigned long vmlinux_memsz(void)
+{
+	unsigned long min, max, size;
+	int i;
+	min = 0xffffffff;
+	max = 0;
+	for(i = 0; i < ehdr.e_phnum; i++) {
+		unsigned long start, end;
+		if (phdr[i].p_type != PT_LOAD)
+			continue;
+		start = phdr[i].p_paddr;
+		end   = phdr[i].p_paddr + phdr[i].p_memsz;
+		if (start < min)
+			min = start;
+		if (end > max)
+			max = end;
+	}
+	/* Get the reported size by vmlinux */
+	size = max - min;
+	/* Add 128K for the bootmem bitmap */
+	size += 128*1024;
+	/* Add in space for the initial page tables */
+	size = ((size + (((size + 4095) >> 12)*4)) + 4095) & ~4095;
+	return size;
+}
+
 void usage(void)
 {
-	die("Usage: build [-b] bootsect setup system [rootdev] [> image]");
+	die("Usage: build [-b] bootsect setup system rootdev vmlinux [> image]");
 }
 
 int main(int argc, char ** argv)
 {
 	unsigned int i, sz, setup_sectors;
+	unsigned kernel_offset, kernel_filesz, kernel_memsz;
 	int c;
 	u32 sys_size;
 	byte major_root, minor_root;
@@ -81,30 +221,25 @@ int main(int argc, char ** argv)
 	    is_big_kernel = 1;
 	    argc--, argv++;
 	  }
-	if ((argc < 4) || (argc > 5))
+	if (argc != 6)
 		usage();
-	if (argc > 4) {
-		if (!strcmp(argv[4], "CURRENT")) {
-			if (stat("/", &sb)) {
-				perror("/");
-				die("Couldn't stat /");
-			}
-			major_root = major(sb.st_dev);
-			minor_root = minor(sb.st_dev);
-		} else if (strcmp(argv[4], "FLOPPY")) {
-			if (stat(argv[4], &sb)) {
-				perror(argv[4]);
-				die("Couldn't stat root device.");
-			}
-			major_root = major(sb.st_rdev);
-			minor_root = minor(sb.st_rdev);
-		} else {
-			major_root = 0;
-			minor_root = 0;
+	if (!strcmp(argv[4], "CURRENT")) {
+		if (stat("/", &sb)) {
+			perror("/");
+			die("Couldn't stat /");
+		}
+		major_root = major(sb.st_dev);
+		minor_root = minor(sb.st_dev);
+	} else if (strcmp(argv[4], "FLOPPY")) {
+		if (stat(argv[4], &sb)) {
+			perror(argv[4]);
+			die("Couldn't stat root device.");
 		}
+		major_root = major(sb.st_rdev);
+		minor_root = minor(sb.st_rdev);
 	} else {
-		major_root = DEFAULT_MAJOR_ROOT;
-		minor_root = DEFAULT_MINOR_ROOT;
+		major_root = 0;
+		minor_root = 0;
 	}
 	fprintf(stderr, "Root device is (%d, %d)\n", major_root, minor_root);
 
@@ -144,10 +279,11 @@ int main(int argc, char ** argv)
 		i += c;
 	}
 
+	kernel_offset = (setup_sectors + 1)*512;
 	file_open(argv[3]);
 	if (fstat (fd, &sb))
 		die("Unable to stat `%s': %m", argv[3]);
-	sz = sb.st_size;
+	kernel_filesz = sz = sb.st_size;
 	fprintf (stderr, "System is %d kB\n", sz/1024);
 	sys_size = (sz + 15) / 16;
 	if (!is_big_kernel && sys_size > DEF_SYSSIZE)
@@ -168,7 +304,37 @@ int main(int argc, char ** argv)
 	}
 	close(fd);
 
-	if (lseek(1, 497, SEEK_SET) != 497)		    /* Write sizes to the bootsector */
+	file_open(argv[5]);
+	read_ehdr();
+	read_phds();
+	close(fd);
+	kernel_memsz = vmlinux_memsz();
+
+	if (lseek(1,  84, SEEK_SET) != 84)		    /* Write sizes to the bootsector */
+		die("Output: seek failed");
+	buf[0] = (kernel_offset >>  0) & 0xff;
+	buf[1] = (kernel_offset >>  8) & 0xff;
+	buf[2] = (kernel_offset >> 16) & 0xff;
+	buf[3] = (kernel_offset >> 24) & 0xff;
+	if (write(1, buf, 4) != 4)
+		die("Write of kernel file offset failed");
+	if (lseek(1, 96, SEEK_SET) != 96)
+		die("Output: seek failed");
+	buf[0] = (kernel_filesz >>  0) & 0xff;
+	buf[1] = (kernel_filesz >>  8) & 0xff;
+	buf[2] = (kernel_filesz >> 16) & 0xff;
+	buf[3] = (kernel_filesz >> 24) & 0xff;
+	if (write(1, buf, 4) != 4)
+		die("Write of kernel file size failed");
+	if (lseek(1, 100, SEEK_SET) != 100)
+		die("Output: seek failed");
+	buf[0] = (kernel_memsz >>  0) & 0xff;
+	buf[1] = (kernel_memsz >>  8) & 0xff;
+	buf[2] = (kernel_memsz >> 16) & 0xff;
+	buf[3] = (kernel_memsz >> 24) & 0xff;
+	if (write(1, buf, 4) != 4)
+		die("Write of kernel memory size failed");
+	if (lseek(1, 497, SEEK_SET) != 497)
 		die("Output: seek failed");
 	buf[0] = setup_sectors;
 	if (write(1, buf, 1) != 1)
diff --git a/arch/i386/kernel/alternative.c b/arch/i386/kernel/alternative.c
index 28ab806..e573263 100644
--- a/arch/i386/kernel/alternative.c
+++ b/arch/i386/kernel/alternative.c
@@ -347,8 +347,8 @@ void __init alternative_instructions(voi
 	if (no_replacement) {
 		printk(KERN_INFO "(SMP-)alternatives turned off\n");
 		free_init_pages("SMP alternatives",
-				(unsigned long)__smp_alt_begin,
-				(unsigned long)__smp_alt_end);
+				__pa_symbol(&__smp_alt_begin),
+				__pa_symbol(&__smp_alt_end));
 		return;
 	}
 	apply_alternatives(__alt_instructions, __alt_instructions_end);
@@ -375,8 +375,8 @@ #ifdef CONFIG_SMP
 						_text, _etext);
 		}
 		free_init_pages("SMP alternatives",
-				(unsigned long)__smp_alt_begin,
-				(unsigned long)__smp_alt_end);
+				__pa_symbol(&__smp_alt_begin),
+				__pa_symbol(&__smp_alt_end));
 	} else {
 		alternatives_smp_save(__smp_alt_instructions,
 				      __smp_alt_instructions_end);
diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c
index f168220..f3a451a 100644
--- a/arch/i386/kernel/setup.c
+++ b/arch/i386/kernel/setup.c
@@ -1219,8 +1219,8 @@ void __init setup_bootmem_allocator(void
 	 * the (very unlikely) case of us accidentally initializing the
 	 * bootmem allocator with an invalid RAM area.
 	 */
-	reserve_bootmem(__PHYSICAL_START, (PFN_PHYS(min_low_pfn) +
-			 bootmap_size + PAGE_SIZE-1) - (__PHYSICAL_START));
+	reserve_bootmem(__pa_symbol(_text), (PFN_PHYS(min_low_pfn) +
+			 bootmap_size + PAGE_SIZE-1) - __pa_symbol(_text));
 
 	/*
 	 * reserve physical page 0 - it's a special BIOS page on many boxes,
diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S
index 2d4f138..8bcf0e1 100644
--- a/arch/i386/kernel/vmlinux.lds.S
+++ b/arch/i386/kernel/vmlinux.lds.S
@@ -15,46 +16,49 @@ ENTRY(phys_startup_32)
 }
 SECTIONS
 {
-  . = __KERNEL_START;
+  . = LOAD_OFFSET + CONFIG_PHYSICAL_START;
   phys_startup_32 = startup_32 - LOAD_OFFSET;
   /* read-only */
-  _text = .;			/* Text and read-only data */
   .text : AT(ADDR(.text) - LOAD_OFFSET) {
+	_text = .;		/* Text and read-only data */
 	*(.text)
 	SCHED_TEXT
 	LOCK_TEXT
 	KPROBES_TEXT
 	*(.fixup)
 	*(.gnu.warning)
-	} :text = 0x9090
-
-  _etext = .;			/* End of text section */
+	_etext = .;		/* End of text section */
+  } :text = 0x9090
 
   . = ALIGN(16);		/* Exception table */
-  __start___ex_table = .;
-  __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { *(__ex_table) }
-  __stop___ex_table = .;
+  __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) {
+	__start___ex_table = .;
+	*(__ex_table)
+	__stop___ex_table = .;
+  }
 
   RODATA
 
   . = ALIGN(4);
-  __tracedata_start = .;
   .tracedata : AT(ADDR(.tracedata) - LOAD_OFFSET) {
+	__tracedata_start = .;
 	*(.tracedata)
+	__tracedata_end = .;
   }
-  __tracedata_end = .;
 
   /* writeable */
   .data : AT(ADDR(.data) - LOAD_OFFSET) {	/* Data */
 	*(.data)
 	CONSTRUCTORS
-	} :data
+  } :data
 
   . = ALIGN(4096);
-  __nosave_begin = .;
-  .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { *(.data.nosave) }
-  . = ALIGN(4096);
-  __nosave_end = .;
+  .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) {
+	__nosave_begin = .;
+	*(.data.nosave)
+	. = ALIGN(4096);
+	__nosave_end = .;
+  }
 
   . = ALIGN(4096);
   .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) {
@@ -68,8 +72,10 @@ SECTIONS
 
   /* rarely changed data like cpu maps */
   . = ALIGN(32);
-  .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { *(.data.read_mostly) }
-  _edata = .;			/* End of data section */
+  .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) {
+	*(.data.read_mostly)
+	_edata = .;		/* End of data section */
+  }
 
 #ifdef CONFIG_STACK_UNWIND
   . = ALIGN(4);
@@ -87,39 +93,41 @@ #endif
 
   /* might get freed after init */
   . = ALIGN(4096);
-  __smp_alt_begin = .;
-  __smp_alt_instructions = .;
   .smp_altinstructions : AT(ADDR(.smp_altinstructions) - LOAD_OFFSET) {
+	__smp_alt_begin = .;
+	__smp_alt_instructions = .;
 	*(.smp_altinstructions)
+	__smp_alt_instructions_end = .;
   }
-  __smp_alt_instructions_end = .;
   . = ALIGN(4);
-  __smp_locks = .;
   .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) {
+	__smp_locks = .;
 	*(.smp_locks)
+	__smp_locks_end = .;
   }
-  __smp_locks_end = .;
   .smp_altinstr_replacement : AT(ADDR(.smp_altinstr_replacement) - LOAD_OFFSET) {
 	*(.smp_altinstr_replacement)
+	. = ALIGN(4096);
+	__smp_alt_end = .;
   }
-  . = ALIGN(4096);
-  __smp_alt_end = .;
 
   /* will be freed after init */
   . = ALIGN(4096);		/* Init code and data */
-  __init_begin = .;
   .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
+  	__init_begin = .;
 	_sinittext = .;
 	*(.init.text)
 	_einittext = .;
   }
   .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { *(.init.data) }
   . = ALIGN(16);
-  __setup_start = .;
-  .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) { *(.init.setup) }
-  __setup_end = .;
-  __initcall_start = .;
+  .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) {
+	__setup_start = .;
+	*(.init.setup)
+	__setup_end = .;
+  }
   .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) {
+	__initcall_start = .;
 	*(.initcall1.init) 
 	*(.initcall2.init) 
 	*(.initcall3.init) 
@@ -127,20 +135,20 @@ #endif
 	*(.initcall5.init) 
 	*(.initcall6.init) 
 	*(.initcall7.init)
+	__initcall_end = .;
   }
-  __initcall_end = .;
-  __con_initcall_start = .;
   .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) {
+	__con_initcall_start = .;
 	*(.con_initcall.init)
+	__con_initcall_end = .;
   }
-  __con_initcall_end = .;
   SECURITY_INIT
   . = ALIGN(4);
-  __alt_instructions = .;
   .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) {
+	__alt_instructions = .;
 	*(.altinstructions)
+	__alt_instructions_end = .;
   }
-  __alt_instructions_end = .; 
   .altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) {
 	*(.altinstr_replacement)
   }
@@ -149,32 +157,32 @@ #endif
   .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { *(.exit.text) }
   .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) { *(.exit.data) }
   . = ALIGN(4096);
-  __initramfs_start = .;
-  .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { *(.init.ramfs) }
-  __initramfs_end = .;
+  .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) {
+	__initramfs_start = .;
+	*(.init.ramfs)
+	__initramfs_end = .;
+  }
   . = ALIGN(L1_CACHE_BYTES);
-  __per_cpu_start = .;
-  .data.percpu  : AT(ADDR(.data.percpu) - LOAD_OFFSET) { *(.data.percpu) }
-  __per_cpu_end = .;
+  .data.percpu  : AT(ADDR(.data.percpu) - LOAD_OFFSET) {
+	__per_cpu_start = .;
+	*(.data.percpu)
+	__per_cpu_end = .;
+  }
   . = ALIGN(4096);
-  __init_end = .;
   /* freed after init ends here */
 	
-  __bss_start = .;		/* BSS */
-  .bss.page_aligned : AT(ADDR(.bss.page_aligned) - LOAD_OFFSET) {
-	*(.bss.page_aligned)
-  }
   .bss : AT(ADDR(.bss) - LOAD_OFFSET) {
+	__init_end = .;
+	__bss_start = .;		/* BSS */
+	*(.bss.page_aligned)
 	*(.bss)
+	. = ALIGN(4);
+	__bss_stop = .;
+  	_end = . ;
+	/* This is where the kernel creates the early boot page tables */
+	. = ALIGN(4096);
+	pg0 = . ;
   }
-  . = ALIGN(4);
-  __bss_stop = .; 
-
-  _end = . ;
-
-  /* This is where the kernel creates the early boot page tables */
-  . = ALIGN(4096);
-  pg0 = .;
 
   /* Sections to be discarded */
   /DISCARD/ : {
diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c
index 89e8486..8dbbb09 100644
--- a/arch/i386/mm/init.c
+++ b/arch/i386/mm/init.c
@@ -750,10 +750,11 @@ void free_init_pages(char *what, unsigne
 	unsigned long addr;
 
 	for (addr = begin; addr < end; addr += PAGE_SIZE) {
-		ClearPageReserved(virt_to_page(addr));
-		init_page_count(virt_to_page(addr));
-		memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE);
-		free_page(addr);
+		struct page *page = pfn_to_page(addr >> PAGE_SHIFT);
+		ClearPageReserved(page);
+		init_page_count(page);
+		memset(page_address(page), POISON_FREE_INITMEM, PAGE_SIZE);
+		__free_page(page);
 		totalram_pages++;
 	}
 	printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10);
@@ -762,14 +763,14 @@ void free_init_pages(char *what, unsigne
 void free_initmem(void)
 {
 	free_init_pages("unused kernel memory",
-			(unsigned long)(&__init_begin),
-			(unsigned long)(&__init_end));
+			__pa_symbol(&__init_begin),
+			__pa_symbol(&__init_end));
 }
 
 #ifdef CONFIG_BLK_DEV_INITRD
 void free_initrd_mem(unsigned long start, unsigned long end)
 {
-	free_init_pages("initrd memory", start, end);
+	free_init_pages("initrd memory", __pa(start), __pa(end));
 }
 #endif
 
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig
index 28df7d8..763b25b 100644
--- a/arch/x86_64/Kconfig
+++ b/arch/x86_64/Kconfig
@@ -486,25 +486,6 @@ config CRASH_DUMP
 	help
 		Generate crash dump after being started by kexec.
 
-config PHYSICAL_START
-	hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP)
-	default "0x1000000" if CRASH_DUMP
-	default "0x200000"
-	help
-	  This gives the physical address where the kernel is loaded. Normally
-	  for regular kernels this value is 0x200000 (2MB). But in the case
-	  of kexec on panic the fail safe kernel needs to run at a different
-	  address than the panic-ed kernel. This option is used to set the load
-	  address for kernels used to capture crash dump on being kexec'ed
-	  after panic. The default value for crash dump kernels is
-	  0x1000000 (16MB). This can also be set based on the "X" value as
-	  specified in the "crashkernel=YM@XM" command line boot parameter
-	  passed to the panic-ed kernel. Typically this parameter is set as
-	  crashkernel=64M@16M. Please take a look at
-	  Documentation/kdump/kdump.txt for more details about crash dumps.
-
-	  Don't change this unless you know what you are doing.
-
 config SECCOMP
 	bool "Enable seccomp to safely compute untrusted bytecode"
 	depends on PROC_FS
diff --git a/arch/x86_64/boot/Makefile b/arch/x86_64/boot/Makefile
index deb063e..80a7492 100644
--- a/arch/x86_64/boot/Makefile
+++ b/arch/x86_64/boot/Makefile
@@ -41,7 +41,7 @@ # --------------------------------------
 
 quiet_cmd_image = BUILD   $@
 cmd_image = $(obj)/tools/build $(BUILDFLAGS) $(obj)/bootsect $(obj)/setup \
-	    $(obj)/vmlinux.bin $(ROOT_DEV) > $@
+	    $(obj)/vmlinux.bin $(ROOT_DEV) vmlinux > $@
 
 $(obj)/bzImage: $(obj)/bootsect $(obj)/setup \
 			      $(obj)/vmlinux.bin $(obj)/tools/build FORCE
diff --git a/arch/x86_64/boot/bootsect.S b/arch/x86_64/boot/bootsect.S
index 011b7a4..05bd1f3 100644
--- a/arch/x86_64/boot/bootsect.S
+++ b/arch/x86_64/boot/bootsect.S
@@ -13,6 +13,13 @@
  *
  */
 
+#include <linux/version.h>
+#include <linux/utsrelease.h>
+#include <linux/compile.h>
+#include <linux/elf.h>
+#include <linux/elf-em.h>
+#include <linux/elf_boot.h>
+#include <asm/page.h>
 #include <asm/boot.h>
 
 SETUPSECTS	= 4			/* default nr of setup-sectors */
@@ -42,10 +49,88 @@ #endif
 
 .global _start
 _start:
-
+ehdr:
+	# e_ident is carefully crafted so if this is treated
+	# as an x86 bootsector you will execute through
+	# e_ident and then print the bugger off message.
+	# The 1 store to bx+di is unfortunate it is
+	# unlikely to affect the ability to print
+	# a message and you aren't supposed to be booting a
+	# bzImage directly from a floppy anyway.
+
+	# e_ident
+	.byte ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3
+	.byte ELFCLASS64, ELFDATA2LSB, EV_CURRENT, ELFOSABI_STANDALONE
+	.byte 0xeb, 0x3d, 0, 0, 0, 0, 0, 0
+	.word ET_DYN				# e_type
+	.word EM_X86_64				# e_machine
+	.int  1					# e_version
+	.quad 0x0000000000000100		# e_entry (startup_64)
+	.quad phdr - _start			# e_phoff
+	.quad 0					# e_shoff
+	.int  0					# e_flags
+	.word e_ehdr - ehdr			# e_ehsize
+	.word e_phdr1 - phdr			# e_phentsize
+	.word (e_phdr - phdr)/(e_phdr1 - phdr)	# e_phnum
+	.word 64				# e_shentsize
+	.word 0					# e_shnum
+	.word 0					# e_shstrndx
+e_ehdr:
+
+.org 71
+normalize:
 	# Normalize the start address
 	jmpl	$BOOTSEG, $start2
 
+.org 80
+phdr:
+	.int PT_LOAD					# p_type
+	.int PF_R | PF_W | PF_X				# p_flags
+	.quad (SETUPSECTS+1)*512			# p_offset
+	.quad __START_KERNEL_map			# p_vaddr
+	.quad 0x0000000000000000			# p_paddr
+	.quad SYSSIZE*16				# p_filesz
+	.quad 0						# p_memsz
+	.quad 2*1024*1024				# p_align
+e_phdr1:
+
+	.int PT_NOTE					# p_type
+	.int 0						# p_flags
+	.quad b_note - _start				# p_offset
+	.quad 0						# p_vaddr
+	.quad 0						# p_paddr
+	.quad e_note - b_note				# p_filesz
+	.quad 0						# p_memsz
+	.quad 0						# p_align
+e_phdr:
+
+.macro note name, type
+	.balign 4
+	.int	2f - 1f			# n_namesz
+	.int	4f - 3f			# n_descsz
+	.int	\type			# n_type
+	.balign 4
+1:	.asciz "\name"
+2:	.balign 4
+3:
+.endm
+.macro enote
+4:	.balign 4
+.endm
+
+	.balign 4
+b_note:
+	note ELF_NOTE_BOOT, EIN_PROGRAM_NAME
+		.asciz	"Linux"
+	enote
+	note ELF_NOTE_BOOT, EIN_PROGRAM_VERSION
+		.asciz	UTS_RELEASE
+	enote
+	note ELF_NOTE_BOOT, EIN_ARGUMENT_STYLE
+		.asciz	"Linux"
+	enote
+e_note:
+
 start2:
 	movw	%cs, %ax
 	movw	%ax, %ds
@@ -78,11 +163,11 @@ die:
 
 
 bugger_off_msg:
-	.ascii	"Direct booting from floppy is no longer supported.\r\n"
-	.ascii	"Please use a boot loader program instead.\r\n"
+	.ascii	"Booting linux without a boot loader is no longer supported.\r\n"
 	.ascii	"\n"
-	.ascii	"Remove disk and press any key to reboot . . .\r\n"
+	.ascii	"Press any key to reboot . . .\r\n"
 	.byte	0
+ebugger_off_msg:
 
 
 	# Kernel attributes; used by setup
diff --git a/arch/x86_64/boot/compressed/Makefile b/arch/x86_64/boot/compressed/Makefile
index f89d96f..3dda50c 100644
--- a/arch/x86_64/boot/compressed/Makefile
+++ b/arch/x86_64/boot/compressed/Makefile
@@ -7,16 +7,15 @@ # Note all the files here are compiled/l
 #
 
 targets		:= vmlinux vmlinux.bin vmlinux.bin.gz head.o misc.o piggy.o
-EXTRA_AFLAGS	:= -traditional -m32
+EXTRA_AFLAGS	:= -traditional
 
 # cannot use EXTRA_CFLAGS because base CFLAGS contains -mkernel which conflicts with
-# -m32
-CFLAGS := -m32 -D__KERNEL__ -Iinclude -O2  -fno-strict-aliasing
-LDFLAGS := -m elf_i386
+CFLAGS := -m64 -D__KERNEL__ -Iinclude -O2  -fno-strict-aliasing -fPIC -mcmodel=small -fno-builtin
+LDFLAGS := -m elf_x86_64
 
-LDFLAGS_vmlinux := -Ttext $(IMAGE_OFFSET) -e startup_32 -m elf_i386
+LDFLAGS_vmlinux := -T
 
-$(obj)/vmlinux: $(obj)/head.o $(obj)/misc.o $(obj)/piggy.o FORCE
+$(obj)/vmlinux: $(src)/vmlinux.lds $(obj)/head.o $(obj)/misc.o $(obj)/piggy.o FORCE
 	$(call if_changed,ld)
 	@:
 
@@ -26,7 +25,7 @@ LDFLAGS_vmlinux := -Ttext $(IMAGE_OFFSET
 $(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE
 	$(call if_changed,gzip)
 
-LDFLAGS_piggy.o := -r --format binary --oformat elf32-i386 -T
+LDFLAGS_piggy.o := -r --format binary --oformat elf64-x86-64 -T
 
 $(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.gz FORCE
 	$(call if_changed,ld)
diff --git a/arch/x86_64/boot/compressed/head.S b/arch/x86_64/boot/compressed/head.S
index 6f55565..22c8dc4 100644
--- a/arch/x86_64/boot/compressed/head.S
+++ b/arch/x86_64/boot/compressed/head.S
@@ -26,116 +26,245 @@
 
 #include <linux/linkage.h>
 #include <asm/segment.h>
+#include <asm/pgtable.h>
 #include <asm/page.h>
+#include <asm/msr.h>
 
+.section ".text.head"
 	.code32
 	.globl startup_32
 	
 startup_32:
 	cld
 	cli
-	movl $(__KERNEL_DS),%eax
-	movl %eax,%ds
-	movl %eax,%es
-	movl %eax,%fs
-	movl %eax,%gs
-
-	lss stack_start,%esp
-	xorl %eax,%eax
-1:	incl %eax		# check that A20 really IS enabled
-	movl %eax,0x000000	# loop forever if it isn't
-	cmpl %eax,0x100000
-	je 1b
+	movl	$(__KERNEL_DS), %eax
+	movl	%eax, %ds
+	movl	%eax, %es
+	movl	%eax, %ss
+
+/* Calculate the delta between where we were compiled to run
+ * at and where we were actually loaded at.  This can only be done
+ * with a short local call on x86.  Nothing  else will tell us what
+ * address we are running at.  The reserved chunk of the real-mode
+ * data at 0x34-0x3f are used as the stack for this calculation.
+ * Only 4 bytes are needed.
+ */
+	leal	0x40(%esi), %esp
+	call	1f
+1:	popl	%ebp
+	subl	$1b, %ebp
+
+/* Compute the delta between where we were compiled to run at
+ * and where the code will actually run at.
+ */
+	movl	%ebp, %ebx
+	addl	$(LARGE_PAGE_SIZE -1), %ebx
+	andl	$LARGE_PAGE_MASK, %ebx
+
+	/* Replace the compressed data size with the uncompressed size */
+	subl	input_len(%ebp), %ebx
+	movl	output_len(%ebp), %eax
+	addl	%eax, %ebx
+	/* Add 8 bytes for every 32K input block */
+	shrl	$12, %eax
+	addl	%eax, %ebx
+	/* Add 32K + 18 bytes of extra slack and align on a 4K boundary */
+	addl	$(32768 + 18 + 4095), %ebx
+	andl	$~4095, %ebx
 
 /*
- * Initialize eflags.  Some BIOS's leave bits like NT set.  This would
- * confuse the debugger if this code is traced.
- * XXX - best to initialize before switching to protected mode.
+ * Prepare for entering 64 bit mode
  */
-	pushl $0
-	popfl
+
+	/* Load new GDT with the 64bit segments using 32bit descriptor */
+	leal	gdt(%ebp), %eax
+	movl	%eax, gdt+2(%ebp)
+	lgdt	gdt(%ebp)
+
+	/* Enable PAE mode */
+	xorl	%eax, %eax
+	orl	$(1 << 5), %eax
+	movl	%eax, %cr4
+
 /*
- * Clear BSS
+ * Build early 4G boot pagetable
  */
-	xorl %eax,%eax
-	movl $_edata,%edi
-	movl $_end,%ecx
-	subl %edi,%ecx
-	cld
-	rep
-	stosb
+	/* Initialize Page tables to 0*/
+	leal	pgtable(%ebx), %edi
+	xorl	%eax, %eax
+	movl	$((4096*6)/4), %ecx
+	rep	stosl
+
+	/* Build Level 4 */
+	leal	pgtable + 0(%ebx), %edi
+	leal	0x1007 (%edi), %eax
+	movl	%eax, 0(%edi)
+
+	/* Build Level 3 */
+	leal	pgtable + 0x1000(%ebx), %edi
+	leal	0x1007(%edi), %eax
+	movl	$4, %ecx
+1:	movl	%eax, 0x00(%edi)
+	addl	$0x00001000, %eax
+	addl	$8, %edi
+	decl	%ecx
+	jnz	1b
+
+	/* Build Level 2 */
+	leal	pgtable + 0x2000(%ebx), %edi
+	movl	$0x00000183, %eax
+	movl	$2048, %ecx
+1:	movl	%eax, 0(%edi)
+	addl	$0x00200000, %eax
+	addl	$8, %edi
+	decl	%ecx
+	jnz	1b
+
+	/* Enable the boot page tables */
+	leal	pgtable(%ebx), %eax
+	movl	%eax, %cr3
+
+	/* Enable Long mode in EFER (Extended Feature Enable Register) */
+	movl	$MSR_EFER, %ecx
+	rdmsr
+	btsl	$_EFER_LME, %eax
+	wrmsr
+
+	/* Setup for the jump to 64bit mode
+	 *
+	 * When the jump is performend we will be in long mode but
+	 * in 32bit compatibility mode with EFER.LME = 1, CS.L = 0, CS.D = 1
+	 * (and in turn EFER.LMA = 1).	To jump into 64bit mode we use
+	 * the new gdt/idt that has __KERNEL_CS with CS.L = 1.
+	 * We place all of the values on our mini stack so lret can
+	 * used to perform that far jump.
+	 */
+	pushl	$__KERNEL_CS
+	leal	startup_64(%ebp), %eax
+	pushl	%eax
+
+	/* Enter paged protected Mode, activating Long Mode */
+	movl	$0x80000001, %eax /* Enable Paging and Protected mode */
+	movl	%eax, %cr0
+
+	/* Jump from 32bit compatibility mode into 64bit mode. */
+	lret
+
+	/* Be careful here startup_64 needs to be at a predictable
+	 * address so I can export it in an ELF header.  Bootloaders
+	 * should look at the ELF header to find this address, as
+	 * it may change in the future.
+	 */
+	.code64
+	.org 0x100
+ENTRY(startup_64)
+	/* We come here either from startup_32 or directly from a
+	 * 64bit bootloader.  If we come here from a bootloader we depend on
+	 * an identity mapped page table being provied that maps our
+	 * entire text+data+bss and hopefully all of memory.
+	 */
+
+	/* Setup data segments. */
+	xorl	%eax, %eax
+	movl	%eax, %ds
+	movl	%eax, %es
+	movl	%eax, %ss
+
+	/* Compute the decompressed kernel start address.  It is where
+	 * we were loaded at aligned to a 2M boundary.
+	 */
+	leaq	startup_32(%rip) /* - $startup_32 */, %rbp
+	addq	$(LARGE_PAGE_SIZE - 1), %rbp
+	andq	$LARGE_PAGE_MASK, %rbp
+
+/* Compute the delta between where we were compiled to run at
+ * and where the code will actually run at.
+ */
+	/* Start with the delta to where the kernel will run at. */
+	movq	%rbp, %rbx
+
+	/* Replace the compressed data size with the uncompressed size */
+	movl	input_len(%rip), %eax
+	subq	%rax, %rbx
+	movl	output_len(%rip), %eax
+	addq	%rax, %rbx
+	/* Add 8 bytes for every 32K input block */
+	shrq	$12, %rax
+	addq	%rax, %rbx
+	/* Add 32K + 18 bytes of extra slack and align on a 4K boundary */
+	addq	$(32768 + 18 + 4095), %rbx
+	andq	$~4095, %rbx
+
+/* Copy the compressed kernel to the end of our buffer
+ * where decompression in place becomes safe.
+ */
+	leaq	_end(%rip), %r8
+	leaq	_end(%rbx), %r9
+	movq	$_end /* - $startup_32 */, %rcx
+1:	subq	$8, %r8
+	subq	$8, %r9
+	movq	0(%r8), %rax
+	movq	%rax, 0(%r9)
+	subq	$8, %rcx
+	jnz	1b
+
 /*
- * Do the decompression, and jump to the new kernel..
+ * Jump to the relocated address.
  */
-	subl $16,%esp	# place for structure on the stack
-	movl %esp,%eax
-	pushl %esi	# real mode pointer as second arg
-	pushl %eax	# address of structure as first arg
-	call decompress_kernel
-	orl  %eax,%eax 
-	jnz  3f
-	addl $8,%esp
-	xorl %ebx,%ebx
-	ljmp $(__KERNEL_CS), $__PHYSICAL_START
+	leaq	relocated(%rbx), %rax
+	jmp	*%rax
+
+.section ".text"
+relocated:
 
 /*
- * We come here, if we were loaded high.
- * We need to move the move-in-place routine down to 0x1000
- * and then start it with the buffer addresses in registers,
- * which we got from the stack.
+ * Clear BSS
  */
-3:
-	movl %esi,%ebx	
-	movl $move_routine_start,%esi
-	movl $0x1000,%edi
-	movl $move_routine_end,%ecx
-	subl %esi,%ecx
-	addl $3,%ecx
-	shrl $2,%ecx
+	xorq	%rax, %rax
+	leaq    _edata(%rbx), %rdi
+	leaq    _end(%rbx), %rcx
+	subq	%rdi, %rcx
 	cld
 	rep
-	movsl
-
-	popl %esi	# discard the address
-	addl $4,%esp	# real mode pointer
-	popl %esi	# low_buffer_start
-	popl %ecx	# lcount
-	popl %edx	# high_buffer_start
-	popl %eax	# hcount
-	movl $__PHYSICAL_START,%edi
-	cli		# make sure we don't get interrupted
-	ljmp $(__KERNEL_CS), $0x1000 # and jump to the move routine
+	stosb
+
+	/* Setup the stack */
+	leaq	user_stack_end(%rip), %rsp
+
+	/* zero EFLAGS after setting rsp */
+	pushq	$0
+	popfq
 
 /*
- * Routine (template) for moving the decompressed kernel in place,
- * if we were high loaded. This _must_ PIC-code !
+ * Do the decompression, and jump to the new kernel..
  */
-move_routine_start:
-	movl %ecx,%ebp
-	shrl $2,%ecx
-	rep
-	movsl
-	movl %ebp,%ecx
-	andl $3,%ecx
-	rep
-	movsb
-	movl %edx,%esi
-	movl %eax,%ecx	# NOTE: rep movsb won't move if %ecx == 0
-	addl $3,%ecx
-	shrl $2,%ecx
-	rep
-	movsl
-	movl %ebx,%esi	# Restore setup pointer
-	xorl %ebx,%ebx
-	ljmp $(__KERNEL_CS), $__PHYSICAL_START
-move_routine_end:
+	pushq	%rsi			# Save the real mode argument
+	movq	%rsi, %rdi		# real mode address
+	leaq	_heap(%rip), %rsi	# _heap
+	leaq	input_data(%rip), %rdx  # input_data
+	movl	input_len(%rip), %eax
+	movq	%rax, %rcx		# input_len
+	movq	%rbp, %r8		# output
+	call	decompress_kernel
+	popq	%rsi
 
+/*
+ * Jump to the decompressed kernel.
+ */
+	jmp	*%rbp
 
-/* Stack for uncompression */ 	
-	.align 32
+	.data
+gdt:
+	.word	gdt_end - gdt
+	.long	gdt
+	.word	0
+	.quad	0x0000000000000000	/* NULL descriptor */
+	.quad	0x00af9a000000ffff	/* __KERNEL_CS */
+	.quad	0x00cf92000000ffff	/* __KERNEL_DS */
+gdt_end:
+	.bss
+/* Stack for uncompression */
+	.balign 4
 user_stack:	 	
 	.fill 4096,4,0
-stack_start:	
-	.long user_stack+4096
-	.word __KERNEL_DS
-
+user_stack_end:
diff --git a/arch/x86_64/boot/compressed/misc.c b/arch/x86_64/boot/compressed/misc.c
index 3755b2e..0e6c4b7 100644
--- a/arch/x86_64/boot/compressed/misc.c
+++ b/arch/x86_64/boot/compressed/misc.c
@@ -9,9 +9,96 @@
  * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996
  */
 
+#define _LINUX_STRING_H_ 1
+#define __LINUX_BITMAP_H 1
+
+#include <linux/linkage.h>
 #include <linux/screen_info.h>
+#include <linux/serial_reg.h>
 #include <asm/io.h>
 #include <asm/page.h>
+#include <asm/setup.h>
+
+/* WARNING!!
+ * This code is compiled with -fPIC and it is relocated dynamically
+ * at run time, but no relocation processing is performed.
+ * This means that it is not safe to place pointers in static structures.
+ */
+
+/*
+ * Getting to provable safe in place decompression is hard.
+ * Worst case behaviours need to be analized.
+ * Background information:
+ *
+ * The file layout is:
+ *    magic[2]
+ *    method[1]
+ *    flags[1]
+ *    timestamp[4]
+ *    extraflags[1]
+ *    os[1]
+ *    compressed data blocks[N]
+ *    crc[4] orig_len[4]
+ *
+ * resulting in 18 bytes of non compressed data overhead.
+ *
+ * Files divided into blocks
+ * 1 bit (last block flag)
+ * 2 bits (block type)
+ *
+ * 1 block occurs every 32K -1 bytes or when there 50% compression has been achieved.
+ * The smallest block type encoding is always used.
+ *
+ * stored:
+ *    32 bits length in bytes.
+ *
+ * fixed:
+ *    magic fixed tree.
+ *    symbols.
+ *
+ * dynamic:
+ *    dynamic tree encoding.
+ *    symbols.
+ *
+ *
+ * The buffer for decompression in place is the length of the
+ * uncompressed data, plus a small amount extra to keep the algorithm safe.
+ * The compressed data is placed at the end of the buffer.  The output
+ * pointer is placed at the start of the buffer and the input pointer
+ * is placed where the compressed data starts.  Problems will occur
+ * when the output pointer overruns the input pointer.
+ *
+ * The output pointer can only overrun the input pointer if the input
+ * pointer is moving faster than the output pointer.  A condition only
+ * triggered by data whose compressed form is larger than the uncompressed
+ * form.
+ *
+ * The worst case at the block level is a growth of the compressed data
+ * of 5 bytes per 32767 bytes.
+ *
+ * The worst case internal to a compressed block is very hard to figure.
+ * The worst case can at least be boundined by having one bit that represents
+ * 32764 bytes and then all of the rest of the bytes representing the very
+ * very last byte.
+ *
+ * All of which is enough to compute an amount of extra data that is required
+ * to be safe.  To avoid problems at the block level allocating 5 extra bytes
+ * per 32767 bytes of data is sufficient.  To avoind problems internal to a block
+ * adding an extra 32767 bytes (the worst case uncompressed block size) is
+ * sufficient, to ensure that in the worst case the decompressed data for
+ * block will stop the byte before the compressed data for a block begins.
+ * To avoid problems with the compressed data's meta information an extra 18
+ * bytes are needed.  Leading to the formula:
+ *
+ * extra_bytes = (uncompressed_size >> 12) + 32768 + 18 + decompressor_size.
+ *
+ * Adding 8 bytes per 32K is a bit excessive but much easier to calculate.
+ * Adding 32768 instead of 32767 just makes for round numbers.
+ * Adding the decompressor_size is necessary as it musht live after all
+ * of the data as well.  Last I measured the decompressor is about 14K.
+ * 10K of actuall data and 4K of bss.
+ *
+ */
 
 /*
  * gzip declarations
@@ -28,15 +115,20 @@ typedef unsigned char  uch;
 typedef unsigned short ush;
 typedef unsigned long  ulg;
 
-#define WSIZE 0x8000		/* Window size must be at least 32k, */
-				/* and a power of two */
+#define WSIZE 0x80000000	/* Window size must be at least 32k,
+				 * and a power of two
+				 * We don't actually have a window just
+				 * a huge output buffer so I report
+				 * a 2G windows size, as that should
+				 * always be larger than our output buffer.
+				 */
 
-static uch *inbuf;	     /* input buffer */
-static uch window[WSIZE];    /* Sliding window buffer */
+static uch *inbuf;	/* input buffer */
+static uch *window;	/* Sliding window buffer, (and final output buffer) */
 
-static unsigned insize = 0;  /* valid bytes in inbuf */
-static unsigned inptr = 0;   /* index of next byte to be processed in inbuf */
-static unsigned outcnt = 0;  /* bytes in output buffer */
+static unsigned insize;  /* valid bytes in inbuf */
+static unsigned inptr;   /* index of next byte to be processed in inbuf */
+static unsigned outcnt;  /* bytes in output buffer */
 
 /* gzip flag byte */
 #define ASCII_FLAG   0x01 /* bit 0 set: file probably ASCII text */
@@ -76,44 +168,55 @@ static void gzip_release(void **);
  * This is set up by the setup-routine at boot-time
  */
 static unsigned char *real_mode; /* Pointer to real-mode data */
+static char saved_command_line[COMMAND_LINE_SIZE];
 
 #define RM_EXT_MEM_K   (*(unsigned short *)(real_mode + 0x2))
 #ifndef STANDARD_MEMORY_BIOS_CALL
 #define RM_ALT_MEM_K   (*(unsigned long *)(real_mode + 0x1e0))
 #endif
 #define RM_SCREEN_INFO (*(struct screen_info *)(real_mode+0))
+#define RM_NEW_CL_POINTER ((char *)(unsigned long)(*(unsigned *)(real_mode+0x228)))
+#define RM_OLD_CL_MAGIC (*(unsigned short *)(real_mode + 0x20))
+#define RM_OLD_CL_OFFSET (*(unsigned short *)(real_mode + 0x22))
+#define OLD_CL_MAGIC 0xA33F
 
 extern unsigned char input_data[];
 extern int input_len;
 
 static long bytes_out = 0;
-static uch *output_data;
-static unsigned long output_ptr = 0;
 
 static void *malloc(int size);
 static void free(void *where);
 
 static void *memset(void *s, int c, unsigned n);
 static void *memcpy(void *dest, const void *src, unsigned n);
+static int memcmp(const void *s1, const void *s2, unsigned n);
+static size_t strlen(const char *str);
+static char *strstr(const char *haystack, const char *needle);
 
 static void putstr(const char *);
+static unsigned simple_strtou(const char *cp, char **endp, unsigned base);
 
-extern int end;
-static long free_mem_ptr = (long)&end;
+static long free_mem_ptr;
 static long free_mem_end_ptr;
 
-#define INPLACE_MOVE_ROUTINE  0x1000
-#define LOW_BUFFER_START      0x2000
-#define LOW_BUFFER_MAX       0x90000
-#define HEAP_SIZE             0x3000
-static unsigned int low_buffer_end, low_buffer_size;
-static int high_loaded =0;
-static uch *high_buffer_start /* = (uch *)(((ulg)&end) + HEAP_SIZE)*/;
+#define HEAP_SIZE             0x6000
 
-static char *vidmem = (char *)0xb8000;
+static char *vidmem;
 static int vidport;
 static int lines, cols;
 
+/* The early serial console */
+
+#define DEFAULT_BAUD 9600
+#define DEFAULT_BASE 0x3f8 /* ttyS0 */
+static unsigned serial_base = DEFAULT_BASE;
+
+#define CONSOLE_NOOP   0
+#define CONSOLE_VID    1
+#define CONSOLE_SERIAL 2
+static int console = CONSOLE_NOOP;
+
 #include "../../../../lib/inflate.c"
 
 static void *malloc(int size)
@@ -148,7 +251,8 @@ static void gzip_release(void **ptr)
 	free_mem_ptr = (long) *ptr;
 }
  
-static void scroll(void)
+/* The early video console */
+static void vid_scroll(void)
 {
 	int i;
 
@@ -157,7 +261,7 @@ static void scroll(void)
 		vidmem[i] = ' ';
 }
 
-static void putstr(const char *s)
+static void vid_putstr(const char *s)
 {
 	int x,y,pos;
 	char c;
@@ -169,7 +273,7 @@ static void putstr(const char *s)
 		if ( c == '\n' ) {
 			x = 0;
 			if ( ++y >= lines ) {
-				scroll();
+				vid_scroll();
 				y--;
 			}
 		} else {
@@ -177,7 +281,7 @@ static void putstr(const char *s)
 			if ( ++x >= cols ) {
 				x = 0;
 				if ( ++y >= lines ) {
-					scroll();
+					vid_scroll();
 					y--;
 				}
 			}
@@ -194,6 +298,178 @@ static void putstr(const char *s)
 	outb_p(0xff & (pos >> 1), vidport+1);
 }
 
+static void vid_console_init(void)
+{
+	if (RM_SCREEN_INFO.orig_video_mode == 7) {
+		vidmem = (char *) 0xb0000;
+		vidport = 0x3b4;
+	} else {
+		vidmem = (char *) 0xb8000;
+		vidport = 0x3d4;
+	}
+
+	lines = RM_SCREEN_INFO.orig_video_lines;
+	cols = RM_SCREEN_INFO.orig_video_cols;
+}
+
+/* The early serial console */
+static void serial_putc(int ch)
+{
+	if (ch == '\n') {
+		serial_putc('\r');
+	}
+	/* Wait until I can send a byte */
+	while ((inb(serial_base + UART_LSR) & UART_LSR_THRE) == 0)
+		;
+
+	/* Send the byte */
+	outb(ch, serial_base + UART_TX);
+
+	/* Wait until the byte is transmitted */
+	while (!(inb(serial_base + UART_LSR) & UART_LSR_TEMT))
+		;
+}
+
+static void serial_putstr(const char *str)
+{
+	int ch;
+	while((ch = *str++) != '\0') {
+		if (ch == '\n') {
+			serial_putc('\r');
+		}
+		serial_putc(ch);
+	}
+}
+
+static void serial_console_init(char *s)
+{
+	unsigned base = DEFAULT_BASE;
+	unsigned baud = DEFAULT_BAUD;
+	unsigned divisor;
+	char *e;
+
+	if (*s == ',')
+		++s;
+	if (*s && (*s != ' ')) {
+		if (memcmp(s, "0x", 2) == 0) {
+			base = simple_strtou(s, &e, 16);
+		} else {
+			static const unsigned bases[] = { 0x3f8, 0x2f8 };
+			unsigned port;
+
+			if (memcmp(s, "ttyS", 4) == 0)
+				s += 4;
+			port = simple_strtou(s, &e, 10);
+			if ((port > 1) || (s == e))
+				port = 0;
+			base = bases[port];
+		}
+		s = e;
+		if (*s == ',')
+			++s;
+	}
+	if (*s && (*s != ' ')) {
+		baud = simple_strtou(s, &e, 0);
+		if ((baud == 0) || (s == e))
+			baud = DEFAULT_BAUD;
+	}
+	divisor = 115200 / baud;
+	serial_base = base;
+
+	outb(0x00, serial_base + UART_IER); /* no interrupt */
+	outb(0x00, serial_base + UART_FCR); /* no fifo */
+	outb(0x03, serial_base + UART_MCR); /* DTR + RTS */
+
+	/* Set Baud Rate divisor  */
+	outb(0x83, serial_base + UART_LCR);
+	outb(divisor & 0xff, serial_base + UART_DLL);
+	outb(divisor >> 8, serial_base + UART_DLM);
+	outb(0x03, serial_base + UART_LCR); /* 8n1 */
+
+}
+
+static void putstr(const char *str)
+{
+	if (console == CONSOLE_VID) {
+		vid_putstr(str);
+	} else if (console == CONSOLE_SERIAL) {
+		serial_putstr(str);
+	}
+}
+
+static void console_init(char *cmdline)
+{
+	cmdline = strstr(cmdline, "earlyprintk=");
+	if (!cmdline)
+		return;
+	cmdline += 12;
+	if (memcmp(cmdline, "vga", 3) == 0) {
+		vid_console_init();
+		console = CONSOLE_VID;
+	} else if (memcmp(cmdline, "serial", 6) == 0) {
+		serial_console_init(cmdline + 6);
+		console = CONSOLE_SERIAL;
+	} else if (memcmp(cmdline, "ttyS", 4) == 0) {
+		serial_console_init(cmdline);
+		console = CONSOLE_SERIAL;
+	}
+}
+
+static inline int tolower(int ch)
+{
+	return ch | 0x20;
+}
+
+static inline int isdigit(int ch)
+{
+	return (ch >= '0') && (ch <= '9');
+}
+
+static inline int isxdigit(int ch)
+{
+	ch = tolower(ch);
+	return isdigit(ch) || ((ch >= 'a') && (ch <= 'f'));
+}
+
+
+static inline int digval(int ch)
+{
+	return isdigit(ch)? (ch - '0') : tolower(ch) - 'a' + 10;
+}
+
+/**
+ * simple_strtou - convert a string to an unsigned
+ * @cp: The start of the string
+ * @endp: A pointer to the end of the parsed string will be placed here
+ * @base: The number base to use
+ */
+static unsigned simple_strtou(const char *cp, char **endp, unsigned base)
+{
+	unsigned result = 0,value;
+
+	if (!base) {
+		base = 10;
+		if (*cp == '0') {
+			base = 8;
+			cp++;
+			if ((tolower(*cp) == 'x') && isxdigit(cp[1])) {
+				cp++;
+				base = 16;
+			}
+		}
+	} else if (base == 16) {
+		if (cp[0] == '0' && tolower(cp[1]) == 'x')
+			cp += 2;
+	}
+	while (isxdigit(*cp) && ((value = digval(*cp)) < base)) {
+		result = result*base + value;
+		cp++;
+	}
+	if (endp)
+		*endp = (char *)cp;
+	return result;
+}
+
 static void* memset(void* s, int c, unsigned n)
 {
 	int i;
@@ -212,64 +488,68 @@ static void* memcpy(void* dest, const vo
 	return dest;
 }
 
+static int memcmp(const void *s1, const void *s2, unsigned n)
+{
+	const unsigned char *str1 = s1, *str2 = s2;
+	size_t i;
+	int result = 0;
+	for(i = 0; (result == 0) && (i < n); i++) {
+		result = *str1++ - *str2++;
+		}
+	return result;
+}
+
+static size_t strlen(const char *str)
+{
+	size_t len = 0;
+	while (*str++)
+		len++;
+	return len;
+}
+
+static char *strstr(const char *haystack, const char *needle)
+{
+	size_t len;
+	len = strlen(needle);
+	while(*haystack) {
+		if (memcmp(haystack, needle, len) == 0)
+			return (char *)haystack;
+		haystack++;
+	}
+	return NULL;
+}
+
 /* ===========================================================================
  * Fill the input buffer. This is called only when the buffer is empty
  * and at least one byte is really needed.
  */
 static int fill_inbuf(void)
 {
-	if (insize != 0) {
-		error("ran out of input data");
-	}
-
-	inbuf = input_data;
-	insize = input_len;
-	inptr = 1;
-	return inbuf[0];
+	error("ran out of input data");
+	return 0;
 }
 
 /* ===========================================================================
  * Write the output window window[0..outcnt-1] and update crc and bytes_out.
  * (Used for the decompressed data only.)
  */
-static void flush_window_low(void)
-{
-    ulg c = crc;         /* temporary variable */
-    unsigned n;
-    uch *in, *out, ch;
-    
-    in = window;
-    out = &output_data[output_ptr]; 
-    for (n = 0; n < outcnt; n++) {
-	    ch = *out++ = *in++;
-	    c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8);
-    }
-    crc = c;
-    bytes_out += (ulg)outcnt;
-    output_ptr += (ulg)outcnt;
-    outcnt = 0;
-}
-
-static void flush_window_high(void)
-{
-    ulg c = crc;         /* temporary variable */
-    unsigned n;
-    uch *in,  ch;
-    in = window;
-    for (n = 0; n < outcnt; n++) {
-	ch = *output_data++ = *in++;
-	if ((ulg)output_data == low_buffer_end) output_data=high_buffer_start;
-	c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8);
-    }
-    crc = c;
-    bytes_out += (ulg)outcnt;
-    outcnt = 0;
-}
-
 static void flush_window(void)
 {
-	if (high_loaded) flush_window_high();
-	else flush_window_low();
+	/* With my window equal to my output buffer
+	 * I only need to compute the crc here.
+	 */
+	ulg c = crc;         /* temporary variable */
+	unsigned n;
+	uch *in, ch;
+
+	in = window;
+	for (n = 0; n < outcnt; n++) {
+		ch = *in++;
+		c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8);
+	}
+	crc = c;
+	bytes_out += (ulg)outcnt;
+	outcnt = 0;
 }
 
 static void error(char *x)
@@ -281,78 +561,42 @@ static void error(char *x)
 	while(1);	/* Halt */
 }
 
-static void setup_normal_output_buffer(void)
-{
-#ifdef STANDARD_MEMORY_BIOS_CALL
-	if (RM_EXT_MEM_K < 1024) error("Less than 2MB of memory");
-#else
-	if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < 1024) error("Less than 2MB of memory");
-#endif
-	output_data = (unsigned char *)__PHYSICAL_START; /* Normally Points to 1M */
-	free_mem_end_ptr = (long)real_mode;
-}
-
-struct moveparams {
-	uch *low_buffer_start;  int lcount;
-	uch *high_buffer_start; int hcount;
-};
-
-static void setup_output_buffer_if_we_run_high(struct moveparams *mv)
+static void save_command_line(void)
 {
-	high_buffer_start = (uch *)(((ulg)&end) + HEAP_SIZE);
-#ifdef STANDARD_MEMORY_BIOS_CALL
-	if (RM_EXT_MEM_K < (3*1024)) error("Less than 4MB of memory");
-#else
-	if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < (3*1024)) error("Less than 4MB of memory");
-#endif	
-	mv->low_buffer_start = output_data = (unsigned char *)LOW_BUFFER_START;
-	low_buffer_end = ((unsigned int)real_mode > LOW_BUFFER_MAX
-	  ? LOW_BUFFER_MAX : (unsigned int)real_mode) & ~0xfff;
-	low_buffer_size = low_buffer_end - LOW_BUFFER_START;
-	high_loaded = 1;
-	free_mem_end_ptr = (long)high_buffer_start;
-	if ( (__PHYSICAL_START + low_buffer_size) > ((ulg)high_buffer_start)) {
-		high_buffer_start = (uch *)(__PHYSICAL_START + low_buffer_size);
-		mv->hcount = 0; /* say: we need not to move high_buffer */
+	/* Find the command line */
+	char *cmdline;
+	cmdline = saved_command_line;
+	if (RM_NEW_CL_POINTER) {
+		cmdline = RM_NEW_CL_POINTER;
+	} else if (OLD_CL_MAGIC == RM_OLD_CL_MAGIC) {
+		cmdline = real_mode + RM_OLD_CL_OFFSET;
 	}
-	else mv->hcount = -1;
-	mv->high_buffer_start = high_buffer_start;
+	memcpy(saved_command_line, cmdline, COMMAND_LINE_SIZE);
+	saved_command_line[COMMAND_LINE_SIZE - 1] = '\0';
 }
 
-static void close_output_buffer_if_we_run_high(struct moveparams *mv)
-{
-	if (bytes_out > low_buffer_size) {
-		mv->lcount = low_buffer_size;
-		if (mv->hcount)
-			mv->hcount = bytes_out - low_buffer_size;
-	} else {
-		mv->lcount = bytes_out;
-		mv->hcount = 0;
-	}
-}
-
-int decompress_kernel(struct moveparams *mv, void *rmode)
+asmlinkage void decompress_kernel(void *rmode, unsigned long heap,
+	uch *input_data, unsigned long input_len, uch *output)
 {
 	real_mode = rmode;
+	save_command_line();
+	console_init(saved_command_line);
 
-	if (RM_SCREEN_INFO.orig_video_mode == 7) {
-		vidmem = (char *) 0xb0000;
-		vidport = 0x3b4;
-	} else {
-		vidmem = (char *) 0xb8000;
-		vidport = 0x3d4;
-	}
-
-	lines = RM_SCREEN_INFO.orig_video_lines;
-	cols = RM_SCREEN_INFO.orig_video_cols;
+	window = output;  		/* Output buffer (Normally at 1M) */
+	free_mem_ptr     = heap;	/* Heap  */
+	free_mem_end_ptr = heap + HEAP_SIZE;
+	inbuf  = input_data;		/* Input buffer */
+	insize = input_len;
+	inptr  = 0;
 
-	if (free_mem_ptr < 0x100000) setup_normal_output_buffer();
-	else setup_output_buffer_if_we_run_high(mv);
+	if ((ulg)output & 0x1fffffUL)
+		error("Destination address not 2M aligned");
+	if ((ulg)output >= 0xffffffffffUL)
+		error("Destination address too large");
 
 	makecrc();
 	putstr(".\nDecompressing Linux...");
 	gunzip();
 	putstr("done.\nBooting the kernel.\n");
-	if (high_loaded) close_output_buffer_if_we_run_high(mv);
-	return high_loaded;
+	return;
 }
diff --git a/arch/x86_64/boot/compressed/vmlinux.lds b/arch/x86_64/boot/compressed/vmlinux.lds
new file mode 100644
index 0000000..94c13e5
--- /dev/null
+++ b/arch/x86_64/boot/compressed/vmlinux.lds
@@ -0,0 +1,44 @@
+OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64")
+OUTPUT_ARCH(i386:x86-64)
+ENTRY(startup_64)
+SECTIONS
+{
+	/* Be careful parts of head.S assume startup_32 is at
+ 	 * address 0.
+	 */
+	. = 0;
+	.text :	{
+		_head = . ;
+		*(.text.head)
+		_ehead = . ;
+		*(.text.compressed)
+		_text = .; 	/* Text */
+		*(.text)
+		*(.text.*)
+		_etext = . ;
+	}
+	.rodata : {
+		_rodata = . ;
+		*(.rodata)	 /* read-only data */
+		*(.rodata.*)
+		_erodata = . ;
+	}
+	.data :	{
+		_data = . ;
+		*(.data)
+		*(.data.*)
+		_edata = . ;
+	}
+	.bss : {
+		_bss = . ;
+		*(.bss)
+		*(.bss.*)
+		*(COMMON)
+		. = ALIGN(8);
+		_end = . ;
+		. = ALIGN(4096);
+		pgtable = . ;
+		. = . + 4096 * 6;
+		_heap = .;
+	}
+}
diff --git a/arch/x86_64/boot/compressed/vmlinux.scr b/arch/x86_64/boot/compressed/vmlinux.scr
index 1ed9d79..48117cf 100644
--- a/arch/x86_64/boot/compressed/vmlinux.scr
+++ b/arch/x86_64/boot/compressed/vmlinux.scr
@@ -1,9 +1,10 @@
 SECTIONS
 {
-  .data : { 
+  .text.compressed : {
 	input_len = .;
 	LONG(input_data_end - input_data) input_data = .; 
-	*(.data) 
+	*(.data)
+	output_len = . - 4;
 	input_data_end = .; 
 	}
 }
diff --git a/arch/x86_64/boot/tools/build.c b/arch/x86_64/boot/tools/build.c
index eae8669..fd9bf41 100644
--- a/arch/x86_64/boot/tools/build.c
+++ b/arch/x86_64/boot/tools/build.c
@@ -27,6 +27,11 @@ #include <stdio.h>
 #include <string.h>
 #include <stdlib.h>
 #include <stdarg.h>
+#include <elf.h>
+#include <byteswap.h>
+#define USE_BSD
+#include <endian.h>
+#include <errno.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <sys/sysmacros.h>
@@ -48,6 +53,10 @@ byte buf[1024];
 int fd;
 int is_big_kernel;
 
+#define MAX_PHDRS 100
+static Elf64_Ehdr ehdr;
+static Elf64_Phdr phdr[MAX_PHDRS];
+
 void die(const char * str, ...)
 {
 	va_list args;
@@ -57,20 +66,155 @@ void die(const char * str, ...)
 	exit(1);
 }
 
+#if BYTE_ORDER == LITTLE_ENDIAN
+#define le16_to_cpu(val) (val)
+#define le32_to_cpu(val) (val)
+#define le64_to_cpu(val) (val)
+#endif
+#if BYTE_ORDER == BIG_ENDIAN
+#define le16_to_cpu(val) bswap_16(val)
+#define le32_to_cpu(val) bswap_32(val)
+#define le64_to_cpu(val) bswap_64(val)
+#endif
+
+static uint16_t elf16_to_cpu(uint16_t val)
+{
+	return le16_to_cpu(val);
+}
+
+static uint32_t elf32_to_cpu(uint32_t val)
+{
+	return le32_to_cpu(val);
+}
+
+static uint64_t elf64_to_cpu(uint64_t val)
+{
+	return le64_to_cpu(val);
+}
+
 void file_open(const char *name)
 {
 	if ((fd = open(name, O_RDONLY, 0)) < 0)
 		die("Unable to open `%s': %m", name);
 }
 
+static void read_ehdr(void)
+{
+	if (read(fd, &ehdr, sizeof(ehdr)) != sizeof(ehdr)) {
+		die("Cannot read ELF header: %s\n",
+			strerror(errno));
+	}
+	if (memcmp(ehdr.e_ident, ELFMAG, 4) != 0) {
+		die("No ELF magic\n");
+	}
+	if (ehdr.e_ident[EI_CLASS] != ELFCLASS64) {
+		die("Not a 64 bit executable\n");
+	}
+	if (ehdr.e_ident[EI_DATA] != ELFDATA2LSB) {
+		die("Not a LSB ELF executable\n");
+	}
+	if (ehdr.e_ident[EI_VERSION] != EV_CURRENT) {
+		die("Unknown ELF version\n");
+	}
+	/* Convert the fields to native endian */
+	ehdr.e_type      = elf16_to_cpu(ehdr.e_type);
+	ehdr.e_machine   = elf16_to_cpu(ehdr.e_machine);
+	ehdr.e_version   = elf32_to_cpu(ehdr.e_version);
+	ehdr.e_entry     = elf64_to_cpu(ehdr.e_entry);
+	ehdr.e_phoff     = elf64_to_cpu(ehdr.e_phoff);
+	ehdr.e_shoff     = elf64_to_cpu(ehdr.e_shoff);
+	ehdr.e_flags     = elf32_to_cpu(ehdr.e_flags);
+	ehdr.e_ehsize    = elf16_to_cpu(ehdr.e_ehsize);
+	ehdr.e_phentsize = elf16_to_cpu(ehdr.e_phentsize);
+	ehdr.e_phnum     = elf16_to_cpu(ehdr.e_phnum);
+	ehdr.e_shentsize = elf16_to_cpu(ehdr.e_shentsize);
+	ehdr.e_shnum     = elf16_to_cpu(ehdr.e_shnum);
+	ehdr.e_shstrndx  = elf16_to_cpu(ehdr.e_shstrndx);
+
+	if ((ehdr.e_type != ET_EXEC) && (ehdr.e_type != ET_DYN)) {
+		die("Unsupported ELF header type\n");
+	}
+	if (ehdr.e_machine != EM_X86_64) {
+		die("Not for x86_64\n");
+	}
+	if (ehdr.e_version != EV_CURRENT) {
+		die("Unknown ELF version\n");
+	}
+	if (ehdr.e_ehsize != sizeof(Elf64_Ehdr)) {
+		die("Bad Elf header size\n");
+	}
+	if (ehdr.e_phentsize != sizeof(Elf64_Phdr)) {
+		die("Bad program header entry\n");
+	}
+	if (ehdr.e_shentsize != sizeof(Elf64_Shdr)) {
+		die("Bad section header entry\n");
+	}
+	if (ehdr.e_shstrndx >= ehdr.e_shnum) {
+		die("String table index out of bounds\n");
+	}
+}
+
+static void read_phds(void)
+{
+	int i;
+	size_t size;
+	if (ehdr.e_phnum > MAX_PHDRS) {
+		die("%d program headers supported: %d\n",
+			ehdr.e_phnum, MAX_PHDRS);
+	}
+	if (lseek(fd, ehdr.e_phoff, SEEK_SET) < 0) {
+		die("Seek to %d failed: %s\n",
+			ehdr.e_phoff, strerror(errno));
+	}
+	size = sizeof(phdr[0])*ehdr.e_phnum;
+	if (read(fd, &phdr, size) != size) {
+		die("Cannot read ELF section headers: %s\n",
+			strerror(errno));
+	}
+	for(i = 0; i < ehdr.e_phnum; i++) {
+		phdr[i].p_type      = elf32_to_cpu(phdr[i].p_type);
+		phdr[i].p_flags     = elf32_to_cpu(phdr[i].p_flags);
+		phdr[i].p_offset    = elf64_to_cpu(phdr[i].p_offset);
+		phdr[i].p_vaddr     = elf64_to_cpu(phdr[i].p_vaddr);
+		phdr[i].p_paddr     = elf64_to_cpu(phdr[i].p_paddr);
+		phdr[i].p_filesz    = elf64_to_cpu(phdr[i].p_filesz);
+		phdr[i].p_memsz     = elf64_to_cpu(phdr[i].p_memsz);
+		phdr[i].p_align     = elf64_to_cpu(phdr[i].p_align);
+	}
+}
+
+uint64_t vmlinux_memsz(void)
+{
+	uint64_t min, max, size;
+	int i;
+	max = 0;
+	min = ~max;
+	for(i = 0; i < ehdr.e_phnum; i++) {
+		uint64_t start, end;
+		if (phdr[i].p_type != PT_LOAD)
+			continue;
+		start = phdr[i].p_paddr;
+		end   = phdr[i].p_paddr + phdr[i].p_memsz;
+		if (start < min)
+			min = start;
+		if (end > max)
+			max = end;
+	}
+	/* Get the reported size by vmlinux */
+	size = max - min;
+	return size;
+}
+
 void usage(void)
 {
-	die("Usage: build [-b] bootsect setup system [rootdev] [> image]");
+	die("Usage: build [-b] bootsect setup system rootdev vmlinux [> image]");
 }
 
 int main(int argc, char ** argv)
 {
-	unsigned int i, c, sz, setup_sectors;
+	unsigned int i, sz, setup_sectors;
+	uint64_t kernel_offset, kernel_filesz, kernel_memsz;
+	int c;
 	u32 sys_size;
 	byte major_root, minor_root;
 	struct stat sb;
@@ -80,30 +224,25 @@ int main(int argc, char ** argv)
 	    is_big_kernel = 1;
 	    argc--, argv++;
 	  }
-	if ((argc < 4) || (argc > 5))
+	if (argc != 6)
 		usage();
-	if (argc > 4) {
-		if (!strcmp(argv[4], "CURRENT")) {
-			if (stat("/", &sb)) {
-				perror("/");
-				die("Couldn't stat /");
-			}
-			major_root = major(sb.st_dev);
-			minor_root = minor(sb.st_dev);
-		} else if (strcmp(argv[4], "FLOPPY")) {
-			if (stat(argv[4], &sb)) {
-				perror(argv[4]);
-				die("Couldn't stat root device.");
-			}
-			major_root = major(sb.st_rdev);
-			minor_root = minor(sb.st_rdev);
-		} else {
-			major_root = 0;
-			minor_root = 0;
+	if (!strcmp(argv[4], "CURRENT")) {
+		if (stat("/", &sb)) {
+			perror("/");
+			die("Couldn't stat /");
+		}
+		major_root = major(sb.st_dev);
+		minor_root = minor(sb.st_dev);
+	} else if (strcmp(argv[4], "FLOPPY")) {
+		if (stat(argv[4], &sb)) {
+			perror(argv[4]);
+			die("Couldn't stat root device.");
 		}
+		major_root = major(sb.st_rdev);
+		minor_root = minor(sb.st_rdev);
 	} else {
-		major_root = DEFAULT_MAJOR_ROOT;
-		minor_root = DEFAULT_MINOR_ROOT;
+		major_root = 0;
+		minor_root = 0;
 	}
 	fprintf(stderr, "Root device is (%d, %d)\n", major_root, minor_root);
 
@@ -143,10 +282,11 @@ int main(int argc, char ** argv)
 		i += c;
 	}
 
+	kernel_offset = (setup_sectors + 1)*512;
 	file_open(argv[3]);
 	if (fstat (fd, &sb))
 		die("Unable to stat `%s': %m", argv[3]);
-	sz = sb.st_size;
+	kernel_filesz = sz = sb.st_size;
 	fprintf (stderr, "System is %d kB\n", sz/1024);
 	sys_size = (sz + 15) / 16;
 	if (!is_big_kernel && sys_size > DEF_SYSSIZE)
@@ -167,7 +307,49 @@ int main(int argc, char ** argv)
 	}
 	close(fd);
 
-	if (lseek(1, 497, SEEK_SET) != 497)		    /* Write sizes to the bootsector */
+	file_open(argv[5]);
+	read_ehdr();
+	read_phds();
+	close(fd);
+	kernel_memsz = vmlinux_memsz();
+
+	if (lseek(1,  88, SEEK_SET) != 88)		    /* Write sizes to the bootsector */
+		die("Output: seek failed");
+	buf[0] = (kernel_offset >>  0) & 0xff;
+	buf[1] = (kernel_offset >>  8) & 0xff;
+	buf[2] = (kernel_offset >> 16) & 0xff;
+	buf[3] = (kernel_offset >> 24) & 0xff;
+	buf[4] = (kernel_offset >> 32) & 0xff;
+	buf[5] = (kernel_offset >> 40) & 0xff;
+	buf[6] = (kernel_offset >> 48) & 0xff;
+	buf[7] = (kernel_offset >> 56) & 0xff;
+	if (write(1, buf, 8) != 8)
+		die("Write of kernel file offset failed");
+	if (lseek(1, 112, SEEK_SET) != 112)
+		die("Output: seek failed");
+	buf[0] = (kernel_filesz >>  0) & 0xff;
+	buf[1] = (kernel_filesz >>  8) & 0xff;
+	buf[2] = (kernel_filesz >> 16) & 0xff;
+	buf[3] = (kernel_filesz >> 24) & 0xff;
+	buf[4] = (kernel_filesz >> 32) & 0xff;
+	buf[5] = (kernel_filesz >> 40) & 0xff;
+	buf[6] = (kernel_filesz >> 48) & 0xff;
+	buf[7] = (kernel_filesz >> 56) & 0xff;
+	if (write(1, buf, 8) != 8)
+		die("Write of kernel file size failed");
+	if (lseek(1, 120, SEEK_SET) != 120)
+		die("Output: seek failed");
+	buf[0] = (kernel_memsz >>  0) & 0xff;
+	buf[1] = (kernel_memsz >>  8) & 0xff;
+	buf[2] = (kernel_memsz >> 16) & 0xff;
+	buf[3] = (kernel_memsz >> 24) & 0xff;
+	buf[4] = (kernel_memsz >> 32) & 0xff;
+	buf[5] = (kernel_memsz >> 40) & 0xff;
+	buf[6] = (kernel_memsz >> 48) & 0xff;
+	buf[7] = (kernel_memsz >> 56) & 0xff;
+	if (write(1, buf, 8) != 8)
+		die("Write of kernel memory size failed");
+	if (lseek(1, 497, SEEK_SET) != 497)
 		die("Output: seek failed");
 	buf[0] = setup_sectors;
 	if (write(1, buf, 1) != 1)
diff --git a/arch/x86_64/defconfig b/arch/x86_64/defconfig
index 840d5d9..06cf378 100644
--- a/arch/x86_64/defconfig
+++ b/arch/x86_64/defconfig
@@ -158,7 +158,6 @@ CONFIG_X86_MCE_INTEL=y
 CONFIG_X86_MCE_AMD=y
 # CONFIG_KEXEC is not set
 # CONFIG_CRASH_DUMP is not set
-CONFIG_PHYSICAL_START=0x200000
 CONFIG_SECCOMP=y
 # CONFIG_HZ_100 is not set
 CONFIG_HZ_250=y
diff --git a/arch/x86_64/kernel/acpi/sleep.c b/arch/x86_64/kernel/acpi/sleep.c
index 5ebf62c..d9b28f8 100644
--- a/arch/x86_64/kernel/acpi/sleep.c
+++ b/arch/x86_64/kernel/acpi/sleep.c
@@ -60,17 +60,6 @@ extern char wakeup_start, wakeup_end;
 
 extern unsigned long FASTCALL(acpi_copy_wakeup_routine(unsigned long));
 
-static pgd_t low_ptr;
-
-static void init_low_mapping(void)
-{
-	pgd_t *slot0 = pgd_offset(current->mm, 0UL);
-	low_ptr = *slot0;
-	set_pgd(slot0, *pgd_offset(current->mm, PAGE_OFFSET));
-	WARN_ON(num_online_cpus() != 1);
-	local_flush_tlb();
-}
-
 /**
  * acpi_save_state_mem - save kernel state
  *
@@ -79,8 +68,6 @@ static void init_low_mapping(void)
  */
 int acpi_save_state_mem(void)
 {
-	init_low_mapping();
-
 	memcpy((void *)acpi_wakeup_address, &wakeup_start,
 	       &wakeup_end - &wakeup_start);
 	acpi_copy_wakeup_routine(acpi_wakeup_address);
@@ -93,8 +80,6 @@ int acpi_save_state_mem(void)
  */
 void acpi_restore_state_mem(void)
 {
-	set_pgd(pgd_offset(current->mm, 0UL), low_ptr);
-	local_flush_tlb();
 }
 
 /**
@@ -107,8 +92,8 @@ void acpi_restore_state_mem(void)
  */
 void __init acpi_reserve_bootmem(void)
 {
-	acpi_wakeup_address = (unsigned long)alloc_bootmem_low(PAGE_SIZE);
-	if ((&wakeup_end - &wakeup_start) > PAGE_SIZE)
+	acpi_wakeup_address = (unsigned long)alloc_bootmem_low(PAGE_SIZE*2);
+	if ((&wakeup_end - &wakeup_start) > (PAGE_SIZE*2))
 		printk(KERN_CRIT
 		       "ACPI: Wakeup code way too big, will crash on attempt to suspend\n");
 }
diff --git a/arch/x86_64/kernel/acpi/wakeup.S b/arch/x86_64/kernel/acpi/wakeup.S
index 185faa9..3eda0b5 100644
--- a/arch/x86_64/kernel/acpi/wakeup.S
+++ b/arch/x86_64/kernel/acpi/wakeup.S
@@ -1,6 +1,7 @@
 .text
 #include <linux/linkage.h>
 #include <asm/segment.h>
+#include <asm/pgtable.h>
 #include <asm/page.h>
 #include <asm/msr.h>
 
@@ -15,7 +16,6 @@ # If physical address of wakeup_code is 
 # cs = 0x1234, eip = 0x05
 #
 
-
 ALIGN
 	.align	16
 ENTRY(wakeup_start)
@@ -30,22 +30,25 @@ # Running in *copy* of this code, somewh
 	cld
 	# setup data segment
 	movw	%cs, %ax
-	movw	%ax, %ds					# Make ds:0 point to wakeup_start
+	movw	%ax, %ds			# Make ds:0 point to wakeup_start
 	movw	%ax, %ss
-	mov	$(wakeup_stack - wakeup_code), %sp		# Private stack is needed for ASUS board
+						# Private stack is needed for ASUS board
+	mov	$(wakeup_stack - wakeup_code), %sp
 
-	pushl	$0						# Kill any dangerous flags
+	pushl	$0				# Kill any dangerous flags
 	popfl
 
 	movl	real_magic - wakeup_code, %eax
 	cmpl	$0x12345678, %eax
 	jne	bogus_real_magic
 
+	call	verify_cpu			# Verify the cpu supports long mode
+
 	testl	$1, video_flags - wakeup_code
 	jz	1f
 	lcall   $0xc000,$3
 	movw	%cs, %ax
-	movw	%ax, %ds					# Bios might have played with that
+	movw	%ax, %ds			# Bios might have played with that
 	movw	%ax, %ss
 1:
 
@@ -60,13 +63,17 @@ # Running in *copy* of this code, somewh
 	movw	$0x0e00 + 'L', %fs:(0x10)
 
 	movb	$0xa2, %al	;  outb %al, $0x80
+
+	mov	%ds, %ax			# Find 32bit wakeup_code address
+	movzx	%ax, %esi			# (Convert %ds:gdt to a linear ptr)
+	shll	$4, %esi
+
+						# Fixup the vectors
+	addl	%esi, wakeup_32_vector - wakeup_code
+	addl	%esi, wakeup_long64_vector - wakeup_code
+	addl	%esi, gdt_48a + 2 - wakeup_code	# Fixup the gdt pointer
 	
-	lidt	%ds:idt_48a - wakeup_code
-	xorl	%eax, %eax
-	movw	%ds, %ax			# (Convert %ds:gdt to a linear ptr)
-	shll	$4, %eax
-	addl	$(gdta - wakeup_code), %eax
-	movl	%eax, gdt_48a +2 - wakeup_code
+	lidtl	%ds:idt_48a - wakeup_code
 	lgdtl	%ds:gdt_48a - wakeup_code	# load gdt with whatever is
 						# appropriate
 
@@ -75,85 +82,47 @@ # Running in *copy* of this code, somewh
 	jmp	1f
 1:
 
-	.byte 0x66, 0xea			# prefix + jmpi-opcode
-	.long	wakeup_32 - __START_KERNEL_map
-	.word	__KERNEL_CS
+	ljmpl	*(wakeup_32_vector - wakeup_code)
+
+	.balign 4
+wakeup_32_vector:
+	.long	wakeup_32 - wakeup_code
+	.word	__KERNEL32_CS, 0
 
 	.code32
 wakeup_32:
 # Running in this code, but at low address; paging is not yet turned on.
 	movb	$0xa5, %al	;  outb %al, $0x80
 
-	/* Check if extended functions are implemented */		
-	movl	$0x80000000, %eax
-	cpuid
-	cmpl	$0x80000000, %eax
-	jbe	bogus_cpu
-	wbinvd
-	mov	$0x80000001, %eax
-	cpuid
-	btl	$29, %edx
-	jnc	bogus_cpu
-	movl	%edx,%edi
-	
-	movw	$__KERNEL_DS, %ax
-	movw	%ax, %ds
-	movw	%ax, %es
-	movw	%ax, %fs
-	movw	%ax, %gs
-
-	movw	$__KERNEL_DS, %ax	
-	movw	%ax, %ss
+	/* Initialize segments */
+	movl	$__KERNEL_DS, %eax
+	movl	%eax, %ds
 
-	mov	$(wakeup_stack - __START_KERNEL_map), %esp
-	movl	saved_magic - __START_KERNEL_map, %eax
-	cmpl	$0x9abcdef0, %eax
-	jne	bogus_32_magic
+	movw	$0x0e00 + 'i', %ds:(0xb8012)
+	movb	$0xa8, %al	;  outb %al, $0x80;
 
 	/*
 	 * Prepare for entering 64bits mode
 	 */
 
-	/* Enable PAE mode and PGE */
+	/* Enable PAE */
 	xorl	%eax, %eax
 	btsl	$5, %eax
-	btsl	$7, %eax
 	movl	%eax, %cr4
 
 	/* Setup early boot stage 4 level pagetables */
-	movl	$(wakeup_level4_pgt - __START_KERNEL_map), %eax
+	leal	(wakeup_level4_pgt - wakeup_code)(%esi), %eax
 	movl	%eax, %cr3
 
-	/* Setup EFER (Extended Feature Enable Register) */
-	movl	$MSR_EFER, %ecx
-	rdmsr
-	/* Fool rdmsr and reset %eax to avoid dependences */
-	xorl	%eax, %eax
 	/* Enable Long Mode */
-	btsl	$_EFER_LME, %eax
-	/* Enable System Call */
-	btsl	$_EFER_SCE, %eax
-
-	/* No Execute supported? */	
-	btl	$20,%edi
-	jnc     1f
-	btsl	$_EFER_NX, %eax
-1:	
-				
-	/* Make changes effective */
+	movl	$MSR_EFER, %ecx
+	movl	$(1 << _EFER_LME), %eax	# Enable Long Mode
+	xorl	%edx, %edx
 	wrmsr
-	wbinvd
 
 	xorl	%eax, %eax
 	btsl	$31, %eax			/* Enable paging and in turn activate Long Mode */
 	btsl	$0, %eax			/* Enable protected mode */
-	btsl	$1, %eax			/* Enable MP */
-	btsl	$4, %eax			/* Enable ET */
-	btsl	$5, %eax			/* Enable NE */
-	btsl	$16, %eax			/* Enable WP */
-	btsl	$18, %eax			/* Enable AM */
-
-	/* Make changes effective */
 	movl	%eax, %cr0
 	/* At this point:
 		CR4.PAE must be 1
@@ -162,11 +131,6 @@ # Running in this code, but at low addre
 		Next instruction must be a branch
 		This must be on identity-mapped page
 	*/
-	jmp	reach_compatibility_mode
-reach_compatibility_mode:
-	movw	$0x0e00 + 'i', %ds:(0xb8012)
-	movb	$0xa8, %al	;  outb %al, $0x80; 	
-		
 	/*
 	 * At this point we're in long mode but in 32bit compatibility mode
 	 * with EFER.LME = 1, CS.L = 0, CS.D = 1 (and in turn
@@ -174,20 +138,13 @@ reach_compatibility_mode:
 	 * the new gdt/idt that has __KERNEL_CS with CS.L = 1.
 	 */
 
-	movw	$0x0e00 + 'n', %ds:(0xb8014)
-	movb	$0xa9, %al	;  outb %al, $0x80
-	
-	/* Load new GDT with the 64bit segment using 32bit descriptor */
-	movl	$(pGDT32 - __START_KERNEL_map), %eax
-	lgdt	(%eax)
-
-	movl    $(wakeup_jumpvector - __START_KERNEL_map), %eax
 	/* Finally jump in 64bit mode */
-	ljmp	*(%eax)
+	ljmp	*(wakeup_long64_vector - wakeup_code)(%esi)
 
-wakeup_jumpvector:
-	.long	wakeup_long64 - __START_KERNEL_map
-	.word	__KERNEL_CS
+	.balign 4
+wakeup_long64_vector:
+	.long	wakeup_long64 - wakeup_code
+	.word	__KERNEL_CS, 0
 
 .code64
 
@@ -199,10 +156,18 @@ wakeup_long64:
 	 * addresses where we're currently running on. We have to do that here
 	 * because in 32bit we couldn't load a 64bit linear address.
 	 */
-	lgdt	cpu_gdt_descr - __START_KERNEL_map
+	lgdt	cpu_gdt_descr
+
+	movw	$0x0e00 + 'n', %ds:(0xb8014)
+	movb	$0xa9, %al	;  outb %al, $0x80
+
+	movq	saved_magic, %rax
+	movq	$0x123456789abcdef0, %rdx
+	cmpq	%rdx, %rax
+	jne	bogus_64_magic
 
 	movw	$0x0e00 + 'u', %ds:(0xb8016)
-	
+
 	nop
 	nop
 	movw	$__KERNEL_DS, %ax
@@ -211,16 +176,16 @@ wakeup_long64:
 	movw	%ax, %es
 	movw	%ax, %fs
 	movw	%ax, %gs
-	movq	saved_esp, %rsp
+	movq	saved_rsp, %rsp
 
 	movw	$0x0e00 + 'x', %ds:(0xb8018)
-	movq	saved_ebx, %rbx
-	movq	saved_edi, %rdi
-	movq	saved_esi, %rsi
-	movq	saved_ebp, %rbp
+	movq	saved_rbx, %rbx
+	movq	saved_rdi, %rdi
+	movq	saved_rsi, %rsi
+	movq	saved_rbp, %rbp
 
 	movw	$0x0e00 + '!', %ds:(0xb801a)
-	movq	saved_eip, %rax
+	movq	saved_rip, %rax
 	jmp	*%rax
 
 .code32
@@ -228,25 +193,10 @@ wakeup_long64:
 	.align	64	
 gdta:
 	.word	0, 0, 0, 0			# dummy
-
-	.word	0, 0, 0, 0			# unused
-
-	.word	0xFFFF				# 4Gb - (0x100000*0x1000 = 4Gb)
-	.word	0				# base address = 0
-	.word	0x9B00				# code read/exec. ??? Why I need 0x9B00 (as opposed to 0x9A00 in order for this to work?)
-	.word	0x00CF				# granularity = 4096, 386
-						#  (+5th nibble of limit)
-
-	.word	0xFFFF				# 4Gb - (0x100000*0x1000 = 4Gb)
-	.word	0				# base address = 0
-	.word	0x9200				# data read/write
-	.word	0x00CF				# granularity = 4096, 386
-						#  (+5th nibble of limit)
-# this is 64bit descriptor for code
-	.word	0xFFFF
-	.word	0
-	.word	0x9A00				# code read/exec
-	.word	0x00AF				# as above, but it is long mode and with D=0
+	/* ??? Why I need the accessed bit set in order for this to work? */
+	.quad	0x00cf9b000000ffff		# __KERNEL32_CS
+	.quad	0x00af9b000000ffff		# __KERNEL_CS
+	.quad	0x00cf93000000ffff		# __KERNEL_DS
 
 idt_48a:
 	.word	0				# idt limit = 0
@@ -255,30 +205,24 @@ idt_48a:
 gdt_48a:
 	.word	0x8000				# gdt limit=2048,
 						#  256 GDT entries
-	.word	0, 0				# gdt base (filled in later)
-	
-	
+	.long	gdta - wakeup_code		# gdt base (relocated in later)
+
+
 real_save_gdt:	.word 0
 		.quad 0
 real_magic:	.quad 0
 video_mode:	.quad 0
 video_flags:	.quad 0
 
+.code16
 bogus_real_magic:
 	movb	$0xba,%al	;  outb %al,$0x80		
 	jmp bogus_real_magic
 
-bogus_32_magic:
+.code64
+bogus_64_magic:
 	movb	$0xb3,%al	;  outb %al,$0x80
-	jmp bogus_32_magic
-
-bogus_31_magic:
-	movb	$0xb1,%al	;  outb %al,$0x80
-	jmp bogus_31_magic
-
-bogus_cpu:
-	movb	$0xbc,%al	;  outb %al,$0x80
-	jmp bogus_cpu
+	jmp bogus_64_magic
 
 	
 /* This code uses an extended set of video mode numbers. These include:
@@ -301,6 +245,7 @@ #define VIDEO_FIRST_VESA 0x0200
 #define VIDEO_FIRST_V7 0x0900
 
 # Setting of user mode (AX=mode ID) => CF=success
+.code16
 mode_seta:
 	movw	%ax, %bx
 #if 0
@@ -346,14 +291,59 @@ check_vesaa:
 
 _setbada: jmp setbada
 
-	.code64
-bogus_magic:
-	movw	$0x0e00 + 'B', %ds:(0xb8018)
-	jmp bogus_magic
+	.code16
+verify_cpu:
+	pushl	$0			# Kill any dangerous flags
+	popfl
+
+	/* minimum CPUID flags for x86-64 */
+	/* see http://www.x86-64.org/lists/discuss/msg02971.html */
+#define REQUIRED_MASK1 ((1<<0)|(1<<3)|(1<<4)|(1<<5)|(1<<6)|(1<<8)|\
+			   (1<<13)|(1<<15)|(1<<24)|(1<<25)|(1<<26))
+#define REQUIRED_MASK2 (1<<29)
+
+	pushfl				# check for cpuid
+	popl	%eax
+	movl	%eax, %ebx
+	xorl	$0x200000,%eax
+	pushl	%eax
+	popfl
+	pushfl
+	popl	%eax
+	pushl	%ebx
+	popfl
+	cmpl	%eax, %ebx
+	jz	no_longmode
+
+	xorl	%eax, %eax		# See if cpuid 1 is implemented
+	cpuid
+	cmpl	$0x1, %eax
+	jb	no_longmode
+
+	movl	$0x01, %eax		# Does the cpu have what it takes?
+	cpuid
+	andl	$REQUIRED_MASK1, %edx
+	xorl	$REQUIRED_MASK1, %edx
+	jnz	no_longmode
 
-bogus_magic2:
-	movw	$0x0e00 + '2', %ds:(0xb8018)
-	jmp bogus_magic2
+	movl	$0x80000000, %eax	# See if extended cpuid is implemented
+	cpuid
+	cmpl	$0x80000001, %eax
+	jb	no_longmode
+
+	movl	$0x80000001, %eax	# Does the cpu have what it takes?
+	cpuid
+	andl	$REQUIRED_MASK2, %edx
+	xorl	$REQUIRED_MASK2, %edx
+	jnz	no_longmode
+
+	ret				# The cpu supports long mode
+
+no_longmode:
+	movb	$0xbc,%al	;  outb %al,$0x80
+	jmp no_longmode
+
+	ret
 	
 
 wakeup_stack_begin:	# Stack grows down
@@ -361,7 +351,15 @@ wakeup_stack_begin:	# Stack grows down
 .org	0xff0
 wakeup_stack:		# Just below end of page
 
+.org	0x1000
+ENTRY(wakeup_level4_pgt)
+	.quad	level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
+	.fill	510,8,0
+	/* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
+	.quad	level3_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE
+
 ENTRY(wakeup_end)
+	.code64
 	
 ##
 # acpi_copy_wakeup_routine
@@ -378,23 +376,6 @@ ENTRY(acpi_copy_wakeup_routine)
 	pushq	%rcx
 	pushq	%rdx
 
-	sgdt	saved_gdt
-	sidt	saved_idt
-	sldt	saved_ldt
-	str	saved_tss
-
-	movq    %cr3, %rdx
-	movq    %rdx, saved_cr3
-	movq    %cr4, %rdx
-	movq    %rdx, saved_cr4
-	movq	%cr0, %rdx
-	movq	%rdx, saved_cr0
-	sgdt    real_save_gdt - wakeup_start (,%rdi)
-	movl	$MSR_EFER, %ecx
-	rdmsr
-	movl	%eax, saved_efer
-	movl	%edx, saved_efer2
-
 	movl	saved_video_mode, %edx
 	movl	%edx, video_mode - wakeup_start (,%rdi)
 	movl	acpi_video_flags, %edx
@@ -403,18 +384,11 @@ ENTRY(acpi_copy_wakeup_routine)
 	movq	$0x123456789abcdef0, %rdx
 	movq	%rdx, saved_magic
 
-	movl	saved_magic - __START_KERNEL_map, %eax
-	cmpl	$0x9abcdef0, %eax
-	jne	bogus_32_magic
-
-	# make sure %cr4 is set correctly (features, etc)
-	movl	saved_cr4 - __START_KERNEL_map, %eax
-	movq	%rax, %cr4
+	movq	saved_magic, %rax
+	movq	$0x123456789abcdef0, %rdx
+	cmpq	%rdx, %rax
+	jne	bogus_64_magic
 
-	movl	saved_cr0 - __START_KERNEL_map, %eax
-	movq	%rax, %cr0
-	jmp	1f		# Flush pipelines
-1:
 	# restore the regs we used
 	popq	%rdx
 	popq	%rcx
@@ -450,13 +424,13 @@ do_suspend_lowlevel:
 	movq %r15, saved_context_r15(%rip)
 	pushfq ; popq saved_context_eflags(%rip)
 
-	movq	$.L97, saved_eip(%rip)
+	movq	$.L97, saved_rip(%rip)
 
-	movq %rsp,saved_esp
-	movq %rbp,saved_ebp
-	movq %rbx,saved_ebx
-	movq %rdi,saved_edi
-	movq %rsi,saved_esi
+	movq %rsp,saved_rsp
+	movq %rbp,saved_rbp
+	movq %rbx,saved_rbx
+	movq %rdi,saved_rdi
+	movq %rsi,saved_rsi
 
 	addq	$8, %rsp
 	movl	$3, %edi
@@ -503,25 +477,12 @@ do_suspend_lowlevel:
 	
 .data
 ALIGN
-ENTRY(saved_ebp)	.quad	0
-ENTRY(saved_esi)	.quad	0
-ENTRY(saved_edi)	.quad	0
-ENTRY(saved_ebx)	.quad	0
+ENTRY(saved_rbp)	.quad	0
+ENTRY(saved_rsi)	.quad	0
+ENTRY(saved_rdi)	.quad	0
+ENTRY(saved_rbx)	.quad	0
 
-ENTRY(saved_eip)	.quad	0
-ENTRY(saved_esp)	.quad	0
+ENTRY(saved_rip)	.quad	0
+ENTRY(saved_rsp)	.quad	0
 
 ENTRY(saved_magic)	.quad	0
-
-ALIGN
-# saved registers
-saved_gdt:	.quad	0,0
-saved_idt:	.quad	0,0
-saved_ldt:	.quad	0
-saved_tss:	.quad	0
-
-saved_cr0:	.quad 0
-saved_cr3:	.quad 0
-saved_cr4:	.quad 0
-saved_efer:	.quad 0
-saved_efer2:	.quad 0
diff --git a/arch/x86_64/kernel/e820.c b/arch/x86_64/kernel/e820.c
index e56c2ad..56dd525 100644
--- a/arch/x86_64/kernel/e820.c
+++ b/arch/x86_64/kernel/e820.c
@@ -205,8 +210,8 @@ unsigned long __init e820_end_of_ram(voi
 		if (start >= end)
 			continue;
 		if (ei->type == E820_RAM) { 
-		if (end > end_pfn<<PAGE_SHIFT)
-			end_pfn = end>>PAGE_SHIFT;
+			if (end > end_pfn<<PAGE_SHIFT)
+				end_pfn = end>>PAGE_SHIFT;
 		} else { 
 			if (end > end_pfn_map<<PAGE_SHIFT) 
 				end_pfn_map = end>>PAGE_SHIFT;
diff --git a/arch/x86_64/kernel/early_printk.c b/arch/x86_64/kernel/early_printk.c
index 140051e..d2b4cfb 100644
--- a/arch/x86_64/kernel/early_printk.c
+++ b/arch/x86_64/kernel/early_printk.c
@@ -11,11 +11,10 @@ #include <asm/fcntl.h>
 
 #ifdef __i386__
 #include <asm/setup.h>
-#define VGABASE		(__ISA_IO_base + 0xb8000)
 #else
 #include <asm/bootsetup.h>
-#define VGABASE		((void __iomem *)0xffffffff800b8000UL)
 #endif
+#define VGABASE		(__ISA_IO_base + 0xb8000)
 
 static int max_ypos = 25, max_xpos = 80;
 static int current_ypos = 25, current_xpos = 0;
diff --git a/arch/x86_64/kernel/head.S b/arch/x86_64/kernel/head.S
index 6df05e6..b821d13 100644
--- a/arch/x86_64/kernel/head.S
+++ b/arch/x86_64/kernel/head.S
@@ -5,6 +5,7 @@
  *  Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
  *  Copyright (C) 2000 Karsten Keil <kkeil@suse.de>
  *  Copyright (C) 2001,2002 Andi Kleen <ak@suse.de>
+ *  Copyright (C) 2005 Eric Biederman <ebiederm@xmission.com>
  *
  *  $Id: head.S,v 1.49 2002/03/19 17:39:25 ak Exp $
  */
@@ -15,98 +16,127 @@ #include <linux/threads.h>
 #include <linux/init.h>
 #include <asm/desc.h>
 #include <asm/segment.h>
+#include <asm/pgtable.h>
 #include <asm/page.h>
 #include <asm/msr.h>
 #include <asm/cache.h>
 	
 /* we are not able to switch in one step to the final KERNEL ADRESS SPACE
- * because we need identity-mapped pages on setup so define __START_KERNEL to
- * 0x100000 for this stage
+ * because we need identity-mapped pages.
  * 
  */
 
 	.text
 	.section .bootstrap.text
-	.code32
-	.globl startup_32
-/* %bx:	 1 if coming from smp trampoline on secondary cpu */ 
-startup_32:
-	
+	.code64
+	.globl startup_64
+startup_64:
+
 	/*
-	 * At this point the CPU runs in 32bit protected mode (CS.D = 1) with
-	 * paging disabled and the point of this file is to switch to 64bit
-	 * long mode with a kernel mapping for kerneland to jump into the
-	 * kernel virtual addresses.
- 	 * There is no stack until we set one up.
+	 * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 1,
+	 * and someone has loaded an identity mapped page table
+	 * for us.  These identity mapped page tables map all of the
+	 * kernel pages and possibly all of memory.
+	 *
+	 * %esi holds a physical pointer to real_mode_data.
+	 *
+	 * We come here either directly from a 64bit bootloader, or from
+	 * arch/x86_64/boot/compressed/head.S.
+	 *
+	 * We only come here initially at boot nothing else comes here.
+	 *
+	 * Since we may be loaded at an address different from what we were
+	 * compiled to run at we first fixup the physical addresses in our page
+	 * tables and then reload them.
 	 */
 
-	/* Initialize the %ds segment register */
-	movl $__KERNEL_DS,%eax
-	movl %eax,%ds
-
-	/* Load new GDT with the 64bit segments using 32bit descriptor */
-	lgdt	pGDT32 - __START_KERNEL_map
-
-	/* If the CPU doesn't support CPUID this will double fault.
-	 * Unfortunately it is hard to check for CPUID without a stack. 
+	/* Compute the delta between the address I am compiled to run at and the
+	 * address I am actually running at.
 	 */
-	
-	/* Check if extended functions are implemented */		
-	movl	$0x80000000, %eax
-	cpuid
-	cmpl	$0x80000000, %eax
-	jbe	no_long_mode
-	/* Check if long mode is implemented */
-	mov	$0x80000001, %eax
-	cpuid
-	btl	$29, %edx
-	jnc	no_long_mode
-
-	/*
-	 * Prepare for entering 64bits mode
+	leaq	_text(%rip), %rbp
+	subq	$_text - __START_KERNEL_map, %rbp
+
+	/* Is the address not 2M aligned? */
+	movq	%rbp, %rax
+	andl	$~LARGE_PAGE_MASK, %eax
+	testl	%eax, %eax
+	jnz	bad_address
+
+	/* Is the address too large? */
+	leaq	_text(%rip), %rdx
+	movq	$PGDIR_SIZE, %rax
+	cmpq	%rax, %rdx
+	jae	bad_address
+
+	/* Fixup the physical addresses in the page table
 	 */
+	addq	%rbp, init_level4_pgt + 0(%rip)
+	addq	%rbp, init_level4_pgt + (258*8)(%rip)
+	addq	%rbp, init_level4_pgt + (511*8)(%rip)
+
+	addq	%rbp, level3_ident_pgt + 0(%rip)
+	addq	%rbp, level3_kernel_pgt + (510*8)(%rip)
+
+	/* Add an Identity mapping if I am above 1G */
+	leaq	_text(%rip), %rdi
+	andq	$LARGE_PAGE_MASK, %rdi
+
+	movq	%rdi, %rax
+	shrq	$PUD_SHIFT, %rax
+	andq	$(PTRS_PER_PUD - 1), %rax
+	jz	ident_complete
+
+	leaq	(level2_spare_pgt - __START_KERNEL_map + _KERNPG_TABLE)(%rbp), %rdx
+	leaq	level3_ident_pgt(%rip), %rbx
+	movq	%rdx, 0(%rbx, %rax, 8)
+
+	movq	%rdi, %rax
+	shrq	$PMD_SHIFT, %rax
+	andq	$(PTRS_PER_PMD - 1), %rax
+	leaq	__PAGE_KERNEL_LARGE_EXEC(%rdi), %rdx
+	leaq	level2_spare_pgt(%rip), %rbx
+	movq	%rdx, 0(%rbx, %rax, 8)
+ident_complete:
+
+	/* Fixup the kernel text+data virtual addresses
+	 */
+	leaq	level2_kernel_pgt(%rip), %rdi
+	leaq	4096(%rdi), %r8
+	/* See if it is a valid page table entry */
+1:	testq	$1, 0(%rdi)
+	jz	2f
+	addq	%rbp, 0(%rdi)
+	/* Go to the next page */
+2:	addq	$8, %rdi
+	cmp	%r8, %rdi
+	jne	1b
+
+	/* Fixup phys_base */
+	addq	%rbp, phys_base(%rip)
 
-	/* Enable PAE mode */
-	xorl	%eax, %eax
-	btsl	$5, %eax
-	movl	%eax, %cr4
-
-	/* Setup early boot stage 4 level pagetables */
-	movl	$(boot_level4_pgt - __START_KERNEL_map), %eax
-	movl	%eax, %cr3
-
-	/* Setup EFER (Extended Feature Enable Register) */
-	movl	$MSR_EFER, %ecx
-	rdmsr
-
-	/* Enable Long Mode */
-	btsl	$_EFER_LME, %eax
-				
-	/* Make changes effective */
-	wrmsr
+#ifdef CONFIG_SMP
+	addq	%rbp, trampoline_level4_pgt + 0(%rip)
+	addq	%rbp, trampoline_level4_pgt + (511*8)(%rip)
+#endif
+#ifdef CONFIG_ACPI_SLEEP
+	addq	%rbp, wakeup_level4_pgt + 0(%rip)
+	addq	%rbp, wakeup_level4_pgt + (511*8)(%rip)
+#endif
 
-	xorl	%eax, %eax
-	btsl	$31, %eax			/* Enable paging and in turn activate Long Mode */
-	btsl	$0, %eax			/* Enable protected mode */
-	/* Make changes effective */
-	movl	%eax, %cr0
+ENTRY(secondary_startup_64)
 	/*
-	 * At this point we're in long mode but in 32bit compatibility mode
-	 * with EFER.LME = 1, CS.L = 0, CS.D = 1 (and in turn
-	 * EFER.LMA = 1). Now we want to jump in 64bit mode, to do that we use
-	 * the new gdt/idt that has __KERNEL_CS with CS.L = 1.
-	 */
-	ljmp	$__KERNEL_CS, $(startup_64 - __START_KERNEL_map)
-
-	.code64
-	.org 0x100	
-	.globl startup_64
-startup_64:
-	/* We come here either from startup_32
-	 * or directly from a 64bit bootloader.
-	 * Since we may have come directly from a bootloader we
-	 * reload the page tables here.
-	 */
+	 * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 1,
+	 * and someone has loaded a mapped page table.
+	 *
+	 * %esi holds a physical pointer to real_mode_data.
+	 *
+	 * We come here either from startup_64 (using physical addresses)
+	 * or from trampoline.S (using virtual addresses).
+	 *
+	 * Using virtual addresses from trampoline.S removes the need
+	 * to have any identity mapped pages in the kernel page table
+	 * after the boot processor executes this code.
+ 	 */
 
 	/* Enable PAE mode and PGE */
 	xorq	%rax, %rax
@@ -115,9 +145,15 @@ startup_64:
 	movq	%rax, %cr4
 
 	/* Setup early boot stage 4 level pagetables. */
-	movq	$(boot_level4_pgt - __START_KERNEL_map), %rax
+	movq	$(init_level4_pgt - __START_KERNEL_map), %rax
+	addq	phys_base(%rip), %rax
 	movq	%rax, %cr3
 
+	/* Ensure I am executing from virtual addresses */
+	movq	$1f, %rax
+	jmp	*%rax
+1:
+
 	/* Check if nx is implemented */
 	movl	$0x80000001, %eax
 	cpuid
@@ -126,17 +162,11 @@ startup_64:
 	/* Setup EFER (Extended Feature Enable Register) */
 	movl	$MSR_EFER, %ecx
 	rdmsr
-
-	/* Enable System Call */
-	btsl	$_EFER_SCE, %eax
-
-	/* No Execute supported? */
-	btl	$20,%edi
+	btsl	$_EFER_SCE, %eax	/* Enable System Call */
+	btl	$20,%edi		/* No Execute supported? */
 	jnc     1f
 	btsl	$_EFER_NX, %eax
-1:
-	/* Make changes effective */
-	wrmsr
+1:	wrmsr				/* Make changes effective */
 
 	/* Setup cr0 */
 #define CR0_PM				1		/* protected mode */
@@ -163,7 +193,7 @@ #define CR0_PAGING 			(1<<31)
 	 * addresses where we're currently running on. We have to do that here
 	 * because in 32bit we couldn't load a 64bit linear address.
 	 */
-	lgdt	cpu_gdt_descr
+	lgdt	cpu_gdt_descr(%rip)
 
 	/* 
 	 * Setup up a dummy PDA. this is just for some early bootup code
@@ -202,6 +232,9 @@ initial_code:
 init_rsp:
 	.quad  init_thread_union+THREAD_SIZE-8
 
+bad_address:
+	jmp bad_address
+
 ENTRY(early_idt_handler)
 	cmpl $2,early_recursion_flag(%rip)
 	jz  1f
@@ -230,109 +263,72 @@ early_idt_msg:
 early_idt_ripmsg:
 	.asciz "RIP %s\n"
 
-.code32
-ENTRY(no_long_mode)
-	/* This isn't an x86-64 CPU so hang */
-1:
-	jmp	1b
-
-.org 0xf00
-	.globl pGDT32
-pGDT32:
-	.word	gdt_end-cpu_gdt_table-1
-	.long	cpu_gdt_table-__START_KERNEL_map
-
-.org 0xf10	
-ljumpvector:
-	.long	startup_64-__START_KERNEL_map
-	.word	__KERNEL_CS
-
+.balign PAGE_SIZE
 ENTRY(stext)
 ENTRY(_stext)
 
-	$page = 0
 #define NEXT_PAGE(name) \
-	$page = $page + 1; \
-	.org $page * 0x1000; \
-	phys_/**/name = $page * 0x1000 + __PHYSICAL_START; \
+	.balign	PAGE_SIZE; \
 ENTRY(name)
 
+/* Automate the creation of 1 to 1 mapping pmd entries */
+#define PMDS(START, PERM, COUNT)		\
+	i = 0 ;					\
+	.rept (COUNT) ;				\
+	.quad	(START) + (i << 21) + (PERM) ;	\
+	i = i + 1 ;				\
+	.endr
+
+	/*
+	 * This default setting generates an ident mapping at address 0x100000
+	 * and a mapping for the kernel that precisely maps virtual address
+	 * 0xffffffff80000000 to physical address 0x000000. (always using
+	 * 2Mbyte large pages provided by PAE mode)
+	 */
 NEXT_PAGE(init_level4_pgt)
-	/* This gets initialized in x86_64_start_kernel */
-	.fill	512,8,0
+	.quad	level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
+	.fill	257,8,0
+	.quad	level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
+	.fill	252,8,0
+	/* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
+	.quad	level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
 
 NEXT_PAGE(level3_ident_pgt)
-	.quad	phys_level2_ident_pgt | 0x007
+	.quad	level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
 	.fill	511,8,0
 
 NEXT_PAGE(level3_kernel_pgt)
 	.fill	510,8,0
 	/* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */
-	.quad	phys_level2_kernel_pgt | 0x007
+	.quad	level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE
 	.fill	1,8,0
 
 NEXT_PAGE(level2_ident_pgt)
-	/* 40MB for bootup. 	*/
-	i = 0
-	.rept 20
-	.quad	i << 21 | 0x083
-	i = i + 1
-	.endr
-	/* Temporary mappings for the super early allocator in arch/x86_64/mm/init.c */
-	.globl temp_boot_pmds
-temp_boot_pmds:
-	.fill	492,8,0
+	/* Since I easily can, map the first 1G.
+	 * Don't set NX because code runs from these pages.
+	 */
+	PMDS(0x0000000000000000, __PAGE_KERNEL_LARGE_EXEC, PTRS_PER_PMD)
 	
 NEXT_PAGE(level2_kernel_pgt)
 	/* 40MB kernel mapping. The kernel code cannot be bigger than that.
 	   When you change this change KERNEL_TEXT_SIZE in page.h too. */
 	/* (2^48-(2*1024*1024*1024)-((2^39)*511)-((2^30)*510)) = 0 */
-	i = 0
-	.rept 20
-	.quad	i << 21 | 0x183
-	i = i + 1
-	.endr
+	PMDS(0x0000000000000000, __PAGE_KERNEL_LARGE_EXEC|_PAGE_GLOBAL,
+		KERNEL_TEXT_SIZE/PMD_SIZE)
 	/* Module mapping starts here */
-	.fill	492,8,0
+	.fill	(PTRS_PER_PMD - (KERNEL_TEXT_SIZE/PMD_SIZE)),8,0
 
-NEXT_PAGE(level3_physmem_pgt)
-	.quad	phys_level2_kernel_pgt | 0x007	/* so that __va works even before pagetable_init */
-	.fill	511,8,0
+NEXT_PAGE(level2_spare_pgt)
+	.fill	512,8,0
 
+#undef PMDS
 #undef NEXT_PAGE
 
 	.data
 
-#ifdef CONFIG_ACPI_SLEEP
-	.align PAGE_SIZE
-ENTRY(wakeup_level4_pgt)
-	.quad	phys_level3_ident_pgt | 0x007
-	.fill	255,8,0
-	.quad	phys_level3_physmem_pgt | 0x007
-	.fill	254,8,0
-	/* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
-	.quad	phys_level3_kernel_pgt | 0x007
-#endif
-
 #ifndef CONFIG_HOTPLUG_CPU
 	__INITDATA
 #endif
-	/*
-	 * This default setting generates an ident mapping at address 0x100000
-	 * and a mapping for the kernel that precisely maps virtual address
-	 * 0xffffffff80000000 to physical address 0x000000. (always using
-	 * 2Mbyte large pages provided by PAE mode)
-	 */
-	.align PAGE_SIZE
-ENTRY(boot_level4_pgt)
-	.quad	phys_level3_ident_pgt | 0x007
-	.fill	255,8,0
-	.quad	phys_level3_physmem_pgt | 0x007
-	.fill	254,8,0
-	/* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
-	.quad	phys_level3_kernel_pgt | 0x007
-
-	.data
 
 	.align 16
 	.globl cpu_gdt_descr
@@ -347,6 +343,10 @@ #ifdef CONFIG_SMP
 	.endr
 #endif
 
+ENTRY(phys_base)
+	/* This must match the first entry in level2_kernel_pgt */
+	.quad	0x0000000000000000
+
 /* We need valid kernel segments for data and code in long mode too
  * IRET will check the segment types  kkeil 2000/10/28
  * Also sysret mandates a special GDT layout 
@@ -360,16 +360,16 @@ #endif
 	
 ENTRY(cpu_gdt_table)
 	.quad	0x0000000000000000	/* NULL descriptor */
+	.quad	0x00cf9b000000ffff	/* __KERNEL32_CS */
+	.quad	0x00af9b000000ffff	/* __KERNEL_CS */
+	.quad	0x00cf93000000ffff	/* __KERNEL_DS */
+	.quad	0x00cffb000000ffff	/* __USER32_CS */
+	.quad	0x00cff3000000ffff	/* __USER_DS, __USER32_DS  */
+	.quad	0x00affb000000ffff	/* __USER_CS */
 	.quad	0x0			/* unused */
-	.quad	0x00af9a000000ffff	/* __KERNEL_CS */
-	.quad	0x00cf92000000ffff	/* __KERNEL_DS */
-	.quad	0x00cffa000000ffff	/* __USER32_CS */
-	.quad	0x00cff2000000ffff	/* __USER_DS, __USER32_DS  */		
-	.quad	0x00affa000000ffff	/* __USER_CS */
-	.quad	0x00cf9a000000ffff	/* __KERNEL32_CS */
 	.quad	0,0			/* TSS */
 	.quad	0,0			/* LDT */
-	.quad   0,0,0			/* three TLS descriptors */ 
+	.quad   0,0,0			/* three TLS descriptors */
 	.quad	0			/* unused */
 gdt_end:	
 	/* asm/segment.h:GDT_ENTRIES must match this */	
diff --git a/arch/x86_64/kernel/head64.c b/arch/x86_64/kernel/head64.c
index 36647ce..99d4463 100644
--- a/arch/x86_64/kernel/head64.c
+++ b/arch/x86_64/kernel/head64.c
@@ -18,8 +18,16 @@ #include <asm/bootsetup.h>
 #include <asm/setup.h>
 #include <asm/desc.h>
 #include <asm/pgtable.h>
+#include <asm/tlbflush.h>
 #include <asm/sections.h>
 
+static void __init zap_identity_mappings(void)
+{
+	pgd_t *pgd = pgd_offset_k(0UL);
+	pgd_clear(pgd);
+	__flush_tlb();
+}
+
 /* Don't add a printk in there. printk relies on the PDA which is not initialized 
    yet. */
 static void __init clear_bss(void)
@@ -29,29 +37,28 @@ static void __init clear_bss(void)
 }
 
 #define NEW_CL_POINTER		0x228	/* Relative to real mode data */
-#define OLD_CL_MAGIC_ADDR	0x90020
+#define OLD_CL_MAGIC_ADDR	0x20
 #define OLD_CL_MAGIC            0xA33F
-#define OLD_CL_BASE_ADDR        0x90000
-#define OLD_CL_OFFSET           0x90022
+#define OLD_CL_OFFSET           0x22
 
 extern char saved_command_line[];
 
 static void __init copy_bootdata(char *real_mode_data)
 {
-	int new_data;
+	unsigned long new_data;
 	char * command_line;
 
 	memcpy(x86_boot_params, real_mode_data, BOOT_PARAM_SIZE);
-	new_data = *(int *) (x86_boot_params + NEW_CL_POINTER);
+	new_data = *(u32 *) (x86_boot_params + NEW_CL_POINTER);
 	if (!new_data) {
-		if (OLD_CL_MAGIC != * (u16 *) OLD_CL_MAGIC_ADDR) {
+		if (OLD_CL_MAGIC != *(u16 *)(real_mode_data + OLD_CL_MAGIC_ADDR)) {
 			printk("so old bootloader that it does not support commandline?!\n");
 			return;
 		}
-		new_data = OLD_CL_BASE_ADDR + * (u16 *) OLD_CL_OFFSET;
+		new_data = __pa(real_mode_data) + *(u16 *)(real_mode_data + OLD_CL_OFFSET);
 		printk("old bootloader convention, maybe loadlin?\n");
 	}
-	command_line = (char *) ((u64)(new_data));
+	command_line = __va(new_data);
 	memcpy(saved_command_line, command_line, COMMAND_LINE_SIZE);
 	printk("Bootdata ok (command line is %s)\n", saved_command_line);	
 }
@@ -79,6 +86,8 @@ void __init x86_64_start_kernel(char * r
 	char *s;
 	int i;
 
+	/* Make NULL pointers segfault */
+	zap_identity_mappings();
 	for (i = 0; i < 256; i++)
 		set_intr_gate(i, early_idt_handler);
 	asm volatile("lidt %0" :: "m" (idt_descr));
@@ -89,17 +98,11 @@ void __init x86_64_start_kernel(char * r
 	 */
 	lockdep_init();
 
-	/*
-	 * switch to init_level4_pgt from boot_level4_pgt
-	 */
-	memcpy(init_level4_pgt, boot_level4_pgt, PTRS_PER_PGD*sizeof(pgd_t));
-	asm volatile("movq %0,%%cr3" :: "r" (__pa_symbol(&init_level4_pgt)));
-
  	for (i = 0; i < NR_CPUS; i++)
  		cpu_pda(i) = &boot_cpu_pda[i];
 
 	pda_init(0);
-	copy_bootdata(real_mode_data);
+	copy_bootdata(__va(real_mode_data));
 #ifdef CONFIG_SMP
 	cpu_set(0, cpu_online_map);
 #endif
@@ -116,7 +119,7 @@ #ifdef CONFIG_X86_IO_APIC
 		disable_apic = 1;
 #endif
 	/* You need early console to see that */
-	if (__pa_symbol(&_end) >= KERNEL_TEXT_SIZE)
+	if (((unsigned long)&_end) >= (__START_KERNEL_map + KERNEL_TEXT_SIZE))
 		panic("Kernel too big for kernel mapping\n");
 
 	setup_boot_cpu_data();
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
index 8a099ff..44a40e6 100644
--- a/arch/x86_64/kernel/setup.c
+++ b/arch/x86_64/kernel/setup.c
@@ -505,10 +505,10 @@ static void discover_ebda(void)
 	 * there is a real-mode segmented pointer pointing to the 
 	 * 4K EBDA area at 0x40E
 	 */
-	ebda_addr = *(unsigned short *)EBDA_ADDR_POINTER;
+	ebda_addr = *(unsigned short *)__va(EBDA_ADDR_POINTER);
 	ebda_addr <<= 4;
 
-	ebda_size = *(unsigned short *)(unsigned long)ebda_addr;
+	ebda_size = *(unsigned short *)__va(ebda_addr);
 
 	/* Round EBDA up to pages */
 	if (ebda_size == 0)
@@ -543,11 +543,12 @@ #endif
 	init_mm.end_code = (unsigned long) &_etext;
 	init_mm.end_data = (unsigned long) &_edata;
 	init_mm.brk = (unsigned long) &_end;
+	init_mm.pgd = __va(__pa_symbol(&init_level4_pgt));
 
-	code_resource.start = virt_to_phys(&_text);
-	code_resource.end = virt_to_phys(&_etext)-1;
-	data_resource.start = virt_to_phys(&_etext);
-	data_resource.end = virt_to_phys(&_edata)-1;
+	code_resource.start = __pa_symbol(&_text);
+	code_resource.end = __pa_symbol(&_etext)-1;
+	data_resource.start = __pa_symbol(&_etext);
+	data_resource.end = __pa_symbol(&_edata)-1;
 
 	parse_cmdline_early(cmdline_p);
 
@@ -568,8 +569,6 @@ #endif
 
 	dmi_scan_machine();
 
-	zap_low_mappings(0);
-
 #ifdef CONFIG_ACPI
 	/*
 	 * Initialize the ACPI boot-time table parser (gets the RSDP and SDT).
@@ -610,15 +610,8 @@ #endif
 		reserve_bootmem_generic(ebda_addr, ebda_size);
 
 #ifdef CONFIG_SMP
-	/*
-	 * But first pinch a few for the stack/trampoline stuff
-	 * FIXME: Don't need the extra page at 4K, but need to fix
-	 * trampoline before removing it. (see the GDT stuff)
-	 */
-	reserve_bootmem_generic(PAGE_SIZE, PAGE_SIZE);
-
 	/* Reserve SMP trampoline */
-	reserve_bootmem_generic(SMP_TRAMPOLINE_BASE, PAGE_SIZE);
+	reserve_bootmem_generic(SMP_TRAMPOLINE_BASE, 2*PAGE_SIZE);
 #endif
 
 #ifdef CONFIG_ACPI_SLEEP
diff --git a/arch/x86_64/kernel/setup64.c b/arch/x86_64/kernel/setup64.c
index 6fe58a6..a1f3aed 100644
--- a/arch/x86_64/kernel/setup64.c
+++ b/arch/x86_64/kernel/setup64.c
@@ -197,7 +197,6 @@ void __cpuinit cpu_init (void)
 	/* CPU 0 is initialised in head64.c */
 	if (cpu != 0) {
 		pda_init(cpu);
-		zap_low_mappings(cpu);
 	} else 
 		estacks = boot_exception_stacks; 
 
diff --git a/arch/x86_64/kernel/smp.c b/arch/x86_64/kernel/smp.c
index 5a1c0a3..5a54066 100644
--- a/arch/x86_64/kernel/smp.c
+++ b/arch/x86_64/kernel/smp.c
@@ -76,7 +76,7 @@ static inline void leave_mm(int cpu)
 	if (read_pda(mmu_state) == TLBSTATE_OK)
 		BUG();
 	cpu_clear(cpu, read_pda(active_mm)->cpu_vm_mask);
-	load_cr3(swapper_pg_dir);
+	load_cr3(init_mm.pgd);
 }
 
 /*
diff --git a/arch/x86_64/kernel/suspend.c b/arch/x86_64/kernel/suspend.c
index 91f7e67..fe865ea 100644
--- a/arch/x86_64/kernel/suspend.c
+++ b/arch/x86_64/kernel/suspend.c
@@ -33,7 +33,6 @@ void __save_processor_state(struct saved
 	asm volatile ("str %0"  : "=m" (ctxt->tr));
 
 	/* XMM0..XMM15 should be handled by kernel_fpu_begin(). */
-	/* EFER should be constant for kernel version, no need to handle it. */
 	/*
 	 * segment registers
 	 */
@@ -50,6 +49,7 @@ void __save_processor_state(struct saved
 	/*
 	 * control registers 
 	 */
+	rdmsrl(MSR_EFER, ctxt->efer);
 	asm volatile ("movq %%cr0, %0" : "=r" (ctxt->cr0));
 	asm volatile ("movq %%cr2, %0" : "=r" (ctxt->cr2));
 	asm volatile ("movq %%cr3, %0" : "=r" (ctxt->cr3));
@@ -75,6 +75,7 @@ void __restore_processor_state(struct sa
 	/*
 	 * control registers
 	 */
+	wrmsrl(MSR_EFER, ctxt->efer);
 	asm volatile ("movq %0, %%cr8" :: "r" (ctxt->cr8));
 	asm volatile ("movq %0, %%cr4" :: "r" (ctxt->cr4));
 	asm volatile ("movq %0, %%cr3" :: "r" (ctxt->cr3));
diff --git a/arch/x86_64/kernel/trampoline.S b/arch/x86_64/kernel/trampoline.S
index 23a03eb..13eee63 100644
--- a/arch/x86_64/kernel/trampoline.S
+++ b/arch/x86_64/kernel/trampoline.S
@@ -3,6 +3,7 @@
  *	Trampoline.S	Derived from Setup.S by Linus Torvalds
  *
  *	4 Jan 1997 Michael Chastain: changed to gnu as.
+ *	15 Sept 2005 Eric Biederman: 64bit PIC support
  *
  *	Entry: CS:IP point to the start of our code, we are 
  *	in real mode with no stack, but the rest of the 
@@ -17,15 +18,20 @@
  *	and IP is zero.  Thus, data addresses need to be absolute
  *	(no relocation) and are taken with regard to r_base.
  *
+ *	With the addition of trampoline_level4_pgt this code can
+ *	now enter a 64bit kernel that lives at arbitrary 64bit
+ *	physical addresses.
+ *
  *	If you work on this file, check the object module with objdump
  *	--full-contents --reloc to make sure there are no relocation
- *	entries. For the GDT entry we do hand relocation in smpboot.c
- *	because of 64bit linker limitations.
+ *	entries.
  */
 
 #include <linux/linkage.h>
-#include <asm/segment.h>
+#include <asm/pgtable.h>
 #include <asm/page.h>
+#include <asm/msr.h>
+#include <asm/segment.h>
 
 .data
 
@@ -33,15 +39,31 @@ #include <asm/page.h>
 
 ENTRY(trampoline_data)
 r_base = .
+	cli			# We should be safe anyway
 	wbinvd	
 	mov	%cs, %ax	# Code and data in the same place
 	mov	%ax, %ds
+	mov	%ax, %es
+	mov	%ax, %ss
 
-	cli			# We should be safe anyway
 
 	movl	$0xA5A5A5A5, trampoline_data - r_base
 				# write marker for master knows we're running
 
+					# Setup stack
+	movw	$(trampoline_stack_end - r_base), %sp
+
+	call	verify_cpu		# Verify the cpu supports long mode
+
+	mov	%cs, %ax
+	movzx	%ax, %esi		# Find the 32bit trampoline location
+	shll	$4, %esi
+
+					# Fixup the vectors
+	addl	%esi, startup_32_vector - r_base
+	addl	%esi, startup_64_vector - r_base
+	addl	%esi, tgdt + 2 - r_base	# Fixup the gdt pointer
+
 	/*
 	 * GDT tables in non default location kernel can be beyond 16MB and
 	 * lgdt will not be able to load the address as in real mode default
@@ -49,23 +71,141 @@ r_base = .
 	 * to 32 bit.
 	 */
 
-	lidtl	idt_48 - r_base	# load idt with 0, 0
-	lgdtl	gdt_48 - r_base	# load gdt with whatever is appropriate
+	lidtl	tidt - r_base	# load idt with 0, 0
+	lgdtl	tgdt - r_base	# load gdt with whatever is appropriate
 
 	xor	%ax, %ax
 	inc	%ax		# protected mode (PE) bit
 	lmsw	%ax		# into protected mode
-	# flaush prefetch and jump to startup_32 in arch/x86_64/kernel/head.S
-	ljmpl	$__KERNEL32_CS, $(startup_32-__START_KERNEL_map)
+
+	# flush prefetch and jump to startup_32
+	ljmpl	*(startup_32_vector - r_base)
+
+	.code32
+	.balign 4
+startup_32:
+	movl	$__KERNEL_DS, %eax	# Initialize the %ds segment register
+	movl	%eax, %ds
+
+	xorl	%eax, %eax
+	btsl	$5, %eax		# Enable PAE mode
+	movl	%eax, %cr4
+
+					# Setup trampoline 4 level pagetables
+	leal	(trampoline_level4_pgt - r_base)(%esi), %eax
+	movl	%eax, %cr3
+
+	movl	$MSR_EFER, %ecx
+	movl	$(1 << _EFER_LME), %eax	# Enable Long Mode
+	xorl	%edx, %edx
+	wrmsr
+
+	xorl	%eax, %eax
+	btsl	$31, %eax		# Enable paging and in turn activate Long Mode
+	btsl	$0, %eax		# Enable protected mode
+	movl	%eax, %cr0
+
+	/*
+	 * At this point we're in long mode but in 32bit compatibility mode
+	 * with EFER.LME = 1, CS.L = 0, CS.D = 1 (and in turn
+	 * EFER.LMA = 1). Now we want to jump in 64bit mode, to do that we use
+	 * the new gdt/idt that has __KERNEL_CS with CS.L = 1.
+	 */
+	ljmp	*(startup_64_vector - r_base)(%esi)
+
+	.code64
+	.balign 4
+startup_64:
+	# Now jump into the kernel using virtual addresses
+	movq	$secondary_startup_64, %rax
+	jmp	*%rax
+
+	.code16
+verify_cpu:
+	pushl	$0			# Kill any dangerous flags
+	popfl
+
+	/* minimum CPUID flags for x86-64 */
+	/* see http://www.x86-64.org/lists/discuss/msg02971.html */
+#define REQUIRED_MASK1 ((1<<0)|(1<<3)|(1<<4)|(1<<5)|(1<<6)|(1<<8)|\
+			   (1<<13)|(1<<15)|(1<<24)|(1<<25)|(1<<26))
+#define REQUIRED_MASK2 (1<<29)
+
+	pushfl				# check for cpuid
+	popl	%eax
+	movl	%eax, %ebx
+	xorl	$0x200000,%eax
+	pushl	%eax
+	popfl
+	pushfl
+	popl	%eax
+	pushl	%ebx
+	popfl
+	cmpl	%eax, %ebx
+	jz	no_longmode
+
+	xorl	%eax, %eax		# See if cpuid 1 is implemented
+	cpuid
+	cmpl	$0x1, %eax
+	jb	no_longmode
+
+	movl	$0x01, %eax		# Does the cpu have what it takes?
+	cpuid
+	andl	$REQUIRED_MASK1, %edx
+	xorl	$REQUIRED_MASK1, %edx
+	jnz	no_longmode
+
+	movl	$0x80000000, %eax	# See if extended cpuid is implemented
+	cpuid
+	cmpl	$0x80000001, %eax
+	jb	no_longmode
+
+	movl	$0x80000001, %eax	# Does the cpu have what it takes?
+	cpuid
+	andl	$REQUIRED_MASK2, %edx
+	xorl	$REQUIRED_MASK2, %edx
+	jnz	no_longmode
+
+	ret				# The cpu supports long mode
+
+no_longmode:
+	hlt
+	jmp no_longmode
+
 
 	# Careful these need to be in the same 64K segment as the above;
-idt_48:
+tidt:
 	.word	0			# idt limit = 0
 	.word	0, 0			# idt base = 0L
 
-gdt_48:
-	.short	__KERNEL32_CS + 7	# gdt limit
-	.long	cpu_gdt_table-__START_KERNEL_map
+	# Duplicate the global descriptor table
+	# so the kernel can live anywhere
+	.balign 4
+tgdt:
+	.short	tgdt_end - tgdt		# gdt limit
+	.long	tgdt - r_base
+	.short 0
+	.quad	0x00cf9b000000ffff	# __KERNEL32_CS
+	.quad	0x00af9b000000ffff	# __KERNEL_CS
+	.quad	0x00cf93000000ffff	# __KERNEL_DS
+tgdt_end:
+
+	.balign 4
+startup_32_vector:
+	.long	startup_32 - r_base
+	.word	__KERNEL32_CS, 0
+
+	.balign 4
+startup_64_vector:
+	.long	startup_64 - r_base
+	.word	__KERNEL_CS, 0
+
+trampoline_stack:
+	.org 0x1000
+trampoline_stack_end:
+ENTRY(trampoline_level4_pgt)
+	.quad	level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
+	.fill	510,8,0
+	.quad	level3_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE
 
-.globl trampoline_end
-trampoline_end:	
+ENTRY(trampoline_end)
diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S
index 7c4de31..456fe8e 100644
--- a/arch/x86_64/kernel/vmlinux.lds.S
+++ b/arch/x86_64/kernel/vmlinux.lds.S
@@ -15,7 +15,7 @@ ENTRY(phys_startup_64)
 jiffies_64 = jiffies;
 SECTIONS
 {
-  . = __START_KERNEL;
+  . = __START_KERNEL_map;
   phys_startup_64 = startup_64 - LOAD_OFFSET;
   _text = .;			/* Text and read-only data */
   .text :  AT(ADDR(.text) - LOAD_OFFSET) {
diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c
index f603037..2e48407 100644
--- a/arch/x86_64/kernel/vsyscall.c
+++ b/arch/x86_64/kernel/vsyscall.c
@@ -41,6 +41,12 @@ seqlock_t __xtime_lock __section_xtime_l
 
 #include <asm/unistd.h>
 
+#define __pa_vsymbol(x)			\
+	({unsigned long v;  		\
+	extern char __vsyscall_0; 	\
+	  asm("" : "=r" (v) : "0" (x)); \
+	  ((v - VSYSCALL_FIRST_PAGE) + __pa_symbol(&__vsyscall_0)); })
+
 static __always_inline void timeval_normalize(struct timeval * tv)
 {
 	time_t __sec;
@@ -155,10 +161,10 @@ static int vsyscall_sysctl_change(ctl_ta
 		return ret;
 	/* gcc has some trouble with __va(__pa()), so just do it this
 	   way. */
-	map1 = ioremap(__pa_symbol(&vsysc1), 2);
+	map1 = ioremap(__pa_vsymbol(&vsysc1), 2);
 	if (!map1)
 		return -ENOMEM;
-	map2 = ioremap(__pa_symbol(&vsysc2), 2);
+	map2 = ioremap(__pa_vsymbol(&vsysc2), 2);
 	if (!map2) {
 		ret = -ENOMEM;
 		goto out;
--- linux-2.6.17.noarch.orig/arch/x86_64/mm/init.c	2006-09-12 14:21:35.000000000 -0400
+++ linux-2.6.17.noarch/arch/x86_64/mm/init.c	2006-09-13 13:23:01.000000000 -0400
@@ -168,154 +168,101 @@
 
 unsigned long __initdata table_start, table_end; 
 
-extern pmd_t temp_boot_pmds[]; 
-
-static  struct temp_map { 
-	pmd_t *pmd;
-	void  *address; 
-	int    allocated; 
-} temp_mappings[] __initdata = { 
-	{ &temp_boot_pmds[0], (void *)(40UL * 1024 * 1024) },
-	{ &temp_boot_pmds[1], (void *)(42UL * 1024 * 1024) }, 
-	{}
-}; 
-
-static __meminit void *alloc_low_page(int *index, unsigned long *phys)
+static __init unsigned long alloc_low_page(void)
 { 
-	struct temp_map *ti;
-	int i; 
-	unsigned long pfn = table_end++, paddr; 
-	void *adr;
-
-	if (after_bootmem) {
-		adr = (void *)get_zeroed_page(GFP_ATOMIC);
-		*phys = __pa(adr);
-		return adr;
-	}
-
+	unsigned long pfn = table_end++;
 	if (pfn >= end_pfn) 
 		panic("alloc_low_page: ran out of memory"); 
-	for (i = 0; temp_mappings[i].allocated; i++) {
-		if (!temp_mappings[i].pmd) 
-			panic("alloc_low_page: ran out of temp mappings"); 
-	} 
-	ti = &temp_mappings[i];
-	paddr = (pfn << PAGE_SHIFT) & PMD_MASK; 
-	set_pmd(ti->pmd, __pmd(paddr | _KERNPG_TABLE | _PAGE_PSE)); 
-	ti->allocated = 1; 
-	__flush_tlb(); 	       
-	adr = ti->address + ((pfn << PAGE_SHIFT) & ~PMD_MASK); 
-	memset(adr, 0, PAGE_SIZE);
-	*index = i; 
-	*phys  = pfn * PAGE_SIZE;  
-	return adr; 
-} 
-
-static __meminit void unmap_low_page(int i)
-{ 
-	struct temp_map *ti;
-
-	if (after_bootmem)
-		return;
-
-	ti = &temp_mappings[i];
-	set_pmd(ti->pmd, __pmd(0));
-	ti->allocated = 0; 
-} 
+	return pfn << PAGE_SHIFT;
+}
 
 /* Must run before zap_low_mappings */
 __init void *early_ioremap(unsigned long addr, unsigned long size)
 {
-	unsigned long map = round_down(addr, LARGE_PAGE_SIZE); 
-
-	/* actually usually some more */
-	if (size >= LARGE_PAGE_SIZE) { 
-		printk("SMBIOS area too long %lu\n", size);
-		return NULL;
+	unsigned long vaddr;
+	pmd_t *pmd, *last_pmd;
+	int i, pmds;
+
+	pmds = ((addr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE;
+	vaddr = __START_KERNEL_map;
+	pmd = level2_kernel_pgt;
+	last_pmd = level2_kernel_pgt + PTRS_PER_PMD - 1;
+	for (; pmd <= last_pmd; pmd++, vaddr += PMD_SIZE) {
+		for (i = 0; i < pmds; i++) {
+			if (pmd_present(pmd[i]))
+				goto next;
+		}
+		vaddr += addr & ~PMD_MASK;
+		addr &= PMD_MASK;
+		for (i = 0; i < pmds; i++, addr += PMD_SIZE)
+			set_pmd(pmd + i,__pmd(addr | __PAGE_KERNEL_LARGE));
+		__flush_tlb();
+		return (void *)vaddr;
+	next:
+		;
 	}
-	set_pmd(temp_mappings[0].pmd,  __pmd(map | _KERNPG_TABLE | _PAGE_PSE));
-	map += LARGE_PAGE_SIZE;
-	set_pmd(temp_mappings[1].pmd,  __pmd(map | _KERNPG_TABLE | _PAGE_PSE));
-	__flush_tlb();
-	return temp_mappings[0].address + (addr & (LARGE_PAGE_SIZE-1));
+	printk("early_ioremap(0x%lx, %lu) failed\n", addr, size);
+	return NULL;
 }
 
 /* To avoid virtual aliases later */
 __init void early_iounmap(void *addr, unsigned long size)
 {
-	if ((void *)round_down((unsigned long)addr, LARGE_PAGE_SIZE) != temp_mappings[0].address)
-		printk("early_iounmap: bad address %p\n", addr);
-	set_pmd(temp_mappings[0].pmd, __pmd(0));
-	set_pmd(temp_mappings[1].pmd, __pmd(0));
+	unsigned long vaddr;
+	pmd_t *pmd;
+	int i, pmds;
+
+	vaddr = (unsigned long)addr;
+	pmds = ((vaddr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE;
+	pmd = level2_kernel_pgt + pmd_index(vaddr);
+	for (i = 0; i < pmds; i++)
+		pmd_clear(pmd + i);
 	__flush_tlb();
 }
 
-static void __meminit
-phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
+static void __init
+phys_pmd_init(pmd_t *pmd, unsigned long address, unsigned long end)
 {
-	int i = pmd_index(address);
+	int i;
 
-	for (; i < PTRS_PER_PMD; i++, address += PMD_SIZE) {
+	for (i = 0; i < PTRS_PER_PMD; pmd++, i++, address += PMD_SIZE) {
 		unsigned long entry;
-		pmd_t *pmd = pmd_page + pmd_index(address);
 
 		if (address >= end) {
-			if (!after_bootmem)
-				for (; i < PTRS_PER_PMD; i++, pmd++)
-					set_pmd(pmd, __pmd(0));
+			for (; i < PTRS_PER_PMD; i++, pmd++)
+				set_pmd(pmd, __pmd(0));
 			break;
 		}
-		
-		if (pmd_val(*pmd)) {
-			printk (KERN_ERR "%s trying to trample pte entry \
-				%lx@%lx\n",__func__,pmd_val(*pmd),address);
-		}
 		entry = _PAGE_NX|_PAGE_PSE|_KERNPG_TABLE|_PAGE_GLOBAL|address;
 		entry &= __supported_pte_mask;
 		set_pmd(pmd, __pmd(entry));
 	}
 }
 
-static void __meminit
-phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end)
-{
-	pmd_t *pmd = pmd_offset(pud,0);
-	spin_lock(&init_mm.page_table_lock);
-	phys_pmd_init(pmd, address, end);
-	spin_unlock(&init_mm.page_table_lock);
-	__flush_tlb_all();
-}
-
-static void __meminit phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end)
+static void __init phys_pud_init(pud_t *pud, unsigned long address, unsigned long end)
 { 
-	int i = pud_index(addr);
+	long i = pud_index(address);
 
+	pud = pud + i;
 
-	for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE ) {
-		int map; 
-		unsigned long pmd_phys;
-		pud_t *pud = pud_page + pud_index(addr);
+	for (; i < PTRS_PER_PUD; pud++, i++) {
+		unsigned long paddr, pmd_phys;
 		pmd_t *pmd;
 
-		if (addr >= end)
+		paddr = (address & PGDIR_MASK) + i*PUD_SIZE;
+		if (paddr >= end)
 			break;
 
-		if (!after_bootmem && !e820_any_mapped(addr,addr+PUD_SIZE,0)) {
+		if (!e820_any_mapped(paddr, paddr+PUD_SIZE, 0)) {
 			set_pud(pud, __pud(0)); 
 			continue;
 		} 
 
-		if (pud_val(*pud)) {
-			phys_pmd_update(pud, addr, end);
-			continue;
-		}
-
-		pmd = alloc_low_page(&map, &pmd_phys);
-		spin_lock(&init_mm.page_table_lock);
+		pmd_phys = alloc_low_page();
+		pmd = early_ioremap(pmd_phys, PAGE_SIZE);
 		set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE));
-		phys_pmd_init(pmd, addr, end);
-		spin_unlock(&init_mm.page_table_lock);
-		unmap_low_page(map);
+		phys_pmd_init(pmd, paddr, end);
+		early_iounmap(pmd, PAGE_SIZE);
 	}
 	__flush_tlb();
 } 
@@ -348,7 +295,7 @@
 /* Setup the direct mapping of the physical memory at PAGE_OFFSET.
    This runs before bootmem is initialized and gets pages directly from the 
    physical memory. To access them they are temporarily mapped. */
-void __meminit init_memory_mapping(unsigned long start, unsigned long end)
+void __init init_memory_mapping(unsigned long start, unsigned long end)
 { 
 	unsigned long next; 
 
@@ -360,49 +307,28 @@
 	 * mapped.  Unfortunately this is done currently before the nodes are 
 	 * discovered.
 	 */
-	if (!after_bootmem)
-		find_early_table_space(end);
+	find_early_table_space(end);
 
 	start = (unsigned long)__va(start);
 	end = (unsigned long)__va(end);
 
 	for (; start < end; start = next) {
-		int map;
 		unsigned long pud_phys; 
 		pgd_t *pgd = pgd_offset_k(start);
 		pud_t *pud;
 
-		if (after_bootmem)
-			pud = pud_offset(pgd, start & PGDIR_MASK);
-		else
-			pud = alloc_low_page(&map, &pud_phys);
+		pud_phys = alloc_low_page();
+		pud = early_ioremap(pud_phys, PAGE_SIZE);
 
 		next = start + PGDIR_SIZE;
 		if (next > end) 
 			next = end; 
 		phys_pud_init(pud, __pa(start), __pa(next));
-		if (!after_bootmem)
-			set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys));
-		unmap_low_page(map);   
+		set_pgd(pgd, mk_kernel_pgd(pud_phys));
+		early_iounmap(pud, PAGE_SIZE);
 	} 
 
-	if (!after_bootmem)
-		asm volatile("movq %%cr4,%0" : "=r" (mmu_cr4_features));
-	__flush_tlb_all();
-}
-
-void __cpuinit zap_low_mappings(int cpu)
-{
-	if (cpu == 0) {
-		pgd_t *pgd = pgd_offset_k(0UL);
-		pgd_clear(pgd);
-	} else {
-		/*
-		 * For AP's, zap the low identity mappings by changing the cr3
-		 * to init_level4_pgt and doing local flush tlb all
-		 */
-		asm volatile("movq %0,%%cr3" :: "r" (__pa_symbol(&init_level4_pgt)));
-	}
+	asm volatile("movq %%cr4,%0" : "=r" (mmu_cr4_features));
 	__flush_tlb_all();
 }
 
@@ -541,6 +467,92 @@
 }
 
 #ifdef CONFIG_MEMORY_HOTPLUG
+static void 
+late_phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
+{
+	int i = pmd_index(address);
+
+	for (; i < PTRS_PER_PMD; i++, address += PMD_SIZE) {
+		unsigned long entry;
+		pmd_t *pmd = pmd_page + pmd_index(address);
+
+		if (address >= end) 
+			break;
+		
+		if (pmd_val(*pmd)) {
+			printk (KERN_ERR "%s trying to trample pte entry \
+				%lx@%lx\n",__func__,pmd_val(*pmd),address);
+		}
+		entry = _PAGE_NX|_PAGE_PSE|_KERNPG_TABLE|_PAGE_GLOBAL|address;
+		entry &= __supported_pte_mask;
+		set_pmd(pmd, __pmd(entry));
+	}
+}
+static void 
+late_phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end)
+{
+	pmd_t *pmd = pmd_offset(pud,0);
+	spin_lock(&init_mm.page_table_lock);
+	late_phys_pmd_init(pmd, address, end);
+	spin_unlock(&init_mm.page_table_lock);
+	__flush_tlb_all();
+}
+
+static void  late_phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end)
+{ 
+	int i = pud_index(addr);
+
+
+	for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE ) {
+		int map; 
+		unsigned long pmd_phys;
+		pud_t *pud = pud_page + pud_index(addr);
+		pmd_t *pmd;
+
+		if (addr >= end)
+			break;
+
+		if (pud_val(*pud)) {
+			late_phys_pmd_update(pud, addr, end);
+			continue;
+		}
+
+		pmd = (pmd_t *)get_zeroed_page(GFP_ATOMIC);
+		pmd_phys = __pa(pmd);
+
+		spin_lock(&init_mm.page_table_lock);
+		set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE));
+		late_phys_pmd_init(pmd, addr, end);
+		spin_unlock(&init_mm.page_table_lock);
+	}
+} 
+
+/* Setup the direct mapping of the physical memory at PAGE_OFFSET.
+ * This runs after bootmem is initialized and gets pages normally.
+ */
+static void late_init_memory_mapping(unsigned long start, unsigned long end)
+{
+	unsigned long next;
+
+	Dprintk("add_memory_mapping\n");
+
+	start = (unsigned long)__va(start);
+	end = (unsigned long)__va(end);
+
+	for (; start < end; start = next) {
+		unsigned long pud_phys;
+		pgd_t *pgd = pgd_offset_k(start);
+		pud_t *pud;
+
+		pud = pud_offset(pgd, start & PGDIR_MASK);
+
+		next = start + PGDIR_SIZE;
+		if (next > end)
+			next = end;
+		late_phys_pud_init(pud, __pa(start), __pa(next));
+	}
+	__flush_tlb_all();
+}
 /*
  * Memory is added always to NORMAL zone. This means you will never get
  * additional DMA/DMA32 memory.
@@ -557,7 +569,7 @@
 	if (ret)
 		goto error;
 
-	init_memory_mapping(start, (start + size -1));
+	late_init_memory_mapping(start, (start + size -1));
 
 	return ret;
 error:
@@ -680,15 +692,6 @@
 		reservedpages << (PAGE_SHIFT-10),
 		datasize >> 10,
 		initsize >> 10);
-
-#ifdef CONFIG_SMP
-	/*
-	 * Sync boot_level4_pgt mappings with the init_level4_pgt
-	 * except for the low identity mappings which are already zapped
-	 * in init_level4_pgt. This sync-up is essential for AP's bringup
-	 */
-	memcpy(boot_level4_pgt+1, init_level4_pgt+1, (PTRS_PER_PGD-1)*sizeof(pgd_t));
-#endif
 }
 
 void free_init_pages(char *what, unsigned long begin, unsigned long end)
@@ -700,11 +703,11 @@
 
 	printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10);
 	for (addr = begin; addr < end; addr += PAGE_SIZE) {
-		ClearPageReserved(virt_to_page(addr));
-		init_page_count(virt_to_page(addr));
-		memset((void *)(addr & ~(PAGE_SIZE-1)),
-			POISON_FREE_INITMEM, PAGE_SIZE);
-		free_page(addr);
+		struct page *page = pfn_to_page(addr >> PAGE_SHIFT);
+		ClearPageReserved(page);
+		init_page_count(page);
+		memset(page_address(page), POISON_FREE_INITMEM, PAGE_SIZE);
+		__free_page(page);
 		totalram_pages++;
 	}
 }
@@ -714,17 +717,18 @@
 	memset(__initdata_begin, POISON_FREE_INITDATA,
 		__initdata_end - __initdata_begin);
 	free_init_pages("unused kernel memory",
-			(unsigned long)(&__init_begin),
-			(unsigned long)(&__init_end));
+			__pa_symbol(&__init_begin),
+			__pa_symbol(&__init_end));
 }
 
 #ifdef CONFIG_DEBUG_RODATA
 
 void mark_rodata_ro(void)
 {
-	unsigned long addr = (unsigned long)__start_rodata;
+	unsigned long addr = (unsigned long)__va(__pa_symbol(&__start_rodata));
+	unsigned long end  = (unsigned long)__va(__pa_symbol(&__end_rodata));
 
-	for (; addr < (unsigned long)__end_rodata; addr += PAGE_SIZE)
+	for (; addr < end; addr += PAGE_SIZE)
 		change_page_attr_addr(addr, 1, PAGE_KERNEL_RO);
 
 	printk ("Write protecting the kernel read-only data: %luk\n",
@@ -743,7 +747,7 @@
 #ifdef CONFIG_BLK_DEV_INITRD
 void free_initrd_mem(unsigned long start, unsigned long end)
 {
-	free_init_pages("initrd memory", start, end);
+	free_init_pages("initrd memory", __pa(start), __pa(end));
 }
 #endif
 
diff --git a/arch/x86_64/mm/pageattr.c b/arch/x86_64/mm/pageattr.c
index 2685b1f..9d6196d 100644
--- a/arch/x86_64/mm/pageattr.c
+++ b/arch/x86_64/mm/pageattr.c
@@ -51,7 +51,6 @@ static struct page *split_large_page(uns
 	SetPagePrivate(base);
 	page_private(base) = 0;
 
-	address = __pa(address);
 	addr = address & LARGE_PAGE_MASK; 
 	pbase = (pte_t *)page_address(base);
 	for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) {
@@ -95,7 +94,7 @@ static inline void save_page(struct page
  * No more special protections in this 2/4MB area - revert to a
  * large page again. 
  */
-static void revert_page(unsigned long address, pgprot_t ref_prot)
+static void revert_page(unsigned long address, unsigned long pfn, pgprot_t ref_prot)
 {
 	pgd_t *pgd;
 	pud_t *pud;
@@ -109,7 +108,7 @@ static void revert_page(unsigned long ad
 	pmd = pmd_offset(pud, address);
 	BUG_ON(pmd_val(*pmd) & _PAGE_PSE);
 	pgprot_val(ref_prot) |= _PAGE_PSE;
-	large_pte = mk_pte_phys(__pa(address) & LARGE_PAGE_MASK, ref_prot);
+	large_pte = mk_pte_phys((pfn << PAGE_SHIFT) & LARGE_PAGE_MASK, ref_prot);
 	set_pte((pte_t *)pmd, large_pte);
 }      
 
@@ -137,7 +136,7 @@ __change_page_attr(unsigned long address
 			struct page *split;
 			ref_prot2 = __pgprot(pgprot_val(pte_pgprot(*lookup_address(address))) & ~(1<<_PAGE_BIT_PSE));
 
-			split = split_large_page(address, prot, ref_prot2);
+			split = split_large_page(pfn << PAGE_SHIFT, prot, ref_prot2);
 			if (!split)
 				return -ENOMEM;
 			set_pte(kpte,mk_pte(split, ref_prot2));
@@ -156,7 +155,7 @@ __change_page_attr(unsigned long address
 
 	if (page_private(kpte_page) == 0) {
 		save_page(kpte_page);
-		revert_page(address, ref_prot);
+		revert_page(address, pfn, ref_prot);
  	}
 	return 0;
 } 
@@ -176,6 +175,7 @@ __change_page_attr(unsigned long address
  */
 int change_page_attr_addr(unsigned long address, int numpages, pgprot_t prot)
 {
+	unsigned long phys_base_pfn = __pa_symbol(__START_KERNEL_map) >> PAGE_SHIFT;
 	int err = 0; 
 	int i; 
 
@@ -188,14 +188,16 @@ int change_page_attr_addr(unsigned long 
 			break; 
 		/* Handle kernel mapping too which aliases part of the
 		 * lowmem */
-		if (__pa(address) < KERNEL_TEXT_SIZE) {
+		if ((pfn >= phys_base_pfn) &&
+			((pfn - phys_base_pfn) < (KERNEL_TEXT_SIZE >> PAGE_SHIFT)))
+		{
 			unsigned long addr2;
 			pgprot_t prot2 = prot;
-			addr2 = __START_KERNEL_map + __pa(address);
+			addr2 = __START_KERNEL_map + ((pfn - phys_base_pfn) << PAGE_SHIFT);
  			pgprot_val(prot2) &= ~_PAGE_NX;
 			err = __change_page_attr(addr2, pfn, prot2, PAGE_KERNEL_EXEC);
-		} 
-	} 	
+		}
+	}
 	up_write(&init_mm.mmap_sem); 
 	return err;
 }
diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c
index ac8ea66..26d315b 100644
--- a/arch/x86_64/mm/fault.c
+++ b/arch/x86_64/mm/fault.c
@@ -650,9 +650,9 @@ void vmalloc_sync_all(void)
 			start = address + PGDIR_SIZE;
 	}
 	/* Check that there is no need to do the same for the modules area. */
-	BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL));
+	BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL_map));
 	BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) == 
-				(__START_KERNEL & PGDIR_MASK)));
+				(__START_KERNEL_map & PGDIR_MASK)));
 }
 
 static int __init enable_pagefaulttrace(char *str)
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index db5a373..7cd3c22 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -11,8 +11,8 @@ #define ALIGN_FUNCTION()  . = ALIGN(8)
 
 #define RODATA								\
 	. = ALIGN(4096);						\
-	__start_rodata = .;						\
 	.rodata           : AT(ADDR(.rodata) - LOAD_OFFSET) {		\
+		VMLINUX_SYMBOL(__start_rodata) = .;			\
 		*(.rodata) *(.rodata.*)					\
 		*(__vermagic)		/* Kernel version magic */	\
 	}								\
diff --git a/include/asm-i386/page.h b/include/asm-i386/page.h
index f5bf544..1af9f6b 100644
--- a/include/asm-i386/page.h
+++ b/include/asm-i386/page.h
@@ -112,23 +112,21 @@ #endif /* __ASSEMBLY__ */
 
 #ifdef __ASSEMBLY__
 #define __PAGE_OFFSET		CONFIG_PAGE_OFFSET
-#define __PHYSICAL_START	CONFIG_PHYSICAL_START
 #else
 #define __PAGE_OFFSET		((unsigned long)CONFIG_PAGE_OFFSET)
-#define __PHYSICAL_START	((unsigned long)CONFIG_PHYSICAL_START)
 #endif
-#define __KERNEL_START		(__PAGE_OFFSET + __PHYSICAL_START)
 
 /*
  * Under exec-shield we don't use the generic fixmap gate area.
  * The vDSO ("gate area") has a normal vma found the normal ways.
  */
 #define __HAVE_ARCH_GATE_AREA	1
 
 #define PAGE_OFFSET		((unsigned long)__PAGE_OFFSET)
 #define VMALLOC_RESERVE		((unsigned long)__VMALLOC_RESERVE)
 #define MAXMEM			(-__PAGE_OFFSET-__VMALLOC_RESERVE)
 #define __pa(x)			((unsigned long)(x)-PAGE_OFFSET)
+#define __pa_symbol(x)		__pa(x)
 #define __va(x)			((void *)((unsigned long)(x)+PAGE_OFFSET))
 #define pfn_to_kaddr(pfn)      __va((pfn) << PAGE_SHIFT)
 #ifdef CONFIG_FLATMEM
diff --git a/include/asm-x86_64/const.h b/include/asm-x86_64/const.h
new file mode 100644
index 0000000..54fb08f
--- /dev/null
+++ b/include/asm-x86_64/const.h
@@ -0,0 +1,20 @@
+/* const.h: Macros for dealing with constants.  */
+
+#ifndef _X86_64_CONST_H
+#define _X86_64_CONST_H
+
+/* Some constant macros are used in both assembler and
+ * C code.  Therefore we cannot annotate them always with
+ * 'UL' and other type specificers unilaterally.  We
+ * use the following macros to deal with this.
+ */
+
+#ifdef __ASSEMBLY__
+#define _AC(X,Y)	X
+#else
+#define __AC(X,Y)	(X##Y)
+#define _AC(X,Y)	__AC(X,Y)
+#endif
+
+
+#endif /* !(_X86_64_CONST_H) */
diff --git a/include/asm-x86_64/page.h b/include/asm-x86_64/page.h
index 10f3461..d125c09 100644
--- a/include/asm-x86_64/page.h
+++ b/include/asm-x86_64/page.h
@@ -1,14 +1,11 @@
 #ifndef _X86_64_PAGE_H
 #define _X86_64_PAGE_H
 
+#include <asm/const.h>
 
 /* PAGE_SHIFT determines the page size */
 #define PAGE_SHIFT	12
-#ifdef __ASSEMBLY__
-#define PAGE_SIZE	(0x1 << PAGE_SHIFT)
-#else
-#define PAGE_SIZE	(1UL << PAGE_SHIFT)
-#endif
+#define PAGE_SIZE	(_AC(1,UL) << PAGE_SHIFT)
 #define PAGE_MASK	(~(PAGE_SIZE-1))
 #define PHYSICAL_PAGE_MASK	(~(PAGE_SIZE-1) & __PHYSICAL_MASK)
 
@@ -33,10 +30,10 @@ #define MCE_STACK 5
 #define N_EXCEPTION_STACKS 5  /* hw limit: 7 */
 
 #define LARGE_PAGE_MASK (~(LARGE_PAGE_SIZE-1))
-#define LARGE_PAGE_SIZE (1UL << PMD_SHIFT)
+#define LARGE_PAGE_SIZE (_AC(1,UL) << PMD_SHIFT)
 
 #define HPAGE_SHIFT PMD_SHIFT
-#define HPAGE_SIZE	((1UL) << HPAGE_SHIFT)
+#define HPAGE_SIZE	(_AC(1,UL) << HPAGE_SHIFT)
 #define HPAGE_MASK	(~(HPAGE_SIZE - 1))
 #define HUGETLB_PAGE_ORDER	(HPAGE_SHIFT - PAGE_SHIFT)
 
@@ -64,6 +61,8 @@ #define PTE_MASK	PHYSICAL_PAGE_MASK
 
 typedef struct { unsigned long pgprot; } pgprot_t;
 
+extern unsigned long phys_base;
+
 #define pte_val(x)	((x).pte)
 #define pmd_val(x)	((x).pmd)
 #define pud_val(x)	((x).pud)
@@ -76,29 +75,22 @@ #define __pud(x) ((pud_t) { (x) } )
 #define __pgd(x) ((pgd_t) { (x) } )
 #define __pgprot(x)	((pgprot_t) { (x) } )
 
-#define __PHYSICAL_START	((unsigned long)CONFIG_PHYSICAL_START)
-#define __START_KERNEL		(__START_KERNEL_map + __PHYSICAL_START)
-#define __START_KERNEL_map	0xffffffff80000000UL
-#define __PAGE_OFFSET           0xffff810000000000UL
-
-#else
-#define __PHYSICAL_START	CONFIG_PHYSICAL_START
-#define __START_KERNEL		(__START_KERNEL_map + __PHYSICAL_START)
-#define __START_KERNEL_map	0xffffffff80000000
-#define __PAGE_OFFSET           0xffff810000000000
 #endif /* !__ASSEMBLY__ */
 
+#define __START_KERNEL_map	_AC(0xffffffff80000000,UL)
+#define __PAGE_OFFSET           _AC(0xffff810000000000,UL)
+
 /* to align the pointer to the (next) page boundary */
 #define PAGE_ALIGN(addr)	(((addr)+PAGE_SIZE-1)&PAGE_MASK)
 
 /* See Documentation/x86_64/mm.txt for a description of the memory map. */
 #define __PHYSICAL_MASK_SHIFT	46
-#define __PHYSICAL_MASK		((1UL << __PHYSICAL_MASK_SHIFT) - 1)
+#define __PHYSICAL_MASK		((_AC(1,UL) << __PHYSICAL_MASK_SHIFT) - 1)
 #define __VIRTUAL_MASK_SHIFT	48
-#define __VIRTUAL_MASK		((1UL << __VIRTUAL_MASK_SHIFT) - 1)
+#define __VIRTUAL_MASK		((_AC(1,UL) << __VIRTUAL_MASK_SHIFT) - 1)
 
-#define KERNEL_TEXT_SIZE  (40UL*1024*1024)
-#define KERNEL_TEXT_START 0xffffffff80000000UL 
+#define KERNEL_TEXT_SIZE  (_AC(40,UL)*1024*1024)
+#define KERNEL_TEXT_START _AC(0xffffffff80000000,UL)
 
 #ifndef __ASSEMBLY__
 
@@ -106,21 +98,19 @@ #include <asm/bug.h>
 
 #endif /* __ASSEMBLY__ */
 
-#define PAGE_OFFSET		((unsigned long)__PAGE_OFFSET)
+#define PAGE_OFFSET		__PAGE_OFFSET
 
 /* Note: __pa(&symbol_visible_to_c) should be always replaced with __pa_symbol.
-   Otherwise you risk miscompilation. */ 
-#define __pa(x)			(((unsigned long)(x)>=__START_KERNEL_map)?(unsigned long)(x) - (unsigned long)__START_KERNEL_map:(unsigned long)(x) - PAGE_OFFSET)
+   Otherwise you risk miscompilation. */
+#define __pa(x)			((unsigned long)(x) - PAGE_OFFSET)
 /* __pa_symbol should be used for C visible symbols.
    This seems to be the official gcc blessed way to do such arithmetic. */ 
 #define __pa_symbol(x)		\
 	({unsigned long v;  \
 	  asm("" : "=r" (v) : "0" (x)); \
-	  __pa(v); })
+	  ((v - __START_KERNEL_map) + phys_base); })
 
 #define __va(x)			((void *)((unsigned long)(x)+PAGE_OFFSET))
-#define __boot_va(x)		__va(x)
-#define __boot_pa(x)		__pa(x)
 #ifdef CONFIG_FLATMEM
 #define pfn_valid(pfn)		((pfn) < end_pfn)
 #endif
diff --git a/include/asm-x86_64/pgtable.h b/include/asm-x86_64/pgtable.h
index a31ab4e..fa43712 100644
--- a/include/asm-x86_64/pgtable.h
+++ b/include/asm-x86_64/pgtable.h
@@ -1,6 +1,9 @@
 #ifndef _X86_64_PGTABLE_H
 #define _X86_64_PGTABLE_H
 
+#include <asm/const.h>
+#ifndef __ASSEMBLY__
+
 /*
  * This file contains the functions and defines necessary to modify and use
  * the x86-64 page table tree.
@@ -12,14 +15,12 @@ #include <linux/threads.h>
 #include <asm/pda.h>
 
 extern pud_t level3_kernel_pgt[512];
-extern pud_t level3_physmem_pgt[512];
 extern pud_t level3_ident_pgt[512];
 extern pmd_t level2_kernel_pgt[512];
 extern pgd_t init_level4_pgt[];
-extern pgd_t boot_level4_pgt[];
 extern unsigned long __supported_pte_mask;
 
-#define swapper_pg_dir init_level4_pgt
+#define swapper_pg_dir ((pgd_t *)NULL)
 
 extern int nonx_setup(char *str);
 extern void paging_init(void);
@@ -32,7 +33,9 @@ extern unsigned long pgkern_mask;
  * for zero-mapped memory areas etc..
  */
 extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)];
-#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
+#define ZERO_PAGE(vaddr) (pfn_to_page(__pa_symbol(&empty_zero_page) >> PAGE_SHIFT))
+
+#endif /* !__ASSEMBLY__ */
 
 /*
  * PGDIR_SHIFT determines what a top-level page table entry can map
@@ -58,6 +61,8 @@ #define PTRS_PER_PMD	512
  */
 #define PTRS_PER_PTE	512
 
+#ifndef __ASSEMBLY__
+
 #define pte_ERROR(e) \
 	printk("%s:%d: bad pte %p(%016lx).\n", __FILE__, __LINE__, &(e), pte_val(e))
 #define pmd_ERROR(e) \
@@ -124,22 +129,23 @@ #define pte_same(a, b)		((a).pte == (b).
 
 #define pte_pgprot(a)	(__pgprot((a).pte & ~PHYSICAL_PAGE_MASK))
 
-#define PMD_SIZE	(1UL << PMD_SHIFT)
+#endif /* !__ASSEMBLY__ */
+
+#define PMD_SIZE	(_AC(1,UL) << PMD_SHIFT)
 #define PMD_MASK	(~(PMD_SIZE-1))
-#define PUD_SIZE	(1UL << PUD_SHIFT)
+#define PUD_SIZE	(_AC(1,UL) << PUD_SHIFT)
 #define PUD_MASK	(~(PUD_SIZE-1))
-#define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
+#define PGDIR_SIZE	(_AC(1,UL) << PGDIR_SHIFT)
 #define PGDIR_MASK	(~(PGDIR_SIZE-1))
 
 #define USER_PTRS_PER_PGD	((TASK_SIZE-1)/PGDIR_SIZE+1)
 #define FIRST_USER_ADDRESS	0
 
-#ifndef __ASSEMBLY__
-#define MAXMEM		 0x3fffffffffffUL
-#define VMALLOC_START    0xffffc20000000000UL
-#define VMALLOC_END      0xffffe1ffffffffffUL
-#define MODULES_VADDR    0xffffffff88000000UL
-#define MODULES_END      0xfffffffffff00000UL
+#define MAXMEM		 _AC(0x3fffffffffff,UL)
+#define VMALLOC_START    _AC(0xffffc20000000000,UL)
+#define VMALLOC_END      _AC(0xffffe1ffffffffff,UL)
+#define MODULES_VADDR    _AC(0xffffffff88000000,UL)
+#define MODULES_END      _AC(0xfffffffffff00000,UL)
 #define MODULES_LEN   (MODULES_END - MODULES_VADDR)
 
 #define _PAGE_BIT_PRESENT	0
@@ -165,7 +171,7 @@ #define _PAGE_FILE	0x040	/* nonlinear fi
 #define _PAGE_GLOBAL	0x100	/* Global TLB entry */
 
 #define _PAGE_PROTNONE	0x080	/* If not present */
-#define _PAGE_NX        (1UL<<_PAGE_BIT_NX)
+#define _PAGE_NX        (_AC(1,UL)<<_PAGE_BIT_NX)
 
 #define _PAGE_TABLE	(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY)
 #define _KERNPG_TABLE	(_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
@@ -227,6 +233,8 @@ #define __S101	PAGE_READONLY_EXEC
 #define __S110	PAGE_SHARED_EXEC
 #define __S111	PAGE_SHARED_EXEC
 
+#ifndef __ASSEMBLY__
+
 static inline unsigned long pgd_bad(pgd_t pgd) 
 { 
        unsigned long val = pgd_val(pgd);
@@ -418,8 +426,6 @@ extern spinlock_t pgd_lock;
 extern struct page *pgd_list;
 void vmalloc_sync_all(void);
 
-#endif /* !__ASSEMBLY__ */
-
 extern int kern_addr_valid(unsigned long addr); 
 
 #define io_remap_pfn_range(vma, vaddr, pfn, size, prot)		\
@@ -449,5 +455,6 @@ #define __HAVE_ARCH_PTEP_GET_AND_CLEAR_F
 #define __HAVE_ARCH_PTEP_SET_WRPROTECT
 #define __HAVE_ARCH_PTE_SAME
 #include <asm-generic/pgtable.h>
+#endif /* !__ASSEMBLY__ */
 
 #endif /* _X86_64_PGTABLE_H */
diff --git a/include/asm-x86_64/proto.h b/include/asm-x86_64/proto.h
index 038fe1f..978ea43 100644
--- a/include/asm-x86_64/proto.h
+++ b/include/asm-x86_64/proto.h
@@ -11,8 +11,6 @@ struct pt_regs;
 extern void start_kernel(void);
 extern void pda_init(int); 
 
-extern void zap_low_mappings(int cpu);
-
 extern void early_idt_handler(void);
 
 extern void mcheck_init(struct cpuinfo_x86 *c);
diff --git a/include/asm-x86_64/segment.h b/include/asm-x86_64/segment.h
index d4bed33..58d6715 100644
--- a/include/asm-x86_64/segment.h
+++ b/include/asm-x86_64/segment.h
@@ -6,7 +6,7 @@ #include <asm/cache.h>
 #define __KERNEL_CS	0x10
 #define __KERNEL_DS	0x18
 
-#define __KERNEL32_CS   0x38
+#define __KERNEL32_CS   0x08
 
 /* 
  * we cannot use the same code segment descriptor for user and kernel
@@ -20,7 +20,7 @@ #define __USER_DS     0x2b   /* 5*8+3 */
 #define __USER_CS     0x33   /* 6*8+3 */ 
 #define __USER32_DS	__USER_DS 
 
-#define GDT_ENTRY_TLS 1
+#define GDT_ENTRY_TLS 7
 #define GDT_ENTRY_TSS 8	/* needs two entries */
 #define GDT_ENTRY_LDT 10 /* needs two entries */
 #define GDT_ENTRY_TLS_MIN 12
diff --git a/include/asm-x86_64/suspend.h b/include/asm-x86_64/suspend.h
index bc7f817..9c3f8de 100644
--- a/include/asm-x86_64/suspend.h
+++ b/include/asm-x86_64/suspend.h
@@ -17,6 +17,7 @@ struct saved_context {
   	u16 ds, es, fs, gs, ss;
 	unsigned long gs_base, gs_kernel_base, fs_base;
 	unsigned long cr0, cr2, cr3, cr4, cr8;
+	unsigned long efer;
 	u16 gdt_pad;
 	u16 gdt_limit;
 	unsigned long gdt_base;
@@ -44,12 +45,12 @@ #define loaddebug(thread,register) \
 extern void fix_processor_context(void);
 
 #ifdef CONFIG_ACPI_SLEEP
-extern unsigned long saved_eip;
-extern unsigned long saved_esp;
-extern unsigned long saved_ebp;
-extern unsigned long saved_ebx;
-extern unsigned long saved_esi;
-extern unsigned long saved_edi;
+extern unsigned long saved_rip;
+extern unsigned long saved_rsp;
+extern unsigned long saved_rbp;
+extern unsigned long saved_rbx;
+extern unsigned long saved_rsi;
+extern unsigned long saved_rdi;
 
 /* routines for saving/restoring kernel state */
 extern int acpi_save_state_mem(void);
diff --git a/include/linux/elf.h b/include/linux/elf.h
index b70d1d2..6fa8d3d 100644
--- a/include/linux/elf.h
+++ b/include/linux/elf.h
@@ -1,9 +1,11 @@
 #ifndef _LINUX_ELF_H
 #define _LINUX_ELF_H
 
+#include <linux/elf-em.h>
+
+#ifndef __ASSEMBLY__
 #include <linux/types.h>
 #include <linux/auxvec.h>
-#include <linux/elf-em.h>
 #include <asm/elf.h>
 
 #ifndef elf_read_implies_exec
@@ -30,6 +32,8 @@ typedef __u32	Elf64_Word;
 typedef __u64	Elf64_Xword;
 typedef __s64	Elf64_Sxword;
 
+#endif /* __ASSEMBLY__ */
+
 /* These constants are for the segment types stored in the image headers */
 #define PT_NULL    0
 #define PT_LOAD    1
@@ -97,6 +101,8 @@ #define STT_FILE    4
 #define STT_COMMON  5
 #define STT_TLS     6
 
+#ifndef __ASSEMBLY__
+
 #define ELF_ST_BIND(x)		((x) >> 4)
 #define ELF_ST_TYPE(x)		(((unsigned int) x) & 0xf)
 #define ELF32_ST_BIND(x)	ELF_ST_BIND(x)
@@ -204,12 +210,16 @@ typedef struct elf64_hdr {
   Elf64_Half e_shstrndx;
 } Elf64_Ehdr;
 
+#endif /* __ASSEMBLY__ */
+
 /* These constants define the permissions on sections in the program
    header, p_flags. */
 #define PF_R		0x4
 #define PF_W		0x2
 #define PF_X		0x1
 
+#ifndef __ASSEMBLY__
+
 typedef struct elf32_phdr{
   Elf32_Word	p_type;
   Elf32_Off	p_offset;
@@ -232,6 +242,8 @@ typedef struct elf64_phdr {
   Elf64_Xword p_align;		/* Segment alignment, file & memory */
 } Elf64_Phdr;
 
+#endif /* __ASSEMBLY__ */
+
 /* sh_type */
 #define SHT_NULL	0
 #define SHT_PROGBITS	1
@@ -265,6 +277,8 @@ #define SHN_HIPROC	0xff1f
 #define SHN_ABS		0xfff1
 #define SHN_COMMON	0xfff2
 #define SHN_HIRESERVE	0xffff
+
+#ifndef __ASSEMBLY__
  
 typedef struct {
   Elf32_Word	sh_name;
@@ -292,6 +306,8 @@ typedef struct elf64_shdr {
   Elf64_Xword sh_entsize;	/* Entry size if section holds table */
 } Elf64_Shdr;
 
+#endif /* __ASSEMBLY__ */
+
 #define	EI_MAG0		0		/* e_ident[] indexes */
 #define	EI_MAG1		1
 #define	EI_MAG2		2
@@ -322,8 +338,9 @@ #define EV_NONE		0		/* e_version, EI_VER
 #define EV_CURRENT	1
 #define EV_NUM		2
 
-#define ELFOSABI_NONE	0
-#define ELFOSABI_LINUX	3
+#define ELFOSABI_NONE		0
+#define ELFOSABI_LINUX		3
+#define ELFOSABI_STANDALONE	255
 
 #ifndef ELF_OSABI
 #define ELF_OSABI ELFOSABI_NONE
@@ -338,6 +355,8 @@ #define NT_AUXV		6
 #define NT_PRXFPREG     0x46e62b7f      /* copied from gdb5.1/include/elf/common.h */
 
 
+#ifndef __ASSEMBLY__
+
 /* Note header in a PT_NOTE section */
 typedef struct elf32_note {
   Elf32_Word	n_namesz;	/* Name size */
@@ -368,5 +387,7 @@ #define elf_note	elf64_note
 
 #endif
 
+#endif /* __ASSEMBLY__ */
+
 
 #endif /* _LINUX_ELF_H */
diff --git a/include/linux/elf_boot.h b/include/linux/elf_boot.h
new file mode 100644
index 0000000..09301e5
--- /dev/null
+++ b/include/linux/elf_boot.h
@@ -0,0 +1,19 @@
+#ifndef ELF_BOOT_H
+#define ELF_BOOT_H
+
+/* Elf notes to help bootloaders identify what program they are booting.
+ */
+
+/* Standardized Elf image notes for booting... The name for all of these is ELFBoot */
+#define ELF_NOTE_BOOT		"ELFBoot"
+
+#define EIN_PROGRAM_NAME	0x00000001
+/* The program in this ELF file */
+#define EIN_PROGRAM_VERSION	0x00000002
+/* The version of the program in this ELF file */
+#define EIN_PROGRAM_CHECKSUM	0x00000003
+/* ip style checksum of the memory image. */
+#define EIN_ARGUMENT_STYLE	0x00000004
+/* String identifying argument passing style */
+
+#endif /* ELF_BOOT_H */
diff --git a/include/linux/screen_info.h b/include/linux/screen_info.h
index 2925e66..b02308e 100644
--- a/include/linux/screen_info.h
+++ b/include/linux/screen_info.h
@@ -42,7 +42,8 @@ struct screen_info {
 	u16 pages;		/* 0x32 */
 	u16 vesa_attributes;	/* 0x34 */
 	u32 capabilities;       /* 0x36 */
-				/* 0x3a -- 0x3f reserved for future expansion */
+				/* 0x3a -- 0x3b reserved for future expansion */
+				/* 0x3c -- 0x3f micro stack for relocatable kernels */
 };
 
 extern struct screen_info screen_info;
diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c
index 22d281c..4c1ad0a 100644
--- a/scripts/kallsyms.c
+++ b/scripts/kallsyms.c
@@ -43,7 +43,7 @@ struct sym_entry {
 
 static struct sym_entry *table;
 static unsigned int table_size, table_cnt;
-static unsigned long long _stext, _etext, _sinittext, _einittext, _sextratext, _eextratext;
+static unsigned long long _text, _stext, _etext, _sinittext, _einittext, _sextratext, _eextratext;
 static int all_symbols = 0;
 static char symbol_prefix_char = '\0';
 
@@ -91,7 +91,9 @@ static int read_symbol(FILE *in, struct 
 		sym++;
 
 	/* Ignore most absolute/undefined (?) symbols. */
-	if (strcmp(sym, "_stext") == 0)
+	if (strcmp(sym, "_text") == 0)
+		_text = s->addr;
+	else if (strcmp(sym, "_stext") == 0)
 		_stext = s->addr;
 	else if (strcmp(sym, "_etext") == 0)
 		_etext = s->addr;
@@ -265,9 +267,21 @@ static void write_src(void)
 
 	printf(".data\n");
 
+	/* Provide proper symbols relocatability by their '_text'
+	 * relativeness.  The symbol names cannot be used to construct
+	 * normal symbol references as the list of symbols contains
+	 * symbols that are declared static and are private to their
+	 * .o files.  This prevents .tmp_kallsyms.o or any other
+	 * object from referencing them.
+	 */
 	output_label("kallsyms_addresses");
 	for (i = 0; i < table_cnt; i++) {
-		printf("\tPTR\t%#llx\n", table[i].addr);
+		if (toupper(table[i].sym[0]) != 'A') {
+			printf("\tPTR\t_text + %#llx\n",
+				table[i].addr - _text);
+		} else {
+			printf("\tPTR\t%#llx\n", table[i].addr);
+		}
 	}
 	printf("\n");
 
> >> 
> >> I'm a little disappointed but at this point it isn't a great surprise,
> >> the code is early yet and hasn't had much testing or attention.
> >> I wonder if I have missed something else silly.
> >> 
> >> As for testing, can you use plain kexec to load the kernel at a
> >> different address?  I'm curious to know if it is something related
> >> to the kexec on panic path or if it is just running at a different
> >> location that is the problem.
> >

I think I have found the 'something silly'.  Here is a patch that allows
our Dell em64t boxes to boot.  This change matches the original code.  The
main difference that caused the problems was the setting of _PAGE_NX bit.
This caused issues in early_io_remap().  

Thanks to Larry Woodman for debugging this.  

Cheers,
Don


Signed-off-by:  Don Zickus <dzickus@redhat.com>

--- linux-2.6.17.noarch/arch/x86_64/mm/init.c.orig	2006-08-11 12:35:58.000000000 -0400
+++ linux-2.6.17.noarch/arch/x86_64/mm/init.c	2006-08-11 13:14:20.000000000 -0400
@@ -196,7 +196,7 @@
 		vaddr += addr & ~PMD_MASK;
 		addr &= PMD_MASK;
 		for (i = 0; i < pmds; i++, addr += PMD_SIZE)
-			set_pmd(pmd + i,__pmd(addr | __PAGE_KERNEL_LARGE));
+			set_pmd(pmd + i,__pmd(addr | _KERNPG_TABLE | _PAGE_PSE));
 		__flush_tlb();
 		return (void *)vaddr;
 	next:
_______________________________________________
fastboot mailing list
fastboot@lists.osdl.org
https://lists.osdl.org/mailman/listinfo/fastboot


o Kdump on x86_64 fails as at run time bzImage decompression is consuming
  more memory and stomps over some of the data loaded by kexec immediately
  after bzImage.

o How much memory bzImage will effectively consume at load time is exported
  through "MemSize" field of bzImage program headers.

o This patch does more adjustments to while calculating the load time
  memory requirements of bzImage, which gives loader a clue about
  where it is safe to load some other data.

o Following are some adjustments.

	- Add memory consumed by decompressor code. (code+data+bss...etc).
	- Adjust the meory required for safe decompression. (refer misc.c)
	- Take into account the HEAP memory used by decompressor code.


Signed-off-by: Vivek Goyal <vgoyal@in.ibm.com>
---

 arch/x86_64/boot/Makefile               |    3 
 arch/x86_64/boot/compressed/vmlinux.lds |    2 
 arch/x86_64/boot/tools/build.c          |  129 ++++++++++++++++++++------------
 3 files changed, 87 insertions(+), 47 deletions(-)

diff -puN arch/x86_64/boot/tools/build.c~x86_64-bzImage-mem-size-adjustment-fix arch/x86_64/boot/tools/build.c
--- linux-2.6.18-rc3-1M/arch/x86_64/boot/tools/build.c~x86_64-bzImage-mem-size-adjustment-fix	2006-08-10 20:05:10.000000000 -0400
+++ linux-2.6.18-rc3-1M-root/arch/x86_64/boot/tools/build.c	2006-08-11 01:45:59.000000000 -0400
@@ -54,8 +54,13 @@ int fd;
 int is_big_kernel;
 
 #define MAX_PHDRS 100
-static Elf64_Ehdr ehdr;
-static Elf64_Phdr phdr[MAX_PHDRS];
+/* Uncompressed kernel vmlinux. */
+static Elf64_Ehdr vmlinux_ehdr;
+static Elf64_Phdr vmlinux_phdr[MAX_PHDRS];
+
+/* Compressed kernel vmlinux (With decompressor code attached)*/
+static Elf64_Ehdr cvmlinux_ehdr;
+static Elf64_Phdr cvmlinux_phdr[MAX_PHDRS];
 
 void die(const char * str, ...)
 {
@@ -98,80 +103,80 @@ void file_open(const char *name)
 		die("Unable to open `%s': %m", name);
 }
 
-static void read_ehdr(void)
+static void read_ehdr(Elf64_Ehdr *ehdr)
 {
-	if (read(fd, &ehdr, sizeof(ehdr)) != sizeof(ehdr)) {
+	if (read(fd, ehdr, sizeof(*ehdr)) != sizeof(*ehdr)) {
 		die("Cannot read ELF header: %s\n",
 			strerror(errno));
 	}
-	if (memcmp(ehdr.e_ident, ELFMAG, 4) != 0) {
+	if (memcmp(ehdr->e_ident, ELFMAG, 4) != 0) {
 		die("No ELF magic\n");
 	}
-	if (ehdr.e_ident[EI_CLASS] != ELFCLASS64) {
+	if (ehdr->e_ident[EI_CLASS] != ELFCLASS64) {
 		die("Not a 64 bit executable\n");
 	}
-	if (ehdr.e_ident[EI_DATA] != ELFDATA2LSB) {
+	if (ehdr->e_ident[EI_DATA] != ELFDATA2LSB) {
 		die("Not a LSB ELF executable\n");
 	}
-	if (ehdr.e_ident[EI_VERSION] != EV_CURRENT) {
+	if (ehdr->e_ident[EI_VERSION] != EV_CURRENT) {
 		die("Unknown ELF version\n");
 	}
 	/* Convert the fields to native endian */
-	ehdr.e_type      = elf16_to_cpu(ehdr.e_type);
-	ehdr.e_machine   = elf16_to_cpu(ehdr.e_machine);
-	ehdr.e_version   = elf32_to_cpu(ehdr.e_version);
-	ehdr.e_entry     = elf64_to_cpu(ehdr.e_entry);
-	ehdr.e_phoff     = elf64_to_cpu(ehdr.e_phoff);
-	ehdr.e_shoff     = elf64_to_cpu(ehdr.e_shoff);
-	ehdr.e_flags     = elf32_to_cpu(ehdr.e_flags);
-	ehdr.e_ehsize    = elf16_to_cpu(ehdr.e_ehsize);
-	ehdr.e_phentsize = elf16_to_cpu(ehdr.e_phentsize);
-	ehdr.e_phnum     = elf16_to_cpu(ehdr.e_phnum);
-	ehdr.e_shentsize = elf16_to_cpu(ehdr.e_shentsize);
-	ehdr.e_shnum     = elf16_to_cpu(ehdr.e_shnum);
-	ehdr.e_shstrndx  = elf16_to_cpu(ehdr.e_shstrndx);
+	ehdr->e_type      = elf16_to_cpu(ehdr->e_type);
+	ehdr->e_machine   = elf16_to_cpu(ehdr->e_machine);
+	ehdr->e_version   = elf32_to_cpu(ehdr->e_version);
+	ehdr->e_entry     = elf64_to_cpu(ehdr->e_entry);
+	ehdr->e_phoff     = elf64_to_cpu(ehdr->e_phoff);
+	ehdr->e_shoff     = elf64_to_cpu(ehdr->e_shoff);
+	ehdr->e_flags     = elf32_to_cpu(ehdr->e_flags);
+	ehdr->e_ehsize    = elf16_to_cpu(ehdr->e_ehsize);
+	ehdr->e_phentsize = elf16_to_cpu(ehdr->e_phentsize);
+	ehdr->e_phnum     = elf16_to_cpu(ehdr->e_phnum);
+	ehdr->e_shentsize = elf16_to_cpu(ehdr->e_shentsize);
+	ehdr->e_shnum     = elf16_to_cpu(ehdr->e_shnum);
+	ehdr->e_shstrndx  = elf16_to_cpu(ehdr->e_shstrndx);
 
-	if ((ehdr.e_type != ET_EXEC) && (ehdr.e_type != ET_DYN)) {
+	if ((ehdr->e_type != ET_EXEC) && (ehdr->e_type != ET_DYN)) {
 		die("Unsupported ELF header type\n");
 	}
-	if (ehdr.e_machine != EM_X86_64) {
+	if (ehdr->e_machine != EM_X86_64) {
 		die("Not for x86_64\n");
 	}
-	if (ehdr.e_version != EV_CURRENT) {
+	if (ehdr->e_version != EV_CURRENT) {
 		die("Unknown ELF version\n");
 	}
-	if (ehdr.e_ehsize != sizeof(Elf64_Ehdr)) {
+	if (ehdr->e_ehsize != sizeof(Elf64_Ehdr)) {
 		die("Bad Elf header size\n");
 	}
-	if (ehdr.e_phentsize != sizeof(Elf64_Phdr)) {
+	if (ehdr->e_phentsize != sizeof(Elf64_Phdr)) {
 		die("Bad program header entry\n");
 	}
-	if (ehdr.e_shentsize != sizeof(Elf64_Shdr)) {
+	if (ehdr->e_shentsize != sizeof(Elf64_Shdr)) {
 		die("Bad section header entry\n");
 	}
-	if (ehdr.e_shstrndx >= ehdr.e_shnum) {
+	if (ehdr->e_shstrndx >= ehdr->e_shnum) {
 		die("String table index out of bounds\n");
 	}
 }
 
-static void read_phds(void)
+static void read_phdrs(Elf64_Ehdr *ehdr, Elf64_Phdr *phdr)
 {
 	int i;
 	size_t size;
-	if (ehdr.e_phnum > MAX_PHDRS) {
+	if (ehdr->e_phnum > MAX_PHDRS) {
 		die("%d program headers supported: %d\n",
-			ehdr.e_phnum, MAX_PHDRS);
+			ehdr->e_phnum, MAX_PHDRS);
 	}
-	if (lseek(fd, ehdr.e_phoff, SEEK_SET) < 0) {
+	if (lseek(fd, ehdr->e_phoff, SEEK_SET) < 0) {
 		die("Seek to %d failed: %s\n",
-			ehdr.e_phoff, strerror(errno));
+			ehdr->e_phoff, strerror(errno));
 	}
-	size = sizeof(phdr[0])*ehdr.e_phnum;
-	if (read(fd, &phdr, size) != size) {
-		die("Cannot read ELF section headers: %s\n",
+	size = (sizeof(*phdr))*(ehdr->e_phnum);
+	if (read(fd, phdr, size) != size) {
+		die("Cannot read ELF program headers: %s\n",
 			strerror(errno));
 	}
-	for(i = 0; i < ehdr.e_phnum; i++) {
+	for(i = 0; i < ehdr->e_phnum; i++) {
 		phdr[i].p_type      = elf32_to_cpu(phdr[i].p_type);
 		phdr[i].p_flags     = elf32_to_cpu(phdr[i].p_flags);
 		phdr[i].p_offset    = elf64_to_cpu(phdr[i].p_offset);
@@ -183,13 +188,13 @@ static void read_phds(void)
 	}
 }
 
-uint64_t vmlinux_memsz(void)
+uint64_t elf_exec_memsz(Elf64_Ehdr *ehdr, Elf64_Phdr *phdr)
 {
 	uint64_t min, max, size;
 	int i;
 	max = 0;
 	min = ~max;
-	for(i = 0; i < ehdr.e_phnum; i++) {
+	for(i = 0; i < ehdr->e_phnum; i++) {
 		uint64_t start, end;
 		if (phdr[i].p_type != PT_LOAD)
 			continue;
@@ -200,31 +205,32 @@ uint64_t vmlinux_memsz(void)
 		if (end > max)
 			max = end;
 	}
-	/* Get the reported size by vmlinux */
+	/* Get the reported size by elf exec */
 	size = max - min;
 	return size;
 }
 
 void usage(void)
 {
-	die("Usage: build [-b] bootsect setup system rootdev vmlinux [> image]");
+	die("Usage: build [-b] bootsect setup system rootdev vmlinux vmlinux.bin.gz <vmlinux with decompressor code>[> image]");
 }
 
 int main(int argc, char ** argv)
 {
 	unsigned int i, sz, setup_sectors;
 	uint64_t kernel_offset, kernel_filesz, kernel_memsz;
+	uint64_t vmlinux_memsz, cvmlinux_memsz, vmlinux_gz_size;
 	int c;
 	u32 sys_size;
 	byte major_root, minor_root;
-	struct stat sb;
+	struct stat sb, vmlinux_gz_sb;
 
 	if (argc > 2 && !strcmp(argv[1], "-b"))
 	  {
 	    is_big_kernel = 1;
 	    argc--, argv++;
 	  }
-	if (argc != 6)
+	if (argc != 8)
 		usage();
 	if (!strcmp(argv[4], "CURRENT")) {
 		if (stat("/", &sb)) {
@@ -307,11 +313,42 @@ int main(int argc, char ** argv)
 	}
 	close(fd);
 
+	/* Open uncompressed vmlinux. */
 	file_open(argv[5]);
-	read_ehdr();
-	read_phds();
+	read_ehdr(&vmlinux_ehdr);
+	read_phdrs(&vmlinux_ehdr, vmlinux_phdr);
 	close(fd);
-	kernel_memsz = vmlinux_memsz();
+	vmlinux_memsz = elf_exec_memsz(&vmlinux_ehdr, vmlinux_phdr);
+
+	/* Process vmlinux.bin.gz */
+	file_open(argv[6]);
+	if (fstat (fd, &vmlinux_gz_sb))
+		die("Unable to stat `%s': %m", argv[6]);
+	close(fd);
+	vmlinux_gz_size = vmlinux_gz_sb.st_size;
+
+	/* Process compressed vmlinux (compressed vmlinux + decompressor) */
+	file_open(argv[7]);
+	read_ehdr(&cvmlinux_ehdr);
+	read_phdrs(&cvmlinux_ehdr, cvmlinux_phdr);
+	close(fd);
+	cvmlinux_memsz = elf_exec_memsz(&cvmlinux_ehdr, cvmlinux_phdr);
+
+	kernel_memsz = vmlinux_memsz;
+
+	/* Add decompressor code size */
+	kernel_memsz += cvmlinux_memsz - vmlinux_gz_size;
+
+	/* Refer arch/x86_64/boot/compressed/misc.c for following adj.
+	 * Add 8 bytes for every 32K input block
+	 */
+	kernel_memsz += vmlinux_memsz >> 12;
+
+	/* Add 32K + 18 bytes of extra slack */
+	kernel_memsz = kernel_memsz + (32768 + 18);
+
+	/* Align on a 4K boundary. */
+	kernel_memsz = (kernel_memsz + 4095) & (~4095);
 
 	if (lseek(1,  88, SEEK_SET) != 88)		    /* Write sizes to the bootsector */
 		die("Output: seek failed");
diff -puN arch/x86_64/boot/Makefile~x86_64-bzImage-mem-size-adjustment-fix arch/x86_64/boot/Makefile
--- linux-2.6.18-rc3-1M/arch/x86_64/boot/Makefile~x86_64-bzImage-mem-size-adjustment-fix	2006-08-11 00:53:32.000000000 -0400
+++ linux-2.6.18-rc3-1M-root/arch/x86_64/boot/Makefile	2006-08-11 00:56:27.000000000 -0400
@@ -41,7 +41,8 @@ $(obj)/bzImage: BUILDFLAGS   := -b
 
 quiet_cmd_image = BUILD   $@
 cmd_image = $(obj)/tools/build $(BUILDFLAGS) $(obj)/bootsect $(obj)/setup \
-	    $(obj)/vmlinux.bin $(ROOT_DEV) vmlinux > $@
+	    $(obj)/vmlinux.bin $(ROOT_DEV) vmlinux \
+	    $(obj)/compressed/vmlinux.bin.gz $(obj)/compressed/vmlinux > $@
 
 $(obj)/bzImage: $(obj)/bootsect $(obj)/setup \
 			      $(obj)/vmlinux.bin $(obj)/tools/build FORCE
diff -puN arch/x86_64/boot/compressed/vmlinux.lds~x86_64-bzImage-mem-size-adjustment-fix arch/x86_64/boot/compressed/vmlinux.lds
--- linux-2.6.18-rc3-1M/arch/x86_64/boot/compressed/vmlinux.lds~x86_64-bzImage-mem-size-adjustment-fix	2006-08-11 01:29:52.000000000 -0400
+++ linux-2.6.18-rc3-1M-root/arch/x86_64/boot/compressed/vmlinux.lds	2006-08-11 01:32:00.000000000 -0400
@@ -40,5 +40,7 @@ SECTIONS
 		pgtable = . ;
 		. = . + 4096 * 6;
 		_heap = .;
+		. = . + 0x6000;		/* misc.c, Heap size. */
+		_heap_end = .;
 	}
 }
_


o Get rid of CONFIG_PHYSICAL_START and implement CONFIG_PHYSICAL_ALIGN

Signed-off-by: Vivek Goyal <vgoyal@in.ibm.com>
---

 arch/i386/Kconfig                     |   34 ++++++++++++++++++----------------
 arch/i386/boot/bootsect.S             |    8 ++++----
 arch/i386/boot/compressed/head.S      |   28 ++++++++++++++++------------
 arch/i386/boot/compressed/misc.c      |    7 ++++---
 arch/i386/boot/compressed/vmlinux.lds |    3 +++
 arch/i386/kernel/vmlinux.lds.S        |    5 +++--
 include/asm-i386/boot.h               |    6 +++++-
 7 files changed, 53 insertions(+), 38 deletions(-)

diff -puN arch/i386/Kconfig~i386-implement-config-physical-align-option arch/i386/Kconfig
--- linux-2.6.18-rc3-1M/arch/i386/Kconfig~i386-implement-config-physical-align-option	2006-08-17 10:56:46.000000000 -0400
+++ linux-2.6.18-rc3-1M-root/arch/i386/Kconfig	2006-08-17 11:28:40.000000000 -0400
@@ -773,24 +773,26 @@ config RELOCATABLE
           must live at a different physical address than the primary
           kernel.
 
-config PHYSICAL_START
-	hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP)
-
-	default "0x1000000" if CRASH_DUMP
+config PHYSICAL_ALIGN
+	hex "Alignment value to which kernel should be aligned" 
 	default "0x100000"
-	range 0x100000 0x37c00000
+	range 0x2000 0x400000
 	help
-	  This gives the physical address where the kernel is loaded. Normally
-	  for regular kernels this value is 0x100000 (1MB). But in the case
-	  of kexec on panic the fail safe kernel needs to run at a different
-	  address than the panic-ed kernel. This option is used to set the load
-	  address for kernels used to capture crash dump on being kexec'ed
-	  after panic. The default value for crash dump kernels is
-	  0x1000000 (16MB). This can also be set based on the "X" value as
-	  specified in the "crashkernel=YM@XM" command line boot parameter
-	  passed to the panic-ed kernel. Typically this parameter is set as
-	  crashkernel=64M@16M. Please take a look at
-	  Documentation/kdump/kdump.txt for more details about crash dumps.
+	  This value puts the alignment restrictions on physical address
+	  where kernel is loaded and run from. Kernel is compiled for an
+	  address which meets above alignment restriction.
+
+	  If bootloader loads the kernel at a non-aligned address and
+	  CONFIG_RELOCATABLE is set, kernel will move itself to nearest
+	  address aligned to above value and run from there.
+
+	  If bootloader loads the kernel at a non-aligned address and
+	  CONFIG_RELOCATABLE is not set, kernel will ignore the run time
+	  load address and decompress itself to the address it has been
+	  compiled for and run from there. The address for which kernel is
+	  compiled already meets above alignment restrictions. Hence the
+	  end result is that kernel runs from a physical address meeting above
+	  alignment restrictions.
 
 	  Don't change this unless you know what you are doing.
 
diff -puN include/asm-i386/boot.h~i386-implement-config-physical-align-option include/asm-i386/boot.h
--- linux-2.6.18-rc3-1M/include/asm-i386/boot.h~i386-implement-config-physical-align-option	2006-08-17 10:56:46.000000000 -0400
+++ linux-2.6.18-rc3-1M-root/include/asm-i386/boot.h	2006-08-17 10:56:46.000000000 -0400
@@ -12,4 +12,8 @@
 #define EXTENDED_VGA	0xfffe		/* 80x50 mode */
 #define ASK_VGA		0xfffd		/* ask for it at bootup */
 
-#endif
+/* Physical address where kenrel should be loaded. */
+#define LOAD_PHYSICAL_ADDR ((0x100000 + CONFIG_PHYSICAL_ALIGN - 1) \
+				& ~(CONFIG_PHYSICAL_ALIGN - 1))
+
+#endif /* _LINUX_BOOT_H */
diff -puN arch/i386/kernel/vmlinux.lds.S~i386-implement-config-physical-align-option arch/i386/kernel/vmlinux.lds.S
--- linux-2.6.18-rc3-1M/arch/i386/kernel/vmlinux.lds.S~i386-implement-config-physical-align-option	2006-08-17 10:56:46.000000000 -0400
+++ linux-2.6.18-rc3-1M-root/arch/i386/kernel/vmlinux.lds.S	2006-08-17 10:56:46.000000000 -0400
@@ -2,12 +2,13 @@
  * Written by Martin Mares <mj@atrey.karlin.mff.cuni.cz>;
  */
 
-#define LOAD_OFFSET __PAGE_OFFSET
+#define LOAD_OFFSET	__PAGE_OFFSET
 
 #include <asm-generic/vmlinux.lds.h>
 #include <asm/thread_info.h>
 #include <asm/page.h>
 #include <asm/cache.h>
+#include <asm/boot.h>
 
 OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
 OUTPUT_ARCH(i386)
@@ -16,7 +17,7 @@ ENTRY(phys_startup_32)
 }
 SECTIONS
 {
-  . = LOAD_OFFSET + CONFIG_PHYSICAL_START;
+  . = LOAD_OFFSET + LOAD_PHYSICAL_ADDR;
   phys_startup_32 = startup_32 - LOAD_OFFSET;
   /* read-only */
   .text : AT(ADDR(.text) - LOAD_OFFSET) {
diff -puN arch/i386/boot/bootsect.S~i386-implement-config-physical-align-option arch/i386/boot/bootsect.S
--- linux-2.6.18-rc3-1M/arch/i386/boot/bootsect.S~i386-implement-config-physical-align-option	2006-08-17 10:56:46.000000000 -0400
+++ linux-2.6.18-rc3-1M-root/arch/i386/boot/bootsect.S	2006-08-17 12:37:19.000000000 -0400
@@ -69,7 +69,7 @@ ehdr:
 #endif
 	.word EM_386				# e_machine
 	.int  1					# e_version
-	.int  CONFIG_PHYSICAL_START		# e_entry
+	.int  LOAD_PHYSICAL_ADDR		# e_entry
 	.int  phdr - _start			# e_phoff
 	.int  0					# e_shoff
 	.int  0					# e_flags
@@ -90,12 +90,12 @@ normalize:
 phdr:
 	.int PT_LOAD					# p_type
 	.int (SETUPSECTS+1)*512				# p_offset
-	.int __PAGE_OFFSET + CONFIG_PHYSICAL_START	# p_vaddr
-	.int CONFIG_PHYSICAL_START			# p_paddr
+	.int LOAD_PHYSICAL_ADDR + __PAGE_OFFSET		# p_vaddr
+	.int LOAD_PHYSICAL_ADDR				# p_paddr
 	.int SYSSIZE*16					# p_filesz
 	.int 0						# p_memsz
 	.int PF_R | PF_W | PF_X				# p_flags
-	.int 4*1024*1024				# p_align
+	.int CONFIG_PHYSICAL_ALIGN			# p_align
 e_phdr1:
 
 	.int PT_NOTE					# p_type
diff -puN arch/i386/boot/compressed/vmlinux.lds~i386-implement-config-physical-align-option arch/i386/boot/compressed/vmlinux.lds
--- linux-2.6.18-rc3-1M/arch/i386/boot/compressed/vmlinux.lds~i386-implement-config-physical-align-option	2006-08-17 10:56:46.000000000 -0400
+++ linux-2.6.18-rc3-1M-root/arch/i386/boot/compressed/vmlinux.lds	2006-08-17 10:56:46.000000000 -0400
@@ -3,6 +3,9 @@ OUTPUT_ARCH(i386)
 ENTRY(startup_32)
 SECTIONS
 {
+        /* Be careful parts of head.S assume startup_32 is at
+         * address 0.
+	 */
 	. =  0 	;
 	.text.head : {
 		_head = . ;
diff -puN arch/i386/boot/compressed/head.S~i386-implement-config-physical-align-option arch/i386/boot/compressed/head.S
--- linux-2.6.18-rc3-1M/arch/i386/boot/compressed/head.S~i386-implement-config-physical-align-option	2006-08-17 10:56:46.000000000 -0400
+++ linux-2.6.18-rc3-1M-root/arch/i386/boot/compressed/head.S	2006-08-17 11:12:51.000000000 -0400
@@ -27,6 +27,7 @@
 #include <linux/linkage.h>
 #include <asm/segment.h>
 #include <asm/page.h>
+#include <asm/boot.h>
 
 .section ".text.head"
 	.globl startup_32
@@ -53,17 +54,19 @@ startup_32:
 1:	popl %ebp
 	subl $1b, %ebp
 
-/* Compute the delta between where we were compiled to run at
- * and where the code will actually run at.
+
+/* %ebp contains the address we are loaded at by the boot loader and %ebx
+ * contains the address where we should move the kernel image temporarily
+ * for safe in-place decompression.
  */
-	/* Start with the delta to where the kernel will run at.  If we are
-	 * a relocatable kernel this is the delta to our load address otherwise
-	 * this is the delta to CONFIG_PHYSICAL start.
-	 */
+
 #ifdef CONFIG_RELOCATABLE
-	movl %ebp, %ebx
+	movl 	%ebp, %ebx
+	addl    $(CONFIG_PHYSICAL_ALIGN - 1), %ebx
+	andl    $(~(CONFIG_PHYSICAL_ALIGN - 1)), %ebx
+
 #else
-	movl $(CONFIG_PHYSICAL_START - startup_32), %ebx
+	movl $LOAD_PHYSICAL_ADDR, %ebx
 #endif
 
 	/* Replace the compressed data size with the uncompressed size */
@@ -95,9 +98,10 @@ startup_32:
 /* Compute the kernel start address.
  */
 #ifdef CONFIG_RELOCATABLE
-	leal	startup_32(%ebp), %ebp
+	addl    $(CONFIG_PHYSICAL_ALIGN - 1), %ebp
+	andl    $(~(CONFIG_PHYSICAL_ALIGN - 1)), %ebp
 #else
-	movl	$CONFIG_PHYSICAL_START, %ebp
+	movl	$LOAD_PHYSICAL_ADDR, %ebp
 #endif
 
 /*
@@ -151,8 +155,8 @@ relocated:
  * and where it was actually loaded.
  */
 	movl %ebp, %ebx
-	subl $CONFIG_PHYSICAL_START, %ebx
-
+	subl $LOAD_PHYSICAL_ADDR, %ebx
+	jz   2f		/* Nothing to be done if loaded at compiled addr. */
 /*
  * Process relocations.
  */
diff -puN arch/i386/boot/compressed/misc.c~i386-implement-config-physical-align-option arch/i386/boot/compressed/misc.c
--- linux-2.6.18-rc3-1M/arch/i386/boot/compressed/misc.c~i386-implement-config-physical-align-option	2006-08-17 10:56:46.000000000 -0400
+++ linux-2.6.18-rc3-1M-root/arch/i386/boot/compressed/misc.c	2006-08-17 11:19:05.000000000 -0400
@@ -18,6 +18,7 @@
 #include <asm/io.h>
 #include <asm/setup.h>
 #include <asm/page.h>
+#include <asm/boot.h>
 
 /* WARNING!!
  * This code is compiled with -fPIC and it is relocated dynamically
@@ -585,12 +586,12 @@ asmlinkage void decompress_kernel(void *
 	insize = input_len;
 	inptr  = 0;
 
-	if (((u32)output - CONFIG_PHYSICAL_START) & 0x3fffff)
-		error("Destination address not 4M aligned");
+	if ((u32)output & (CONFIG_PHYSICAL_ALIGN -1))
+		error("Destination address not CONFIG_PHYSICAL_ALIGN aligned");
 	if (end > ((-__PAGE_OFFSET-(512 <<20)-1) & 0x7fffffff))
 		error("Destination address too large");
 #ifndef CONFIG_RELOCATABLE
-	if ((u32)output != CONFIG_PHYSICAL_START)
+	if ((u32)output != LOAD_PHYSICAL_ADDR)
 		error("Wrong destination address");
 #endif
 
_


o ppc64 does not seem to be defining symbol _text  which is used by
  kernel/kallsyms.c

Signed-off-by: Vivek Goyal <vgoyal@in.ibm.com>
---

 arch/powerpc/kernel/vmlinux.lds.S |    1 +
 1 file changed, 1 insertion(+)

diff -puN arch/powerpc/kernel/vmlinux.lds.S~ppc64-compilation-fix arch/powerpc/kernel/vmlinux.lds.S
--- linux-2.6.18-rc3-1M/arch/powerpc/kernel/vmlinux.lds.S~ppc64-compilation-fix	2006-08-24 16:16:17.000000000 -0400
+++ linux-2.6.18-rc3-1M-root/arch/powerpc/kernel/vmlinux.lds.S	2006-08-24 16:26:33.000000000 -0400
@@ -33,6 +33,7 @@ SECTIONS
 
 	/* Text and gots */
 	.text : {
+		_text = .;
 		*(.text .text.*)
 		SCHED_TEXT
 		LOCK_TEXT
_

o Currently in RHEL kernels, one can not open /proc/kcore. Though /proc/kcore
  is very much present. Upstream kernels do support opening and reading
  /proc/kcore.

o We need to access at least kcore ELF header information in user space
  to determine where kernel is compiled in virtual address space and
  what's the size (on x86_64). This information is used by kdump to
  fill in ELF headers while generating the kernel core dump.

o Assumption is that due to security reasons, access to kcore must have
  been disabled. This patch allows only a very limited access to kcore. 
  root user can only read /proc/kcore ELF headers and not the actual
  memory contents.

Signed-off-by: Vivek Goyal <vgoyal@in.ibm.com>
---

 fs/proc/kcore.c |    8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff -puN fs/proc/kcore.c~enable-rhel-access-to-proc-kcore-elf-headers fs/proc/kcore.c
--- linux-2.6.17.x86_64/fs/proc/kcore.c~enable-rhel-access-to-proc-kcore-elf-headers	2006-09-06 13:44:09.000000000 -0400
+++ linux-2.6.17.x86_64-root/fs/proc/kcore.c	2006-09-06 13:44:55.000000000 -0400
@@ -25,7 +25,7 @@
 
 static int open_kcore(struct inode * inode, struct file * filp)
 {
-	return -EPERM;
+	return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
 }
 
 static ssize_t read_kcore(struct file *, char __user *, size_t, loff_t *);
@@ -87,7 +87,8 @@ static size_t get_kcore_size(int *nphdr,
 			sizeof(struct elf_prpsinfo) +
 			sizeof(struct task_struct);
 	*elf_buflen = PAGE_ALIGN(*elf_buflen);
-	return size + *elf_buflen;
+	/* Access to kcore is not allowed (except elf headers) */
+	return *elf_buflen;
 }
 
 
@@ -303,6 +304,9 @@ read_kcore(struct file *file, char __use
 	} else
 		read_unlock(&kclist_lock);
 
+	/* Access to kcore is not allowed (except elf headers). */
+	return acc;
+
 	/*
 	 * Check to see if our file offset matches with any of
 	 * the addresses in the elf_phdr on our list.
_


o As per ELF specifications, it looks like that "namesz" field contains
  the length of "name" including the size of null character. And 
  currently we are filling "namesz" without taking into the consideration
  the null character size.

o Kexec-tools performs this check deligently hence I ran into the issue
  while trying to open /proc/kcore in kexec-tools for some info.

Signed-off-by: Vivek Goyal <vgoyal@in.ibm.com>
---

 fs/proc/kcore.c |    4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff -puN fs/proc/kcore.c~kcore-elf-note-namesz-fix fs/proc/kcore.c
--- linux-2.6.18-rc3-1M/fs/proc/kcore.c~kcore-elf-note-namesz-fix	2006-08-31 16:10:41.000000000 -0400
+++ linux-2.6.18-rc3-1M-root/fs/proc/kcore.c	2006-08-31 16:10:41.000000000 -0400
@@ -100,7 +100,7 @@ static int notesize(struct memelfnote *e
 	int sz;
 
 	sz = sizeof(struct elf_note);
-	sz += roundup(strlen(en->name), 4);
+	sz += roundup((strlen(en->name) + 1), 4);
 	sz += roundup(en->datasz, 4);
 
 	return sz;
@@ -116,7 +116,7 @@ static char *storenote(struct memelfnote
 
 #define DUMP_WRITE(addr,nr) do { memcpy(bufp,addr,nr); bufp += nr; } while(0)
 
-	en.n_namesz = strlen(men->name);
+	en.n_namesz = strlen(men->name) + 1;
 	en.n_descsz = men->datasz;
 	en.n_type = men->type;
 
--- linux-2.6.18.noarch/include/asm-x86_64/page.h.orig  2006-09-22
11:59:48.000000000 -0400
+++ linux-2.6.18.noarch/include/asm-x86_64/page.h       2006-09-22
11:59:21.000000000 -0400
@@ -1,6 +1,7 @@
 #ifndef _X86_64_PAGE_H
 #define _X86_64_PAGE_H

+#ifdef __KERNEL__
 #include <asm/const.h>

 /* PAGE_SHIFT determines the page size */
@@ -37,7 +38,6 @@
 #define HPAGE_MASK     (~(HPAGE_SIZE - 1))
 #define HUGETLB_PAGE_ORDER     (HPAGE_SHIFT - PAGE_SHIFT)

-#ifdef __KERNEL__
 #ifndef __ASSEMBLY__

 extern unsigned long end_pfn;
Date: Thu, 21 Sep 2006 15:47:25 -0400
From: Vivek Goyal <vgoyal@redhat.com>
Subject: [PATCH RHEL5] BZ#207596: Fix memory hotplug oops occurring due to
 non-zero page returned by alloc_low_page()


From: Vivek Goyal <vgoyal@in.ibm.com>

o Memory hotplug oopses when a chunk of memory is added. 

o Problem is that alloc_low_page() no longer returns a zeroed page. So while
  initial page tables are built, un-initialized puds also might contain a
  non-zero value. Hot plug code thinks that pud has already been initialized
  and tries to access the associated pmd and oopses.

o memset the page to zero before using it.

Signed-off-by: Vivek Goyal <vgoyal@in.ibm.com>
---

 arch/x86_64/mm/init.c |    3 ++-
 1 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c
index 5db93b9..61bd620 100644
--- a/arch/x86_64/mm/init.c
+++ b/arch/x86_64/mm/init.c
@@ -260,6 +260,7 @@ static void __init phys_pud_init(pud_t *
 
 		pmd_phys = alloc_low_page();
 		pmd = early_ioremap(pmd_phys, PAGE_SIZE);
+		memset(pmd, 0, PAGE_SIZE);
 		set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE));
 		phys_pmd_init(pmd, paddr, end);
 		early_iounmap(pmd, PAGE_SIZE);
@@ -319,7 +320,7 @@ void __init init_memory_mapping(unsigned
 
 		pud_phys = alloc_low_page();
 		pud = early_ioremap(pud_phys, PAGE_SIZE);
-
+		memset(pud, 0, PAGE_SIZE);
 		next = start + PGDIR_SIZE;
 		if (next > end) 
 			next = end; 



Signed-off-by: Vivek Goyal <vgoyal@in.ibm.com>
---

 arch/i386/kernel/vmlinux.lds.S |    8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff -puN arch/i386/kernel/vmlinux.lds.S~i386-put-alternative-sections-into-one-section arch/i386/kernel/vmlinux.lds.S
--- linux-2.6.17.i386/arch/i386/kernel/vmlinux.lds.S~i386-put-alternative-sections-into-one-section	2006-09-22 03:04:07.000000000 -0400
+++ linux-2.6.17.i386-root/arch/i386/kernel/vmlinux.lds.S	2006-09-22 03:18:56.000000000 -0400
@@ -106,19 +106,15 @@ SECTIONS
 
   /* might get freed after init */
   . = ALIGN(4096);
-  .smp_altinstructions : AT(ADDR(.smp_altinstructions) - LOAD_OFFSET) {
+  .smp_alternatives : AT(ADDR(.smp_alternatives) - LOAD_OFFSET) {
 	__smp_alt_begin = .;
 	__smp_alt_instructions = .;
 	*(.smp_altinstructions)
 	__smp_alt_instructions_end = .;
-  }
-  . = ALIGN(4);
-  .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) {
+  	. = ALIGN(4);
 	__smp_locks = .;
 	*(.smp_locks)
 	__smp_locks_end = .;
-  }
-  .smp_altinstr_replacement : AT(ADDR(.smp_altinstr_replacement) - LOAD_OFFSET) {
 	*(.smp_altinstr_replacement)
 	. = ALIGN(4096);
 	__smp_alt_end = .;
_
Date: Wed, 25 Oct 2006 16:05:56 -0400
From: Kimball Murray <kmurray@redhat.com>
Subject: [RHEL5 Patch 1/1 revised(2) ] .bss data used before clear_bss() (BZ-211604)

Still fighting against the compiler, apparently.  Moving Vivek's call down
past clear_bss() was necessary, but not sufficient.  I was still able to
trigger the bug with some clever tricks.

I couldn't make sense of this from looking at the code, but when I disassembled
x86_64_start_kernel I saw that the compiler had inlined Vivek's
zap_identity_mappings() call.  Once it had done that, it was free to reorder
some instructions.  The upshot is that the compiler decided to grab my global
flag _before_ running the memset loop in clear_bss().  Then afterwards, it
used the saved value from the register.

So for this patch, I added a barrier() after clear_bss(), then disassembled
the routine to verify that it no longer grabbed my global flag before the
memset loop.  Also, seems to test OK.

I would ask that Vivek, when you push the kernel relocation bits upstream,
please ask that either clear_bss() gets moved into head.S, so we can trust
the C code in start kernel, or if not that, then ask that a barrier() be
put after clear_bss() and also a big red sign that says not to add code
before the clear_bss().


-Kimball

-------------------------- snip ----------------------
diff -Naur linux-2.6.18-1.2736.orig/arch/x86_64/kernel/head64.c linux-2.6.18-1.2736/arch/x86_64/kernel/head64.c
--- linux-2.6.18-1.2736.orig/arch/x86_64/kernel/head64.c	2006-10-25 12:43:34.000000000 -0400
+++ linux-2.6.18-1.2736/arch/x86_64/kernel/head64.c	2006-10-25 15:44:26.000000000 -0400
@@ -86,12 +86,14 @@
 	char *s;
 	int i;
 
-	/* Make NULL pointers segfault */
-	zap_identity_mappings();
 	for (i = 0; i < 256; i++)
 		set_intr_gate(i, early_idt_handler);
 	asm volatile("lidt %0" :: "m" (idt_descr));
 	clear_bss();
+	barrier();
+
+	/* Make NULL pointers segfault */
+	zap_identity_mappings();
 
 	/*
 	 * This must be called really, really early: