Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > 27922b4260f65d317aabda37e42bbbff > files > 1940

kernel-2.6.18-238.el5.src.rpm

From: luyu <luyu@redhat.com>
Subject: Re: [RHEL 5.1 PATCH] BZ 233046 getcpu system call
Date: Sat, 28 Apr 2007 10:06:33 +0800
Bugzilla: 233046
Message-Id: <4632AC29.3070109@redhat.com>
Changelog: [misc] getcpu system call


  
BZ 233046

Recently Fenghua implemented getcpu system call for IPF that is a clean 
interface to get the CPU and node number a process is running on.
There is  /proc/<pid>/stat, but that is not a well documented API that 
can be used in production systems. Andi added a system call in x86-64 
that does this.
The implementation includes both sys_getcpu and fsys_getcpu.  
sys_getcpu patch can be found at:
   [PATCH] Hook up getcpu system call for IA64
   http://www.gelato.unsw.edu.au/archives/linux-ia64/0702/19940.html
fsys_getcpu patch can be found at:
   [PATCH] fsys_getcpu for IA64
   http://www.gelato.unsw.edu.au/archives/linux-ia64/0702/19994.html
Fenghua's patch has been in upstream kernel and his patches are on top 
of Andi Kleen's getcpu patch:
    

Thanks,
Luming


diff -BruN linux-2.6.18.ia64/arch/i386/kernel/syscall_table.S linux-2.6.18.ia64-patched/arch/i386/kernel/syscall_table.S
--- linux-2.6.18.ia64/arch/i386/kernel/syscall_table.S	2007-03-20 18:40:15.000000000 -0400
+++ linux-2.6.18.ia64-patched/arch/i386/kernel/syscall_table.S	2007-03-20 21:53:11.000000000 -0400
@@ -325,3 +325,4 @@
 	.long sys_tee			/* 315 */
 	.long sys_vmsplice
 	.long sys_move_pages
+	.long sys_getcpu
diff -BruN linux-2.6.18.ia64/arch/ia64/kernel/asm-offsets.c linux-2.6.18.ia64-patched/arch/ia64/kernel/asm-offsets.c
--- linux-2.6.18.ia64/arch/ia64/kernel/asm-offsets.c	2007-03-20 18:40:15.000000000 -0400
+++ linux-2.6.18.ia64-patched/arch/ia64/kernel/asm-offsets.c	2007-03-20 19:13:04.000000000 -0400
@@ -35,6 +35,7 @@
 	BLANK();
 
 	DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
+	DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
 	DEFINE(TI_PRE_COUNT, offsetof(struct thread_info, preempt_count));
 
 	BLANK();
diff -BruN linux-2.6.18.ia64/arch/ia64/kernel/entry.S linux-2.6.18.ia64-patched/arch/ia64/kernel/entry.S
--- linux-2.6.18.ia64/arch/ia64/kernel/entry.S	2007-03-20 18:40:20.000000000 -0400
+++ linux-2.6.18.ia64-patched/arch/ia64/kernel/entry.S	2007-03-20 19:12:57.000000000 -0400
@@ -1624,5 +1624,7 @@
 	data8 sys_sync_file_range		// 1300
 	data8 sys_tee
 	data8 sys_vmsplice
+	data8 sys_ni_syscall			// reserved for move_pages
+	data8 sys_getcpu
 
 	.org sys_call_table + 8*NR_syscalls	// guard against failures to increase NR_syscalls
diff -BruN linux-2.6.18.ia64/arch/ia64/kernel/fsys.S linux-2.6.18.ia64-patched/arch/ia64/kernel/fsys.S
--- linux-2.6.18.ia64/arch/ia64/kernel/fsys.S	2007-03-20 18:40:14.000000000 -0400
+++ linux-2.6.18.ia64-patched/arch/ia64/kernel/fsys.S	2007-03-20 19:13:04.000000000 -0400
@@ -10,6 +10,8 @@
  *			probably broke it along the way... ;-)
  * 13-Jul-04 clameter   Implement fsys_clock_gettime and revise fsys_gettimeofday to make
  *                      it capable of using memory based clocks without falling back to C code.
+ * 08-Feb-07 Fenghua Yu Implement fsys_getcpu.
+ *
  */
 
 #include <asm/asmmacro.h>
@@ -505,6 +507,59 @@
 #endif
 END(fsys_rt_sigprocmask)
 
+/*
+ * fsys_getcpu doesn't use the third parameter in this implementation. It reads
+ * current_thread_info()->cpu and corresponding node in cpu_to_node_map.
+ */
+ENTRY(fsys_getcpu)
+	.prologue
+	.altrp b6
+	.body
+	;;
+	add r2=TI_FLAGS+IA64_TASK_SIZE,r16
+	tnat.nz p6,p0 = r32			// guard against NaT argument
+	add r3=TI_CPU+IA64_TASK_SIZE,r16
+	;;
+	ld4 r3=[r3]				// M r3 = thread_info->cpu
+	ld4 r2=[r2]				// M r2 = thread_info->flags
+(p6)    br.cond.spnt.few .fail_einval		// B
+	;;
+	tnat.nz p7,p0 = r33			// I guard against NaT argument
+(p7)    br.cond.spnt.few .fail_einval		// B
+#ifdef CONFIG_NUMA
+	movl r17=cpu_to_node_map
+	;;
+EX(.fail_efault, probe.w.fault r32, 3)		// M This takes 5 cycles
+EX(.fail_efault, probe.w.fault r33, 3)		// M This takes 5 cycles
+	shladd r18=r3,1,r17
+	;;
+	ld2 r20=[r18]				// r20 = cpu_to_node_map[cpu]
+	and r2 = TIF_ALLWORK_MASK,r2
+	;;
+	cmp.ne p8,p0=0,r2
+(p8)	br.spnt.many fsys_fallback_syscall
+	;;
+	;;
+EX(.fail_efault, st4 [r32] = r3)
+EX(.fail_efault, st2 [r33] = r20)
+	mov r8=0
+	;;
+#else
+EX(.fail_efault, probe.w.fault r32, 3)		// M This takes 5 cycles
+EX(.fail_efault, probe.w.fault r33, 3)		// M This takes 5 cycles
+	and r2 = TIF_ALLWORK_MASK,r2
+	;;
+	cmp.ne p8,p0=0,r2
+(p8)	br.spnt.many fsys_fallback_syscall
+	;;
+EX(.fail_efault, st4 [r32] = r3)
+EX(.fail_efault, st2 [r33] = r0)
+	mov r8=0
+	;;
+#endif
+	FSYS_RETURN
+END(fsys_getcpu)
+
 ENTRY(fsys_fallback_syscall)
 	.prologue
 	.altrp b6
@@ -878,6 +933,56 @@
 	data8 0				// timer_delete
 	data8 0				// clock_settime
 	data8 fsys_clock_gettime	// clock_gettime
+	data8 0				// clock_getres		// 1255
+	data8 0				// clock_nanosleep
+	data8 0				// fstatfs64
+	data8 0				// statfs64
+	data8 0				// mbind
+	data8 0				// get_mempolicy	// 1260
+	data8 0				// set_mempolicy
+	data8 0				// mq_open
+	data8 0				// mq_unlink
+	data8 0				// mq_timedsend
+	data8 0				// mq_timedreceive	// 1265
+	data8 0				// mq_notify
+	data8 0				// mq_getsetattr
+	data8 0				// kexec_load
+	data8 0				// vserver
+	data8 0				// waitid		// 1270
+	data8 0				// add_key
+	data8 0				// request_key
+	data8 0				// keyctl
+	data8 0				// ioprio_set
+	data8 0				// ioprio_get		// 1275
+	data8 0				// move_pages
+	data8 0				// inotify_init
+	data8 0				// inotify_add_watch
+	data8 0				// inotify_rm_watch
+	data8 0				// migrate_pages	// 1280
+	data8 0				// openat
+	data8 0				// mkdirat
+	data8 0				// mknodat
+	data8 0				// fchownat
+	data8 0				// futimesat		// 1285
+	data8 0				// newfstatat
+	data8 0				// unlinkat
+	data8 0				// renameat
+	data8 0				// linkat
+	data8 0				// symlinkat		// 1290
+	data8 0				// readlinkat
+	data8 0				// fchmodat
+	data8 0				// faccessat
+	data8 0
+	data8 0							// 1295
+	data8 0				// unshare
+	data8 0				// splice
+	data8 0				// set_robust_list
+	data8 0				// get_robust_list
+	data8 0				// sync_file_range	// 1300
+	data8 0				// tee
+	data8 0				// vmsplice
+	data8 0
+	data8 fsys_getcpu		// getcpu		// 1304
 
 	// fill in zeros for the remaining entries
 	.zero:
diff -BruN linux-2.6.18.ia64/arch/x86_64/ia32/ia32entry.S linux-2.6.18.ia64-patched/arch/x86_64/ia32/ia32entry.S
--- linux-2.6.18.ia64/arch/x86_64/ia32/ia32entry.S	2007-03-20 18:40:15.000000000 -0400
+++ linux-2.6.18.ia64-patched/arch/x86_64/ia32/ia32entry.S	2007-03-20 21:53:11.000000000 -0400
@@ -721,4 +721,5 @@
 	.quad sys_tee
 	.quad compat_sys_vmsplice
 	.quad compat_sys_move_pages
+	.quad sys_getcpu
 ia32_syscall_end:		
diff -BruN linux-2.6.18.ia64/arch/x86_64/kernel/head.S linux-2.6.18.ia64-patched/arch/x86_64/kernel/head.S
--- linux-2.6.18.ia64/arch/x86_64/kernel/head.S	2007-03-20 18:40:20.000000000 -0400
+++ linux-2.6.18.ia64-patched/arch/x86_64/kernel/head.S	2007-04-03 19:54:20.000000000 -0400
@@ -371,7 +371,7 @@
 	.quad	0,0			/* TSS */
 	.quad	0,0			/* LDT */
 	.quad   0,0,0			/* three TLS descriptors */
-	.quad	0			/* unused */
+ 	.quad	0x0000f40000000000	/* node/CPU stored in limit */
 gdt_end:	
 	/* asm/segment.h:GDT_ENTRIES must match this */	
 	/* This should be a multiple of the cache line size */
diff -BruN linux-2.6.18.ia64/arch/x86_64/kernel/time.c linux-2.6.18.ia64-patched/arch/x86_64/kernel/time.c
--- linux-2.6.18.ia64/arch/x86_64/kernel/time.c	2007-03-20 18:40:14.000000000 -0400
+++ linux-2.6.18.ia64-patched/arch/x86_64/kernel/time.c	2007-04-03 19:56:37.000000000 -0400
@@ -899,13 +899,8 @@
 time_cpu_notifier(struct notifier_block *nb, unsigned long action, void *hcpu)
 {
 	unsigned cpu = (unsigned long) hcpu;
-
-	if (action == CPU_ONLINE && cpu_has(&cpu_data[cpu], X86_FEATURE_RDTSCP)) {
-		unsigned p;
-		p = smp_processor_id() | (cpu_to_node(smp_processor_id())<<12);
-		write_rdtscp_aux(p);
-	}
-
+ 	if (action == CPU_ONLINE)
+ 		vsyscall_set_cpu(cpu);
 	return NOTIFY_DONE;
 }
 
@@ -999,6 +994,11 @@
 	if (unsynchronized_tsc())
 		notsc = 1;
 
+ 	if (cpu_has(&boot_cpu_data, X86_FEATURE_RDTSCP))
+		vgetcpu_mode = VGETCPU_RDTSCP;
+	else
+		vgetcpu_mode = VGETCPU_LSL;
+
 	if (vxtime.hpet_address && notsc) {
 		timetype = hpet_use_timer ? "HPET" : "PIT/HPET";
 		if (hpet_use_timer)
diff -BruN linux-2.6.18.ia64/arch/x86_64/kernel/vmlinux.lds.S linux-2.6.18.ia64-patched/arch/x86_64/kernel/vmlinux.lds.S
--- linux-2.6.18.ia64/arch/x86_64/kernel/vmlinux.lds.S	2007-03-20 18:40:21.000000000 -0400
+++ linux-2.6.18.ia64-patched/arch/x86_64/kernel/vmlinux.lds.S	2007-03-20 22:16:00.000000000 -0400
@@ -103,6 +103,9 @@
   .vxtime : AT(VLOAD(.vxtime)) { *(.vxtime) }
   vxtime = VVIRT(.vxtime);
 
+  .vgetcpu_mode : AT(VLOAD(.vgetcpu_mode)) { *(.vgetcpu_mode) }
+  vgetcpu_mode = VVIRT(.vgetcpu_mode);
+
   .wall_jiffies : AT(VLOAD(.wall_jiffies)) { *(.wall_jiffies) }
   wall_jiffies = VVIRT(.wall_jiffies);
 
diff -BruN linux-2.6.18.ia64/arch/x86_64/kernel/vsyscall.c linux-2.6.18.ia64-patched/arch/x86_64/kernel/vsyscall.c
--- linux-2.6.18.ia64/arch/x86_64/kernel/vsyscall.c	2007-03-20 18:40:20.000000000 -0400
+++ linux-2.6.18.ia64-patched/arch/x86_64/kernel/vsyscall.c	2007-03-20 22:16:30.000000000 -0400
@@ -26,6 +26,7 @@
 #include <linux/seqlock.h>
 #include <linux/jiffies.h>
 #include <linux/sysctl.h>
+#include <linux/getcpu.h>
 
 #include <asm/vsyscall.h>
 #include <asm/pgtable.h>
@@ -33,11 +34,15 @@
 #include <asm/fixmap.h>
 #include <asm/errno.h>
 #include <asm/io.h>
+#include <asm/segment.h>
+#include <asm/desc.h>
+#include <asm/topology.h>
 
 #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr)))
 
 int __sysctl_vsyscall __section_sysctl_vsyscall = 1;
 seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED;
+int __vgetcpu_mode __section_vgetcpu_mode;
 
 #include <asm/unistd.h>
 
@@ -133,9 +138,46 @@
 	return __xtime.tv_sec;
 }
 
-long __vsyscall(2) venosys_0(void)
-{
-	return -ENOSYS;
+/* Fast way to get current CPU and node.
+   This helps to do per node and per CPU caches in user space.
+   The result is not guaranteed without CPU affinity, but usually
+   works out because the scheduler tries to keep a thread on the same
+   CPU.
+
+   tcache must point to a two element sized long array.
+   All arguments can be NULL. */
+long __vsyscall(2)
+vgetcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache)
+{
+	unsigned int dummy, p;
+	unsigned long j = 0;
+
+	/* Fast cache - only recompute value once per jiffies and avoid
+	   relatively costly rdtscp/cpuid otherwise.
+	   This works because the scheduler usually keeps the process
+	   on the same CPU and this syscall doesn't guarantee its
+	   results anyways.
+	   We do this here because otherwise user space would do it on
+	   its own in a likely inferior way (no access to jiffies).
+	   If you don't like it pass NULL. */
+	if (tcache && tcache->blob[0] == (j = __jiffies)) {
+		p = tcache->blob[1];
+	} else if (__vgetcpu_mode == VGETCPU_RDTSCP) {
+		/* Load per CPU data from RDTSCP */
+		rdtscp(dummy, dummy, p);
+	} else {
+		/* Load per CPU data from GDT */
+		asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG));
+	}
+	if (tcache) {
+		tcache->blob[0] = j;
+		tcache->blob[1] = p;
+	}
+	if (cpu)
+		*cpu = p & 0xfff;
+	if (node)
+		*node = p >> 12;
+	return 0;
 }
 
 long __vsyscall(3) venosys_1(void)
@@ -206,6 +248,43 @@
 
 #endif
 
+static void __cpuinit write_rdtscp_cb(void *info)
+{
+	write_rdtscp_aux((unsigned long)info);
+}
+
+void __cpuinit vsyscall_set_cpu(int cpu)
+{
+	unsigned long *d;
+	unsigned long node = 0;
+#ifdef CONFIG_NUMA
+	node = cpu_to_node[cpu];
+#endif
+	if (cpu_has(&cpu_data[cpu], X86_FEATURE_RDTSCP)) {
+		void *info = (void *)((node << 12) | cpu);
+		/* Can happen on preemptive kernel */
+		if (get_cpu() == cpu)
+			write_rdtscp_cb(info);
+#ifdef CONFIG_SMP
+		else {
+			/* the notifier is unfortunately not executed on the
+			   target CPU */
+			smp_call_function_single(cpu,write_rdtscp_cb,info,0,1);
+		}
+#endif
+		put_cpu();
+	}
+
+	/* Store cpu number in limit so that it can be loaded quickly
+	   in user space in vgetcpu.
+	   12 bits for the CPU and 8 bits for the node. */
+	d = (unsigned long *)(cpu_gdt(cpu) + GDT_ENTRY_PER_CPU);
+	*d = 0x0f40000000000ULL;
+	*d |= cpu;
+	*d |= (node & 0xf) << 12;
+	*d |= (node >> 4) << 48;
+}
+
 static void __init map_vsyscall(void)
 {
 	extern char __vsyscall_0;
@@ -220,6 +299,7 @@
 			VSYSCALL_ADDR(__NR_vgettimeofday)));
 	BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime));
 	BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE)));
+	BUG_ON((unsigned long) &vgetcpu != VSYSCALL_ADDR(__NR_vgetcpu));
 	map_vsyscall();
 #ifdef CONFIG_SYSCTL
 	register_sysctl_table(kernel_root_table2, 0);
diff -BruN linux-2.6.18.ia64/include/asm-i386/unistd.h linux-2.6.18.ia64-patched/include/asm-i386/unistd.h
--- linux-2.6.18.ia64/include/asm-i386/unistd.h	2007-03-20 18:40:15.000000000 -0400
+++ linux-2.6.18.ia64-patched/include/asm-i386/unistd.h	2007-03-20 21:53:11.000000000 -0400
@@ -323,10 +323,11 @@
 #define __NR_tee		315
 #define __NR_vmsplice		316
 #define __NR_move_pages		317
+#define __NR_getcpu		318
 
 #ifdef __KERNEL__
 
-#define NR_syscalls 318
+#define NR_syscalls 319
 
 #ifndef __KERNEL_SYSCALLS_NO_ERRNO__
 /*
diff -BruN linux-2.6.18.ia64/include/asm-ia64/unistd.h linux-2.6.18.ia64-patched/include/asm-ia64/unistd.h
--- linux-2.6.18.ia64/include/asm-ia64/unistd.h	2007-03-20 18:40:15.000000000 -0400
+++ linux-2.6.18.ia64-patched/include/asm-ia64/unistd.h	2007-03-20 19:12:57.000000000 -0400
@@ -291,11 +291,13 @@
 #define __NR_sync_file_range		1300
 #define __NR_tee			1301
 #define __NR_vmsplice			1302
+/* 1303 reserved for move_pages */
+#define __NR_getcpu			1304
 
 #ifdef __KERNEL__
 
 
-#define NR_syscalls			279 /* length of syscall table */
+#define NR_syscalls			281 /* length of syscall table */
 
 #define __ARCH_WANT_SYS_RT_SIGACTION
 
diff -BruN linux-2.6.18.ia64/include/asm-x86_64/segment.h linux-2.6.18.ia64-patched/include/asm-x86_64/segment.h
--- linux-2.6.18.ia64/include/asm-x86_64/segment.h	2007-03-20 18:40:20.000000000 -0400
+++ linux-2.6.18.ia64-patched/include/asm-x86_64/segment.h	2007-04-03 19:52:47.000000000 -0400
@@ -25,10 +25,12 @@
 #define GDT_ENTRY_LDT 10 /* needs two entries */
 #define GDT_ENTRY_TLS_MIN 12
 #define GDT_ENTRY_TLS_MAX 14
-/* 15 free */
 
 #define GDT_ENTRY_TLS_ENTRIES 3
 
+#define GDT_ENTRY_PER_CPU 15	/* Abused to load per CPU data from limit */
+#define __PER_CPU_SEG	(GDT_ENTRY_PER_CPU * 8 + 3)
+
 /* TLS indexes for 64bit - hardcoded in arch_prctl */
 #define FS_TLS 0	
 #define GS_TLS 1	
diff -BruN linux-2.6.18.ia64/include/asm-x86_64/smp.h linux-2.6.18.ia64-patched/include/asm-x86_64/smp.h
--- linux-2.6.18.ia64/include/asm-x86_64/smp.h	2007-03-20 18:40:21.000000000 -0400
+++ linux-2.6.18.ia64-patched/include/asm-x86_64/smp.h	2007-03-20 22:16:00.000000000 -0400
@@ -131,13 +131,19 @@
 	/* we don't want to mark this access volatile - bad code generation */
 	return GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR));
 }
-#endif
 
 #ifdef CONFIG_SMP
 #define cpu_physical_id(cpu)		x86_cpu_to_apicid[cpu]
 #else
 #define cpu_physical_id(cpu)		boot_cpu_id
-#endif
-
+static inline int smp_call_function_single(int cpuid, void (*func) (void *info),
+				void *info, int retry, int wait)
+{
+	/* Disable interrupts here? */
+	func(info);
+	return 0;
+}
+#endif /* !CONFIG_SMP */
+#endif /* !__ASSEMBLY */
 #endif
 
diff -BruN linux-2.6.18.ia64/include/asm-x86_64/vsyscall.h linux-2.6.18.ia64-patched/include/asm-x86_64/vsyscall.h
--- linux-2.6.18.ia64/include/asm-x86_64/vsyscall.h	2006-09-19 23:42:06.000000000 -0400
+++ linux-2.6.18.ia64-patched/include/asm-x86_64/vsyscall.h	2007-03-20 22:16:00.000000000 -0400
@@ -4,6 +4,7 @@
 enum vsyscall_num {
 	__NR_vgettimeofday,
 	__NR_vtime,
+	__NR_vgetcpu,
 };
 
 #define VSYSCALL_START (-10UL << 20)
@@ -15,6 +16,7 @@
 #include <linux/seqlock.h>
 
 #define __section_vxtime __attribute__ ((unused, __section__ (".vxtime"), aligned(16)))
+#define __section_vgetcpu_mode __attribute__ ((unused, __section__ (".vgetcpu_mode"), aligned(16)))
 #define __section_wall_jiffies __attribute__ ((unused, __section__ (".wall_jiffies"), aligned(16)))
 #define __section_jiffies __attribute__ ((unused, __section__ (".jiffies"), aligned(16)))
 #define __section_sys_tz __attribute__ ((unused, __section__ (".sys_tz"), aligned(16)))
@@ -26,6 +28,9 @@
 #define VXTIME_HPET	2
 #define VXTIME_PMTMR	3
 
+#define VGETCPU_RDTSCP	1
+#define VGETCPU_LSL	2
+
 struct vxtime_data {
 	long hpet_address;	/* HPET base address */
 	int last;
@@ -40,6 +45,7 @@
 
 /* vsyscall space (readonly) */
 extern struct vxtime_data __vxtime;
+extern int __vgetcpu_mode;
 extern struct timespec __xtime;
 extern volatile unsigned long __jiffies;
 extern unsigned long __wall_jiffies;
@@ -48,6 +54,7 @@
 
 /* kernel space (writeable) */
 extern struct vxtime_data vxtime;
+extern int vgetcpu_mode;
 extern unsigned long wall_jiffies;
 extern struct timezone sys_tz;
 extern int sysctl_vsyscall;
@@ -55,6 +62,8 @@
 
 extern int sysctl_vsyscall;
 
+extern void vsyscall_set_cpu(int cpu);
+
 #define ARCH_HAVE_XTIME_LOCK 1
 
 #endif /* __KERNEL__ */
diff -BruN linux-2.6.18.ia64/include/linux/getcpu.h linux-2.6.18.ia64-patched/include/linux/getcpu.h
--- linux-2.6.18.ia64/include/linux/getcpu.h	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.18.ia64-patched/include/linux/getcpu.h	2007-03-20 22:16:30.000000000 -0400
@@ -0,0 +1,18 @@
+#ifndef _LINUX_GETCPU_H
+#define _LINUX_GETCPU_H 1
+
+/* Cache for getcpu() to speed it up. Results might be a short time
+   out of date, but will be faster.
+
+   User programs should not refer to the contents of this structure.
+   I repeat they should not refer to it. If they do they will break
+   in future kernels.
+
+   It is only a private cache for vgetcpu(). It will change in future kernels.
+   The user program must store this information per thread (__thread)
+   If you want 100% accurate information pass NULL instead. */
+struct getcpu_cache {
+	unsigned long blob[128 / sizeof(long)];
+};
+
+#endif
diff -BruN linux-2.6.18.ia64/include/linux/syscalls.h linux-2.6.18.ia64-patched/include/linux/syscalls.h
--- linux-2.6.18.ia64/include/linux/syscalls.h	2006-09-19 23:42:06.000000000 -0400
+++ linux-2.6.18.ia64-patched/include/linux/syscalls.h	2007-03-20 21:53:11.000000000 -0400
@@ -53,6 +53,7 @@
 struct compat_stat;
 struct compat_timeval;
 struct robust_list_head;
+struct getcpu_cache;
 
 #include <linux/types.h>
 #include <linux/aio_abi.h>
@@ -596,5 +597,6 @@
 				    size_t __user *len_ptr);
 asmlinkage long sys_set_robust_list(struct robust_list_head __user *head,
 				    size_t len);
+asmlinkage long sys_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *cache);
 
 #endif
diff -BruN linux-2.6.18.ia64/kernel/sys.c linux-2.6.18.ia64-patched/kernel/sys.c
--- linux-2.6.18.ia64/kernel/sys.c	2007-03-20 18:40:14.000000000 -0400
+++ linux-2.6.18.ia64-patched/kernel/sys.c	2007-03-20 22:16:30.000000000 -0400
@@ -28,6 +28,7 @@
 #include <linux/tty.h>
 #include <linux/signal.h>
 #include <linux/cn_proc.h>
+#include <linux/getcpu.h>
 
 #include <linux/compat.h>
 #include <linux/syscalls.h>
@@ -2062,3 +2063,33 @@
 	}
 	return error;
 }
+
+asmlinkage long sys_getcpu(unsigned __user *cpup, unsigned __user *nodep,
+	   		   struct getcpu_cache __user *cache)
+{
+	int err = 0;
+	int cpu = raw_smp_processor_id();
+	if (cpup)
+		err |= put_user(cpu, cpup);
+	if (nodep)
+		err |= put_user(cpu_to_node(cpu), nodep);
+	if (cache) {
+		/*
+		 * The cache is not needed for this implementation,
+		 * but make sure user programs pass something
+		 * valid. vsyscall implementations can instead make
+		 * good use of the cache. Only use t0 and t1 because
+		 * these are available in both 32bit and 64bit ABI (no
+		 * need for a compat_getcpu). 32bit has enough
+		 * padding
+		 */
+		unsigned long t0, t1;
+		get_user(t0, &cache->blob[0]);
+		get_user(t1, &cache->blob[1]);
+		t0++;
+		t1++;
+		put_user(t0, &cache->blob[0]);
+		put_user(t1, &cache->blob[1]);
+	}
+	return err ? -EFAULT : 0;
+}