From 8fb402bccf203ecca8f9e0202b8fd3c937dece6f Mon Sep 17 00:00:00 2001
From: Erik Bosman <ebn310@few.vu.nl>
Date: Fri, 11 Apr 2008 18:54:17 +0200
Subject: generic, x86: add prctl commands PR_GET_TSC and PR_SET_TSC

This patch adds prctl commands that make it possible
to deny the execution of timestamp counters in userspace.
If this is not implemented on a specific architecture,
prctl will return -EINVAL.

ned-off-by: Erik Bosman <ejbosman@cs.vu.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/sys.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/sys.c b/kernel/sys.c
index a626116af5db..6a0cc71ee88d 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -67,6 +67,12 @@
 #ifndef SET_ENDIAN
 # define SET_ENDIAN(a,b)	(-EINVAL)
 #endif
+#ifndef GET_TSC_CTL
+# define GET_TSC_CTL(a)		(-EINVAL)
+#endif
+#ifndef SET_TSC_CTL
+# define SET_TSC_CTL(a)		(-EINVAL)
+#endif
 
 /*
  * this is where the system-wide overflow UID and GID are defined, for
@@ -1737,7 +1743,12 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3,
 #else
 			return -EINVAL;
 #endif
-
+		case PR_GET_TSC:
+			error = GET_TSC_CTL(arg2);
+			break;
+		case PR_SET_TSC:
+			error = SET_TSC_CTL(arg2);
+			break;
 		default:
 			error = -EINVAL;
 			break;
-- 
cgit v1.2.1


From d8bb6f4c1670c8324e4135c61ef07486f7f17379 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 1 Apr 2008 19:45:18 +0200
Subject: x86: tsc prevent time going backwards

We already catch most of the TSC problems by sanity checks, but there
is a subtle bug which has been in the code forever. This can cause
time jumps in the range of hours.

This was reported in:
     http://lkml.org/lkml/2007/8/23/96
and
     http://lkml.org/lkml/2008/3/31/23

I was able to reproduce the problem with a gettimeofday loop test on a
dual core and a quad core machine which both have sychronized
TSCs. The TSCs seems not to be perfectly in sync though, but the
kernel is not able to detect the slight delta in the sync check. Still
there exists an extremly small window where this delta can be observed
with a real big time jump. So far I was only able to reproduce this
with the vsyscall gettimeofday implementation, but in theory this
might be observable with the syscall based version as well.

CPU 0 updates the clock source variables under xtime/vyscall lock and
CPU1, where the TSC is slighty behind CPU0, is reading the time right
after the seqlock was unlocked.

The clocksource reference data was updated with the TSC from CPU0 and
the value which is read from TSC on CPU1 is less than the reference
data. This results in a huge delta value due to the unsigned
subtraction of the TSC value and the reference value. This algorithm
can not be changed due to the support of wrapping clock sources like
pm timer.

The huge delta is converted to nanoseconds and added to xtime, which
is then observable by the caller. The next gettimeofday call on CPU1
will show the correct time again as now the TSC has advanced above the
reference value.

To prevent this TSC specific wreckage we need to compare the TSC value
against the reference value and return the latter when it is larger
than the actual TSC value.

I pondered to mark the TSC unstable when the readout is smaller than
the reference value, but this would render an otherwise good and fast
clocksource unusable without a real good reason.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/time/timekeeping.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'kernel')

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index a3fa587c350c..2d6087c7cf98 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -178,6 +178,7 @@ static void change_clocksource(void)
 	if (clock == new)
 		return;
 
+	new->cycle_last = 0;
 	now = clocksource_read(new);
 	nsec =  __get_nsec_offset();
 	timespec_add_ns(&xtime, nsec);
@@ -295,6 +296,7 @@ static int timekeeping_resume(struct sys_device *dev)
 	timespec_add_ns(&xtime, timekeeping_suspend_nsecs);
 	update_xtime_cache(0);
 	/* re-base the last cycle value */
+	clock->cycle_last = 0;
 	clock->cycle_last = clocksource_read(clock);
 	clock->error = 0;
 	timekeeping_suspended = 0;
-- 
cgit v1.2.1


From 61c4628b538608c1a85211ed8438136adfeb9a95 Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Mon, 10 Mar 2008 15:28:04 -0700
Subject: x86, fpu: split FPU state from task struct - v5

Split the FPU save area from the task struct. This allows easy migration
of FPU context, and it's generally cleaner. It also allows the following
two optimizations:

1) only allocate when the application actually uses FPU, so in the first
lazy FPU trap. This could save memory for non-fpu using apps. Next patch
does this lazy allocation.

2) allocate the right size for the actual cpu rather than 512 bytes always.
Patches enabling xsave/xrstor support (coming shortly) will take advantage
of this.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/fork.c | 31 +++++++++++++++++++++++++------
 1 file changed, 25 insertions(+), 6 deletions(-)

(limited to 'kernel')

diff --git a/kernel/fork.c b/kernel/fork.c
index 9c042f901570..44a18192c420 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -132,6 +132,10 @@ void __put_task_struct(struct task_struct *tsk)
 		free_task(tsk);
 }
 
+void __attribute__((weak)) arch_task_cache_init(void)
+{
+}
+
 void __init fork_init(unsigned long mempages)
 {
 #ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR
@@ -144,6 +148,9 @@ void __init fork_init(unsigned long mempages)
 			ARCH_MIN_TASKALIGN, SLAB_PANIC, NULL);
 #endif
 
+	/* do the arch specific task caches init */
+	arch_task_cache_init();
+
 	/*
 	 * The default maximum number of threads is set to a safe
 	 * value: the thread structures can take up at most half
@@ -163,6 +170,13 @@ void __init fork_init(unsigned long mempages)
 		init_task.signal->rlim[RLIMIT_NPROC];
 }
 
+int __attribute__((weak)) arch_dup_task_struct(struct task_struct *dst,
+					       struct task_struct *src)
+{
+	*dst = *src;
+	return 0;
+}
+
 static struct task_struct *dup_task_struct(struct task_struct *orig)
 {
 	struct task_struct *tsk;
@@ -181,15 +195,15 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
 		return NULL;
 	}
 
-	*tsk = *orig;
+ 	err = arch_dup_task_struct(tsk, orig);
+	if (err)
+		goto out;
+
 	tsk->stack = ti;
 
 	err = prop_local_init_single(&tsk->dirties);
-	if (err) {
-		free_thread_info(ti);
-		free_task_struct(tsk);
-		return NULL;
-	}
+	if (err)
+		goto out;
 
 	setup_thread_stack(tsk, orig);
 
@@ -205,6 +219,11 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
 #endif
 	tsk->splice_pipe = NULL;
 	return tsk;
+
+out:
+	free_thread_info(ti);
+	free_task_struct(tsk);
+	return NULL;
 }
 
 #ifdef CONFIG_MMU
-- 
cgit v1.2.1


From 2adee9b30d1382fba97825b9c50e4f50a0117c36 Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Wed, 16 Apr 2008 10:25:35 +0200
Subject: x86: fpu xstate split fix

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/fork.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/fork.c b/kernel/fork.c
index 44a18192c420..89fe414645e9 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -132,9 +132,13 @@ void __put_task_struct(struct task_struct *tsk)
 		free_task(tsk);
 }
 
-void __attribute__((weak)) arch_task_cache_init(void)
-{
-}
+/*
+ * macro override instead of weak attribute alias, to workaround
+ * gcc 4.1.0 and 4.1.1 bugs with weak attribute and empty functions.
+ */
+#ifndef arch_task_cache_init
+#define arch_task_cache_init()
+#endif
 
 void __init fork_init(unsigned long mempages)
 {
-- 
cgit v1.2.1