summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/Kconfig.cpu3
-rw-r--r--arch/x86/include/asm/percpu.h187
-rw-r--r--include/linux/percpu.h194
3 files changed, 316 insertions, 68 deletions
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 2ac9069890cd..15588a0ef466 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -310,6 +310,9 @@ config X86_INTERNODE_CACHE_SHIFT
config X86_CMPXCHG
def_bool X86_64 || (X86_32 && !M386)
+config CMPXCHG_LOCAL
+ def_bool X86_64 || (X86_32 && !M386)
+
config X86_L1_CACHE_SHIFT
int
default "7" if MPENTIUM4 || MPSC
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 38f9e965ff96..8ee45167e817 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -177,39 +177,6 @@ do { \
} \
} while (0)
-/*
- * Add return operation
- */
-#define percpu_add_return_op(var, val) \
-({ \
- typeof(var) paro_ret__ = val; \
- switch (sizeof(var)) { \
- case 1: \
- asm("xaddb %0, "__percpu_arg(1) \
- : "+q" (paro_ret__), "+m" (var) \
- : : "memory"); \
- break; \
- case 2: \
- asm("xaddw %0, "__percpu_arg(1) \
- : "+r" (paro_ret__), "+m" (var) \
- : : "memory"); \
- break; \
- case 4: \
- asm("xaddl %0, "__percpu_arg(1) \
- : "+r" (paro_ret__), "+m" (var) \
- : : "memory"); \
- break; \
- case 8: \
- asm("xaddq %0, "__percpu_arg(1) \
- : "+re" (paro_ret__), "+m" (var) \
- : : "memory"); \
- break; \
- default: __bad_percpu_size(); \
- } \
- paro_ret__ += val; \
- paro_ret__; \
-})
-
#define percpu_from_op(op, var, constraint) \
({ \
typeof(var) pfo_ret__; \
@@ -263,6 +230,125 @@ do { \
})
/*
+ * Add return operation
+ */
+#define percpu_add_return_op(var, val) \
+({ \
+ typeof(var) paro_ret__ = val; \
+ switch (sizeof(var)) { \
+ case 1: \
+ asm("xaddb %0, "__percpu_arg(1) \
+ : "+q" (paro_ret__), "+m" (var) \
+ : : "memory"); \
+ break; \
+ case 2: \
+ asm("xaddw %0, "__percpu_arg(1) \
+ : "+r" (paro_ret__), "+m" (var) \
+ : : "memory"); \
+ break; \
+ case 4: \
+ asm("xaddl %0, "__percpu_arg(1) \
+ : "+r" (paro_ret__), "+m" (var) \
+ : : "memory"); \
+ break; \
+ case 8: \
+ asm("xaddq %0, "__percpu_arg(1) \
+ : "+re" (paro_ret__), "+m" (var) \
+ : : "memory"); \
+ break; \
+ default: __bad_percpu_size(); \
+ } \
+ paro_ret__ += val; \
+ paro_ret__; \
+})
+
+/*
+ * xchg is implemented using cmpxchg without a lock prefix. xchg is
+ * expensive due to the implied lock prefix. The processor cannot prefetch
+ * cachelines if xchg is used.
+ */
+#define percpu_xchg_op(var, nval) \
+({ \
+ typeof(var) pxo_ret__; \
+ typeof(var) pxo_new__ = (nval); \
+ switch (sizeof(var)) { \
+ case 1: \
+ asm("\n1:mov "__percpu_arg(1)",%%al" \
+ "\n\tcmpxchgb %2, "__percpu_arg(1) \
+ "\n\tjnz 1b" \
+ : "=a" (pxo_ret__), "+m" (var) \
+ : "q" (pxo_new__) \
+ : "memory"); \
+ break; \
+ case 2: \
+ asm("\n1:mov "__percpu_arg(1)",%%ax" \
+ "\n\tcmpxchgw %2, "__percpu_arg(1) \
+ "\n\tjnz 1b" \
+ : "=a" (pxo_ret__), "+m" (var) \
+ : "r" (pxo_new__) \
+ : "memory"); \
+ break; \
+ case 4: \
+ asm("\n1:mov "__percpu_arg(1)",%%eax" \
+ "\n\tcmpxchgl %2, "__percpu_arg(1) \
+ "\n\tjnz 1b" \
+ : "=a" (pxo_ret__), "+m" (var) \
+ : "r" (pxo_new__) \
+ : "memory"); \
+ break; \
+ case 8: \
+ asm("\n1:mov "__percpu_arg(1)",%%rax" \
+ "\n\tcmpxchgq %2, "__percpu_arg(1) \
+ "\n\tjnz 1b" \
+ : "=a" (pxo_ret__), "+m" (var) \
+ : "r" (pxo_new__) \
+ : "memory"); \
+ break; \
+ default: __bad_percpu_size(); \
+ } \
+ pxo_ret__; \
+})
+
+/*
+ * cmpxchg has no such implied lock semantics as a result it is much
+ * more efficient for cpu local operations.
+ */
+#define percpu_cmpxchg_op(var, oval, nval) \
+({ \
+ typeof(var) pco_ret__; \
+ typeof(var) pco_old__ = (oval); \
+ typeof(var) pco_new__ = (nval); \
+ switch (sizeof(var)) { \
+ case 1: \
+ asm("cmpxchgb %2, "__percpu_arg(1) \
+ : "=a" (pco_ret__), "+m" (var) \
+ : "q" (pco_new__), "0" (pco_old__) \
+ : "memory"); \
+ break; \
+ case 2: \
+ asm("cmpxchgw %2, "__percpu_arg(1) \
+ : "=a" (pco_ret__), "+m" (var) \
+ : "r" (pco_new__), "0" (pco_old__) \
+ : "memory"); \
+ break; \
+ case 4: \
+ asm("cmpxchgl %2, "__percpu_arg(1) \
+ : "=a" (pco_ret__), "+m" (var) \
+ : "r" (pco_new__), "0" (pco_old__) \
+ : "memory"); \
+ break; \
+ case 8: \
+ asm("cmpxchgq %2, "__percpu_arg(1) \
+ : "=a" (pco_ret__), "+m" (var) \
+ : "r" (pco_new__), "0" (pco_old__) \
+ : "memory"); \
+ break; \
+ default: __bad_percpu_size(); \
+ } \
+ pco_ret__; \
+})
+
+/*
* percpu_read() makes gcc load the percpu variable every time it is
* accessed while percpu_read_stable() allows the value to be cached.
* percpu_read_stable() is more efficient and can be used if its value
@@ -300,6 +386,12 @@ do { \
#define __this_cpu_xor_1(pcp, val) percpu_to_op("xor", (pcp), val)
#define __this_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val)
#define __this_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val)
+/*
+ * Generic fallback operations for __this_cpu_xchg_[1-4] are okay and much
+ * faster than an xchg with forced lock semantics.
+ */
+#define __this_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval)
+#define __this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
#define this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
#define this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
@@ -319,6 +411,11 @@ do { \
#define this_cpu_xor_1(pcp, val) percpu_to_op("xor", (pcp), val)
#define this_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val)
#define this_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val)
+#define this_cpu_xchg_1(pcp, nval) percpu_xchg_op(pcp, nval)
+#define this_cpu_xchg_2(pcp, nval) percpu_xchg_op(pcp, nval)
+#define this_cpu_xchg_4(pcp, nval) percpu_xchg_op(pcp, nval)
+#define this_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval)
+#define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
#define irqsafe_cpu_add_1(pcp, val) percpu_add_op((pcp), val)
#define irqsafe_cpu_add_2(pcp, val) percpu_add_op((pcp), val)
@@ -332,15 +429,32 @@ do { \
#define irqsafe_cpu_xor_1(pcp, val) percpu_to_op("xor", (pcp), val)
#define irqsafe_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val)
#define irqsafe_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val)
+#define irqsafe_cpu_xchg_1(pcp, nval) percpu_xchg_op(pcp, nval)
+#define irqsafe_cpu_xchg_2(pcp, nval) percpu_xchg_op(pcp, nval)
+#define irqsafe_cpu_xchg_4(pcp, nval) percpu_xchg_op(pcp, nval)
+#define irqsafe_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval)
+#define irqsafe_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
#ifndef CONFIG_M386
#define __this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val)
#define __this_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val)
#define __this_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val)
+#define __this_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
+#define __this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
+#define __this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
+
#define this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val)
#define this_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val)
#define this_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val)
-#endif
+#define this_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
+#define this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
+#define this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
+
+#define irqsafe_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
+#define irqsafe_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
+#define irqsafe_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
+#endif /* !CONFIG_M386 */
+
/*
* Per cpu atomic 64 bit operations are only available under 64 bit.
* 32 bit must fall back to generic operations.
@@ -352,6 +466,7 @@ do { \
#define __this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val)
#define __this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val)
#define __this_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val)
+#define __this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val)
#define this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
#define this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val)
@@ -359,14 +474,12 @@ do { \
#define this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val)
#define this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val)
#define this_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val)
+#define this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val)
#define irqsafe_cpu_add_8(pcp, val) percpu_add_op((pcp), val)
#define irqsafe_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val)
#define irqsafe_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val)
#define irqsafe_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val)
-
-#define __this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val)
-#define this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val)
#endif
/* This is not atomic against other CPUs -- CPU preemption needs to be off */
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 4d593defc47d..27c3c6fcfad3 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -417,6 +417,89 @@ do { \
# define this_cpu_xor(pcp, val) __pcpu_size_call(this_cpu_or_, (pcp), (val))
#endif
+#define _this_cpu_generic_add_return(pcp, val) \
+({ \
+ typeof(pcp) ret__; \
+ preempt_disable(); \
+ __this_cpu_add(pcp, val); \
+ ret__ = __this_cpu_read(pcp); \
+ preempt_enable(); \
+ ret__; \
+})
+
+#ifndef this_cpu_add_return
+# ifndef this_cpu_add_return_1
+# define this_cpu_add_return_1(pcp, val) _this_cpu_generic_add_return(pcp, val)
+# endif
+# ifndef this_cpu_add_return_2
+# define this_cpu_add_return_2(pcp, val) _this_cpu_generic_add_return(pcp, val)
+# endif
+# ifndef this_cpu_add_return_4
+# define this_cpu_add_return_4(pcp, val) _this_cpu_generic_add_return(pcp, val)
+# endif
+# ifndef this_cpu_add_return_8
+# define this_cpu_add_return_8(pcp, val) _this_cpu_generic_add_return(pcp, val)
+# endif
+# define this_cpu_add_return(pcp, val) __pcpu_size_call_return2(this_cpu_add_return_, pcp, val)
+#endif
+
+#define this_cpu_sub_return(pcp, val) this_cpu_add_return(pcp, -(val))
+#define this_cpu_inc_return(pcp) this_cpu_add_return(pcp, 1)
+#define this_cpu_dec_return(pcp) this_cpu_add_return(pcp, -1)
+
+#define _this_cpu_generic_xchg(pcp, nval) \
+({ typeof(pcp) ret__; \
+ preempt_disable(); \
+ ret__ = __this_cpu_read(pcp); \
+ __this_cpu_write(pcp, nval); \
+ preempt_enable(); \
+ ret__; \
+})
+
+#ifndef this_cpu_xchg
+# ifndef this_cpu_xchg_1
+# define this_cpu_xchg_1(pcp, nval) _this_cpu_generic_xchg(pcp, nval)
+# endif
+# ifndef this_cpu_xchg_2
+# define this_cpu_xchg_2(pcp, nval) _this_cpu_generic_xchg(pcp, nval)
+# endif
+# ifndef this_cpu_xchg_4
+# define this_cpu_xchg_4(pcp, nval) _this_cpu_generic_xchg(pcp, nval)
+# endif
+# ifndef this_cpu_xchg_8
+# define this_cpu_xchg_8(pcp, nval) _this_cpu_generic_xchg(pcp, nval)
+# endif
+# define this_cpu_xchg(pcp, nval) \
+ __pcpu_size_call_return2(this_cpu_xchg_, (pcp), nval)
+#endif
+
+#define _this_cpu_generic_cmpxchg(pcp, oval, nval) \
+({ typeof(pcp) ret__; \
+ preempt_disable(); \
+ ret__ = __this_cpu_read(pcp); \
+ if (ret__ == (oval)) \
+ __this_cpu_write(pcp, nval); \
+ preempt_enable(); \
+ ret__; \
+})
+
+#ifndef this_cpu_cmpxchg
+# ifndef this_cpu_cmpxchg_1
+# define this_cpu_cmpxchg_1(pcp, oval, nval) _this_cpu_generic_cmpxchg(pcp, oval, nval)
+# endif
+# ifndef this_cpu_cmpxchg_2
+# define this_cpu_cmpxchg_2(pcp, oval, nval) _this_cpu_generic_cmpxchg(pcp, oval, nval)
+# endif
+# ifndef this_cpu_cmpxchg_4
+# define this_cpu_cmpxchg_4(pcp, oval, nval) _this_cpu_generic_cmpxchg(pcp, oval, nval)
+# endif
+# ifndef this_cpu_cmpxchg_8
+# define this_cpu_cmpxchg_8(pcp, oval, nval) _this_cpu_generic_cmpxchg(pcp, oval, nval)
+# endif
+# define this_cpu_cmpxchg(pcp, oval, nval) \
+ __pcpu_size_call_return2(this_cpu_cmpxchg_, pcp, oval, nval)
+#endif
+
/*
* Generic percpu operations that do not require preemption handling.
* Either we do not care about races or the caller has the
@@ -544,36 +627,6 @@ do { \
# define __this_cpu_xor(pcp, val) __pcpu_size_call(__this_cpu_xor_, (pcp), (val))
#endif
-#define _this_cpu_generic_add_return(pcp, val) \
-({ \
- typeof(pcp) ret__; \
- preempt_disable(); \
- __this_cpu_add(pcp, val); \
- ret__ = __this_cpu_read(pcp); \
- preempt_enable(); \
- ret__; \
-})
-
-#ifndef this_cpu_add_return
-# ifndef this_cpu_add_return_1
-# define this_cpu_add_return_1(pcp, val) _this_cpu_generic_add_return(pcp, val)
-# endif
-# ifndef this_cpu_add_return_2
-# define this_cpu_add_return_2(pcp, val) _this_cpu_generic_add_return(pcp, val)
-# endif
-# ifndef this_cpu_add_return_4
-# define this_cpu_add_return_4(pcp, val) _this_cpu_generic_add_return(pcp, val)
-# endif
-# ifndef this_cpu_add_return_8
-# define this_cpu_add_return_8(pcp, val) _this_cpu_generic_add_return(pcp, val)
-# endif
-# define this_cpu_add_return(pcp, val) __pcpu_size_call_return2(this_cpu_add_return_, pcp, val)
-#endif
-
-#define this_cpu_sub_return(pcp, val) this_cpu_add_return(pcp, -(val))
-#define this_cpu_inc_return(pcp) this_cpu_add_return(pcp, 1)
-#define this_cpu_dec_return(pcp) this_cpu_add_return(pcp, -1)
-
#define __this_cpu_generic_add_return(pcp, val) \
({ \
__this_cpu_add(pcp, val); \
@@ -600,11 +653,61 @@ do { \
#define __this_cpu_inc_return(pcp) this_cpu_add_return(pcp, 1)
#define __this_cpu_dec_return(pcp) this_cpu_add_return(pcp, -1)
+#define __this_cpu_generic_xchg(pcp, nval) \
+({ typeof(pcp) ret__; \
+ ret__ = __this_cpu_read(pcp); \
+ __this_cpu_write(pcp, nval); \
+ ret__; \
+})
+
+#ifndef __this_cpu_xchg
+# ifndef __this_cpu_xchg_1
+# define __this_cpu_xchg_1(pcp, nval) __this_cpu_generic_xchg(pcp, nval)
+# endif
+# ifndef __this_cpu_xchg_2
+# define __this_cpu_xchg_2(pcp, nval) __this_cpu_generic_xchg(pcp, nval)
+# endif
+# ifndef __this_cpu_xchg_4
+# define __this_cpu_xchg_4(pcp, nval) __this_cpu_generic_xchg(pcp, nval)
+# endif
+# ifndef __this_cpu_xchg_8
+# define __this_cpu_xchg_8(pcp, nval) __this_cpu_generic_xchg(pcp, nval)
+# endif
+# define __this_cpu_xchg(pcp, nval) \
+ __pcpu_size_call_return2(__this_cpu_xchg_, (pcp), nval)
+#endif
+
+#define __this_cpu_generic_cmpxchg(pcp, oval, nval) \
+({ \
+ typeof(pcp) ret__; \
+ ret__ = __this_cpu_read(pcp); \
+ if (ret__ == (oval)) \
+ __this_cpu_write(pcp, nval); \
+ ret__; \
+})
+
+#ifndef __this_cpu_cmpxchg
+# ifndef __this_cpu_cmpxchg_1
+# define __this_cpu_cmpxchg_1(pcp, oval, nval) __this_cpu_generic_cmpxchg(pcp, oval, nval)
+# endif
+# ifndef __this_cpu_cmpxchg_2
+# define __this_cpu_cmpxchg_2(pcp, oval, nval) __this_cpu_generic_cmpxchg(pcp, oval, nval)
+# endif
+# ifndef __this_cpu_cmpxchg_4
+# define __this_cpu_cmpxchg_4(pcp, oval, nval) __this_cpu_generic_cmpxchg(pcp, oval, nval)
+# endif
+# ifndef __this_cpu_cmpxchg_8
+# define __this_cpu_cmpxchg_8(pcp, oval, nval) __this_cpu_generic_cmpxchg(pcp, oval, nval)
+# endif
+# define __this_cpu_cmpxchg(pcp, oval, nval) \
+ __pcpu_size_call_return2(__this_cpu_cmpxchg_, pcp, oval, nval)
+#endif
+
/*
* IRQ safe versions of the per cpu RMW operations. Note that these operations
* are *not* safe against modification of the same variable from another
* processors (which one gets when using regular atomic operations)
- . They are guaranteed to be atomic vs. local interrupts and
+ * They are guaranteed to be atomic vs. local interrupts and
* preemption only.
*/
#define irqsafe_cpu_generic_to_op(pcp, val, op) \
@@ -691,4 +794,33 @@ do { \
# define irqsafe_cpu_xor(pcp, val) __pcpu_size_call(irqsafe_cpu_xor_, (val))
#endif
+#define irqsafe_cpu_generic_cmpxchg(pcp, oval, nval) \
+({ \
+ typeof(pcp) ret__; \
+ unsigned long flags; \
+ local_irq_save(flags); \
+ ret__ = __this_cpu_read(pcp); \
+ if (ret__ == (oval)) \
+ __this_cpu_write(pcp, nval); \
+ local_irq_restore(flags); \
+ ret__; \
+})
+
+#ifndef irqsafe_cpu_cmpxchg
+# ifndef irqsafe_cpu_cmpxchg_1
+# define irqsafe_cpu_cmpxchg_1(pcp, oval, nval) irqsafe_cpu_generic_cmpxchg(pcp, oval, nval)
+# endif
+# ifndef irqsafe_cpu_cmpxchg_2
+# define irqsafe_cpu_cmpxchg_2(pcp, oval, nval) irqsafe_cpu_generic_cmpxchg(pcp, oval, nval)
+# endif
+# ifndef irqsafe_cpu_cmpxchg_4
+# define irqsafe_cpu_cmpxchg_4(pcp, oval, nval) irqsafe_cpu_generic_cmpxchg(pcp, oval, nval)
+# endif
+# ifndef irqsafe_cpu_cmpxchg_8
+# define irqsafe_cpu_cmpxchg_8(pcp, oval, nval) irqsafe_cpu_generic_cmpxchg(pcp, oval, nval)
+# endif
+# define irqsafe_cpu_cmpxchg(pcp, oval, nval) \
+ __pcpu_size_call_return2(irqsafe_cpu_cmpxchg_, (pcp), oval, nval)
+#endif
+
#endif /* __LINUX_PERCPU_H */
OpenPOWER on IntegriCloud