summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRavikiran G Thirumalai <kiran@scalex86.org>2006-01-08 01:01:27 -0800
committerLinus Torvalds <torvalds@g5.osdl.org>2006-01-08 20:13:38 -0800
commit22fc6eccbf4ce4eb6265e6ada7b50a7b9cc57d05 (patch)
tree3887dc6f1eeb658d773be037971b98d6f5fb3dd7
parent6d524aed1f50b2b1d5b4ad5a4e2fe3f38106d0a6 (diff)
downloadblackbird-op-linux-22fc6eccbf4ce4eb6265e6ada7b50a7b9cc57d05.tar.gz
blackbird-op-linux-22fc6eccbf4ce4eb6265e6ada7b50a7b9cc57d05.zip
[PATCH] Change maxaligned_in_smp alignemnt macros to internodealigned_in_smp macros
____cacheline_maxaligned_in_smp is currently used to align critical structures and avoid false sharing. It uses per-arch L1_CACHE_SHIFT_MAX and people find L1_CACHE_SHIFT_MAX useless. However, we have been using ____cacheline_maxaligned_in_smp to align structures on the internode cacheline size. As per Andi's suggestion, following patch kills ____cacheline_maxaligned_in_smp and introduces INTERNODE_CACHE_SHIFT, which defaults to L1_CACHE_SHIFT for all arches. Arches needing L3/Internode cacheline alignment can define INTERNODE_CACHE_SHIFT in the arch asm/cache.h. Patch replaces ____cacheline_maxaligned_in_smp with ____cacheline_internodealigned_in_smp With this patch, L1_CACHE_SHIFT_MAX can be killed Signed-off-by: Ravikiran Thirumalai <kiran@scalex86.org> Signed-off-by: Shai Fultheim <shai@scalex86.org> Signed-off-by: Andi Kleen <ak@suse.de> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--arch/i386/kernel/init_task.c2
-rw-r--r--arch/i386/kernel/irq.c2
-rw-r--r--arch/x86_64/kernel/init_task.c2
-rw-r--r--include/linux/cache.h17
-rw-r--r--include/linux/ide.h2
-rw-r--r--include/linux/mmzone.h4
-rw-r--r--include/linux/rcupdate.h2
-rw-r--r--kernel/rcupdate.c4
-rw-r--r--mm/sparse.c4
9 files changed, 24 insertions, 15 deletions
diff --git a/arch/i386/kernel/init_task.c b/arch/i386/kernel/init_task.c
index 9caa8e8db80c..cff95d10a4d8 100644
--- a/arch/i386/kernel/init_task.c
+++ b/arch/i386/kernel/init_task.c
@@ -42,5 +42,5 @@ EXPORT_SYMBOL(init_task);
* per-CPU TSS segments. Threads are completely 'soft' on Linux,
* no more per-task TSS's.
*/
-DEFINE_PER_CPU(struct tss_struct, init_tss) ____cacheline_maxaligned_in_smp = INIT_TSS;
+DEFINE_PER_CPU(struct tss_struct, init_tss) ____cacheline_internodealigned_in_smp = INIT_TSS;
diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c
index 1a201a932865..f3a9c78c4a24 100644
--- a/arch/i386/kernel/irq.c
+++ b/arch/i386/kernel/irq.c
@@ -19,7 +19,7 @@
#include <linux/cpu.h>
#include <linux/delay.h>
-DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_maxaligned_in_smp;
+DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_internodealigned_in_smp;
EXPORT_PER_CPU_SYMBOL(irq_stat);
#ifndef CONFIG_X86_LOCAL_APIC
diff --git a/arch/x86_64/kernel/init_task.c b/arch/x86_64/kernel/init_task.c
index e0ba5c1043fd..ce31d904d601 100644
--- a/arch/x86_64/kernel/init_task.c
+++ b/arch/x86_64/kernel/init_task.c
@@ -44,6 +44,6 @@ EXPORT_SYMBOL(init_task);
* section. Since TSS's are completely CPU-local, we want them
* on exact cacheline boundaries, to eliminate cacheline ping-pong.
*/
-DEFINE_PER_CPU(struct tss_struct, init_tss) ____cacheline_maxaligned_in_smp = INIT_TSS;
+DEFINE_PER_CPU(struct tss_struct, init_tss) ____cacheline_internodealigned_in_smp = INIT_TSS;
#define ALIGN_TO_4K __attribute__((section(".data.init_task")))
diff --git a/include/linux/cache.h b/include/linux/cache.h
index 0b7ecf3af78a..ffe52210fc4f 100644
--- a/include/linux/cache.h
+++ b/include/linux/cache.h
@@ -45,12 +45,21 @@
#endif /* CONFIG_SMP */
#endif
-#if !defined(____cacheline_maxaligned_in_smp)
+/*
+ * The maximum alignment needed for some critical structures
+ * These could be inter-node cacheline sizes/L3 cacheline
+ * size etc. Define this in asm/cache.h for your arch
+ */
+#ifndef INTERNODE_CACHE_SHIFT
+#define INTERNODE_CACHE_SHIFT L1_CACHE_SHIFT
+#endif
+
+#if !defined(____cacheline_internodealigned_in_smp)
#if defined(CONFIG_SMP)
-#define ____cacheline_maxaligned_in_smp \
- __attribute__((__aligned__(1 << (L1_CACHE_SHIFT_MAX))))
+#define ____cacheline_internodealigned_in_smp \
+ __attribute__((__aligned__(1 << (INTERNODE_CACHE_SHIFT))))
#else
-#define ____cacheline_maxaligned_in_smp
+#define ____cacheline_internodealigned_in_smp
#endif
#endif
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 7b6a6a58e465..4dd6694963c0 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -801,7 +801,7 @@ typedef struct hwif_s {
unsigned dma;
void (*led_act)(void *data, int rw);
-} ____cacheline_maxaligned_in_smp ide_hwif_t;
+} ____cacheline_internodealigned_in_smp ide_hwif_t;
/*
* internal ide interrupt handler type
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 2a89c132ba9c..7e4ae6ab1977 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -38,7 +38,7 @@ struct pglist_data;
#if defined(CONFIG_SMP)
struct zone_padding {
char x[0];
-} ____cacheline_maxaligned_in_smp;
+} ____cacheline_internodealigned_in_smp;
#define ZONE_PADDING(name) struct zone_padding name;
#else
#define ZONE_PADDING(name)
@@ -233,7 +233,7 @@ struct zone {
* rarely used fields:
*/
char *name;
-} ____cacheline_maxaligned_in_smp;
+} ____cacheline_internodealigned_in_smp;
/*
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index a471f3bb713e..51747cd88d1a 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -65,7 +65,7 @@ struct rcu_ctrlblk {
long cur; /* Current batch number. */
long completed; /* Number of the last completed batch */
int next_pending; /* Is the next batch already waiting? */
-} ____cacheline_maxaligned_in_smp;
+} ____cacheline_internodealigned_in_smp;
/* Is batch a before batch b ? */
static inline int rcu_batch_before(long a, long b)
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 48d3bce465b8..c9afc61240e4 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -61,9 +61,9 @@ struct rcu_state {
/* for current batch to proceed. */
};
-static struct rcu_state rcu_state ____cacheline_maxaligned_in_smp =
+static struct rcu_state rcu_state ____cacheline_internodealigned_in_smp =
{.lock = SPIN_LOCK_UNLOCKED, .cpumask = CPU_MASK_NONE };
-static struct rcu_state rcu_bh_state ____cacheline_maxaligned_in_smp =
+static struct rcu_state rcu_bh_state ____cacheline_internodealigned_in_smp =
{.lock = SPIN_LOCK_UNLOCKED, .cpumask = CPU_MASK_NONE };
DEFINE_PER_CPU(struct rcu_data, rcu_data) = { 0L };
diff --git a/mm/sparse.c b/mm/sparse.c
index 72079b538e2d..0a51f36ba3a1 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -18,10 +18,10 @@
*/
#ifdef CONFIG_SPARSEMEM_EXTREME
struct mem_section *mem_section[NR_SECTION_ROOTS]
- ____cacheline_maxaligned_in_smp;
+ ____cacheline_internodealigned_in_smp;
#else
struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT]
- ____cacheline_maxaligned_in_smp;
+ ____cacheline_internodealigned_in_smp;
#endif
EXPORT_SYMBOL(mem_section);
OpenPOWER on IntegriCloud