From e0ba94f14f747c2661c4d21f8c44e5b0b8cd8e48 Mon Sep 17 00:00:00 2001 From: Alex Shi Date: Thu, 28 Jun 2012 09:02:16 +0800 Subject: x86/tlb_info: get last level TLB entry number of CPU For 4KB pages, x86 CPU has 2 or 1 level TLB, first level is data TLB and instruction TLB, second level is shared TLB for both data and instructions. For hupe page TLB, usually there is just one level and seperated by 2MB/4MB and 1GB. Although each levels TLB size is important for performance tuning, but for genernal and rude optimizing, last level TLB entry number is suitable. And in fact, last level TLB always has the biggest entry number. This patch will get the biggest TLB entry number and use it in furture TLB optimizing. Accroding Borislav's suggestion, except tlb_ll[i/d]_* array, other function and data will be released after system boot up. For all kinds of x86 vendor friendly, vendor specific code was moved to its specific files. Signed-off-by: Alex Shi Link: http://lkml.kernel.org/r/1340845344-27557-2-git-send-email-alex.shi@intel.com Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/common.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'arch/x86/kernel/cpu/common.c') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 6b9333b429ba..b2016df00813 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -452,6 +452,25 @@ void __cpuinit cpu_detect_cache_sizes(struct cpuinfo_x86 *c) c->x86_cache_size = l2size; } +u16 __read_mostly tlb_lli_4k[NR_INFO]; +u16 __read_mostly tlb_lli_2m[NR_INFO]; +u16 __read_mostly tlb_lli_4m[NR_INFO]; +u16 __read_mostly tlb_lld_4k[NR_INFO]; +u16 __read_mostly tlb_lld_2m[NR_INFO]; +u16 __read_mostly tlb_lld_4m[NR_INFO]; + +void __cpuinit cpu_detect_tlb(struct cpuinfo_x86 *c) +{ + if (this_cpu->c_detect_tlb) + this_cpu->c_detect_tlb(c); + + printk(KERN_INFO "Last level iTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \ + "Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d\n", + tlb_lli_4k[ENTRIES], tlb_lli_2m[ENTRIES], + tlb_lli_4m[ENTRIES], tlb_lld_4k[ENTRIES], + tlb_lld_2m[ENTRIES], tlb_lld_4m[ENTRIES]); +} + void __cpuinit detect_ht(struct cpuinfo_x86 *c) { #ifdef CONFIG_X86_HT @@ -911,6 +930,8 @@ void __init identify_boot_cpu(void) #else vgetcpu_set_mode(); #endif + if (boot_cpu_data.cpuid_level >= 2) + cpu_detect_tlb(&boot_cpu_data); } void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) -- cgit v1.2.1 From c4211f42d3e66875298a5e26a75109878c80f15b Mon Sep 17 00:00:00 2001 From: Alex Shi Date: Thu, 28 Jun 2012 09:02:19 +0800 Subject: x86/tlb: add tlb_flushall_shift for specific CPU Testing show different CPU type(micro architectures and NUMA mode) has different balance points between the TLB flush all and multiple invlpg. And there also has cases the tlb flush change has no any help. This patch give a interface to let x86 vendor developers have a chance to set different shift for different CPU type. like some machine in my hands, balance points is 16 entries on Romely-EP; while it is at 8 entries on Bloomfield NHM-EP; and is 256 on IVB mobile CPU. but on model 15 core2 Xeon using invlpg has nothing help. For untested machine, do a conservative optimization, same as NHM CPU. Signed-off-by: Alex Shi Link: http://lkml.kernel.org/r/1340845344-27557-5-git-send-email-alex.shi@intel.com Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/common.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel/cpu/common.c') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index b2016df00813..7595552600b8 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -459,16 +459,26 @@ u16 __read_mostly tlb_lld_4k[NR_INFO]; u16 __read_mostly tlb_lld_2m[NR_INFO]; u16 __read_mostly tlb_lld_4m[NR_INFO]; +/* + * tlb_flushall_shift shows the balance point in replacing cr3 write + * with multiple 'invlpg'. It will do this replacement when + * flush_tlb_lines <= active_lines/2^tlb_flushall_shift. + * If tlb_flushall_shift is -1, means the replacement will be disabled. + */ +s8 __read_mostly tlb_flushall_shift = -1; + void __cpuinit cpu_detect_tlb(struct cpuinfo_x86 *c) { if (this_cpu->c_detect_tlb) this_cpu->c_detect_tlb(c); printk(KERN_INFO "Last level iTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \ - "Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d\n", + "Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \ + "tlb_flushall_shift is 0x%x\n", tlb_lli_4k[ENTRIES], tlb_lli_2m[ENTRIES], tlb_lli_4m[ENTRIES], tlb_lld_4k[ENTRIES], - tlb_lld_2m[ENTRIES], tlb_lld_4m[ENTRIES]); + tlb_lld_2m[ENTRIES], tlb_lld_4m[ENTRIES], + tlb_flushall_shift); } void __cpuinit detect_ht(struct cpuinfo_x86 *c) -- cgit v1.2.1