summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/X86/X86.td38
-rw-r--r--llvm/lib/Target/X86/X86MacroFusion.cpp6
-rw-r--r--llvm/lib/Target/X86/X86Subtarget.cpp1
-rw-r--r--llvm/lib/Target/X86/X86Subtarget.h4
-rw-r--r--llvm/test/CodeGen/X86/avx-select.ll4
-rw-r--r--llvm/test/CodeGen/X86/avx-splat.ll2
-rw-r--r--llvm/test/CodeGen/X86/avx512-mask-op.ll4
-rw-r--r--llvm/test/CodeGen/X86/vec_int_to_fp.ll4
-rw-r--r--llvm/test/CodeGen/X86/x86-cmov-converter.ll2
9 files changed, 43 insertions, 22 deletions
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index 37a7cdd779d..888af176a86 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -288,6 +288,13 @@ def FeatureERMSB
"ermsb", "HasERMSB", "true",
"REP MOVS/STOS are fast">;
+// Sandy Bridge and newer processors have many instructions that can be
+// fused with conditional branches and pass through the CPU as a single
+// operation.
+def FeatureMacroFusion
+ : SubtargetFeature<"macrofusion", "HasMacroFusion", "true",
+ "Various instructions can be fused with conditional branches">;
+
//===----------------------------------------------------------------------===//
// X86 processors supported.
//===----------------------------------------------------------------------===//
@@ -372,7 +379,8 @@ def : ProcessorModel<"core2", SandyBridgeModel, [
FeatureFXSR,
FeatureCMPXCHG16B,
FeatureSlowBTMem,
- FeatureLAHFSAHF
+ FeatureLAHFSAHF,
+ FeatureMacroFusion
]>;
def : ProcessorModel<"penryn", SandyBridgeModel, [
FeatureX87,
@@ -382,7 +390,8 @@ def : ProcessorModel<"penryn", SandyBridgeModel, [
FeatureFXSR,
FeatureCMPXCHG16B,
FeatureSlowBTMem,
- FeatureLAHFSAHF
+ FeatureLAHFSAHF,
+ FeatureMacroFusion
]>;
// Atom CPUs.
@@ -468,7 +477,8 @@ class NehalemProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
FeatureCMPXCHG16B,
FeatureSlowBTMem,
FeaturePOPCNT,
- FeatureLAHFSAHF
+ FeatureLAHFSAHF,
+ FeatureMacroFusion
]>;
def : NehalemProc<"nehalem">;
def : NehalemProc<"corei7">;
@@ -485,7 +495,8 @@ class WestmereProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
FeaturePOPCNT,
FeatureAES,
FeaturePCLMUL,
- FeatureLAHFSAHF
+ FeatureLAHFSAHF,
+ FeatureMacroFusion
]>;
def : WestmereProc<"westmere">;
@@ -516,7 +527,8 @@ def SNBFeatures : ProcessorFeatures<[], [
FeatureLAHFSAHF,
FeatureSlow3OpsLEA,
FeatureFastScalarFSQRT,
- FeatureFastSHLDRotate
+ FeatureFastSHLDRotate,
+ FeatureMacroFusion
]>;
class SandyBridgeProc<string Name> : ProcModel<Name, SandyBridgeModel,
@@ -731,7 +743,8 @@ def : Proc<"bdver1", [
FeatureXSAVE,
FeatureLWP,
FeatureSlowSHLD,
- FeatureLAHFSAHF
+ FeatureLAHFSAHF,
+ FeatureMacroFusion
]>;
// Piledriver
def : Proc<"bdver2", [
@@ -755,7 +768,8 @@ def : Proc<"bdver2", [
FeatureLWP,
FeatureFMA,
FeatureSlowSHLD,
- FeatureLAHFSAHF
+ FeatureLAHFSAHF,
+ FeatureMacroFusion
]>;
// Steamroller
@@ -782,7 +796,8 @@ def : Proc<"bdver3", [
FeatureXSAVEOPT,
FeatureSlowSHLD,
FeatureFSGSBase,
- FeatureLAHFSAHF
+ FeatureLAHFSAHF,
+ FeatureMacroFusion
]>;
// Excavator
@@ -810,7 +825,8 @@ def : Proc<"bdver4", [
FeatureSlowSHLD,
FeatureFSGSBase,
FeatureLAHFSAHF,
- FeatureMWAITX
+ FeatureMWAITX,
+ FeatureMacroFusion
]>;
// Znver1
@@ -830,6 +846,7 @@ def: ProcessorModel<"znver1", Znver1Model, [
FeatureFastLZCNT,
FeatureLAHFSAHF,
FeatureLZCNT,
+ FeatureMacroFusion,
FeatureMMX,
FeatureMOVBE,
FeatureMWAITX,
@@ -873,7 +890,8 @@ def : ProcessorModel<"x86-64", SandyBridgeModel, [
Feature64Bit,
FeatureSlow3OpsLEA,
FeatureSlowBTMem,
- FeatureSlowIncDec
+ FeatureSlowIncDec,
+ FeatureMacroFusion
]>;
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/X86/X86MacroFusion.cpp b/llvm/lib/Target/X86/X86MacroFusion.cpp
index 8fdf1061705..d3ef7aa8d6c 100644
--- a/llvm/lib/Target/X86/X86MacroFusion.cpp
+++ b/llvm/lib/Target/X86/X86MacroFusion.cpp
@@ -27,10 +27,8 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
const MachineInstr *FirstMI,
const MachineInstr &SecondMI) {
const X86Subtarget &ST = static_cast<const X86Subtarget&>(TSI);
- // Check if this processor supports macro-fusion. Since this is a minor
- // heuristic, we haven't specifically reserved a feature. hasAVX is a decent
- // proxy for SandyBridge+.
- if (!ST.hasAVX())
+ // Check if this processor supports macro-fusion.
+ if (!ST.hasMacroFusion())
return false;
enum {
diff --git a/llvm/lib/Target/X86/X86Subtarget.cpp b/llvm/lib/Target/X86/X86Subtarget.cpp
index 6ad6da95d7b..2a7733996c4 100644
--- a/llvm/lib/Target/X86/X86Subtarget.cpp
+++ b/llvm/lib/Target/X86/X86Subtarget.cpp
@@ -347,6 +347,7 @@ void X86Subtarget::initializeEnvironment() {
HasFastVectorFSQRT = false;
HasFastLZCNT = false;
HasFastSHLDRotate = false;
+ HasMacroFusion = false;
HasERMSB = false;
HasSlowDivide32 = false;
HasSlowDivide64 = false;
diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h
index 8b869022d76..7c85e9c2eee 100644
--- a/llvm/lib/Target/X86/X86Subtarget.h
+++ b/llvm/lib/Target/X86/X86Subtarget.h
@@ -238,6 +238,9 @@ protected:
/// True if SHLD based rotate is fast.
bool HasFastSHLDRotate;
+ /// True if the processor supports macrofusion.
+ bool HasMacroFusion;
+
/// True if the processor has enhanced REP MOVSB/STOSB.
bool HasERMSB;
@@ -488,6 +491,7 @@ public:
bool hasFastVectorFSQRT() const { return HasFastVectorFSQRT; }
bool hasFastLZCNT() const { return HasFastLZCNT; }
bool hasFastSHLDRotate() const { return HasFastSHLDRotate; }
+ bool hasMacroFusion() const { return HasMacroFusion; }
bool hasERMSB() const { return HasERMSB; }
bool hasSlowDivide32() const { return HasSlowDivide32; }
bool hasSlowDivide64() const { return HasSlowDivide64; }
diff --git a/llvm/test/CodeGen/X86/avx-select.ll b/llvm/test/CodeGen/X86/avx-select.ll
index 7484f8257ca..f5ab0cab17f 100644
--- a/llvm/test/CodeGen/X86/avx-select.ll
+++ b/llvm/test/CodeGen/X86/avx-select.ll
@@ -16,8 +16,8 @@ define <8 x i32> @select00(i32 %a, <8 x i32> %b) nounwind {
;
; X64-LABEL: select00:
; X64: # BB#0:
-; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X64-NEXT: cmpl $255, %edi
+; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X64-NEXT: je .LBB0_2
; X64-NEXT: # BB#1:
; X64-NEXT: vmovaps %ymm0, %ymm1
@@ -44,8 +44,8 @@ define <4 x i64> @select01(i32 %a, <4 x i64> %b) nounwind {
;
; X64-LABEL: select01:
; X64: # BB#0:
-; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X64-NEXT: cmpl $255, %edi
+; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X64-NEXT: je .LBB1_2
; X64-NEXT: # BB#1:
; X64-NEXT: vmovaps %ymm0, %ymm1
diff --git a/llvm/test/CodeGen/X86/avx-splat.ll b/llvm/test/CodeGen/X86/avx-splat.ll
index 91d1f64c670..0f3f3e5fb6e 100644
--- a/llvm/test/CodeGen/X86/avx-splat.ll
+++ b/llvm/test/CodeGen/X86/avx-splat.ll
@@ -60,8 +60,8 @@ define <8 x float> @funcE() nounwind {
; CHECK-LABEL: funcE:
; CHECK: # BB#0: # %for_exit499
; CHECK-NEXT: xorl %eax, %eax
-; CHECK-NEXT: # implicit-def: %YMM0
; CHECK-NEXT: testb %al, %al
+; CHECK-NEXT: # implicit-def: %YMM0
; CHECK-NEXT: jne .LBB4_2
; CHECK-NEXT: # BB#1: # %load.i1247
; CHECK-NEXT: pushq %rbp
diff --git a/llvm/test/CodeGen/X86/avx512-mask-op.ll b/llvm/test/CodeGen/X86/avx512-mask-op.ll
index f6d752ddc3c..77a2a021416 100644
--- a/llvm/test/CodeGen/X86/avx512-mask-op.ll
+++ b/llvm/test/CodeGen/X86/avx512-mask-op.ll
@@ -692,8 +692,8 @@ define <16 x i8> @test8(<16 x i32>%a, <16 x i32>%b, i32 %a1, i32 %b1) {
;
; AVX512BW-LABEL: test8:
; AVX512BW: ## BB#0:
-; AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512BW-NEXT: cmpl %esi, %edi
+; AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512BW-NEXT: jg LBB17_1
; AVX512BW-NEXT: ## BB#2:
; AVX512BW-NEXT: vpcmpltud %zmm2, %zmm1, %k0
@@ -708,8 +708,8 @@ define <16 x i8> @test8(<16 x i32>%a, <16 x i32>%b, i32 %a1, i32 %b1) {
;
; AVX512DQ-LABEL: test8:
; AVX512DQ: ## BB#0:
-; AVX512DQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512DQ-NEXT: cmpl %esi, %edi
+; AVX512DQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512DQ-NEXT: jg LBB17_1
; AVX512DQ-NEXT: ## BB#2:
; AVX512DQ-NEXT: vpcmpltud %zmm2, %zmm1, %k0
diff --git a/llvm/test/CodeGen/X86/vec_int_to_fp.ll b/llvm/test/CodeGen/X86/vec_int_to_fp.ll
index 7cb1c95cb01..3e36969f879 100644
--- a/llvm/test/CodeGen/X86/vec_int_to_fp.ll
+++ b/llvm/test/CodeGen/X86/vec_int_to_fp.ll
@@ -1678,8 +1678,8 @@ define <4 x float> @uitofp_2i64_to_4f32(<2 x i64> %a) {
; VEX-NEXT: vaddss %xmm0, %xmm0, %xmm0
; VEX-NEXT: .LBB39_6:
; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; VEX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; VEX-NEXT: testq %rax, %rax
+; VEX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; VEX-NEXT: js .LBB39_8
; VEX-NEXT: # BB#7:
; VEX-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm1
@@ -1914,8 +1914,8 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) {
; VEX-NEXT: vaddss %xmm0, %xmm0, %xmm0
; VEX-NEXT: .LBB41_6:
; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; VEX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; VEX-NEXT: testq %rax, %rax
+; VEX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; VEX-NEXT: js .LBB41_8
; VEX-NEXT: # BB#7:
; VEX-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm1
diff --git a/llvm/test/CodeGen/X86/x86-cmov-converter.ll b/llvm/test/CodeGen/X86/x86-cmov-converter.ll
index cdb8894bfd9..5fec1380e14 100644
--- a/llvm/test/CodeGen/X86/x86-cmov-converter.ll
+++ b/llvm/test/CodeGen/X86/x86-cmov-converter.ll
@@ -296,9 +296,9 @@ while.end: ; preds = %while.body, %entry
; CHECK-LABEL: Transform
; CHECK-NOT: cmov
; CHECK: divl [[a:%[0-9a-z]*]]
-; CHECK: cmpl [[a]], %eax
; CHECK: movl $11, [[s1:%[0-9a-z]*]]
; CHECK: movl [[a]], [[s2:%[0-9a-z]*]]
+; CHECK: cmpl [[a]], %edx
; CHECK: ja [[SinkBB:.*]]
; CHECK: [[FalseBB:.*]]:
; CHECK: movl $22, [[s1]]
OpenPOWER on IntegriCloud