summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2018-06-15 06:11:36 +0000
committerCraig Topper <craig.topper@intel.com>2018-06-15 06:11:36 +0000
commit5ec210cc27e82f895eea150541b03a46494dbb03 (patch)
treec50d933f7a98156e4984c850652282ff81c18a90
parent5de2272174587bc8ce10a59fc20570890d0f9721 (diff)
downloadbcm5719-llvm-5ec210cc27e82f895eea150541b03a46494dbb03.tar.gz
bcm5719-llvm-5ec210cc27e82f895eea150541b03a46494dbb03.zip
[X86] Prevent folding stack reloads with instructions that have an undefined register update.
We want to keep the load unfolded so we can use the same register for both sources to avoid a false dependency. llvm-svn: 334802
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.cpp11
-rw-r--r--llvm/test/CodeGen/X86/break-false-dep.ll5
-rw-r--r--llvm/test/CodeGen/X86/stack-folding-fp-avx1.ll20
3 files changed, 19 insertions, 17 deletions
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 234de6ab703..c9af646e563 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -8916,12 +8916,15 @@ static bool shouldPreventUndefRegUpdateMemFold(MachineFunction &MF, MachineInstr
if (MF.getFunction().optForSize() || !hasUndefRegUpdate(MI.getOpcode()) ||
!MI.getOperand(1).isReg())
return false;
-
+
+ // Check if the register is explicitly marked as undef.
+ if (MI.getOperand(1).isUndef())
+ return true;
+
+ // Another possibility is that it is defined by by an IMPLICIT_DEF pseudo.
MachineRegisterInfo &RegInfo = MF.getRegInfo();
MachineInstr *VRegDef = RegInfo.getUniqueVRegDef(MI.getOperand(1).getReg());
- if (VRegDef == nullptr)
- return false;
- return VRegDef->isImplicitDef();
+ return VRegDef && VRegDef->isImplicitDef()
}
diff --git a/llvm/test/CodeGen/X86/break-false-dep.ll b/llvm/test/CodeGen/X86/break-false-dep.ll
index d939381dc37..25a27f7e1ec 100644
--- a/llvm/test/CodeGen/X86/break-false-dep.ll
+++ b/llvm/test/CodeGen/X86/break-false-dep.ll
@@ -217,7 +217,6 @@ top:
; Make sure we are making a smart choice regarding undef registers and
; hiding the false dependency behind a true dependency
-; TODO: We shouldn't be folding the load here.
define double @truedeps(float %arg) {
top:
tail call void asm sideeffect "", "~{xmm6},~{dirflag},~{fpsr},~{flags}"()
@@ -228,8 +227,8 @@ top:
%tmp1 = fpext float %arg to double
ret double %tmp1
;AVX-LABEL:@truedeps
-;AVX: vxorps [[XMM6:%xmm6]], [[XMM6]], [[XMM6]]
-;AVX: vcvtss2sd {{.*}}, [[XMM6]], {{%xmm[0-9]+}}
+;AVX-NOT: vxorps
+;AVX: vcvtss2sd [[XMM0:%xmm[0-9]+]], [[XMM0]], {{%xmm[0-9]+}}
}
; Make sure we are making a smart choice regarding undef registers and
diff --git a/llvm/test/CodeGen/X86/stack-folding-fp-avx1.ll b/llvm/test/CodeGen/X86/stack-folding-fp-avx1.ll
index efcaa2bcec7..2537f6c6a7a 100644
--- a/llvm/test/CodeGen/X86/stack-folding-fp-avx1.ll
+++ b/llvm/test/CodeGen/X86/stack-folding-fp-avx1.ll
@@ -573,7 +573,7 @@ define i64 @stack_fold_cvtsd2si64_int(<2 x double> %a0) {
}
declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone
-define double @stack_fold_cvtsi2sd(i32 %a0) {
+define double @stack_fold_cvtsi2sd(i32 %a0) optsize {
;CHECK-LABEL: stack_fold_cvtsi2sd
;CHECK: vcvtsi2sdl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
@@ -581,7 +581,7 @@ define double @stack_fold_cvtsi2sd(i32 %a0) {
ret double %2
}
-define <2 x double> @stack_fold_cvtsi2sd_int(i32 %a0) {
+define <2 x double> @stack_fold_cvtsi2sd_int(i32 %a0) optsize {
;CHECK-LABEL: stack_fold_cvtsi2sd_int
;CHECK: vcvtsi2sdl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
@@ -590,7 +590,7 @@ define <2 x double> @stack_fold_cvtsi2sd_int(i32 %a0) {
}
declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnone
-define double @stack_fold_cvtsi642sd(i64 %a0) {
+define double @stack_fold_cvtsi642sd(i64 %a0) optsize {
;CHECK-LABEL: stack_fold_cvtsi642sd
;CHECK: vcvtsi2sdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
@@ -598,7 +598,7 @@ define double @stack_fold_cvtsi642sd(i64 %a0) {
ret double %2
}
-define <2 x double> @stack_fold_cvtsi642sd_int(i64 %a0) {
+define <2 x double> @stack_fold_cvtsi642sd_int(i64 %a0) optsize {
;CHECK-LABEL: stack_fold_cvtsi642sd_int
;CHECK: vcvtsi2sdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
@@ -607,7 +607,7 @@ define <2 x double> @stack_fold_cvtsi642sd_int(i64 %a0) {
}
declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readnone
-define float @stack_fold_cvtsi2ss(i32 %a0) {
+define float @stack_fold_cvtsi2ss(i32 %a0) optsize {
;CHECK-LABEL: stack_fold_cvtsi2ss
;CHECK: vcvtsi2ssl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
@@ -615,7 +615,7 @@ define float @stack_fold_cvtsi2ss(i32 %a0) {
ret float %2
}
-define <4 x float> @stack_fold_cvtsi2ss_int(i32 %a0) {
+define <4 x float> @stack_fold_cvtsi2ss_int(i32 %a0) optsize {
;CHECK-LABEL: stack_fold_cvtsi2ss_int
;CHECK: vcvtsi2ssl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
@@ -624,7 +624,7 @@ define <4 x float> @stack_fold_cvtsi2ss_int(i32 %a0) {
}
declare <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float>, i32) nounwind readnone
-define float @stack_fold_cvtsi642ss(i64 %a0) {
+define float @stack_fold_cvtsi642ss(i64 %a0) optsize {
;CHECK-LABEL: stack_fold_cvtsi642ss
;CHECK: vcvtsi2ssq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
@@ -632,7 +632,7 @@ define float @stack_fold_cvtsi642ss(i64 %a0) {
ret float %2
}
-define <4 x float> @stack_fold_cvtsi642ss_int(i64 %a0) {
+define <4 x float> @stack_fold_cvtsi642ss_int(i64 %a0) optsize {
;CHECK-LABEL: stack_fold_cvtsi642ss_int
;CHECK: vcvtsi2ssq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
@@ -1653,7 +1653,7 @@ define <8 x float> @stack_fold_sqrtps_ymm(<8 x float> %a0) {
}
declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone
-define double @stack_fold_sqrtsd(double %a0) {
+define double @stack_fold_sqrtsd(double %a0) optsize {
;CHECK-LABEL: stack_fold_sqrtsd
;CHECK: vsqrtsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
@@ -1664,7 +1664,7 @@ declare double @llvm.sqrt.f64(double) nounwind readnone
; TODO stack_fold_sqrtsd_int
-define float @stack_fold_sqrtss(float %a0) {
+define float @stack_fold_sqrtss(float %a0) optsize {
;CHECK-LABEL: stack_fold_sqrtss
;CHECK: vsqrtss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
OpenPOWER on IntegriCloud