diff options
author | Craig Topper <craig.topper@intel.com> | 2018-06-15 06:11:36 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@intel.com> | 2018-06-15 06:11:36 +0000 |
commit | 5ec210cc27e82f895eea150541b03a46494dbb03 (patch) | |
tree | c50d933f7a98156e4984c850652282ff81c18a90 | |
parent | 5de2272174587bc8ce10a59fc20570890d0f9721 (diff) | |
download | bcm5719-llvm-5ec210cc27e82f895eea150541b03a46494dbb03.tar.gz bcm5719-llvm-5ec210cc27e82f895eea150541b03a46494dbb03.zip |
[X86] Prevent folding stack reloads with instructions that have an undefined register update.
We want to keep the load unfolded so we can use the same register for both sources to avoid a false dependency.
llvm-svn: 334802
-rw-r--r-- | llvm/lib/Target/X86/X86InstrInfo.cpp | 11 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/break-false-dep.ll | 5 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/stack-folding-fp-avx1.ll | 20 |
3 files changed, 19 insertions, 17 deletions
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 234de6ab703..c9af646e563 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -8916,12 +8916,15 @@ static bool shouldPreventUndefRegUpdateMemFold(MachineFunction &MF, MachineInstr if (MF.getFunction().optForSize() || !hasUndefRegUpdate(MI.getOpcode()) || !MI.getOperand(1).isReg()) return false; - + + // Check if the register is explicitly marked as undef. + if (MI.getOperand(1).isUndef()) + return true; + + // Another possibility is that it is defined by by an IMPLICIT_DEF pseudo. MachineRegisterInfo &RegInfo = MF.getRegInfo(); MachineInstr *VRegDef = RegInfo.getUniqueVRegDef(MI.getOperand(1).getReg()); - if (VRegDef == nullptr) - return false; - return VRegDef->isImplicitDef(); + return VRegDef && VRegDef->isImplicitDef() } diff --git a/llvm/test/CodeGen/X86/break-false-dep.ll b/llvm/test/CodeGen/X86/break-false-dep.ll index d939381dc37..25a27f7e1ec 100644 --- a/llvm/test/CodeGen/X86/break-false-dep.ll +++ b/llvm/test/CodeGen/X86/break-false-dep.ll @@ -217,7 +217,6 @@ top: ; Make sure we are making a smart choice regarding undef registers and ; hiding the false dependency behind a true dependency -; TODO: We shouldn't be folding the load here. define double @truedeps(float %arg) { top: tail call void asm sideeffect "", "~{xmm6},~{dirflag},~{fpsr},~{flags}"() @@ -228,8 +227,8 @@ top: %tmp1 = fpext float %arg to double ret double %tmp1 ;AVX-LABEL:@truedeps -;AVX: vxorps [[XMM6:%xmm6]], [[XMM6]], [[XMM6]] -;AVX: vcvtss2sd {{.*}}, [[XMM6]], {{%xmm[0-9]+}} +;AVX-NOT: vxorps +;AVX: vcvtss2sd [[XMM0:%xmm[0-9]+]], [[XMM0]], {{%xmm[0-9]+}} } ; Make sure we are making a smart choice regarding undef registers and diff --git a/llvm/test/CodeGen/X86/stack-folding-fp-avx1.ll b/llvm/test/CodeGen/X86/stack-folding-fp-avx1.ll index efcaa2bcec7..2537f6c6a7a 100644 --- a/llvm/test/CodeGen/X86/stack-folding-fp-avx1.ll +++ b/llvm/test/CodeGen/X86/stack-folding-fp-avx1.ll @@ -573,7 +573,7 @@ define i64 @stack_fold_cvtsd2si64_int(<2 x double> %a0) { } declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone -define double @stack_fold_cvtsi2sd(i32 %a0) { +define double @stack_fold_cvtsi2sd(i32 %a0) optsize { ;CHECK-LABEL: stack_fold_cvtsi2sd ;CHECK: vcvtsi2sdl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() @@ -581,7 +581,7 @@ define double @stack_fold_cvtsi2sd(i32 %a0) { ret double %2 } -define <2 x double> @stack_fold_cvtsi2sd_int(i32 %a0) { +define <2 x double> @stack_fold_cvtsi2sd_int(i32 %a0) optsize { ;CHECK-LABEL: stack_fold_cvtsi2sd_int ;CHECK: vcvtsi2sdl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() @@ -590,7 +590,7 @@ define <2 x double> @stack_fold_cvtsi2sd_int(i32 %a0) { } declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnone -define double @stack_fold_cvtsi642sd(i64 %a0) { +define double @stack_fold_cvtsi642sd(i64 %a0) optsize { ;CHECK-LABEL: stack_fold_cvtsi642sd ;CHECK: vcvtsi2sdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() @@ -598,7 +598,7 @@ define double @stack_fold_cvtsi642sd(i64 %a0) { ret double %2 } -define <2 x double> @stack_fold_cvtsi642sd_int(i64 %a0) { +define <2 x double> @stack_fold_cvtsi642sd_int(i64 %a0) optsize { ;CHECK-LABEL: stack_fold_cvtsi642sd_int ;CHECK: vcvtsi2sdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() @@ -607,7 +607,7 @@ define <2 x double> @stack_fold_cvtsi642sd_int(i64 %a0) { } declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readnone -define float @stack_fold_cvtsi2ss(i32 %a0) { +define float @stack_fold_cvtsi2ss(i32 %a0) optsize { ;CHECK-LABEL: stack_fold_cvtsi2ss ;CHECK: vcvtsi2ssl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() @@ -615,7 +615,7 @@ define float @stack_fold_cvtsi2ss(i32 %a0) { ret float %2 } -define <4 x float> @stack_fold_cvtsi2ss_int(i32 %a0) { +define <4 x float> @stack_fold_cvtsi2ss_int(i32 %a0) optsize { ;CHECK-LABEL: stack_fold_cvtsi2ss_int ;CHECK: vcvtsi2ssl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() @@ -624,7 +624,7 @@ define <4 x float> @stack_fold_cvtsi2ss_int(i32 %a0) { } declare <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float>, i32) nounwind readnone -define float @stack_fold_cvtsi642ss(i64 %a0) { +define float @stack_fold_cvtsi642ss(i64 %a0) optsize { ;CHECK-LABEL: stack_fold_cvtsi642ss ;CHECK: vcvtsi2ssq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() @@ -632,7 +632,7 @@ define float @stack_fold_cvtsi642ss(i64 %a0) { ret float %2 } -define <4 x float> @stack_fold_cvtsi642ss_int(i64 %a0) { +define <4 x float> @stack_fold_cvtsi642ss_int(i64 %a0) optsize { ;CHECK-LABEL: stack_fold_cvtsi642ss_int ;CHECK: vcvtsi2ssq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() @@ -1653,7 +1653,7 @@ define <8 x float> @stack_fold_sqrtps_ymm(<8 x float> %a0) { } declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone -define double @stack_fold_sqrtsd(double %a0) { +define double @stack_fold_sqrtsd(double %a0) optsize { ;CHECK-LABEL: stack_fold_sqrtsd ;CHECK: vsqrtsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() @@ -1664,7 +1664,7 @@ declare double @llvm.sqrt.f64(double) nounwind readnone ; TODO stack_fold_sqrtsd_int -define float @stack_fold_sqrtss(float %a0) { +define float @stack_fold_sqrtss(float %a0) optsize { ;CHECK-LABEL: stack_fold_sqrtss ;CHECK: vsqrtss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() |