diff options
author | Craig Topper <craig.topper@intel.com> | 2019-02-16 03:34:54 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@intel.com> | 2019-02-16 03:34:54 +0000 |
commit | 61da80584d7d6c99c5c3b745685a1fb44dcff164 (patch) | |
tree | 6de93924b12893a12738d08e0f44cfbb611a1bb8 | |
parent | f6e77311502c091f5fad236be5b830801ec13332 (diff) | |
download | bcm5719-llvm-61da80584d7d6c99c5c3b745685a1fb44dcff164.tar.gz bcm5719-llvm-61da80584d7d6c99c5c3b745685a1fb44dcff164.zip |
[X86] Don't prevent load folding for cvtsi2ss/cvtsi2sd based on hasPartialRegUpdate.
Preventing the load fold won't fix the partial register update since the
input we can fold is a GPR. So it will do nothing to prevent a false dependency
on an XMM register.
llvm-svn: 354193
7 files changed, 78 insertions, 95 deletions
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index db55d50c2a0..b335976b304 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -4239,7 +4239,8 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { /// FIXME: This should be turned into a TSFlags. /// static bool hasPartialRegUpdate(unsigned Opcode, - const X86Subtarget &Subtarget) { + const X86Subtarget &Subtarget, + bool ForLoadFold = false) { switch (Opcode) { case X86::CVTSI2SSrr: case X86::CVTSI2SSrm: @@ -4249,6 +4250,9 @@ static bool hasPartialRegUpdate(unsigned Opcode, case X86::CVTSI2SDrm: case X86::CVTSI642SDrr: case X86::CVTSI642SDrm: + // Load folding won't effect the undef register update since the input is + // a GPR. + return !ForLoadFold; case X86::CVTSD2SSrr: case X86::CVTSD2SSrm: case X86::CVTSS2SDrr: @@ -4325,7 +4329,7 @@ unsigned X86InstrInfo::getPartialRegUpdateClearance( // Return true for any instruction the copies the high bits of the first source // operand into the unused high bits of the destination operand. -static bool hasUndefRegUpdate(unsigned Opcode) { +static bool hasUndefRegUpdate(unsigned Opcode, bool ForLoadFold = false) { switch (Opcode) { case X86::VCVTSI2SSrr: case X86::VCVTSI2SSrm: @@ -4343,38 +4347,6 @@ static bool hasUndefRegUpdate(unsigned Opcode) { case X86::VCVTSI642SDrm: case X86::VCVTSI642SDrr_Int: case X86::VCVTSI642SDrm_Int: - case X86::VCVTSD2SSrr: - case X86::VCVTSD2SSrm: - case X86::VCVTSD2SSrr_Int: - case X86::VCVTSD2SSrm_Int: - case X86::VCVTSS2SDrr: - case X86::VCVTSS2SDrm: - case X86::VCVTSS2SDrr_Int: - case X86::VCVTSS2SDrm_Int: - case X86::VRCPSSr: - case X86::VRCPSSr_Int: - case X86::VRCPSSm: - case X86::VRCPSSm_Int: - case X86::VROUNDSDr: - case X86::VROUNDSDm: - case X86::VROUNDSDr_Int: - case X86::VROUNDSDm_Int: - case X86::VROUNDSSr: - case X86::VROUNDSSm: - case X86::VROUNDSSr_Int: - case X86::VROUNDSSm_Int: - case X86::VRSQRTSSr: - case X86::VRSQRTSSr_Int: - case X86::VRSQRTSSm: - case X86::VRSQRTSSm_Int: - case X86::VSQRTSSr: - case X86::VSQRTSSr_Int: - case X86::VSQRTSSm: - case X86::VSQRTSSm_Int: - case X86::VSQRTSDr: - case X86::VSQRTSDr_Int: - case X86::VSQRTSDm: - case X86::VSQRTSDm_Int: // AVX-512 case X86::VCVTSI2SSZrr: case X86::VCVTSI2SSZrm: @@ -4415,6 +4387,42 @@ static bool hasUndefRegUpdate(unsigned Opcode) { case X86::VCVTUSI642SDZrr_Int: case X86::VCVTUSI642SDZrrb_Int: case X86::VCVTUSI642SDZrm_Int: + // Load folding won't effect the undef register update since the input is + // a GPR. + return !ForLoadFold; + case X86::VCVTSD2SSrr: + case X86::VCVTSD2SSrm: + case X86::VCVTSD2SSrr_Int: + case X86::VCVTSD2SSrm_Int: + case X86::VCVTSS2SDrr: + case X86::VCVTSS2SDrm: + case X86::VCVTSS2SDrr_Int: + case X86::VCVTSS2SDrm_Int: + case X86::VRCPSSr: + case X86::VRCPSSr_Int: + case X86::VRCPSSm: + case X86::VRCPSSm_Int: + case X86::VROUNDSDr: + case X86::VROUNDSDm: + case X86::VROUNDSDr_Int: + case X86::VROUNDSDm_Int: + case X86::VROUNDSSr: + case X86::VROUNDSSm: + case X86::VROUNDSSr_Int: + case X86::VROUNDSSm_Int: + case X86::VRSQRTSSr: + case X86::VRSQRTSSr_Int: + case X86::VRSQRTSSm: + case X86::VRSQRTSSm_Int: + case X86::VSQRTSSr: + case X86::VSQRTSSr_Int: + case X86::VSQRTSSm: + case X86::VSQRTSSm_Int: + case X86::VSQRTSDr: + case X86::VSQRTSDr_Int: + case X86::VSQRTSDm: + case X86::VSQRTSDm_Int: + // AVX-512 case X86::VCVTSD2SSZrr: case X86::VCVTSD2SSZrr_Int: case X86::VCVTSD2SSZrrb_Int: @@ -4735,8 +4743,9 @@ MachineInstr *X86InstrInfo::foldMemoryOperandCustom( return nullptr; } -static bool shouldPreventUndefRegUpdateMemFold(MachineFunction &MF, MachineInstr &MI) { - if (MF.getFunction().optForSize() || !hasUndefRegUpdate(MI.getOpcode()) || +static bool shouldPreventUndefRegUpdateMemFold(MachineFunction &MF, + MachineInstr &MI) { + if (!hasUndefRegUpdate(MI.getOpcode(), /*ForLoadFold*/true) || !MI.getOperand(1).isReg()) return false; @@ -4772,7 +4781,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl( // Avoid partial and undef register update stalls unless optimizing for size. if (!MF.getFunction().optForSize() && - (hasPartialRegUpdate(MI.getOpcode(), Subtarget) || + (hasPartialRegUpdate(MI.getOpcode(), Subtarget, /*ForLoadFold*/true) || shouldPreventUndefRegUpdateMemFold(MF, MI))) return nullptr; @@ -4940,7 +4949,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, // Avoid partial and undef register update stalls unless optimizing for size. if (!MF.getFunction().optForSize() && - (hasPartialRegUpdate(MI.getOpcode(), Subtarget) || + (hasPartialRegUpdate(MI.getOpcode(), Subtarget, /*ForLoadFold*/true) || shouldPreventUndefRegUpdateMemFold(MF, MI))) return nullptr; @@ -5140,7 +5149,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl( // Avoid partial and undef register update stalls unless optimizing for size. if (!MF.getFunction().optForSize() && - (hasPartialRegUpdate(MI.getOpcode(), Subtarget) || + (hasPartialRegUpdate(MI.getOpcode(), Subtarget, /*ForLoadFold*/true) || shouldPreventUndefRegUpdateMemFold(MF, MI))) return nullptr; diff --git a/llvm/test/CodeGen/X86/fast-isel-int-float-conversion-x86-64.ll b/llvm/test/CodeGen/X86/fast-isel-int-float-conversion-x86-64.ll index 469b5e5b4ba..5ba47bda19d 100644 --- a/llvm/test/CodeGen/X86/fast-isel-int-float-conversion-x86-64.ll +++ b/llvm/test/CodeGen/X86/fast-isel-int-float-conversion-x86-64.ll @@ -22,14 +22,12 @@ entry: define double @long_to_double_rm(i64* %a) { ; SSE2-LABEL: long_to_double_rm: ; SSE2: # %bb.0: # %entry -; SSE2-NEXT: movq (%rdi), %rax -; SSE2-NEXT: cvtsi2sdq %rax, %xmm0 +; SSE2-NEXT: cvtsi2sdq (%rdi), %xmm0 ; SSE2-NEXT: retq ; ; AVX-LABEL: long_to_double_rm: ; AVX: # %bb.0: # %entry -; AVX-NEXT: movq (%rdi), %rax -; AVX-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm0 +; AVX-NEXT: vcvtsi2sdq (%rdi), %xmm0, %xmm0 ; AVX-NEXT: retq entry: %0 = load i64, i64* %a @@ -71,14 +69,12 @@ entry: define float @long_to_float_rm(i64* %a) { ; SSE2-LABEL: long_to_float_rm: ; SSE2: # %bb.0: # %entry -; SSE2-NEXT: movq (%rdi), %rax -; SSE2-NEXT: cvtsi2ssq %rax, %xmm0 +; SSE2-NEXT: cvtsi2ssq (%rdi), %xmm0 ; SSE2-NEXT: retq ; ; AVX-LABEL: long_to_float_rm: ; AVX: # %bb.0: # %entry -; AVX-NEXT: movq (%rdi), %rax -; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm0 +; AVX-NEXT: vcvtsi2ssq (%rdi), %xmm0, %xmm0 ; AVX-NEXT: retq entry: %0 = load i64, i64* %a diff --git a/llvm/test/CodeGen/X86/fast-isel-int-float-conversion.ll b/llvm/test/CodeGen/X86/fast-isel-int-float-conversion.ll index fbaa86a2e2c..7ba8ac13442 100644 --- a/llvm/test/CodeGen/X86/fast-isel-int-float-conversion.ll +++ b/llvm/test/CodeGen/X86/fast-isel-int-float-conversion.ll @@ -27,8 +27,7 @@ define double @int_to_double_rr(i32 %a) { ; SSE2_X86-NEXT: .cfi_def_cfa_register %ebp ; SSE2_X86-NEXT: andl $-8, %esp ; SSE2_X86-NEXT: subl $8, %esp -; SSE2_X86-NEXT: movl 8(%ebp), %eax -; SSE2_X86-NEXT: cvtsi2sdl %eax, %xmm0 +; SSE2_X86-NEXT: cvtsi2sdl 8(%ebp), %xmm0 ; SSE2_X86-NEXT: movsd %xmm0, (%esp) ; SSE2_X86-NEXT: fldl (%esp) ; SSE2_X86-NEXT: movl %ebp, %esp @@ -45,8 +44,7 @@ define double @int_to_double_rr(i32 %a) { ; AVX_X86-NEXT: .cfi_def_cfa_register %ebp ; AVX_X86-NEXT: andl $-8, %esp ; AVX_X86-NEXT: subl $8, %esp -; AVX_X86-NEXT: movl 8(%ebp), %eax -; AVX_X86-NEXT: vcvtsi2sdl %eax, %xmm0, %xmm0 +; AVX_X86-NEXT: vcvtsi2sdl 8(%ebp), %xmm0, %xmm0 ; AVX_X86-NEXT: vmovsd %xmm0, (%esp) ; AVX_X86-NEXT: fldl (%esp) ; AVX_X86-NEXT: movl %ebp, %esp @@ -61,14 +59,12 @@ entry: define double @int_to_double_rm(i32* %a) { ; SSE2-LABEL: int_to_double_rm: ; SSE2: # %bb.0: # %entry -; SSE2-NEXT: movl (%rdi), %eax -; SSE2-NEXT: cvtsi2sdl %eax, %xmm0 +; SSE2-NEXT: cvtsi2sdl (%rdi), %xmm0 ; SSE2-NEXT: retq ; ; AVX-LABEL: int_to_double_rm: ; AVX: # %bb.0: # %entry -; AVX-NEXT: movl (%rdi), %eax -; AVX-NEXT: vcvtsi2sdl %eax, %xmm0, %xmm0 +; AVX-NEXT: vcvtsi2sdl (%rdi), %xmm0, %xmm0 ; AVX-NEXT: retq ; ; SSE2_X86-LABEL: int_to_double_rm: @@ -179,8 +175,7 @@ define float @int_to_float_rr(i32 %a) { ; SSE2_X86: # %bb.0: # %entry ; SSE2_X86-NEXT: pushl %eax ; SSE2_X86-NEXT: .cfi_def_cfa_offset 8 -; SSE2_X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; SSE2_X86-NEXT: cvtsi2ssl %eax, %xmm0 +; SSE2_X86-NEXT: cvtsi2ssl {{[0-9]+}}(%esp), %xmm0 ; SSE2_X86-NEXT: movss %xmm0, (%esp) ; SSE2_X86-NEXT: flds (%esp) ; SSE2_X86-NEXT: popl %eax @@ -191,8 +186,7 @@ define float @int_to_float_rr(i32 %a) { ; AVX_X86: # %bb.0: # %entry ; AVX_X86-NEXT: pushl %eax ; AVX_X86-NEXT: .cfi_def_cfa_offset 8 -; AVX_X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; AVX_X86-NEXT: vcvtsi2ssl %eax, %xmm0, %xmm0 +; AVX_X86-NEXT: vcvtsi2ssl {{[0-9]+}}(%esp), %xmm0, %xmm0 ; AVX_X86-NEXT: vmovss %xmm0, (%esp) ; AVX_X86-NEXT: flds (%esp) ; AVX_X86-NEXT: popl %eax @@ -206,14 +200,12 @@ entry: define float @int_to_float_rm(i32* %a) { ; SSE2-LABEL: int_to_float_rm: ; SSE2: # %bb.0: # %entry -; SSE2-NEXT: movl (%rdi), %eax -; SSE2-NEXT: cvtsi2ssl %eax, %xmm0 +; SSE2-NEXT: cvtsi2ssl (%rdi), %xmm0 ; SSE2-NEXT: retq ; ; AVX-LABEL: int_to_float_rm: ; AVX: # %bb.0: # %entry -; AVX-NEXT: movl (%rdi), %eax -; AVX-NEXT: vcvtsi2ssl %eax, %xmm0, %xmm0 +; AVX-NEXT: vcvtsi2ssl (%rdi), %xmm0, %xmm0 ; AVX-NEXT: retq ; ; SSE2_X86-LABEL: int_to_float_rm: diff --git a/llvm/test/CodeGen/X86/fast-isel-uint-float-conversion-x86-64.ll b/llvm/test/CodeGen/X86/fast-isel-uint-float-conversion-x86-64.ll index 60d2903ad09..22d8aa7b2d9 100644 --- a/llvm/test/CodeGen/X86/fast-isel-uint-float-conversion-x86-64.ll +++ b/llvm/test/CodeGen/X86/fast-isel-uint-float-conversion-x86-64.ll @@ -15,8 +15,7 @@ entry: define double @long_to_double_rm(i64* %a) { ; ALL-LABEL: long_to_double_rm: ; ALL: # %bb.0: # %entry -; ALL-NEXT: movq (%rdi), %rax -; ALL-NEXT: vcvtusi2sdq %rax, %xmm0, %xmm0 +; ALL-NEXT: vcvtusi2sdq (%rdi), %xmm0, %xmm0 ; ALL-NEXT: retq entry: %0 = load i64, i64* %a @@ -48,8 +47,7 @@ entry: define float @long_to_float_rm(i64* %a) { ; ALL-LABEL: long_to_float_rm: ; ALL: # %bb.0: # %entry -; ALL-NEXT: movq (%rdi), %rax -; ALL-NEXT: vcvtusi2ssq %rax, %xmm0, %xmm0 +; ALL-NEXT: vcvtusi2ssq (%rdi), %xmm0, %xmm0 ; ALL-NEXT: retq entry: %0 = load i64, i64* %a diff --git a/llvm/test/CodeGen/X86/fast-isel-uint-float-conversion.ll b/llvm/test/CodeGen/X86/fast-isel-uint-float-conversion.ll index 6aad161d406..f883ac12051 100644 --- a/llvm/test/CodeGen/X86/fast-isel-uint-float-conversion.ll +++ b/llvm/test/CodeGen/X86/fast-isel-uint-float-conversion.ll @@ -18,8 +18,7 @@ define double @int_to_double_rr(i32 %a) { ; AVX_X86-NEXT: .cfi_def_cfa_register %ebp ; AVX_X86-NEXT: andl $-8, %esp ; AVX_X86-NEXT: subl $8, %esp -; AVX_X86-NEXT: movl 8(%ebp), %eax -; AVX_X86-NEXT: vcvtusi2sdl %eax, %xmm0, %xmm0 +; AVX_X86-NEXT: vcvtusi2sdl 8(%ebp), %xmm0, %xmm0 ; AVX_X86-NEXT: vmovsd %xmm0, (%esp) ; AVX_X86-NEXT: fldl (%esp) ; AVX_X86-NEXT: movl %ebp, %esp @@ -34,8 +33,7 @@ entry: define double @int_to_double_rm(i32* %a) { ; AVX-LABEL: int_to_double_rm: ; AVX: # %bb.0: # %entry -; AVX-NEXT: movl (%rdi), %eax -; AVX-NEXT: vcvtusi2sdl %eax, %xmm0, %xmm0 +; AVX-NEXT: vcvtusi2sdl (%rdi), %xmm0, %xmm0 ; AVX-NEXT: retq ; ; AVX_X86-LABEL: int_to_double_rm: @@ -100,8 +98,7 @@ define float @int_to_float_rr(i32 %a) { ; AVX_X86: # %bb.0: # %entry ; AVX_X86-NEXT: pushl %eax ; AVX_X86-NEXT: .cfi_def_cfa_offset 8 -; AVX_X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; AVX_X86-NEXT: vcvtusi2ssl %eax, %xmm0, %xmm0 +; AVX_X86-NEXT: vcvtusi2ssl {{[0-9]+}}(%esp), %xmm0, %xmm0 ; AVX_X86-NEXT: vmovss %xmm0, (%esp) ; AVX_X86-NEXT: flds (%esp) ; AVX_X86-NEXT: popl %eax @@ -115,8 +112,7 @@ entry: define float @int_to_float_rm(i32* %a) { ; AVX-LABEL: int_to_float_rm: ; AVX: # %bb.0: # %entry -; AVX-NEXT: movl (%rdi), %eax -; AVX-NEXT: vcvtusi2ssl %eax, %xmm0, %xmm0 +; AVX-NEXT: vcvtusi2ssl (%rdi), %xmm0, %xmm0 ; AVX-NEXT: retq ; ; AVX_X86-LABEL: int_to_float_rm: diff --git a/llvm/test/CodeGen/X86/stack-folding-fp-avx1.ll b/llvm/test/CodeGen/X86/stack-folding-fp-avx1.ll index 0d903efdbe2..cbeeb04f4d7 100644 --- a/llvm/test/CodeGen/X86/stack-folding-fp-avx1.ll +++ b/llvm/test/CodeGen/X86/stack-folding-fp-avx1.ll @@ -577,8 +577,7 @@ define i64 @stack_fold_cvtsd2si64_int(<2 x double> %a0) { } declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone -; TODO: This fold shouldn't require optsize. Not folding doesn't prevent reading an undef register since the registers are a mix of XMM and GPR. -define double @stack_fold_cvtsi2sd(i32 %a0) optsize { +define double @stack_fold_cvtsi2sd(i32 %a0) { ;CHECK-LABEL: stack_fold_cvtsi2sd ;CHECK: vcvtsi2sdl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() @@ -586,8 +585,7 @@ define double @stack_fold_cvtsi2sd(i32 %a0) optsize { ret double %2 } -; TODO: This fold shouldn't require optsize. Not folding doesn't prevent reading an undef register since the registers are a mix of XMM and GPR. -define <2 x double> @stack_fold_cvtsi2sd_int(i32 %a0) optsize { +define <2 x double> @stack_fold_cvtsi2sd_int(i32 %a0) { ;CHECK-LABEL: stack_fold_cvtsi2sd_int ;CHECK: vcvtsi2sdl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() @@ -596,8 +594,7 @@ define <2 x double> @stack_fold_cvtsi2sd_int(i32 %a0) optsize { ret <2 x double> %3 } -; TODO: This fold shouldn't require optsize. Not folding doesn't prevent reading an undef register since the registers are a mix of XMM and GPR. -define double @stack_fold_cvtsi642sd(i64 %a0) optsize { +define double @stack_fold_cvtsi642sd(i64 %a0) { ;CHECK-LABEL: stack_fold_cvtsi642sd ;CHECK: vcvtsi2sdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() @@ -605,8 +602,7 @@ define double @stack_fold_cvtsi642sd(i64 %a0) optsize { ret double %2 } -; TODO: This fold shouldn't require optsize. Not folding doesn't prevent reading an undef register since the registers are a mix of XMM and GPR. -define <2 x double> @stack_fold_cvtsi642sd_int(i64 %a0) optsize { +define <2 x double> @stack_fold_cvtsi642sd_int(i64 %a0) { ;CHECK-LABEL: stack_fold_cvtsi642sd_int ;CHECK: vcvtsi2sdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() @@ -615,8 +611,7 @@ define <2 x double> @stack_fold_cvtsi642sd_int(i64 %a0) optsize { ret <2 x double> %3 } -; TODO: This fold shouldn't require optsize. Not folding doesn't prevent reading an undef register since the registers are a mix of XMM and GPR. -define float @stack_fold_cvtsi2ss(i32 %a0) optsize { +define float @stack_fold_cvtsi2ss(i32 %a0) { ;CHECK-LABEL: stack_fold_cvtsi2ss ;CHECK: vcvtsi2ssl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() @@ -624,8 +619,7 @@ define float @stack_fold_cvtsi2ss(i32 %a0) optsize { ret float %2 } -; TODO: This fold shouldn't require optsize. Not folding doesn't prevent reading an undef register since the registers are a mix of XMM and GPR. -define <4 x float> @stack_fold_cvtsi2ss_int(i32 %a0) optsize { +define <4 x float> @stack_fold_cvtsi2ss_int(i32 %a0) { ;CHECK-LABEL: stack_fold_cvtsi2ss_int ;CHECK: vcvtsi2ssl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() @@ -634,8 +628,7 @@ define <4 x float> @stack_fold_cvtsi2ss_int(i32 %a0) optsize { ret <4 x float> %3 } -; TODO: This fold shouldn't require optsize. Not folding doesn't prevent reading an undef register since the registers are a mix of XMM and GPR. -define float @stack_fold_cvtsi642ss(i64 %a0) optsize { +define float @stack_fold_cvtsi642ss(i64 %a0) { ;CHECK-LABEL: stack_fold_cvtsi642ss ;CHECK: vcvtsi2ssq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() @@ -643,8 +636,7 @@ define float @stack_fold_cvtsi642ss(i64 %a0) optsize { ret float %2 } -; TODO: This fold shouldn't require optsize. Not folding doesn't prevent reading an undef register since the registers are a mix of XMM and GPR. -define <4 x float> @stack_fold_cvtsi642ss_int(i64 %a0) optsize { +define <4 x float> @stack_fold_cvtsi642ss_int(i64 %a0) { ;CHECK-LABEL: stack_fold_cvtsi642ss_int ;CHECK: vcvtsi2ssq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() diff --git a/llvm/test/CodeGen/X86/stack-folding-fp-sse42.ll b/llvm/test/CodeGen/X86/stack-folding-fp-sse42.ll index 4599c4d931a..37f235cc78c 100644 --- a/llvm/test/CodeGen/X86/stack-folding-fp-sse42.ll +++ b/llvm/test/CodeGen/X86/stack-folding-fp-sse42.ll @@ -357,7 +357,7 @@ define <4 x float> @stack_fold_cvtsd2ss_int(<2 x double> %a0) optsize { } declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone -define double @stack_fold_cvtsi2sd(i32 %a0) minsize { +define double @stack_fold_cvtsi2sd(i32 %a0) { ;CHECK-LABEL: stack_fold_cvtsi2sd ;CHECK: cvtsi2sdl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() @@ -374,7 +374,7 @@ define <2 x double> @stack_fold_cvtsi2sd_int(i32 %a0, <2 x double> %b0) { ret <2 x double> %3 } -define double @stack_fold_cvtsi642sd(i64 %a0) optsize { +define double @stack_fold_cvtsi642sd(i64 %a0) { ;CHECK-LABEL: stack_fold_cvtsi642sd ;CHECK: cvtsi2sdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() @@ -391,7 +391,7 @@ define <2 x double> @stack_fold_cvtsi642sd_int(i64 %a0, <2 x double> %b0) { ret <2 x double> %3 } -define float @stack_fold_cvtsi2ss(i32 %a0) minsize { +define float @stack_fold_cvtsi2ss(i32 %a0) { ;CHECK-LABEL: stack_fold_cvtsi2ss ;CHECK: cvtsi2ssl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() @@ -408,7 +408,7 @@ define <4 x float> @stack_fold_cvtsi2ss_int(i32 %a0, <4 x float> %b0) { ret <4 x float> %3 } -define float @stack_fold_cvtsi642ss(i64 %a0) optsize { +define float @stack_fold_cvtsi642ss(i64 %a0) { ;CHECK-LABEL: stack_fold_cvtsi642ss ;CHECK: cvtsi2ssq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() |