summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.cpp7
-rw-r--r--llvm/test/CodeGen/X86/fast-isel-fptrunc-fpext.ll40
-rw-r--r--llvm/test/CodeGen/X86/fast-isel-int-float-conversion-x86-64.ll38
-rw-r--r--llvm/test/CodeGen/X86/fast-isel-int-float-conversion.ll98
4 files changed, 177 insertions, 6 deletions
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 276ceae7cdc..a2ec1f4f469 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -8389,6 +8389,11 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
MI.getOpcode() == X86::PUSH64r))
return nullptr;
+ // Avoid partial register update stalls unless optimizing for size.
+ // TODO: we should block undef reg update as well.
+ if (!MF.getFunction()->optForSize() && hasPartialRegUpdate(MI.getOpcode()))
+ return nullptr;
+
unsigned NumOps = MI.getDesc().getNumOperands();
bool isTwoAddr =
NumOps > 1 && MI.getDesc().getOperandConstraint(1, MCOI::TIED_TO) != -1;
@@ -8554,6 +8559,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
// Unless optimizing for size, don't fold to avoid partial
// register update stalls
+ // TODO: we should block undef reg update as well.
if (!MF.getFunction()->optForSize() && hasPartialRegUpdate(MI.getOpcode()))
return nullptr;
@@ -8752,6 +8758,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
if (NoFusing) return nullptr;
// Avoid partial register update stalls unless optimizing for size.
+ // TODO: we should block undef reg update as well.
if (!MF.getFunction()->optForSize() && hasPartialRegUpdate(MI.getOpcode()))
return nullptr;
diff --git a/llvm/test/CodeGen/X86/fast-isel-fptrunc-fpext.ll b/llvm/test/CodeGen/X86/fast-isel-fptrunc-fpext.ll
index 1035c256790..4a3337554b6 100644
--- a/llvm/test/CodeGen/X86/fast-isel-fptrunc-fpext.ll
+++ b/llvm/test/CodeGen/X86/fast-isel-fptrunc-fpext.ll
@@ -55,7 +55,8 @@ entry:
define double @single_to_double_rm(float* %x) {
; SSE-LABEL: single_to_double_rm:
; SSE: # BB#0: # %entry
-; SSE-NEXT: cvtss2sd (%rdi), %xmm0
+; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE-NEXT: cvtss2sd %xmm0, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: single_to_double_rm:
@@ -69,10 +70,28 @@ entry:
ret double %conv
}
+define double @single_to_double_rm_optsize(float* %x) optsize {
+; SSE-LABEL: single_to_double_rm_optsize:
+; SSE: # BB#0: # %entry
+; SSE-NEXT: cvtss2sd (%rdi), %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: single_to_double_rm_optsize:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %0 = load float, float* %x, align 4
+ %conv = fpext float %0 to double
+ ret double %conv
+}
+
define float @double_to_single_rm(double* %x) {
; SSE-LABEL: double_to_single_rm:
; SSE: # BB#0: # %entry
-; SSE-NEXT: cvtsd2ss (%rdi), %xmm0
+; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE-NEXT: cvtsd2ss %xmm0, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: double_to_single_rm:
@@ -85,3 +104,20 @@ entry:
%conv = fptrunc double %0 to float
ret float %conv
}
+
+define float @double_to_single_rm_optsize(double* %x) optsize {
+; SSE-LABEL: double_to_single_rm_optsize:
+; SSE: # BB#0: # %entry
+; SSE-NEXT: cvtsd2ss (%rdi), %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: double_to_single_rm_optsize:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %0 = load double, double* %x, align 8
+ %conv = fptrunc double %0 to float
+ ret float %conv
+}
diff --git a/llvm/test/CodeGen/X86/fast-isel-int-float-conversion-x86-64.ll b/llvm/test/CodeGen/X86/fast-isel-int-float-conversion-x86-64.ll
index 432e190a745..50eddab2b45 100644
--- a/llvm/test/CodeGen/X86/fast-isel-int-float-conversion-x86-64.ll
+++ b/llvm/test/CodeGen/X86/fast-isel-int-float-conversion-x86-64.ll
@@ -21,7 +21,8 @@ entry:
define double @long_to_double_rm(i64* %a) {
; SSE2-LABEL: long_to_double_rm:
; SSE2: # BB#0: # %entry
-; SSE2-NEXT: cvtsi2sdq (%rdi), %xmm0
+; SSE2-NEXT: movq (%rdi), %rax
+; SSE2-NEXT: cvtsi2sdq %rax, %xmm0
; SSE2-NEXT: retq
;
; AVX-LABEL: long_to_double_rm:
@@ -34,6 +35,22 @@ entry:
ret double %1
}
+define double @long_to_double_rm_optsize(i64* %a) optsize {
+; SSE2-LABEL: long_to_double_rm_optsize:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: cvtsi2sdq (%rdi), %xmm0
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: long_to_double_rm_optsize:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vcvtsi2sdq (%rdi), %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %0 = load i64, i64* %a
+ %1 = sitofp i64 %0 to double
+ ret double %1
+}
+
define float @long_to_float_rr(i64 %a) {
; SSE2-LABEL: long_to_float_rr:
; SSE2: # BB#0: # %entry
@@ -52,7 +69,8 @@ entry:
define float @long_to_float_rm(i64* %a) {
; SSE2-LABEL: long_to_float_rm:
; SSE2: # BB#0: # %entry
-; SSE2-NEXT: cvtsi2ssq (%rdi), %xmm0
+; SSE2-NEXT: movq (%rdi), %rax
+; SSE2-NEXT: cvtsi2ssq %rax, %xmm0
; SSE2-NEXT: retq
;
; AVX-LABEL: long_to_float_rm:
@@ -64,3 +82,19 @@ entry:
%1 = sitofp i64 %0 to float
ret float %1
}
+
+define float @long_to_float_rm_optsize(i64* %a) optsize {
+; SSE2-LABEL: long_to_float_rm_optsize:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: cvtsi2ssq (%rdi), %xmm0
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: long_to_float_rm_optsize:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vcvtsi2ssq (%rdi), %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %0 = load i64, i64* %a
+ %1 = sitofp i64 %0 to float
+ ret float %1
+}
diff --git a/llvm/test/CodeGen/X86/fast-isel-int-float-conversion.ll b/llvm/test/CodeGen/X86/fast-isel-int-float-conversion.ll
index d722d37ee6b..57b50abab53 100644
--- a/llvm/test/CodeGen/X86/fast-isel-int-float-conversion.ll
+++ b/llvm/test/CodeGen/X86/fast-isel-int-float-conversion.ll
@@ -58,7 +58,8 @@ entry:
define double @int_to_double_rm(i32* %a) {
; SSE2-LABEL: int_to_double_rm:
; SSE2: # BB#0: # %entry
-; SSE2-NEXT: cvtsi2sdl (%rdi), %xmm0
+; SSE2-NEXT: movl (%rdi), %eax
+; SSE2-NEXT: cvtsi2sdl %eax, %xmm0
; SSE2-NEXT: retq
;
; AVX-LABEL: int_to_double_rm:
@@ -107,6 +108,58 @@ entry:
ret double %1
}
+define double @int_to_double_rm_optsize(i32* %a) optsize {
+; SSE2-LABEL: int_to_double_rm_optsize:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: cvtsi2sdl (%rdi), %xmm0
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: int_to_double_rm_optsize:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vcvtsi2sdl (%rdi), %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; SSE2_X86-LABEL: int_to_double_rm_optsize:
+; SSE2_X86: # BB#0: # %entry
+; SSE2_X86-NEXT: pushl %ebp
+; SSE2_X86-NEXT: .cfi_def_cfa_offset 8
+; SSE2_X86-NEXT: .cfi_offset %ebp, -8
+; SSE2_X86-NEXT: movl %esp, %ebp
+; SSE2_X86-NEXT: .cfi_def_cfa_register %ebp
+; SSE2_X86-NEXT: andl $-8, %esp
+; SSE2_X86-NEXT: subl $8, %esp
+; SSE2_X86-NEXT: movl 8(%ebp), %eax
+; SSE2_X86-NEXT: cvtsi2sdl (%eax), %xmm0
+; SSE2_X86-NEXT: movsd %xmm0, (%esp)
+; SSE2_X86-NEXT: fldl (%esp)
+; SSE2_X86-NEXT: movl %ebp, %esp
+; SSE2_X86-NEXT: popl %ebp
+; SSE2_X86-NEXT: .cfi_def_cfa %esp, 4
+; SSE2_X86-NEXT: retl
+;
+; AVX_X86-LABEL: int_to_double_rm_optsize:
+; AVX_X86: # BB#0: # %entry
+; AVX_X86-NEXT: pushl %ebp
+; AVX_X86-NEXT: .cfi_def_cfa_offset 8
+; AVX_X86-NEXT: .cfi_offset %ebp, -8
+; AVX_X86-NEXT: movl %esp, %ebp
+; AVX_X86-NEXT: .cfi_def_cfa_register %ebp
+; AVX_X86-NEXT: andl $-8, %esp
+; AVX_X86-NEXT: subl $8, %esp
+; AVX_X86-NEXT: movl 8(%ebp), %eax
+; AVX_X86-NEXT: vcvtsi2sdl (%eax), %xmm0, %xmm0
+; AVX_X86-NEXT: vmovsd %xmm0, (%esp)
+; AVX_X86-NEXT: fldl (%esp)
+; AVX_X86-NEXT: movl %ebp, %esp
+; AVX_X86-NEXT: popl %ebp
+; AVX_X86-NEXT: .cfi_def_cfa %esp, 4
+; AVX_X86-NEXT: retl
+entry:
+ %0 = load i32, i32* %a
+ %1 = sitofp i32 %0 to double
+ ret double %1
+}
+
define float @int_to_float_rr(i32 %a) {
; SSE2-LABEL: int_to_float_rr:
; SSE2: # BB#0: # %entry
@@ -148,7 +201,8 @@ entry:
define float @int_to_float_rm(i32* %a) {
; SSE2-LABEL: int_to_float_rm:
; SSE2: # BB#0: # %entry
-; SSE2-NEXT: cvtsi2ssl (%rdi), %xmm0
+; SSE2-NEXT: movl (%rdi), %eax
+; SSE2-NEXT: cvtsi2ssl %eax, %xmm0
; SSE2-NEXT: retq
;
; AVX-LABEL: int_to_float_rm:
@@ -184,3 +238,43 @@ entry:
%1 = sitofp i32 %0 to float
ret float %1
}
+
+define float @int_to_float_rm_optsize(i32* %a) optsize {
+; SSE2-LABEL: int_to_float_rm_optsize:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: cvtsi2ssl (%rdi), %xmm0
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: int_to_float_rm_optsize:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vcvtsi2ssl (%rdi), %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; SSE2_X86-LABEL: int_to_float_rm_optsize:
+; SSE2_X86: # BB#0: # %entry
+; SSE2_X86-NEXT: pushl %eax
+; SSE2_X86-NEXT: .cfi_def_cfa_offset 8
+; SSE2_X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SSE2_X86-NEXT: cvtsi2ssl (%eax), %xmm0
+; SSE2_X86-NEXT: movss %xmm0, (%esp)
+; SSE2_X86-NEXT: flds (%esp)
+; SSE2_X86-NEXT: popl %eax
+; SSE2_X86-NEXT: .cfi_def_cfa_offset 4
+; SSE2_X86-NEXT: retl
+;
+; AVX_X86-LABEL: int_to_float_rm_optsize:
+; AVX_X86: # BB#0: # %entry
+; AVX_X86-NEXT: pushl %eax
+; AVX_X86-NEXT: .cfi_def_cfa_offset 8
+; AVX_X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; AVX_X86-NEXT: vcvtsi2ssl (%eax), %xmm0, %xmm0
+; AVX_X86-NEXT: vmovss %xmm0, (%esp)
+; AVX_X86-NEXT: flds (%esp)
+; AVX_X86-NEXT: popl %eax
+; AVX_X86-NEXT: .cfi_def_cfa_offset 4
+; AVX_X86-NEXT: retl
+entry:
+ %0 = load i32, i32* %a
+ %1 = sitofp i32 %0 to float
+ ret float %1
+}
OpenPOWER on IntegriCloud