summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/X86/X86InstrSSE.td9
-rw-r--r--llvm/test/CodeGen/X86/fastmath-float-half-conversion.ll4
-rw-r--r--llvm/test/CodeGen/X86/half.ll4
3 files changed, 11 insertions, 6 deletions
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index 0ec8106d95a..722ea0fe64a 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -8296,9 +8296,14 @@ let Predicates = [HasF16C] in {
// Patterns for matching conversions from float to half-float and vice versa.
let Predicates = [HasF16C] in {
+ // Use MXCSR.RC for rounding instead of explicitly specifying the default
+ // rounding mode (Nearest-Even, encoded as 0). Both are equivalent in the
+ // configurations we support (the default). However, falling back to MXCSR is
+ // more consistent with other instructions, which are always controlled by it.
+ // It's encoded as 0b100.
def : Pat<(fp_to_f16 FR32:$src),
(i16 (EXTRACT_SUBREG (VMOVPDI2DIrr (VCVTPS2PHrr
- (COPY_TO_REGCLASS FR32:$src, VR128), 0)), sub_16bit))>;
+ (COPY_TO_REGCLASS FR32:$src, VR128), 4)), sub_16bit))>;
def : Pat<(f16_to_fp GR16:$src),
(f32 (COPY_TO_REGCLASS (VCVTPH2PSrr
@@ -8306,7 +8311,7 @@ let Predicates = [HasF16C] in {
def : Pat<(f16_to_fp (i16 (fp_to_f16 FR32:$src))),
(f32 (COPY_TO_REGCLASS (VCVTPH2PSrr
- (VCVTPS2PHrr (COPY_TO_REGCLASS FR32:$src, VR128), 0)), FR32)) >;
+ (VCVTPS2PHrr (COPY_TO_REGCLASS FR32:$src, VR128), 4)), FR32)) >;
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/X86/fastmath-float-half-conversion.ll b/llvm/test/CodeGen/X86/fastmath-float-half-conversion.ll
index 29308735cca..637fcc21595 100644
--- a/llvm/test/CodeGen/X86/fastmath-float-half-conversion.ll
+++ b/llvm/test/CodeGen/X86/fastmath-float-half-conversion.ll
@@ -5,7 +5,7 @@ define zeroext i16 @test1_fast(double %d) #0 {
; ALL-LABEL: test1_fast:
; F16C-NOT: callq {{_+}}truncdfhf2
; F16C: vcvtsd2ss %xmm0, %xmm0, %xmm0
-; F16C-NEXT: vcvtps2ph $0, %xmm0, %xmm0
+; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; AVX: callq {{_+}}truncdfhf2
; ALL: ret
entry:
@@ -19,7 +19,7 @@ define zeroext i16 @test2_fast(x86_fp80 %d) #0 {
; F16C: fldt
; F16C-NEXT: fstps
; F16C-NEXT: vmovss
-; F16C-NEXT: vcvtps2ph $0, %xmm0, %xmm0
+; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; AVX: callq {{_+}}truncxfhf2
; ALL: ret
entry:
diff --git a/llvm/test/CodeGen/X86/half.ll b/llvm/test/CodeGen/X86/half.ll
index 3b2518e28f5..531891f9cae 100644
--- a/llvm/test/CodeGen/X86/half.ll
+++ b/llvm/test/CodeGen/X86/half.ll
@@ -102,7 +102,7 @@ define void @test_sitofp_i64(i64 %a, half* %p) #0 {
; CHECK_LIBCALL-NEXT: retq
; CHECK-F16C-NEXT: vcvtsi2ssq %rdi, [[REG0:%[a-z0-9]+]], [[REG0]]
-; CHECK-F16C-NEXT: vcvtps2ph $0, [[REG0]], [[REG0]]
+; CHECK-F16C-NEXT: vcvtps2ph $4, [[REG0]], [[REG0]]
; CHECK-F16C-NEXT: vmovd [[REG0]], %eax
; CHECK-F16C-NEXT: movw %ax, (%rsi)
; CHECK-F16C-NEXT: retq
@@ -175,7 +175,7 @@ define void @test_uitofp_i64(i64 %a, half* %p) #0 {
; CHECK-LIBCALL-NEXT: callq __gnu_f2h_ieee
; CHECK-LIBCALL-NEXT: movw %ax, ([[ADDR]])
; CHECK-LIBCALL-NEXT: popq [[ADDR]]
-; CHECK-F16C-NEXT: vcvtps2ph $0, [[REG1]], [[REG4:%[a-z0-9]+]]
+; CHECK-F16C-NEXT: vcvtps2ph $4, [[REG1]], [[REG4:%[a-z0-9]+]]
; CHECK-F16C-NEXT: vmovd [[REG4]], %eax
; CHECK-F16C-NEXT: movw %ax, (%rsi)
; CHECK-NEXT: retq
OpenPOWER on IntegriCloud