summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAhmed Bougacha <ahmed.bougacha@gmail.com>2016-02-02 01:32:50 +0000
committerAhmed Bougacha <ahmed.bougacha@gmail.com>2016-02-02 01:32:50 +0000
commit55c6682ae22910fb73b0e03e25cba68c0f99faa9 (patch)
treec252a0f41a4bc33d408f0270e2acf1b981bb6896
parentcad7994c3b4f023767fbc5cf66ed4eeee940a149 (diff)
downloadbcm5719-llvm-55c6682ae22910fb73b0e03e25cba68c0f99faa9.tar.gz
bcm5719-llvm-55c6682ae22910fb73b0e03e25cba68c0f99faa9.zip
[X86] Don't force Nearest-Even rounding for VCVTPS2PH, use MXCSR.
Officially, we don't acknowledge non-default configurations of MXCSR, as getting there would require usage of the FENV_ACCESS pragma (at least insofar as rounding mode is concerned). We don't support the pragma, so we can assume that the default rounding mode - round to nearest, ties to even - is always used. However, it's inconsistent with the rest of the instruction set, where MXCSR is always effective (unless otherwise specified). Also, it's an unnecessary obstacle to the few brave souls that use fenv.h with LLVM. Avoid the hard-coded rounding mode for fp_to_f16; use MXCSR instead. llvm-svn: 259448
-rw-r--r--llvm/lib/Target/X86/X86InstrSSE.td9
-rw-r--r--llvm/test/CodeGen/X86/fastmath-float-half-conversion.ll4
-rw-r--r--llvm/test/CodeGen/X86/half.ll4
3 files changed, 11 insertions, 6 deletions
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index 0ec8106d95a..722ea0fe64a 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -8296,9 +8296,14 @@ let Predicates = [HasF16C] in {
// Patterns for matching conversions from float to half-float and vice versa.
let Predicates = [HasF16C] in {
+ // Use MXCSR.RC for rounding instead of explicitly specifying the default
+ // rounding mode (Nearest-Even, encoded as 0). Both are equivalent in the
+ // configurations we support (the default). However, falling back to MXCSR is
+ // more consistent with other instructions, which are always controlled by it.
+ // It's encoded as 0b100.
def : Pat<(fp_to_f16 FR32:$src),
(i16 (EXTRACT_SUBREG (VMOVPDI2DIrr (VCVTPS2PHrr
- (COPY_TO_REGCLASS FR32:$src, VR128), 0)), sub_16bit))>;
+ (COPY_TO_REGCLASS FR32:$src, VR128), 4)), sub_16bit))>;
def : Pat<(f16_to_fp GR16:$src),
(f32 (COPY_TO_REGCLASS (VCVTPH2PSrr
@@ -8306,7 +8311,7 @@ let Predicates = [HasF16C] in {
def : Pat<(f16_to_fp (i16 (fp_to_f16 FR32:$src))),
(f32 (COPY_TO_REGCLASS (VCVTPH2PSrr
- (VCVTPS2PHrr (COPY_TO_REGCLASS FR32:$src, VR128), 0)), FR32)) >;
+ (VCVTPS2PHrr (COPY_TO_REGCLASS FR32:$src, VR128), 4)), FR32)) >;
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/X86/fastmath-float-half-conversion.ll b/llvm/test/CodeGen/X86/fastmath-float-half-conversion.ll
index 29308735cca..637fcc21595 100644
--- a/llvm/test/CodeGen/X86/fastmath-float-half-conversion.ll
+++ b/llvm/test/CodeGen/X86/fastmath-float-half-conversion.ll
@@ -5,7 +5,7 @@ define zeroext i16 @test1_fast(double %d) #0 {
; ALL-LABEL: test1_fast:
; F16C-NOT: callq {{_+}}truncdfhf2
; F16C: vcvtsd2ss %xmm0, %xmm0, %xmm0
-; F16C-NEXT: vcvtps2ph $0, %xmm0, %xmm0
+; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; AVX: callq {{_+}}truncdfhf2
; ALL: ret
entry:
@@ -19,7 +19,7 @@ define zeroext i16 @test2_fast(x86_fp80 %d) #0 {
; F16C: fldt
; F16C-NEXT: fstps
; F16C-NEXT: vmovss
-; F16C-NEXT: vcvtps2ph $0, %xmm0, %xmm0
+; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; AVX: callq {{_+}}truncxfhf2
; ALL: ret
entry:
diff --git a/llvm/test/CodeGen/X86/half.ll b/llvm/test/CodeGen/X86/half.ll
index 3b2518e28f5..531891f9cae 100644
--- a/llvm/test/CodeGen/X86/half.ll
+++ b/llvm/test/CodeGen/X86/half.ll
@@ -102,7 +102,7 @@ define void @test_sitofp_i64(i64 %a, half* %p) #0 {
; CHECK_LIBCALL-NEXT: retq
; CHECK-F16C-NEXT: vcvtsi2ssq %rdi, [[REG0:%[a-z0-9]+]], [[REG0]]
-; CHECK-F16C-NEXT: vcvtps2ph $0, [[REG0]], [[REG0]]
+; CHECK-F16C-NEXT: vcvtps2ph $4, [[REG0]], [[REG0]]
; CHECK-F16C-NEXT: vmovd [[REG0]], %eax
; CHECK-F16C-NEXT: movw %ax, (%rsi)
; CHECK-F16C-NEXT: retq
@@ -175,7 +175,7 @@ define void @test_uitofp_i64(i64 %a, half* %p) #0 {
; CHECK-LIBCALL-NEXT: callq __gnu_f2h_ieee
; CHECK-LIBCALL-NEXT: movw %ax, ([[ADDR]])
; CHECK-LIBCALL-NEXT: popq [[ADDR]]
-; CHECK-F16C-NEXT: vcvtps2ph $0, [[REG1]], [[REG4:%[a-z0-9]+]]
+; CHECK-F16C-NEXT: vcvtps2ph $4, [[REG1]], [[REG4:%[a-z0-9]+]]
; CHECK-F16C-NEXT: vmovd [[REG4]], %eax
; CHECK-F16C-NEXT: movw %ax, (%rsi)
; CHECK-NEXT: retq
OpenPOWER on IntegriCloud