diff options
-rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 9 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/fastmath-float-half-conversion.ll | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/half.ll | 4 |
3 files changed, 11 insertions, 6 deletions
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 0ec8106d95a..722ea0fe64a 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -8296,9 +8296,14 @@ let Predicates = [HasF16C] in { // Patterns for matching conversions from float to half-float and vice versa. let Predicates = [HasF16C] in { + // Use MXCSR.RC for rounding instead of explicitly specifying the default + // rounding mode (Nearest-Even, encoded as 0). Both are equivalent in the + // configurations we support (the default). However, falling back to MXCSR is + // more consistent with other instructions, which are always controlled by it. + // It's encoded as 0b100. def : Pat<(fp_to_f16 FR32:$src), (i16 (EXTRACT_SUBREG (VMOVPDI2DIrr (VCVTPS2PHrr - (COPY_TO_REGCLASS FR32:$src, VR128), 0)), sub_16bit))>; + (COPY_TO_REGCLASS FR32:$src, VR128), 4)), sub_16bit))>; def : Pat<(f16_to_fp GR16:$src), (f32 (COPY_TO_REGCLASS (VCVTPH2PSrr @@ -8306,7 +8311,7 @@ let Predicates = [HasF16C] in { def : Pat<(f16_to_fp (i16 (fp_to_f16 FR32:$src))), (f32 (COPY_TO_REGCLASS (VCVTPH2PSrr - (VCVTPS2PHrr (COPY_TO_REGCLASS FR32:$src, VR128), 0)), FR32)) >; + (VCVTPS2PHrr (COPY_TO_REGCLASS FR32:$src, VR128), 4)), FR32)) >; } //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/X86/fastmath-float-half-conversion.ll b/llvm/test/CodeGen/X86/fastmath-float-half-conversion.ll index 29308735cca..637fcc21595 100644 --- a/llvm/test/CodeGen/X86/fastmath-float-half-conversion.ll +++ b/llvm/test/CodeGen/X86/fastmath-float-half-conversion.ll @@ -5,7 +5,7 @@ define zeroext i16 @test1_fast(double %d) #0 { ; ALL-LABEL: test1_fast: ; F16C-NOT: callq {{_+}}truncdfhf2 ; F16C: vcvtsd2ss %xmm0, %xmm0, %xmm0 -; F16C-NEXT: vcvtps2ph $0, %xmm0, %xmm0 +; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 ; AVX: callq {{_+}}truncdfhf2 ; ALL: ret entry: @@ -19,7 +19,7 @@ define zeroext i16 @test2_fast(x86_fp80 %d) #0 { ; F16C: fldt ; F16C-NEXT: fstps ; F16C-NEXT: vmovss -; F16C-NEXT: vcvtps2ph $0, %xmm0, %xmm0 +; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 ; AVX: callq {{_+}}truncxfhf2 ; ALL: ret entry: diff --git a/llvm/test/CodeGen/X86/half.ll b/llvm/test/CodeGen/X86/half.ll index 3b2518e28f5..531891f9cae 100644 --- a/llvm/test/CodeGen/X86/half.ll +++ b/llvm/test/CodeGen/X86/half.ll @@ -102,7 +102,7 @@ define void @test_sitofp_i64(i64 %a, half* %p) #0 { ; CHECK_LIBCALL-NEXT: retq ; CHECK-F16C-NEXT: vcvtsi2ssq %rdi, [[REG0:%[a-z0-9]+]], [[REG0]] -; CHECK-F16C-NEXT: vcvtps2ph $0, [[REG0]], [[REG0]] +; CHECK-F16C-NEXT: vcvtps2ph $4, [[REG0]], [[REG0]] ; CHECK-F16C-NEXT: vmovd [[REG0]], %eax ; CHECK-F16C-NEXT: movw %ax, (%rsi) ; CHECK-F16C-NEXT: retq @@ -175,7 +175,7 @@ define void @test_uitofp_i64(i64 %a, half* %p) #0 { ; CHECK-LIBCALL-NEXT: callq __gnu_f2h_ieee ; CHECK-LIBCALL-NEXT: movw %ax, ([[ADDR]]) ; CHECK-LIBCALL-NEXT: popq [[ADDR]] -; CHECK-F16C-NEXT: vcvtps2ph $0, [[REG1]], [[REG4:%[a-z0-9]+]] +; CHECK-F16C-NEXT: vcvtps2ph $4, [[REG1]], [[REG4:%[a-z0-9]+]] ; CHECK-F16C-NEXT: vmovd [[REG4]], %eax ; CHECK-F16C-NEXT: movw %ax, (%rsi) ; CHECK-NEXT: retq |