diff options
-rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 32 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86IntrinsicsInfo.h | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/f16c-intrinsics.ll | 28 |
3 files changed, 32 insertions, 30 deletions
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index eb5a3ddc58c..955a40ee171 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -7698,11 +7698,11 @@ multiclass f16c_ph2ps<RegisterClass RC, X86MemOperand x86memop> { T8PD, VEX, Sched<[WriteCvtF2FLd]>; } -multiclass f16c_ps2ph<RegisterClass RC, X86MemOperand x86memop, Intrinsic Int> { +multiclass f16c_ps2ph<RegisterClass RC, X86MemOperand x86memop> { def rr : Ii8<0x1D, MRMDestReg, (outs VR128:$dst), (ins RC:$src1, i32u8imm:$src2), "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (Int RC:$src1, imm:$src2))]>, + [(set VR128:$dst, (X86cvtps2ph RC:$src1, imm:$src2))]>, TAPD, VEX, Sched<[WriteCvtF2F]>; let hasSideEffects = 0, mayStore = 1, SchedRW = [WriteCvtF2FLd, WriteRMW] in @@ -7715,6 +7715,8 @@ multiclass f16c_ps2ph<RegisterClass RC, X86MemOperand x86memop, Intrinsic Int> { let Predicates = [HasF16C, NoVLX] in { defm VCVTPH2PS : f16c_ph2ps<VR128, f64mem>; defm VCVTPH2PSY : f16c_ph2ps<VR256, f128mem>, VEX_L; + defm VCVTPS2PH : f16c_ps2ph<VR128, f64mem>; + defm VCVTPS2PHY : f16c_ps2ph<VR256, f128mem>, VEX_L; // Pattern match vcvtph2ps of a scalar i64 load. def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzmovl_v2i64 addr:$src)))), @@ -7724,23 +7726,17 @@ let Predicates = [HasF16C, NoVLX] in { def : Pat<(v4f32 (X86cvtph2ps (v8i16 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))), (VCVTPH2PSrm addr:$src)>; -} - -let Predicates = [HasF16C] in { - defm VCVTPS2PH : f16c_ps2ph<VR128, f64mem, int_x86_vcvtps2ph_128>; - defm VCVTPS2PHY : f16c_ps2ph<VR256, f128mem, int_x86_vcvtps2ph_256>, VEX_L; - def : Pat<(store (f64 (extractelt (bc_v2f64 (v8i16 - (int_x86_vcvtps2ph_128 VR128:$src1, i32:$src2))), (iPTR 0))), - addr:$dst), - (VCVTPS2PHmr addr:$dst, VR128:$src1, imm:$src2)>; - def : Pat<(store (i64 (extractelt (bc_v2i64 (v8i16 - (int_x86_vcvtps2ph_128 VR128:$src1, i32:$src2))), (iPTR 0))), - addr:$dst), - (VCVTPS2PHmr addr:$dst, VR128:$src1, imm:$src2)>; - def : Pat<(store (v8i16 (int_x86_vcvtps2ph_256 VR256:$src1, i32:$src2)), - addr:$dst), - (VCVTPS2PHYmr addr:$dst, VR256:$src1, imm:$src2)>; + def : Pat<(store (f64 (extractelt + (bc_v2f64 (v8i16 (X86cvtps2ph VR128:$src1, i32:$src2))), + (iPTR 0))), addr:$dst), + (VCVTPS2PHmr addr:$dst, VR128:$src1, imm:$src2)>; + def : Pat<(store (i64 (extractelt + (bc_v2i64 (v8i16 (X86cvtps2ph VR128:$src1, i32:$src2))), + (iPTR 0))), addr:$dst), + (VCVTPS2PHmr addr:$dst, VR128:$src1, imm:$src2)>; + def : Pat<(store (v8i16 (X86cvtps2ph VR256:$src1, i32:$src2)), addr:$dst), + (VCVTPS2PHYmr addr:$dst, VR256:$src1, imm:$src2)>; } // Patterns for matching conversions from float to half-float and vice versa. diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index 0ed9d2fdee9..9edac22d5ba 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -1588,6 +1588,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(ssse3_pshuf_b_128, INTR_TYPE_2OP, X86ISD::PSHUFB, 0), X86_INTRINSIC_DATA(vcvtph2ps_128, INTR_TYPE_1OP, X86ISD::CVTPH2PS, 0), X86_INTRINSIC_DATA(vcvtph2ps_256, INTR_TYPE_1OP, X86ISD::CVTPH2PS, 0), + X86_INTRINSIC_DATA(vcvtps2ph_128, INTR_TYPE_2OP, X86ISD::CVTPS2PH, 0), + X86_INTRINSIC_DATA(vcvtps2ph_256, INTR_TYPE_2OP, X86ISD::CVTPS2PH, 0), X86_INTRINSIC_DATA(xop_vpcomb, INTR_TYPE_3OP, X86ISD::VPCOM, 0), X86_INTRINSIC_DATA(xop_vpcomd, INTR_TYPE_3OP, X86ISD::VPCOM, 0), X86_INTRINSIC_DATA(xop_vpcomq, INTR_TYPE_3OP, X86ISD::VPCOM, 0), diff --git a/llvm/test/CodeGen/X86/f16c-intrinsics.ll b/llvm/test/CodeGen/X86/f16c-intrinsics.ll index 18b9d8b0045..c0fa94cefaf 100644 --- a/llvm/test/CodeGen/X86/f16c-intrinsics.ll +++ b/llvm/test/CodeGen/X86/f16c-intrinsics.ll @@ -121,12 +121,12 @@ define <8 x i16> @test_x86_vcvtps2ph_128(<4 x float> %a0) { ; ; X32-AVX512VL-LABEL: test_x86_vcvtps2ph_128: ; X32-AVX512VL: # BB#0: -; X32-AVX512VL-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x00] +; X32-AVX512VL-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x00] ; X32-AVX512VL-NEXT: retl # encoding: [0xc3] ; ; X64-AVX512VL-LABEL: test_x86_vcvtps2ph_128: ; X64-AVX512VL: # BB#0: -; X64-AVX512VL-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x00] +; X64-AVX512VL-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x00] ; X64-AVX512VL-NEXT: retq # encoding: [0xc3] %res = call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %a0, i32 0) ; <<8 x i16>> [#uses=1] ret <8 x i16> %res @@ -148,13 +148,13 @@ define <8 x i16> @test_x86_vcvtps2ph_256(<8 x float> %a0) { ; ; X32-AVX512VL-LABEL: test_x86_vcvtps2ph_256: ; X32-AVX512VL: # BB#0: -; X32-AVX512VL-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # encoding: [0xc4,0xe3,0x7d,0x1d,0xc0,0x00] +; X32-AVX512VL-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x1d,0xc0,0x00] ; X32-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X32-AVX512VL-NEXT: retl # encoding: [0xc3] ; ; X64-AVX512VL-LABEL: test_x86_vcvtps2ph_256: ; X64-AVX512VL: # BB#0: -; X64-AVX512VL-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # encoding: [0xc4,0xe3,0x7d,0x1d,0xc0,0x00] +; X64-AVX512VL-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x1d,0xc0,0x00] ; X64-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X64-AVX512VL-NEXT: retq # encoding: [0xc3] %res = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %a0, i32 0) ; <<8 x i16>> [#uses=1] @@ -238,13 +238,13 @@ define void @test_x86_vcvtps2ph_256_m(<8 x i16>* nocapture %d, <8 x float> %a) n ; X32-AVX512VL-LABEL: test_x86_vcvtps2ph_256_m: ; X32-AVX512VL: # BB#0: # %entry ; X32-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X32-AVX512VL-NEXT: vcvtps2ph $3, %ymm0, (%eax) # encoding: [0xc4,0xe3,0x7d,0x1d,0x00,0x03] +; X32-AVX512VL-NEXT: vcvtps2ph $3, %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x1d,0x00,0x03] ; X32-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X32-AVX512VL-NEXT: retl # encoding: [0xc3] ; ; X64-AVX512VL-LABEL: test_x86_vcvtps2ph_256_m: ; X64-AVX512VL: # BB#0: # %entry -; X64-AVX512VL-NEXT: vcvtps2ph $3, %ymm0, (%rdi) # encoding: [0xc4,0xe3,0x7d,0x1d,0x07,0x03] +; X64-AVX512VL-NEXT: vcvtps2ph $3, %ymm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x1d,0x07,0x03] ; X64-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X64-AVX512VL-NEXT: retq # encoding: [0xc3] entry: @@ -268,7 +268,7 @@ define void @test_x86_vcvtps2ph_128_m(<4 x i16>* nocapture %d, <4 x float> %a) n ; X32-AVX512VL-LABEL: test_x86_vcvtps2ph_128_m: ; X32-AVX512VL: # BB#0: # %entry ; X32-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X32-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x03] +; X32-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x03] ; X32-AVX512VL-NEXT: vpmovzxwd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x33,0xc0] ; X32-AVX512VL-NEXT: # xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero ; X32-AVX512VL-NEXT: vpmovdw %xmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x08,0x33,0x00] @@ -276,7 +276,7 @@ define void @test_x86_vcvtps2ph_128_m(<4 x i16>* nocapture %d, <4 x float> %a) n ; ; X64-AVX512VL-LABEL: test_x86_vcvtps2ph_128_m: ; X64-AVX512VL: # BB#0: # %entry -; X64-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x03] +; X64-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x03] ; X64-AVX512VL-NEXT: vpmovzxwd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x33,0xc0] ; X64-AVX512VL-NEXT: # xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero ; X64-AVX512VL-NEXT: vpmovdw %xmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x08,0x33,0x07] @@ -303,12 +303,14 @@ define void @test_x86_vcvtps2ph_128_m2(double* nocapture %hf4x16, <4 x float> %f ; X32-AVX512VL-LABEL: test_x86_vcvtps2ph_128_m2: ; X32-AVX512VL: # BB#0: # %entry ; X32-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X32-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, (%eax) # encoding: [0xc4,0xe3,0x79,0x1d,0x00,0x03] +; X32-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x03] +; X32-AVX512VL-NEXT: vmovlps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x00] ; X32-AVX512VL-NEXT: retl # encoding: [0xc3] ; ; X64-AVX512VL-LABEL: test_x86_vcvtps2ph_128_m2: ; X64-AVX512VL: # BB#0: # %entry -; X64-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, (%rdi) # encoding: [0xc4,0xe3,0x79,0x1d,0x07,0x03] +; X64-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x03] +; X64-AVX512VL-NEXT: vmovlps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x07] ; X64-AVX512VL-NEXT: retq # encoding: [0xc3] entry: %0 = tail call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %f4x32, i32 3) @@ -333,12 +335,14 @@ define void @test_x86_vcvtps2ph_128_m3(i64* nocapture %hf4x16, <4 x float> %f4x3 ; X32-AVX512VL-LABEL: test_x86_vcvtps2ph_128_m3: ; X32-AVX512VL: # BB#0: # %entry ; X32-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X32-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, (%eax) # encoding: [0xc4,0xe3,0x79,0x1d,0x00,0x03] +; X32-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x03] +; X32-AVX512VL-NEXT: vmovlps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x00] ; X32-AVX512VL-NEXT: retl # encoding: [0xc3] ; ; X64-AVX512VL-LABEL: test_x86_vcvtps2ph_128_m3: ; X64-AVX512VL: # BB#0: # %entry -; X64-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, (%rdi) # encoding: [0xc4,0xe3,0x79,0x1d,0x07,0x03] +; X64-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x03] +; X64-AVX512VL-NEXT: vmovlps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x07] ; X64-AVX512VL-NEXT: retq # encoding: [0xc3] entry: %0 = tail call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %f4x32, i32 3) |