diff options
| author | Cameron McInally <cameron.mcinally@nyu.edu> | 2014-11-14 15:43:00 +0000 |
|---|---|---|
| committer | Cameron McInally <cameron.mcinally@nyu.edu> | 2014-11-14 15:43:00 +0000 |
| commit | 04400449c5242d5b21629e372ce636da53c622b0 (patch) | |
| tree | 9a754e21a465b0cd8ca4b67833cb7e4cdd97d20f /llvm | |
| parent | 1180c05db2583b069bee6374ea1fbb53ec2801f8 (diff) | |
| download | bcm5719-llvm-04400449c5242d5b21629e372ce636da53c622b0.tar.gz bcm5719-llvm-04400449c5242d5b21629e372ce636da53c622b0.zip | |
[AVX512] Add 512b masked integer shift by immediate patterns.
llvm-svn: 222002
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 46 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrFormats.td | 4 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512-intrinsics.ll | 117 |
3 files changed, 123 insertions, 44 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 596d2c3bb4b..a9ab0261a59 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -3130,29 +3130,17 @@ def : Pat <(i8 (int_x86_avx512_mask_ptestm_q_512 (v8i64 VR512:$src1), // AVX-512 Shift instructions //===----------------------------------------------------------------------===// multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM, - string OpcodeStr, SDNode OpNode, RegisterClass RC, - ValueType vt, X86MemOperand x86memop, PatFrag mem_frag, - RegisterClass KRC> { - def ri : AVX512BIi8<opc, ImmFormR, (outs RC:$dst), - (ins RC:$src1, i8imm:$src2), - !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set RC:$dst, (vt (OpNode RC:$src1, (i8 imm:$src2))))], - SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V; - def rik : AVX512BIi8<opc, ImmFormR, (outs RC:$dst), - (ins KRC:$mask, RC:$src1, i8imm:$src2), - !strconcat(OpcodeStr, - " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"), - [], SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V, EVEX_K; - def mi: AVX512BIi8<opc, ImmFormM, (outs RC:$dst), - (ins x86memop:$src1, i8imm:$src2), - !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set RC:$dst, (OpNode (mem_frag addr:$src1), - (i8 imm:$src2)))], SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V; - def mik: AVX512BIi8<opc, ImmFormM, (outs RC:$dst), - (ins KRC:$mask, x86memop:$src1, i8imm:$src2), - !strconcat(OpcodeStr, - " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"), - [], SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V, EVEX_K; + string OpcodeStr, SDNode OpNode, X86VectorVTInfo _> { + defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst), + (ins _.RC:$src1, i8imm:$src2), OpcodeStr, + "$src2, $src1", "$src1, $src2", + (_.VT (OpNode _.RC:$src1, (i8 imm:$src2))), + " ", SSE_INTSHIFT_ITINS_P.rr>, AVX512BIi8Base, EVEX_4V; + defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst), + (ins _.MemOp:$src1, i8imm:$src2), OpcodeStr, + "$src2, $src1", "$src1, $src2", + (_.VT (OpNode (_.MemOpFrag addr:$src1), (i8 imm:$src2))), + " ", SSE_INTSHIFT_ITINS_P.rm>, AVX512BIi8Base, EVEX_4V; } multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode, @@ -3183,42 +3171,42 @@ multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode, } defm VPSRLDZ : avx512_shift_rmi<0x72, MRM2r, MRM2m, "vpsrld", X86vsrli, - VR512, v16i32, i512mem, memopv16i32, VK16WM>, + v16i32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>; defm VPSRLDZ : avx512_shift_rrm<0xD2, "vpsrld", X86vsrl, VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512, EVEX_CD8<32, CD8VQ>; defm VPSRLQZ : avx512_shift_rmi<0x73, MRM2r, MRM2m, "vpsrlq", X86vsrli, - VR512, v8i64, i512mem, memopv8i64, VK8WM>, EVEX_V512, + v8i64_info>, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W; defm VPSRLQZ : avx512_shift_rrm<0xD3, "vpsrlq", X86vsrl, VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512, EVEX_CD8<64, CD8VQ>, VEX_W; defm VPSLLDZ : avx512_shift_rmi<0x72, MRM6r, MRM6m, "vpslld", X86vshli, - VR512, v16i32, i512mem, memopv16i32, VK16WM>, EVEX_V512, + v16i32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>; defm VPSLLDZ : avx512_shift_rrm<0xF2, "vpslld", X86vshl, VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512, EVEX_CD8<32, CD8VQ>; defm VPSLLQZ : avx512_shift_rmi<0x73, MRM6r, MRM6m, "vpsllq", X86vshli, - VR512, v8i64, i512mem, memopv8i64, VK8WM>, EVEX_V512, + v8i64_info>, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W; defm VPSLLQZ : avx512_shift_rrm<0xF3, "vpsllq", X86vshl, VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512, EVEX_CD8<64, CD8VQ>, VEX_W; defm VPSRADZ : avx512_shift_rmi<0x72, MRM4r, MRM4m, "vpsrad", X86vsrai, - VR512, v16i32, i512mem, memopv16i32, VK16WM>, + v16i32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>; defm VPSRADZ : avx512_shift_rrm<0xE2, "vpsrad", X86vsra, VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512, EVEX_CD8<32, CD8VQ>; defm VPSRAQZ : avx512_shift_rmi<0x72, MRM4r, MRM4m, "vpsraq", X86vsrai, - VR512, v8i64, i512mem, memopv8i64, VK8WM>, EVEX_V512, + v8i64_info>, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W; defm VPSRAQZ : avx512_shift_rrm<0xE2, "vpsraq", X86vsra, VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512, diff --git a/llvm/lib/Target/X86/X86InstrFormats.td b/llvm/lib/Target/X86/X86InstrFormats.td index cbd135aa007..fe4ead1905c 100644 --- a/llvm/lib/Target/X86/X86InstrFormats.td +++ b/llvm/lib/Target/X86/X86InstrFormats.td @@ -722,6 +722,10 @@ class AVX512BIi8<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = NoItinerary> : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, PD, Requires<[HasAVX512]>; +class AVX512BIi8Base : PD { + Domain ExeDomain = SSEPackedInt; + ImmType ImmT = Imm8; +} class AVX512AIi8<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = NoItinerary> : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TAPD, diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics.ll b/llvm/test/CodeGen/X86/avx512-intrinsics.ll index 11d12c09fea..691d1fbc1d4 100644 --- a/llvm/test/CodeGen/X86/avx512-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512-intrinsics.ll @@ -951,53 +951,140 @@ define <4 x double> @test_vextractf64x4(<8 x double> %a) { declare <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double>, i8, <4 x double>, i8) -define <16 x i32> @test_x86_avx512_mask_pslli_d(<16 x i32> %a0) { - ; CHECK-LABEL: test_x86_avx512_mask_pslli_d +define <16 x i32> @test_x86_avx512_pslli_d(<16 x i32> %a0) { + ; CHECK-LABEL: test_x86_avx512_pslli_d ; CHECK: vpslld %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1) ret <16 x i32> %res } -declare <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone +define <16 x i32> @test_x86_avx512_mask_pslli_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) { + ; CHECK-LABEL: test_x86_avx512_mask_pslli_d + ; CHECK: vpslld $7, %zmm0, %zmm1 {%k1} + %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask) + ret <16 x i32> %res +} -define <8 x i64> @test_x86_avx512_mask_pslli_q(<8 x i64> %a0) { - ; CHECK-LABEL: test_x86_avx512_mask_pslli_q +define <16 x i32> @test_x86_avx512_maskz_pslli_d(<16 x i32> %a0, i16 %mask) { + ; CHECK-LABEL: test_x86_avx512_maskz_pslli_d + ; CHECK: vpslld $7, %zmm0, %zmm0 {%k1} {z} + %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask) + ret <16 x i32> %res +} + +declare <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone + +define <8 x i64> @test_x86_avx512_pslli_q(<8 x i64> %a0) { + ; CHECK-LABEL: test_x86_avx512_pslli_q ; CHECK: vpsllq %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1) ret <8 x i64> %res } + +define <8 x i64> @test_x86_avx512_mask_pslli_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) { + ; CHECK-LABEL: test_x86_avx512_mask_pslli_q + ; CHECK: vpsllq $7, %zmm0, %zmm1 {%k1} + %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask) + ret <8 x i64> %res +} + +define <8 x i64> @test_x86_avx512_maskz_pslli_q(<8 x i64> %a0, i8 %mask) { + ; CHECK-LABEL: test_x86_avx512_maskz_pslli_q + ; CHECK: vpsllq $7, %zmm0, %zmm0 {%k1} {z} + %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask) + ret <8 x i64> %res +} + declare <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone -define <16 x i32> @test_x86_avx512_mask_psrli_d(<16 x i32> %a0) { - ; CHECK-LABEL: test_x86_avx512_mask_psrli_d +define <16 x i32> @test_x86_avx512_psrli_d(<16 x i32> %a0) { + ; CHECK-LABEL: test_x86_avx512_psrli_d ; CHECK: vpsrld %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1) ret <16 x i32> %res } -declare <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone +define <16 x i32> @test_x86_avx512_mask_psrli_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) { + ; CHECK-LABEL: test_x86_avx512_mask_psrli_d + ; CHECK: vpsrld $7, %zmm0, %zmm1 {%k1} + %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask) + ret <16 x i32> %res +} + +define <16 x i32> @test_x86_avx512_maskz_psrli_d(<16 x i32> %a0, i16 %mask) { + ; CHECK-LABEL: test_x86_avx512_maskz_psrli_d + ; CHECK: vpsrld $7, %zmm0, %zmm0 {%k1} {z} + %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask) + ret <16 x i32> %res +} -define <8 x i64> @test_x86_avx512_mask_psrli_q(<8 x i64> %a0) { - ; CHECK-LABEL: test_x86_avx512_mask_psrli_q +declare <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone + +define <8 x i64> @test_x86_avx512_psrli_q(<8 x i64> %a0) { + ; CHECK-LABEL: test_x86_avx512_psrli_q ; CHECK: vpsrlq %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1) ret <8 x i64> %res } + +define <8 x i64> @test_x86_avx512_mask_psrli_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) { + ; CHECK-LABEL: test_x86_avx512_mask_psrli_q + ; CHECK: vpsrlq $7, %zmm0, %zmm1 {%k1} + %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask) + ret <8 x i64> %res +} + +define <8 x i64> @test_x86_avx512_maskz_psrli_q(<8 x i64> %a0, i8 %mask) { + ; CHECK-LABEL: test_x86_avx512_maskz_psrli_q + ; CHECK: vpsrlq $7, %zmm0, %zmm0 {%k1} {z} + %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask) + ret <8 x i64> %res +} + declare <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone -define <16 x i32> @test_x86_avx512_mask_psrai_d(<16 x i32> %a0) { - ; CHECK-LABEL: test_x86_avx512_mask_psrai_d +define <16 x i32> @test_x86_avx512_psrai_d(<16 x i32> %a0) { + ; CHECK-LABEL: test_x86_avx512_psrai_d ; CHECK: vpsrad %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1) ret <16 x i32> %res } -declare <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone +define <16 x i32> @test_x86_avx512_mask_psrai_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) { + ; CHECK-LABEL: test_x86_avx512_mask_psrai_d + ; CHECK: vpsrad $7, %zmm0, %zmm1 {%k1} + %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask) + ret <16 x i32> %res +} + +define <16 x i32> @test_x86_avx512_maskz_psrai_d(<16 x i32> %a0, i16 %mask) { + ; CHECK-LABEL: test_x86_avx512_maskz_psrai_d + ; CHECK: vpsrad $7, %zmm0, %zmm0 {%k1} {z} + %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask) + ret <16 x i32> %res +} -define <8 x i64> @test_x86_avx512_mask_psrai_q(<8 x i64> %a0) { - ; CHECK-LABEL: test_x86_avx512_mask_psrai_q +declare <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone + +define <8 x i64> @test_x86_avx512_psrai_q(<8 x i64> %a0) { + ; CHECK-LABEL: test_x86_avx512_psrai_q ; CHECK: vpsraq %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1) ret <8 x i64> %res } + +define <8 x i64> @test_x86_avx512_mask_psrai_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) { + ; CHECK-LABEL: test_x86_avx512_mask_psrai_q + ; CHECK: vpsraq $7, %zmm0, %zmm1 {%k1} + %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask) + ret <8 x i64> %res +} + +define <8 x i64> @test_x86_avx512_maskz_psrai_q(<8 x i64> %a0, i8 %mask) { + ; CHECK-LABEL: test_x86_avx512_maskz_psrai_q + ; CHECK: vpsraq $7, %zmm0, %zmm0 {%k1} {z} + %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask) + ret <8 x i64> %res +} + declare <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone |

