diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2017-11-28 22:55:08 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2017-11-28 22:55:08 +0000 |
| commit | 8f62394751c4d907964424037e1f274502e1af26 (patch) | |
| tree | 58937248e6b43e1cdcb10d3eccb80e456533acb6 /llvm/lib | |
| parent | 24ca79c776340e9d03fc2f24e7acedba97634eb2 (diff) | |
| download | bcm5719-llvm-8f62394751c4d907964424037e1f274502e1af26.tar.gz bcm5719-llvm-8f62394751c4d907964424037e1f274502e1af26.zip | |
[X86][SSE] Add SSE_UNPCK/SSE_PUNPCK OpndItins
Update multi-classes to take the scheduling OpndItins instead of hard coding it.
Will be reused in the AVX512 equivalents.
llvm-svn: 319245
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 110 |
1 files changed, 61 insertions, 49 deletions
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index cb7e1974d95..01770af0090 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -2432,66 +2432,72 @@ let Constraints = "$src1 = $dst" in { // SSE 1 & 2 - Unpack FP Instructions //===----------------------------------------------------------------------===// +let Sched = WriteFShuffle in +def SSE_UNPCK : OpndItins< + IIC_SSE_UNPCK, IIC_SSE_UNPCK +>; + /// sse12_unpack_interleave - sse 1 & 2 fp unpack and interleave multiclass sse12_unpack_interleave<bits<8> opc, SDNode OpNode, ValueType vt, PatFrag mem_frag, RegisterClass RC, X86MemOperand x86memop, string asm, - Domain d, bit IsCommutable = 0> { + OpndItins itins, Domain d, bit IsCommutable = 0> { let isCommutable = IsCommutable in def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), asm, [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], - IIC_SSE_UNPCK, d>, Sched<[WriteFShuffle]>; + itins.rr, d>, Sched<[itins.Sched]>; def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), asm, [(set RC:$dst, (vt (OpNode RC:$src1, (mem_frag addr:$src2))))], - IIC_SSE_UNPCK, d>, - Sched<[WriteFShuffleLd, ReadAfterLd]>; + itins.rm, d>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } let Predicates = [HasAVX, NoVLX] in { defm VUNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, loadv4f32, VR128, f128mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedSingle>, PS, VEX_4V, VEX_WIG; + SSE_UNPCK, SSEPackedSingle>, PS, VEX_4V, VEX_WIG; defm VUNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, loadv2f64, VR128, f128mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedDouble>, PD, VEX_4V, VEX_WIG; + SSE_UNPCK, SSEPackedDouble>, PD, VEX_4V, VEX_WIG; defm VUNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, loadv4f32, VR128, f128mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedSingle>, PS, VEX_4V, VEX_WIG; + SSE_UNPCK, SSEPackedSingle>, PS, VEX_4V, VEX_WIG; defm VUNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, loadv2f64, VR128, f128mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedDouble>, PD, VEX_4V, VEX_WIG; + SSE_UNPCK, SSEPackedDouble>, PD, VEX_4V, VEX_WIG; defm VUNPCKHPSY: sse12_unpack_interleave<0x15, X86Unpckh, v8f32, loadv8f32, VR256, f256mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedSingle>, PS, VEX_4V, VEX_L, VEX_WIG; + SSE_UNPCK, SSEPackedSingle>, PS, VEX_4V, VEX_L, VEX_WIG; defm VUNPCKHPDY: sse12_unpack_interleave<0x15, X86Unpckh, v4f64, loadv4f64, VR256, f256mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedDouble>, PD, VEX_4V, VEX_L, VEX_WIG; + SSE_UNPCK, SSEPackedDouble>, PD, VEX_4V, VEX_L, VEX_WIG; defm VUNPCKLPSY: sse12_unpack_interleave<0x14, X86Unpckl, v8f32, loadv8f32, VR256, f256mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedSingle>, PS, VEX_4V, VEX_L, VEX_WIG; + SSE_UNPCK, SSEPackedSingle>, PS, VEX_4V, VEX_L, VEX_WIG; defm VUNPCKLPDY: sse12_unpack_interleave<0x14, X86Unpckl, v4f64, loadv4f64, VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedDouble>, PD, VEX_4V, VEX_L, VEX_WIG; + SSE_UNPCK, SSEPackedDouble>, PD, VEX_4V, VEX_L, VEX_WIG; }// Predicates = [HasAVX, NoVLX] + let Constraints = "$src1 = $dst" in { defm UNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, memopv4f32, VR128, f128mem, "unpckhps\t{$src2, $dst|$dst, $src2}", - SSEPackedSingle>, PS; + SSE_UNPCK, SSEPackedSingle>, PS; defm UNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, memopv2f64, VR128, f128mem, "unpckhpd\t{$src2, $dst|$dst, $src2}", - SSEPackedDouble, 1>, PD; + SSE_UNPCK, SSEPackedDouble, 1>, PD; defm UNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, memopv4f32, VR128, f128mem, "unpcklps\t{$src2, $dst|$dst, $src2}", - SSEPackedSingle>, PS; + SSE_UNPCK, SSEPackedSingle>, PS; defm UNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, memopv2f64, VR128, f128mem, "unpcklpd\t{$src2, $dst|$dst, $src2}", - SSEPackedDouble>, PD; + SSE_UNPCK, SSEPackedDouble>, PD; } // Constraints = "$src1 = $dst" let Predicates = [HasAVX1Only] in { @@ -4062,16 +4068,22 @@ let Constraints = "$src1 = $dst" in { // SSE2 - Packed Integer Unpack Instructions //===---------------------------------------------------------------------===// +let Sched = WriteShuffle in +def SSE_PUNPCK : OpndItins< + IIC_SSE_UNPCK, IIC_SSE_UNPCK +>; + let ExeDomain = SSEPackedInt in { multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt, - SDNode OpNode, PatFrag ld_frag, bit Is2Addr = 1> { + SDNode OpNode, OpndItins itins, PatFrag ld_frag, + bit Is2Addr = 1> { def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), !if(Is2Addr, !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set VR128:$dst, (vt (OpNode VR128:$src1, VR128:$src2)))], - IIC_SSE_UNPCK>, Sched<[WriteShuffle]>; + itins.rr>, Sched<[itins.Sched]>; def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), !if(Is2Addr, @@ -4079,85 +4091,85 @@ multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt, !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set VR128:$dst, (vt (OpNode VR128:$src1, (bitconvert (ld_frag addr:$src2)))))], - IIC_SSE_UNPCK>, - Sched<[WriteShuffleLd, ReadAfterLd]>; + itins.rm>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } multiclass sse2_unpack_y<bits<8> opc, string OpcodeStr, ValueType vt, - SDNode OpNode> { + SDNode OpNode, OpndItins itins> { def Yrr : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2), !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR256:$dst, (vt (OpNode VR256:$src1, VR256:$src2)))]>, - Sched<[WriteShuffle]>; + [(set VR256:$dst, (vt (OpNode VR256:$src1, VR256:$src2)))], + itins.rr>, Sched<[itins.Sched]>; def Yrm : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2), !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR256:$dst, (vt (OpNode VR256:$src1, - (bitconvert (loadv4i64 addr:$src2)))))]>, - Sched<[WriteShuffleLd, ReadAfterLd]>; + (bitconvert (loadv4i64 addr:$src2)))))], + itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; } let Predicates = [HasAVX, NoVLX_Or_NoBWI] in { defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Unpckl, - loadv2i64, 0>, VEX_4V, VEX_WIG; + SSE_PUNPCK, loadv2i64, 0>, VEX_4V, VEX_WIG; defm VPUNPCKLWD : sse2_unpack<0x61, "vpunpcklwd", v8i16, X86Unpckl, - loadv2i64, 0>, VEX_4V, VEX_WIG; + SSE_PUNPCK, loadv2i64, 0>, VEX_4V, VEX_WIG; defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Unpckh, - loadv2i64, 0>, VEX_4V, VEX_WIG; + SSE_PUNPCK, loadv2i64, 0>, VEX_4V, VEX_WIG; defm VPUNPCKHWD : sse2_unpack<0x69, "vpunpckhwd", v8i16, X86Unpckh, - loadv2i64, 0>, VEX_4V, VEX_WIG; + SSE_PUNPCK, loadv2i64, 0>, VEX_4V, VEX_WIG; } let Predicates = [HasAVX, NoVLX] in { defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Unpckl, - loadv2i64, 0>, VEX_4V, VEX_WIG; + SSE_PUNPCK, loadv2i64, 0>, VEX_4V, VEX_WIG; defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Unpckl, - loadv2i64, 0>, VEX_4V, VEX_WIG; + SSE_PUNPCK, loadv2i64, 0>, VEX_4V, VEX_WIG; defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Unpckh, - loadv2i64, 0>, VEX_4V, VEX_WIG; + SSE_PUNPCK, loadv2i64, 0>, VEX_4V, VEX_WIG; defm VPUNPCKHQDQ : sse2_unpack<0x6D, "vpunpckhqdq", v2i64, X86Unpckh, - loadv2i64, 0>, VEX_4V, VEX_WIG; + SSE_PUNPCK, loadv2i64, 0>, VEX_4V, VEX_WIG; } let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { - defm VPUNPCKLBW : sse2_unpack_y<0x60, "vpunpcklbw", v32i8, X86Unpckl>, + defm VPUNPCKLBW : sse2_unpack_y<0x60, "vpunpcklbw", v32i8, X86Unpckl, SSE_PUNPCK>, VEX_4V, VEX_L, VEX_WIG; - defm VPUNPCKLWD : sse2_unpack_y<0x61, "vpunpcklwd", v16i16, X86Unpckl>, + defm VPUNPCKLWD : sse2_unpack_y<0x61, "vpunpcklwd", v16i16, X86Unpckl, SSE_PUNPCK>, VEX_4V, VEX_L, VEX_WIG; - defm VPUNPCKHBW : sse2_unpack_y<0x68, "vpunpckhbw", v32i8, X86Unpckh>, + defm VPUNPCKHBW : sse2_unpack_y<0x68, "vpunpckhbw", v32i8, X86Unpckh, SSE_PUNPCK>, VEX_4V, VEX_L, VEX_WIG; - defm VPUNPCKHWD : sse2_unpack_y<0x69, "vpunpckhwd", v16i16, X86Unpckh>, + defm VPUNPCKHWD : sse2_unpack_y<0x69, "vpunpckhwd", v16i16, X86Unpckh, SSE_PUNPCK>, VEX_4V, VEX_L, VEX_WIG; } let Predicates = [HasAVX2, NoVLX] in { - defm VPUNPCKLDQ : sse2_unpack_y<0x62, "vpunpckldq", v8i32, X86Unpckl>, + defm VPUNPCKLDQ : sse2_unpack_y<0x62, "vpunpckldq", v8i32, X86Unpckl, SSE_PUNPCK>, VEX_4V, VEX_L, VEX_WIG; - defm VPUNPCKLQDQ : sse2_unpack_y<0x6C, "vpunpcklqdq", v4i64, X86Unpckl>, + defm VPUNPCKLQDQ : sse2_unpack_y<0x6C, "vpunpcklqdq", v4i64, X86Unpckl, SSE_PUNPCK>, VEX_4V, VEX_L, VEX_WIG; - defm VPUNPCKHDQ : sse2_unpack_y<0x6A, "vpunpckhdq", v8i32, X86Unpckh>, + defm VPUNPCKHDQ : sse2_unpack_y<0x6A, "vpunpckhdq", v8i32, X86Unpckh, SSE_PUNPCK>, VEX_4V, VEX_L, VEX_WIG; - defm VPUNPCKHQDQ : sse2_unpack_y<0x6D, "vpunpckhqdq", v4i64, X86Unpckh>, + defm VPUNPCKHQDQ : sse2_unpack_y<0x6D, "vpunpckhqdq", v4i64, X86Unpckh, SSE_PUNPCK>, VEX_4V, VEX_L, VEX_WIG; } let Constraints = "$src1 = $dst" in { - defm PUNPCKLBW : sse2_unpack<0x60, "punpcklbw", v16i8, X86Unpckl, + defm PUNPCKLBW : sse2_unpack<0x60, "punpcklbw", v16i8, X86Unpckl, SSE_PUNPCK, memopv2i64>; - defm PUNPCKLWD : sse2_unpack<0x61, "punpcklwd", v8i16, X86Unpckl, + defm PUNPCKLWD : sse2_unpack<0x61, "punpcklwd", v8i16, X86Unpckl, SSE_PUNPCK, memopv2i64>; - defm PUNPCKLDQ : sse2_unpack<0x62, "punpckldq", v4i32, X86Unpckl, + defm PUNPCKLDQ : sse2_unpack<0x62, "punpckldq", v4i32, X86Unpckl, SSE_PUNPCK, memopv2i64>; - defm PUNPCKLQDQ : sse2_unpack<0x6C, "punpcklqdq", v2i64, X86Unpckl, + defm PUNPCKLQDQ : sse2_unpack<0x6C, "punpcklqdq", v2i64, X86Unpckl, SSE_PUNPCK, memopv2i64>; - defm PUNPCKHBW : sse2_unpack<0x68, "punpckhbw", v16i8, X86Unpckh, + defm PUNPCKHBW : sse2_unpack<0x68, "punpckhbw", v16i8, X86Unpckh, SSE_PUNPCK, memopv2i64>; - defm PUNPCKHWD : sse2_unpack<0x69, "punpckhwd", v8i16, X86Unpckh, + defm PUNPCKHWD : sse2_unpack<0x69, "punpckhwd", v8i16, X86Unpckh, SSE_PUNPCK, memopv2i64>; - defm PUNPCKHDQ : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Unpckh, + defm PUNPCKHDQ : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Unpckh, SSE_PUNPCK, memopv2i64>; - defm PUNPCKHQDQ : sse2_unpack<0x6D, "punpckhqdq", v2i64, X86Unpckh, + defm PUNPCKHQDQ : sse2_unpack<0x6D, "punpckhqdq", v2i64, X86Unpckh, SSE_PUNPCK, memopv2i64>; } } // ExeDomain = SSEPackedInt |

