diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2017-11-28 19:39:47 +0000 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2017-11-28 19:39:47 +0000 |
commit | d49bd0cd872bf09df17b9a986821f5b057d4896d (patch) | |
tree | 88712dd064a8142a6cbd15d7fb5426aeb4352c7b | |
parent | dd4295626b9e4fc7cc97a35833a587dd4f240212 (diff) | |
download | bcm5719-llvm-d49bd0cd872bf09df17b9a986821f5b057d4896d.tar.gz bcm5719-llvm-d49bd0cd872bf09df17b9a986821f5b057d4896d.zip |
[X86][SSE] Add SSE_HADDSUB/SSE_PABS/SSE_PALIGN OpndItins
Update multi-classes to take the scheduling OpndItins instead of hard coding it.
Will be reused in the AVX512 equivalents.
llvm-svn: 319209
-rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 104 |
1 files changed, 59 insertions, 45 deletions
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index ac465e3963e..59b00785af3 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -4831,77 +4831,82 @@ let Constraints = "$src1 = $dst", Predicates = [UseSSE3] in { // SSE3 Instructions //===---------------------------------------------------------------------===// +let Sched = WriteFHAdd in +def SSE_HADDSUB : OpndItins< + IIC_SSE_HADDSUB_RR, IIC_SSE_HADDSUB_RM +>; + // Horizontal ops multiclass S3D_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC, - X86MemOperand x86memop, SDNode OpNode, PatFrag ld_frag, - bit Is2Addr = 1> { + X86MemOperand x86memop, SDNode OpNode, OpndItins itins, + PatFrag ld_frag, bit Is2Addr = 1> { def rr : S3DI<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>, - Sched<[WriteFHAdd]>; + [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], itins.rr>, + Sched<[itins.Sched]>; def rm : S3DI<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (vt (OpNode RC:$src1, (ld_frag addr:$src2))))], - IIC_SSE_HADDSUB_RM>, Sched<[WriteFHAddLd, ReadAfterLd]>; + itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; } multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC, - X86MemOperand x86memop, SDNode OpNode, PatFrag ld_frag, - bit Is2Addr = 1> { + X86MemOperand x86memop, SDNode OpNode, OpndItins itins, + PatFrag ld_frag, bit Is2Addr = 1> { def rr : S3I<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>, - Sched<[WriteFHAdd]>; + [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], itins.rr>, + Sched<[itins.Sched]>; def rm : S3I<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (vt (OpNode RC:$src1, (ld_frag addr:$src2))))], - IIC_SSE_HADDSUB_RM>, Sched<[WriteFHAddLd, ReadAfterLd]>; + itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; } let Predicates = [HasAVX] in { let ExeDomain = SSEPackedSingle in { defm VHADDPS : S3D_Int<0x7C, "vhaddps", v4f32, VR128, f128mem, - X86fhadd, loadv4f32, 0>, VEX_4V, VEX_WIG; + X86fhadd, SSE_HADDSUB, loadv4f32, 0>, VEX_4V, VEX_WIG; defm VHSUBPS : S3D_Int<0x7D, "vhsubps", v4f32, VR128, f128mem, - X86fhsub, loadv4f32, 0>, VEX_4V, VEX_WIG; + X86fhsub, SSE_HADDSUB, loadv4f32, 0>, VEX_4V, VEX_WIG; defm VHADDPSY : S3D_Int<0x7C, "vhaddps", v8f32, VR256, f256mem, - X86fhadd, loadv8f32, 0>, VEX_4V, VEX_L, VEX_WIG; + X86fhadd, SSE_HADDSUB, loadv8f32, 0>, VEX_4V, VEX_L, VEX_WIG; defm VHSUBPSY : S3D_Int<0x7D, "vhsubps", v8f32, VR256, f256mem, - X86fhsub, loadv8f32, 0>, VEX_4V, VEX_L, VEX_WIG; + X86fhsub, SSE_HADDSUB, loadv8f32, 0>, VEX_4V, VEX_L, VEX_WIG; } let ExeDomain = SSEPackedDouble in { defm VHADDPD : S3_Int <0x7C, "vhaddpd", v2f64, VR128, f128mem, - X86fhadd, loadv2f64, 0>, VEX_4V, VEX_WIG; + X86fhadd, SSE_HADDSUB, loadv2f64, 0>, VEX_4V, VEX_WIG; defm VHSUBPD : S3_Int <0x7D, "vhsubpd", v2f64, VR128, f128mem, - X86fhsub, loadv2f64, 0>, VEX_4V, VEX_WIG; + X86fhsub, SSE_HADDSUB, loadv2f64, 0>, VEX_4V, VEX_WIG; defm VHADDPDY : S3_Int <0x7C, "vhaddpd", v4f64, VR256, f256mem, - X86fhadd, loadv4f64, 0>, VEX_4V, VEX_L, VEX_WIG; + X86fhadd, SSE_HADDSUB, loadv4f64, 0>, VEX_4V, VEX_L, VEX_WIG; defm VHSUBPDY : S3_Int <0x7D, "vhsubpd", v4f64, VR256, f256mem, - X86fhsub, loadv4f64, 0>, VEX_4V, VEX_L, VEX_WIG; + X86fhsub, SSE_HADDSUB, loadv4f64, 0>, VEX_4V, VEX_L, VEX_WIG; } } let Constraints = "$src1 = $dst" in { let ExeDomain = SSEPackedSingle in { defm HADDPS : S3D_Int<0x7C, "haddps", v4f32, VR128, f128mem, X86fhadd, - memopv4f32>; + SSE_HADDSUB, memopv4f32>; defm HSUBPS : S3D_Int<0x7D, "hsubps", v4f32, VR128, f128mem, X86fhsub, - memopv4f32>; + SSE_HADDSUB, memopv4f32>; } let ExeDomain = SSEPackedDouble in { defm HADDPD : S3_Int<0x7C, "haddpd", v2f64, VR128, f128mem, X86fhadd, - memopv2f64>; + SSE_HADDSUB, memopv2f64>; defm HSUBPD : S3_Int<0x7D, "hsubpd", v2f64, VR128, f128mem, X86fhsub, - memopv2f64>; + SSE_HADDSUB, memopv2f64>; } } @@ -4909,59 +4914,63 @@ let Constraints = "$src1 = $dst" in { // SSSE3 - Packed Absolute Instructions //===---------------------------------------------------------------------===// +let Sched = WriteVecALU in +def SSE_PABS : OpndItins< + IIC_SSE_PABS_RR, IIC_SSE_PABS_RM +>; /// SS3I_unop_rm_int - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}. multiclass SS3I_unop_rm<bits<8> opc, string OpcodeStr, ValueType vt, - SDNode OpNode, PatFrag ld_frag> { + SDNode OpNode, OpndItins itins, PatFrag ld_frag> { def rr : SS38I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (vt (OpNode VR128:$src)))], - IIC_SSE_PABS_RR>, Sched<[WriteVecALU]>; + itins.rr>, Sched<[itins.Sched]>; def rm : SS38I<opc, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (vt (OpNode (bitconvert (ld_frag addr:$src)))))], - IIC_SSE_PABS_RM>, Sched<[WriteVecALULd]>; + itins.rm>, Sched<[itins.Sched.Folded]>; } /// SS3I_unop_rm_int_y - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}. multiclass SS3I_unop_rm_y<bits<8> opc, string OpcodeStr, ValueType vt, - SDNode OpNode> { + SDNode OpNode, OpndItins itins> { def Yrr : SS38I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [(set VR256:$dst, (vt (OpNode VR256:$src)))]>, - Sched<[WriteVecALU]>; + [(set VR256:$dst, (vt (OpNode VR256:$src)))], itins.rr>, + Sched<[itins.Sched]>; def Yrm : SS38I<opc, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, - (vt (OpNode (bitconvert (loadv4i64 addr:$src)))))]>, - Sched<[WriteVecALULd]>; + (vt (OpNode (bitconvert (loadv4i64 addr:$src)))))], itins.rm>, + Sched<[itins.Sched.Folded]>; } let Predicates = [HasAVX, NoVLX_Or_NoBWI] in { - defm VPABSB : SS3I_unop_rm<0x1C, "vpabsb", v16i8, abs, loadv2i64>, VEX, VEX_WIG; - defm VPABSW : SS3I_unop_rm<0x1D, "vpabsw", v8i16, abs, loadv2i64>, VEX, VEX_WIG; + defm VPABSB : SS3I_unop_rm<0x1C, "vpabsb", v16i8, abs, SSE_PABS, loadv2i64>, VEX, VEX_WIG; + defm VPABSW : SS3I_unop_rm<0x1D, "vpabsw", v8i16, abs, SSE_PABS, loadv2i64>, VEX, VEX_WIG; } let Predicates = [HasAVX, NoVLX] in { - defm VPABSD : SS3I_unop_rm<0x1E, "vpabsd", v4i32, abs, loadv2i64>, VEX, VEX_WIG; + defm VPABSD : SS3I_unop_rm<0x1E, "vpabsd", v4i32, abs, SSE_PABS, loadv2i64>, VEX, VEX_WIG; } let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { - defm VPABSB : SS3I_unop_rm_y<0x1C, "vpabsb", v32i8, abs>, VEX, VEX_L, VEX_WIG; - defm VPABSW : SS3I_unop_rm_y<0x1D, "vpabsw", v16i16, abs>, VEX, VEX_L, VEX_WIG; + defm VPABSB : SS3I_unop_rm_y<0x1C, "vpabsb", v32i8, abs, SSE_PABS>, VEX, VEX_L, VEX_WIG; + defm VPABSW : SS3I_unop_rm_y<0x1D, "vpabsw", v16i16, abs, SSE_PABS>, VEX, VEX_L, VEX_WIG; } let Predicates = [HasAVX2, NoVLX] in { - defm VPABSD : SS3I_unop_rm_y<0x1E, "vpabsd", v8i32, abs>, VEX, VEX_L, VEX_WIG; + defm VPABSD : SS3I_unop_rm_y<0x1E, "vpabsd", v8i32, abs, SSE_PABS>, VEX, VEX_L, VEX_WIG; } -defm PABSB : SS3I_unop_rm<0x1C, "pabsb", v16i8, abs, memopv2i64>; -defm PABSW : SS3I_unop_rm<0x1D, "pabsw", v8i16, abs, memopv2i64>; -defm PABSD : SS3I_unop_rm<0x1E, "pabsd", v4i32, abs, memopv2i64>; +defm PABSB : SS3I_unop_rm<0x1C, "pabsb", v16i8, abs, SSE_PABS, memopv2i64>; +defm PABSW : SS3I_unop_rm<0x1D, "pabsw", v8i16, abs, SSE_PABS, memopv2i64>; +defm PABSD : SS3I_unop_rm<0x1E, "pabsd", v4i32, abs, SSE_PABS, memopv2i64>; //===---------------------------------------------------------------------===// // SSSE3 - Packed Binary Operator Instructions @@ -5181,9 +5190,14 @@ defm PMULHRSW : SS3I_binop_rm<0x0B, "pmulhrsw", X86mulhrs, v8i16, v8i16, // SSSE3 - Packed Align Instruction Patterns //===---------------------------------------------------------------------===// +let Sched = WriteShuffle in +def SSE_PALIGN : OpndItins< + IIC_SSE_PALIGNRR, IIC_SSE_PALIGNRM +>; + multiclass ssse3_palignr<string asm, ValueType VT, RegisterClass RC, PatFrag memop_frag, X86MemOperand x86memop, - bit Is2Addr = 1> { + OpndItins itins, bit Is2Addr = 1> { let hasSideEffects = 0 in { def rri : SS3AI<0x0F, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2, u8imm:$src3), @@ -5192,7 +5206,7 @@ multiclass ssse3_palignr<string asm, ValueType VT, RegisterClass RC, !strconcat(asm, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), [(set RC:$dst, (VT (X86PAlignr RC:$src1, RC:$src2, (i8 imm:$src3))))], - IIC_SSE_PALIGNRR>, Sched<[WriteShuffle]>; + itins.rr>, Sched<[itins.Sched]>; let mayLoad = 1 in def rmi : SS3AI<0x0F, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$src3), @@ -5203,19 +5217,19 @@ multiclass ssse3_palignr<string asm, ValueType VT, RegisterClass RC, [(set RC:$dst, (VT (X86PAlignr RC:$src1, (bitconvert (memop_frag addr:$src2)), (i8 imm:$src3))))], - IIC_SSE_PALIGNRM>, Sched<[WriteShuffleLd, ReadAfterLd]>; + itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; } } let Predicates = [HasAVX, NoVLX_Or_NoBWI] in defm VPALIGNR : ssse3_palignr<"vpalignr", v16i8, VR128, loadv2i64, - i128mem, 0>, VEX_4V, VEX_WIG; + i128mem, SSE_PALIGN, 0>, VEX_4V, VEX_WIG; let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in defm VPALIGNRY : ssse3_palignr<"vpalignr", v32i8, VR256, loadv4i64, - i256mem, 0>, VEX_4V, VEX_L, VEX_WIG; + i256mem, SSE_PALIGN, 0>, VEX_4V, VEX_L, VEX_WIG; let Constraints = "$src1 = $dst", Predicates = [UseSSSE3] in defm PALIGNR : ssse3_palignr<"palignr", v16i8, VR128, memopv2i64, - i128mem>; + i128mem, SSE_PALIGN>; //===---------------------------------------------------------------------===// // SSSE3 - Thread synchronization |