summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2017-11-28 22:55:08 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2017-11-28 22:55:08 +0000
commit8f62394751c4d907964424037e1f274502e1af26 (patch)
tree58937248e6b43e1cdcb10d3eccb80e456533acb6 /llvm/lib
parent24ca79c776340e9d03fc2f24e7acedba97634eb2 (diff)
downloadbcm5719-llvm-8f62394751c4d907964424037e1f274502e1af26.tar.gz
bcm5719-llvm-8f62394751c4d907964424037e1f274502e1af26.zip
[X86][SSE] Add SSE_UNPCK/SSE_PUNPCK OpndItins
Update multi-classes to take the scheduling OpndItins instead of hard coding it. Will be reused in the AVX512 equivalents. llvm-svn: 319245
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/X86InstrSSE.td110
1 files changed, 61 insertions, 49 deletions
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index cb7e1974d95..01770af0090 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -2432,66 +2432,72 @@ let Constraints = "$src1 = $dst" in {
// SSE 1 & 2 - Unpack FP Instructions
//===----------------------------------------------------------------------===//
+let Sched = WriteFShuffle in
+def SSE_UNPCK : OpndItins<
+ IIC_SSE_UNPCK, IIC_SSE_UNPCK
+>;
+
/// sse12_unpack_interleave - sse 1 & 2 fp unpack and interleave
multiclass sse12_unpack_interleave<bits<8> opc, SDNode OpNode, ValueType vt,
PatFrag mem_frag, RegisterClass RC,
X86MemOperand x86memop, string asm,
- Domain d, bit IsCommutable = 0> {
+ OpndItins itins, Domain d, bit IsCommutable = 0> {
let isCommutable = IsCommutable in
def rr : PI<opc, MRMSrcReg,
(outs RC:$dst), (ins RC:$src1, RC:$src2),
asm, [(set RC:$dst,
(vt (OpNode RC:$src1, RC:$src2)))],
- IIC_SSE_UNPCK, d>, Sched<[WriteFShuffle]>;
+ itins.rr, d>, Sched<[itins.Sched]>;
def rm : PI<opc, MRMSrcMem,
(outs RC:$dst), (ins RC:$src1, x86memop:$src2),
asm, [(set RC:$dst,
(vt (OpNode RC:$src1,
(mem_frag addr:$src2))))],
- IIC_SSE_UNPCK, d>,
- Sched<[WriteFShuffleLd, ReadAfterLd]>;
+ itins.rm, d>,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
let Predicates = [HasAVX, NoVLX] in {
defm VUNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, loadv4f32,
VR128, f128mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedSingle>, PS, VEX_4V, VEX_WIG;
+ SSE_UNPCK, SSEPackedSingle>, PS, VEX_4V, VEX_WIG;
defm VUNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, loadv2f64,
VR128, f128mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedDouble>, PD, VEX_4V, VEX_WIG;
+ SSE_UNPCK, SSEPackedDouble>, PD, VEX_4V, VEX_WIG;
defm VUNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, loadv4f32,
VR128, f128mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedSingle>, PS, VEX_4V, VEX_WIG;
+ SSE_UNPCK, SSEPackedSingle>, PS, VEX_4V, VEX_WIG;
defm VUNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, loadv2f64,
VR128, f128mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedDouble>, PD, VEX_4V, VEX_WIG;
+ SSE_UNPCK, SSEPackedDouble>, PD, VEX_4V, VEX_WIG;
defm VUNPCKHPSY: sse12_unpack_interleave<0x15, X86Unpckh, v8f32, loadv8f32,
VR256, f256mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedSingle>, PS, VEX_4V, VEX_L, VEX_WIG;
+ SSE_UNPCK, SSEPackedSingle>, PS, VEX_4V, VEX_L, VEX_WIG;
defm VUNPCKHPDY: sse12_unpack_interleave<0x15, X86Unpckh, v4f64, loadv4f64,
VR256, f256mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedDouble>, PD, VEX_4V, VEX_L, VEX_WIG;
+ SSE_UNPCK, SSEPackedDouble>, PD, VEX_4V, VEX_L, VEX_WIG;
defm VUNPCKLPSY: sse12_unpack_interleave<0x14, X86Unpckl, v8f32, loadv8f32,
VR256, f256mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedSingle>, PS, VEX_4V, VEX_L, VEX_WIG;
+ SSE_UNPCK, SSEPackedSingle>, PS, VEX_4V, VEX_L, VEX_WIG;
defm VUNPCKLPDY: sse12_unpack_interleave<0x14, X86Unpckl, v4f64, loadv4f64,
VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedDouble>, PD, VEX_4V, VEX_L, VEX_WIG;
+ SSE_UNPCK, SSEPackedDouble>, PD, VEX_4V, VEX_L, VEX_WIG;
}// Predicates = [HasAVX, NoVLX]
+
let Constraints = "$src1 = $dst" in {
defm UNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, memopv4f32,
VR128, f128mem, "unpckhps\t{$src2, $dst|$dst, $src2}",
- SSEPackedSingle>, PS;
+ SSE_UNPCK, SSEPackedSingle>, PS;
defm UNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, memopv2f64,
VR128, f128mem, "unpckhpd\t{$src2, $dst|$dst, $src2}",
- SSEPackedDouble, 1>, PD;
+ SSE_UNPCK, SSEPackedDouble, 1>, PD;
defm UNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, memopv4f32,
VR128, f128mem, "unpcklps\t{$src2, $dst|$dst, $src2}",
- SSEPackedSingle>, PS;
+ SSE_UNPCK, SSEPackedSingle>, PS;
defm UNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, memopv2f64,
VR128, f128mem, "unpcklpd\t{$src2, $dst|$dst, $src2}",
- SSEPackedDouble>, PD;
+ SSE_UNPCK, SSEPackedDouble>, PD;
} // Constraints = "$src1 = $dst"
let Predicates = [HasAVX1Only] in {
@@ -4062,16 +4068,22 @@ let Constraints = "$src1 = $dst" in {
// SSE2 - Packed Integer Unpack Instructions
//===---------------------------------------------------------------------===//
+let Sched = WriteShuffle in
+def SSE_PUNPCK : OpndItins<
+ IIC_SSE_UNPCK, IIC_SSE_UNPCK
+>;
+
let ExeDomain = SSEPackedInt in {
multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt,
- SDNode OpNode, PatFrag ld_frag, bit Is2Addr = 1> {
+ SDNode OpNode, OpndItins itins, PatFrag ld_frag,
+ bit Is2Addr = 1> {
def rr : PDI<opc, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set VR128:$dst, (vt (OpNode VR128:$src1, VR128:$src2)))],
- IIC_SSE_UNPCK>, Sched<[WriteShuffle]>;
+ itins.rr>, Sched<[itins.Sched]>;
def rm : PDI<opc, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
!if(Is2Addr,
@@ -4079,85 +4091,85 @@ multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt,
!strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set VR128:$dst, (vt (OpNode VR128:$src1,
(bitconvert (ld_frag addr:$src2)))))],
- IIC_SSE_UNPCK>,
- Sched<[WriteShuffleLd, ReadAfterLd]>;
+ itins.rm>,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
multiclass sse2_unpack_y<bits<8> opc, string OpcodeStr, ValueType vt,
- SDNode OpNode> {
+ SDNode OpNode, OpndItins itins> {
def Yrr : PDI<opc, MRMSrcReg,
(outs VR256:$dst), (ins VR256:$src1, VR256:$src2),
!strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR256:$dst, (vt (OpNode VR256:$src1, VR256:$src2)))]>,
- Sched<[WriteShuffle]>;
+ [(set VR256:$dst, (vt (OpNode VR256:$src1, VR256:$src2)))],
+ itins.rr>, Sched<[itins.Sched]>;
def Yrm : PDI<opc, MRMSrcMem,
(outs VR256:$dst), (ins VR256:$src1, i256mem:$src2),
!strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst, (vt (OpNode VR256:$src1,
- (bitconvert (loadv4i64 addr:$src2)))))]>,
- Sched<[WriteShuffleLd, ReadAfterLd]>;
+ (bitconvert (loadv4i64 addr:$src2)))))],
+ itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Unpckl,
- loadv2i64, 0>, VEX_4V, VEX_WIG;
+ SSE_PUNPCK, loadv2i64, 0>, VEX_4V, VEX_WIG;
defm VPUNPCKLWD : sse2_unpack<0x61, "vpunpcklwd", v8i16, X86Unpckl,
- loadv2i64, 0>, VEX_4V, VEX_WIG;
+ SSE_PUNPCK, loadv2i64, 0>, VEX_4V, VEX_WIG;
defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Unpckh,
- loadv2i64, 0>, VEX_4V, VEX_WIG;
+ SSE_PUNPCK, loadv2i64, 0>, VEX_4V, VEX_WIG;
defm VPUNPCKHWD : sse2_unpack<0x69, "vpunpckhwd", v8i16, X86Unpckh,
- loadv2i64, 0>, VEX_4V, VEX_WIG;
+ SSE_PUNPCK, loadv2i64, 0>, VEX_4V, VEX_WIG;
}
let Predicates = [HasAVX, NoVLX] in {
defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Unpckl,
- loadv2i64, 0>, VEX_4V, VEX_WIG;
+ SSE_PUNPCK, loadv2i64, 0>, VEX_4V, VEX_WIG;
defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Unpckl,
- loadv2i64, 0>, VEX_4V, VEX_WIG;
+ SSE_PUNPCK, loadv2i64, 0>, VEX_4V, VEX_WIG;
defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Unpckh,
- loadv2i64, 0>, VEX_4V, VEX_WIG;
+ SSE_PUNPCK, loadv2i64, 0>, VEX_4V, VEX_WIG;
defm VPUNPCKHQDQ : sse2_unpack<0x6D, "vpunpckhqdq", v2i64, X86Unpckh,
- loadv2i64, 0>, VEX_4V, VEX_WIG;
+ SSE_PUNPCK, loadv2i64, 0>, VEX_4V, VEX_WIG;
}
let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
- defm VPUNPCKLBW : sse2_unpack_y<0x60, "vpunpcklbw", v32i8, X86Unpckl>,
+ defm VPUNPCKLBW : sse2_unpack_y<0x60, "vpunpcklbw", v32i8, X86Unpckl, SSE_PUNPCK>,
VEX_4V, VEX_L, VEX_WIG;
- defm VPUNPCKLWD : sse2_unpack_y<0x61, "vpunpcklwd", v16i16, X86Unpckl>,
+ defm VPUNPCKLWD : sse2_unpack_y<0x61, "vpunpcklwd", v16i16, X86Unpckl, SSE_PUNPCK>,
VEX_4V, VEX_L, VEX_WIG;
- defm VPUNPCKHBW : sse2_unpack_y<0x68, "vpunpckhbw", v32i8, X86Unpckh>,
+ defm VPUNPCKHBW : sse2_unpack_y<0x68, "vpunpckhbw", v32i8, X86Unpckh, SSE_PUNPCK>,
VEX_4V, VEX_L, VEX_WIG;
- defm VPUNPCKHWD : sse2_unpack_y<0x69, "vpunpckhwd", v16i16, X86Unpckh>,
+ defm VPUNPCKHWD : sse2_unpack_y<0x69, "vpunpckhwd", v16i16, X86Unpckh, SSE_PUNPCK>,
VEX_4V, VEX_L, VEX_WIG;
}
let Predicates = [HasAVX2, NoVLX] in {
- defm VPUNPCKLDQ : sse2_unpack_y<0x62, "vpunpckldq", v8i32, X86Unpckl>,
+ defm VPUNPCKLDQ : sse2_unpack_y<0x62, "vpunpckldq", v8i32, X86Unpckl, SSE_PUNPCK>,
VEX_4V, VEX_L, VEX_WIG;
- defm VPUNPCKLQDQ : sse2_unpack_y<0x6C, "vpunpcklqdq", v4i64, X86Unpckl>,
+ defm VPUNPCKLQDQ : sse2_unpack_y<0x6C, "vpunpcklqdq", v4i64, X86Unpckl, SSE_PUNPCK>,
VEX_4V, VEX_L, VEX_WIG;
- defm VPUNPCKHDQ : sse2_unpack_y<0x6A, "vpunpckhdq", v8i32, X86Unpckh>,
+ defm VPUNPCKHDQ : sse2_unpack_y<0x6A, "vpunpckhdq", v8i32, X86Unpckh, SSE_PUNPCK>,
VEX_4V, VEX_L, VEX_WIG;
- defm VPUNPCKHQDQ : sse2_unpack_y<0x6D, "vpunpckhqdq", v4i64, X86Unpckh>,
+ defm VPUNPCKHQDQ : sse2_unpack_y<0x6D, "vpunpckhqdq", v4i64, X86Unpckh, SSE_PUNPCK>,
VEX_4V, VEX_L, VEX_WIG;
}
let Constraints = "$src1 = $dst" in {
- defm PUNPCKLBW : sse2_unpack<0x60, "punpcklbw", v16i8, X86Unpckl,
+ defm PUNPCKLBW : sse2_unpack<0x60, "punpcklbw", v16i8, X86Unpckl, SSE_PUNPCK,
memopv2i64>;
- defm PUNPCKLWD : sse2_unpack<0x61, "punpcklwd", v8i16, X86Unpckl,
+ defm PUNPCKLWD : sse2_unpack<0x61, "punpcklwd", v8i16, X86Unpckl, SSE_PUNPCK,
memopv2i64>;
- defm PUNPCKLDQ : sse2_unpack<0x62, "punpckldq", v4i32, X86Unpckl,
+ defm PUNPCKLDQ : sse2_unpack<0x62, "punpckldq", v4i32, X86Unpckl, SSE_PUNPCK,
memopv2i64>;
- defm PUNPCKLQDQ : sse2_unpack<0x6C, "punpcklqdq", v2i64, X86Unpckl,
+ defm PUNPCKLQDQ : sse2_unpack<0x6C, "punpcklqdq", v2i64, X86Unpckl, SSE_PUNPCK,
memopv2i64>;
- defm PUNPCKHBW : sse2_unpack<0x68, "punpckhbw", v16i8, X86Unpckh,
+ defm PUNPCKHBW : sse2_unpack<0x68, "punpckhbw", v16i8, X86Unpckh, SSE_PUNPCK,
memopv2i64>;
- defm PUNPCKHWD : sse2_unpack<0x69, "punpckhwd", v8i16, X86Unpckh,
+ defm PUNPCKHWD : sse2_unpack<0x69, "punpckhwd", v8i16, X86Unpckh, SSE_PUNPCK,
memopv2i64>;
- defm PUNPCKHDQ : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Unpckh,
+ defm PUNPCKHDQ : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Unpckh, SSE_PUNPCK,
memopv2i64>;
- defm PUNPCKHQDQ : sse2_unpack<0x6D, "punpckhqdq", v2i64, X86Unpckh,
+ defm PUNPCKHQDQ : sse2_unpack<0x6D, "punpckhqdq", v2i64, X86Unpckh, SSE_PUNPCK,
memopv2i64>;
}
} // ExeDomain = SSEPackedInt
OpenPOWER on IntegriCloud