summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2018-07-17 20:16:18 +0000
committerCraig Topper <craig.topper@intel.com>2018-07-17 20:16:18 +0000
commit9ef92865ecca5bbe3bf9ef10a3bb569c16af8bdd (patch)
tree1214882d5c749311a9f13d22498b621e004f6134 /llvm/lib
parentc0f2e306f2388a0ad1f10e4e9cc8157121d4f4e5 (diff)
downloadbcm5719-llvm-9ef92865ecca5bbe3bf9ef10a3bb569c16af8bdd.tar.gz
bcm5719-llvm-9ef92865ecca5bbe3bf9ef10a3bb569c16af8bdd.zip
[X86] Add patterns for folding full vector load into MOVHPS and MOVLPS with SSE1 only.
llvm-svn: 337320
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td4
-rw-r--r--llvm/lib/Target/X86/X86InstrSSE.td37
2 files changed, 25 insertions, 16 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 3a1f840139e..2035e49720f 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -6452,7 +6452,9 @@ multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr,
Sched<[SchedWriteFShuffle.XMM.Folded, ReadAfterLd]>, EVEX_4V;
}
-defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", X86Movlhps,
+// No patterns for MOVLPS/MOVHPS as the Movlhps node should only be created in
+// SSE1. And MOVLPS pattern is even more complex.
+defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", null_frag,
v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Unpckl,
v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index 448be0eda0e..3797d91fb31 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -661,19 +661,16 @@ let Predicates = [UseSSE1] in {
// SSE 1 & 2 - Move Low packed FP Instructions
//===----------------------------------------------------------------------===//
-multiclass sse12_mov_hilo_packed_base<bits<8>opc, SDNode psnode, SDNode pdnode,
+multiclass sse12_mov_hilo_packed_base<bits<8>opc, SDNode pdnode,
string base_opc, string asm_opr> {
+ // No pattern as they need be special cased between high and low.
let hasSideEffects = 0, mayLoad = 1 in
def PSrm : PI<opc, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
- !strconcat(base_opc, "s", asm_opr),
- [(set VR128:$dst,
- (psnode VR128:$src1,
- (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))],
- SSEPackedSingle>, PS,
- Sched<[SchedWriteFShuffle.XMM.Folded, ReadAfterLd]>;
+ (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
+ !strconcat(base_opc, "s", asm_opr),
+ [], SSEPackedSingle>, PS,
+ Sched<[SchedWriteFShuffle.XMM.Folded, ReadAfterLd]>;
- let hasSideEffects = 0, mayLoad = 1 in
def PDrm : PI<opc, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
!strconcat(base_opc, "d", asm_opr),
@@ -683,19 +680,19 @@ multiclass sse12_mov_hilo_packed_base<bits<8>opc, SDNode psnode, SDNode pdnode,
Sched<[SchedWriteFShuffle.XMM.Folded, ReadAfterLd]>;
}
-multiclass sse12_mov_hilo_packed<bits<8>opc, SDPatternOperator psnode,
- SDPatternOperator pdnode, string base_opc> {
+multiclass sse12_mov_hilo_packed<bits<8>opc, SDPatternOperator pdnode,
+ string base_opc> {
let Predicates = [UseAVX] in
- defm V#NAME : sse12_mov_hilo_packed_base<opc, psnode, pdnode, base_opc,
+ defm V#NAME : sse12_mov_hilo_packed_base<opc, pdnode, base_opc,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}">,
VEX_4V, VEX_WIG;
let Constraints = "$src1 = $dst" in
- defm NAME : sse12_mov_hilo_packed_base<opc, psnode, pdnode, base_opc,
+ defm NAME : sse12_mov_hilo_packed_base<opc, pdnode, base_opc,
"\t{$src2, $dst|$dst, $src2}">;
}
-defm MOVL : sse12_mov_hilo_packed<0x12, null_frag, X86Movsd, "movlp">;
+defm MOVL : sse12_mov_hilo_packed<0x12, X86Movsd, "movlp">;
let SchedRW = [WriteFStore] in {
let Predicates = [UseAVX] in {
@@ -725,13 +722,18 @@ let Predicates = [UseSSE1] in {
def : Pat<(store (i64 (extractelt (bc_v2i64 (v4f32 VR128:$src2)),
(iPTR 0))), addr:$src1),
(MOVLPSmr addr:$src1, VR128:$src2)>;
+
+ // This pattern helps select MOVLPS on SSE1 only targets. With SSE2 we'll
+ // end up with a movsd or bleand instead of shufp.
+ def : Pat<(X86Shufp (memopv4f32 addr:$src2), VR128:$src1, (i8 -28)),
+ (MOVLPSrm VR128:$src1, addr:$src2)>;
}
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Move Hi packed FP Instructions
//===----------------------------------------------------------------------===//
-defm MOVH : sse12_mov_hilo_packed<0x16, X86Movlhps, X86Unpckl, "movhp">;
+defm MOVH : sse12_mov_hilo_packed<0x16, X86Unpckl, "movhp">;
let SchedRW = [WriteFStore] in {
// v2f64 extract element 1 is always custom lowered to unpack high to low
@@ -796,6 +798,11 @@ let Predicates = [UseSSE1] in {
def : Pat<(X86Movlhps VR128:$src1,
(bc_v4f32 (v2i64 (X86vzload addr:$src2)))),
(MOVHPSrm VR128:$src1, addr:$src2)>;
+
+ // This pattern helps select MOVHPS on SSE1 only targets. With SSE2 we'll
+ // end up with a movsd or bleand instead of shufp.
+ def : Pat<(X86Movlhps VR128:$src1, (memopv4f32 addr:$src2)),
+ (MOVHPSrm VR128:$src1, addr:$src2)>;
}
let Predicates = [UseSSE2] in {
OpenPOWER on IntegriCloud