summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2017-02-21 16:41:44 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2017-02-21 16:41:44 +0000
commit791955819c85b956c9f509ed0f274fbe59781d4f (patch)
tree24eb7c0e87ffed8568b1e493b016d8db956914d0 /llvm/lib/Target
parenta6e95e1652bf42239562f86a665898530becd83f (diff)
downloadbcm5719-llvm-791955819c85b956c9f509ed0f274fbe59781d4f.tar.gz
bcm5719-llvm-791955819c85b956c9f509ed0f274fbe59781d4f.zip
[X86][AVX2] Fix VPBROADCASTQ folding on 32-bit targets.
As i64 isn't a value type on 32-bit targets, we need to fold the VZEXT_LOAD into VPBROADCASTQ. llvm-svn: 295733
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td11
-rw-r--r--llvm/lib/Target/X86/X86InstrSSE.td5
2 files changed, 16 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 5c0be19f0cc..527e86d94b9 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -1030,7 +1030,18 @@ multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
AVX5128IBase, EVEX;
}
+let Predicates = [HasAVX512] in {
+ // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
+ def : Pat<(v8i64 (X86VBroadcast (v8i64 (X86vzload addr:$src)))),
+ (VPBROADCASTQZm addr:$src)>;
+}
+
let Predicates = [HasVLX, HasBWI] in {
+ // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
+ def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload addr:$src)))),
+ (VPBROADCASTQZ128m addr:$src)>;
+ def : Pat<(v4i64 (X86VBroadcast (v4i64 (X86vzload addr:$src)))),
+ (VPBROADCASTQZ256m addr:$src)>;
// loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
// This means we'll encounter truncated i32 loads; match that here.
def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index 7c7fa496e0b..2417cdc1904 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -8265,6 +8265,11 @@ defm VPBROADCASTQ : avx2_broadcast<0x59, "vpbroadcastq", i64mem, loadi64,
v2i64, v4i64, NoVLX>;
let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
+ // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
+ def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload addr:$src)))),
+ (VPBROADCASTQrm addr:$src)>;
+ def : Pat<(v4i64 (X86VBroadcast (v4i64 (X86vzload addr:$src)))),
+ (VPBROADCASTQYrm addr:$src)>;
// loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
// This means we'll encounter truncated i32 loads; match that here.
def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
OpenPOWER on IntegriCloud