summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@gmail.com>2016-10-01 06:01:23 +0000
committerCraig Topper <craig.topper@gmail.com>2016-10-01 06:01:23 +0000
commitbe351eea0cc06e3efa0b8c45f6dfeb32ae7dfabc (patch)
tree249b47432da8e5de6e8d4ed2f05280badc8045fc
parent8aca90507f0a67653b7f08d0b9ed7a32de3a3d87 (diff)
downloadbcm5719-llvm-be351eea0cc06e3efa0b8c45f6dfeb32ae7dfabc.tar.gz
bcm5719-llvm-be351eea0cc06e3efa0b8c45f6dfeb32ae7dfabc.zip
[AVX-512] Add EVEX versions of VPBROADCASTW patterns with truncated i32 loads.
llvm-svn: 283015
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td15
-rw-r--r--llvm/lib/Target/X86/X86InstrSSE.td4
2 files changed, 18 insertions, 1 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index fc28e5df947..950fbc5e402 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -1009,6 +1009,21 @@ multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
AVX5128IBase, EVEX;
}
+let Predicates = [HasVLX, HasBWI] in {
+ // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
+ // This means we'll encounter truncated i32 loads; match that here.
+ def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
+ (VPBROADCASTWZ128m addr:$src)>;
+ def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
+ (VPBROADCASTWZ256m addr:$src)>;
+ def : Pat<(v8i16 (X86VBroadcast
+ (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
+ (VPBROADCASTWZ128m addr:$src)>;
+ def : Pat<(v16i16 (X86VBroadcast
+ (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
+ (VPBROADCASTWZ256m addr:$src)>;
+}
+
//===----------------------------------------------------------------------===//
// AVX-512 BROADCAST SUBVECTORS
//
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index 0f3232bb1ed..98d5dac9e8e 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -8318,7 +8318,7 @@ defm VPBROADCASTD : avx2_broadcast<0x58, "vpbroadcastd", i32mem, loadi32,
defm VPBROADCASTQ : avx2_broadcast<0x59, "vpbroadcastq", i64mem, loadi64,
v2i64, v4i64, NoVLX>;
-let Predicates = [HasAVX2] in {
+let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
// loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
// This means we'll encounter truncated i32 loads; match that here.
def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
@@ -8331,7 +8331,9 @@ let Predicates = [HasAVX2] in {
def : Pat<(v16i16 (X86VBroadcast
(i16 (trunc (i32 (zextloadi16 addr:$src)))))),
(VPBROADCASTWYrm addr:$src)>;
+}
+let Predicates = [HasAVX2] in {
// Provide aliases for broadcast from the same register class that
// automatically does the extract.
def : Pat<(v8f32 (X86VBroadcast (v8f32 VR256:$src))),
OpenPOWER on IntegriCloud