diff options
author | Craig Topper <craig.topper@gmail.com> | 2016-10-01 06:01:23 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@gmail.com> | 2016-10-01 06:01:23 +0000 |
commit | be351eea0cc06e3efa0b8c45f6dfeb32ae7dfabc (patch) | |
tree | 249b47432da8e5de6e8d4ed2f05280badc8045fc | |
parent | 8aca90507f0a67653b7f08d0b9ed7a32de3a3d87 (diff) | |
download | bcm5719-llvm-be351eea0cc06e3efa0b8c45f6dfeb32ae7dfabc.tar.gz bcm5719-llvm-be351eea0cc06e3efa0b8c45f6dfeb32ae7dfabc.zip |
[AVX-512] Add EVEX versions of VPBROADCASTW patterns with truncated i32 loads.
llvm-svn: 283015
-rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 15 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 4 |
2 files changed, 18 insertions, 1 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index fc28e5df947..950fbc5e402 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -1009,6 +1009,21 @@ multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr, AVX5128IBase, EVEX; } +let Predicates = [HasVLX, HasBWI] in { + // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably. + // This means we'll encounter truncated i32 loads; match that here. + def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))), + (VPBROADCASTWZ128m addr:$src)>; + def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))), + (VPBROADCASTWZ256m addr:$src)>; + def : Pat<(v8i16 (X86VBroadcast + (i16 (trunc (i32 (zextloadi16 addr:$src)))))), + (VPBROADCASTWZ128m addr:$src)>; + def : Pat<(v16i16 (X86VBroadcast + (i16 (trunc (i32 (zextloadi16 addr:$src)))))), + (VPBROADCASTWZ256m addr:$src)>; +} + //===----------------------------------------------------------------------===// // AVX-512 BROADCAST SUBVECTORS // diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 0f3232bb1ed..98d5dac9e8e 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -8318,7 +8318,7 @@ defm VPBROADCASTD : avx2_broadcast<0x58, "vpbroadcastd", i32mem, loadi32, defm VPBROADCASTQ : avx2_broadcast<0x59, "vpbroadcastq", i64mem, loadi64, v2i64, v4i64, NoVLX>; -let Predicates = [HasAVX2] in { +let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably. // This means we'll encounter truncated i32 loads; match that here. def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))), @@ -8331,7 +8331,9 @@ let Predicates = [HasAVX2] in { def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (zextloadi16 addr:$src)))))), (VPBROADCASTWYrm addr:$src)>; +} +let Predicates = [HasAVX2] in { // Provide aliases for broadcast from the same register class that // automatically does the extract. def : Pat<(v8f32 (X86VBroadcast (v8f32 VR256:$src))), |