summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2017-09-03 22:25:50 +0000
committerCraig Topper <craig.topper@intel.com>2017-09-03 22:25:50 +0000
commitfcf6bc550344794cedfd282df45f384b12c88650 (patch)
tree3304201261e87b8131c50d0bd2f9549c79782c65 /llvm/lib
parent788fbe08db9c557f8a445540a197e1e9d9c31493 (diff)
downloadbcm5719-llvm-fcf6bc550344794cedfd282df45f384b12c88650.tar.gz
bcm5719-llvm-fcf6bc550344794cedfd282df45f384b12c88650.zip
[X86] Add more patterns to use moves to zero the upper portions of a vector register that I missed in r312450.
llvm-svn: 312459
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td46
1 files changed, 46 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index b1778329fba..e4e3be33932 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -4044,6 +4044,51 @@ let Predicates = [HasAVX512, NoVLX] in {
// will zero the upper bits.
// TODO: Is there a safe way to detect whether the producing instruction
// already zeroed the upper bits?
+
+ // 128->512 register form.
+ def : Pat<(v8f64 (insert_subvector (bitconvert (v16i32 immAllZerosV)),
+ (v2f64 VR128:$src), (iPTR 0))),
+ (SUBREG_TO_REG (i64 0), (VMOVAPDrr VR128:$src), sub_xmm)>;
+ def : Pat<(v16f32 (insert_subvector (bitconvert (v16i32 immAllZerosV)),
+ (v4f32 VR128:$src), (iPTR 0))),
+ (SUBREG_TO_REG (i64 0), (VMOVAPSrr VR128:$src), sub_xmm)>;
+ def : Pat<(v8i64 (insert_subvector (bitconvert (v16i32 immAllZerosV)),
+ (v2i64 VR128:$src), (iPTR 0))),
+ (SUBREG_TO_REG (i64 0), (VMOVDQArr VR128:$src), sub_xmm)>;
+ def : Pat<(v16i32 (insert_subvector (bitconvert (v16i32 immAllZerosV)),
+ (v4i32 VR128:$src), (iPTR 0))),
+ (SUBREG_TO_REG (i64 0), (VMOVDQArr VR128:$src), sub_xmm)>;
+ def : Pat<(v32i16 (insert_subvector (bitconvert (v16i32 immAllZerosV)),
+ (v8i16 VR128:$src), (iPTR 0))),
+ (SUBREG_TO_REG (i64 0), (VMOVDQArr VR128:$src), sub_xmm)>;
+ def : Pat<(v64i8 (insert_subvector (bitconvert (v16i32 immAllZerosV)),
+ (v16i8 VR128:$src), (iPTR 0))),
+ (SUBREG_TO_REG (i64 0), (VMOVDQArr VR128:$src), sub_xmm)>;
+
+ // 128->512 memory form.
+ def : Pat<(v8f64 (insert_subvector (bitconvert (v16i32 immAllZerosV)),
+ (loadv2f64 addr:$src), (iPTR 0))),
+ (SUBREG_TO_REG (i64 0), (VMOVAPDrm addr:$src), sub_xmm)>;
+ def : Pat<(v16f32 (insert_subvector (bitconvert (v16i32 immAllZerosV)),
+ (loadv4f32 addr:$src), (iPTR 0))),
+ (SUBREG_TO_REG (i64 0), (VMOVAPSrm addr:$src), sub_xmm)>;
+ def : Pat<(v8i64 (insert_subvector (bitconvert (v16i32 immAllZerosV)),
+ (loadv2i64 addr:$src), (iPTR 0))),
+ (SUBREG_TO_REG (i64 0), (VMOVDQArm addr:$src), sub_xmm)>;
+ def : Pat<(v16i32 (insert_subvector (bitconvert (v16i32 immAllZerosV)),
+ (bc_v4i32 (loadv2i64 addr:$src)),
+ (iPTR 0))),
+ (SUBREG_TO_REG (i64 0), (VMOVDQArm addr:$src), sub_xmm)>;
+ def : Pat<(v32i16 (insert_subvector (bitconvert (v16i32 immAllZerosV)),
+ (bc_v8i16 (loadv2i64 addr:$src)),
+ (iPTR 0))),
+ (SUBREG_TO_REG (i64 0), (VMOVDQArm addr:$src), sub_xmm)>;
+ def : Pat<(v64i8 (insert_subvector (bitconvert (v16i32 immAllZerosV)),
+ (bc_v16i8 (loadv2i64 addr:$src)),
+ (iPTR 0))),
+ (SUBREG_TO_REG (i64 0), (VMOVDQArm addr:$src), sub_xmm)>;
+
+ // 256->512 register form.
def : Pat<(v8f64 (insert_subvector (bitconvert (v16i32 immAllZerosV)),
(v4f64 VR256:$src), (iPTR 0))),
(SUBREG_TO_REG (i64 0), (VMOVAPDYrr VR256:$src), sub_ymm)>;
@@ -4063,6 +4108,7 @@ let Predicates = [HasAVX512, NoVLX] in {
(v32i8 VR256:$src), (iPTR 0))),
(SUBREG_TO_REG (i64 0), (VMOVDQAYrr VR256:$src), sub_ymm)>;
+ // 256->512 memory form.
def : Pat<(v8f64 (insert_subvector (bitconvert (v16i32 immAllZerosV)),
(loadv4f64 addr:$src), (iPTR 0))),
(SUBREG_TO_REG (i64 0), (VMOVAPDYrm addr:$src), sub_ymm)>;
OpenPOWER on IntegriCloud