summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorQuentin Colombet <qcolombet@apple.com>2014-03-24 17:54:19 +0000
committerQuentin Colombet <qcolombet@apple.com>2014-03-24 17:54:19 +0000
commit2d5c156b963e6a2c9dcdb1d4e7f84f0576d5bbe8 (patch)
treea3fefac604258e0a1f49929e6c197a89e6eee2fa /llvm/lib
parentad41d7b5318ee6f6bb9e2805c679fc441289ffc6 (diff)
downloadbcm5719-llvm-2d5c156b963e6a2c9dcdb1d4e7f84f0576d5bbe8.tar.gz
bcm5719-llvm-2d5c156b963e6a2c9dcdb1d4e7f84f0576d5bbe8.zip
[X86][ISelDAG] Add missing fallback patterns for avx2 broadcast instructions.
Those patterns are used when the load cannot be folded into the related broadcast during the select phase. This happens when the load gets additional uses that were not anticipated during the previous lowering phases (constant vector to constant load, then constant load reused) or when selection DAG is not able to prove that folding the load will not create a cycle in the DAG. <rdar://problem/16074331> llvm-svn: 204631
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/X86InstrSSE.td25
1 files changed, 25 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index 8da6bde6c9e..f7aac458be8 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -8431,6 +8431,31 @@ let Predicates = [HasAVX2] in {
(VBROADCASTSSYrr (COPY_TO_REGCLASS GR32:$src, VR128))>;
def : Pat<(v4i64 (X86VBroadcast GR64:$src)),
(VBROADCASTSDYrr (COPY_TO_REGCLASS GR64:$src, VR128))>;
+
+ def : Pat<(v16i8 (X86VBroadcast GR8:$src)),
+ (VPBROADCASTBrr (COPY_TO_REGCLASS
+ (i32 (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit)),
+ VR128))>;
+ def : Pat<(v32i8 (X86VBroadcast GR8:$src)),
+ (VPBROADCASTBYrr (COPY_TO_REGCLASS
+ (i32 (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit)),
+ VR128))>;
+
+ def : Pat<(v8i16 (X86VBroadcast GR16:$src)),
+ (VPBROADCASTWrr (COPY_TO_REGCLASS
+ (i32 (SUBREG_TO_REG (i32 0), GR16:$src, sub_16bit)),
+ VR128))>;
+ def : Pat<(v16i16 (X86VBroadcast GR16:$src)),
+ (VPBROADCASTWYrr (COPY_TO_REGCLASS
+ (i32 (SUBREG_TO_REG (i32 0), GR16:$src, sub_16bit)),
+ VR128))>;
+
+ // The patterns for VPBROADCASTD are not needed because they would match
+ // the exact same thing as VBROADCASTSS patterns.
+
+ def : Pat<(v2i64 (X86VBroadcast GR64:$src)),
+ (VPBROADCASTQrr (COPY_TO_REGCLASS GR64:$src, VR128))>;
+ // The v4i64 pattern is not needed because VBROADCASTSDYrr already match.
}
}
OpenPOWER on IntegriCloud