summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@gmail.com>2017-05-09 16:32:11 +0000
committerCraig Topper <craig.topper@gmail.com>2017-05-09 16:32:11 +0000
commitf893d49f0ceddb804af589de33f20e010402ffbd (patch)
treebe587d6efe1a4470ff576a8659a46cbdfe2b396f /llvm/lib
parent6844e21f593467c640f0e14e2113bf0dfb32b1a8 (diff)
downloadbcm5719-llvm-f893d49f0ceddb804af589de33f20e010402ffbd.tar.gz
bcm5719-llvm-f893d49f0ceddb804af589de33f20e010402ffbd.zip
[X86] Add more patterns for BZHI isel
This patch adds more patterns that a reasonable person might write that can be compiled to BZHI. This adds support for (~0U >> (32 - b)) & a; and a << (32 - b) >> (32 - b); This was inspired by the code in APInt::clearUnusedBits. This can pass an index of 32 to the bzhi instruction which a quick test of Haswell hardware shows will not mask any bits. Though the description text in the Intel manual says the "index is saturated to OperandSize-1". The pseudocode in the same manual indicates no bits will be zeroed for this case. I think this is still missing cases where the subtract portion is an 8-bit operation. Differential Revision: https://reviews.llvm.org/D32616 llvm-svn: 302549
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.td32
1 files changed, 32 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td
index b46b1ee9c8e..4d7d8ece92d 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/llvm/lib/Target/X86/X86InstrInfo.td
@@ -2352,6 +2352,38 @@ let Predicates = [HasBMI2] in {
def : Pat<(and (loadi64 addr:$src), (add (shl 1, GR8:$lz), -1)),
(BZHI64rm addr:$src,
(INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR8:$lz, sub_8bit))>;
+
+ // x & (-1 >> (32 - y))
+ def : Pat<(and GR32:$src, (srl -1, (i8 (trunc (sub 32, GR32:$lz))))),
+ (BZHI32rr GR32:$src, GR32:$lz)>;
+ def : Pat<(and (loadi32 addr:$src), (srl -1, (i8 (trunc (sub 32, GR32:$lz))))),
+ (BZHI32rm addr:$src, GR32:$lz)>;
+
+ // x & (-1 >> (64 - y))
+ def : Pat<(and GR64:$src, (srl -1, (i8 (trunc (sub 64, GR32:$lz))))),
+ (BZHI64rr GR64:$src,
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$lz, sub_32bit))>;
+ def : Pat<(and (loadi64 addr:$src), (srl -1, (i8 (trunc (sub 64, GR32:$lz))))),
+ (BZHI64rm addr:$src,
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$lz, sub_32bit))>;
+
+ // x << (32 - y) >> (32 - y)
+ def : Pat<(srl (shl GR32:$src, (i8 (trunc (sub 32, GR32:$lz)))),
+ (i8 (trunc (sub 32, GR32:$lz)))),
+ (BZHI32rr GR32:$src, GR32:$lz)>;
+ def : Pat<(srl (shl (loadi32 addr:$src), (i8 (trunc (sub 32, GR32:$lz)))),
+ (i8 (trunc (sub 32, GR32:$lz)))),
+ (BZHI32rm addr:$src, GR32:$lz)>;
+
+ // x << (64 - y) >> (64 - y)
+ def : Pat<(srl (shl GR64:$src, (i8 (trunc (sub 64, GR32:$lz)))),
+ (i8 (trunc (sub 64, GR32:$lz)))),
+ (BZHI64rr GR64:$src,
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$lz, sub_32bit))>;
+ def : Pat<(srl (shl (loadi64 addr:$src), (i8 (trunc (sub 64, GR32:$lz)))),
+ (i8 (trunc (sub 64, GR32:$lz)))),
+ (BZHI64rm addr:$src,
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$lz, sub_32bit))>;
} // HasBMI2
let Predicates = [HasBMI] in {
OpenPOWER on IntegriCloud