summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorRoman Lebedev <lebedev.ri@gmail.com>2018-06-06 19:38:16 +0000
committerRoman Lebedev <lebedev.ri@gmail.com>2018-06-06 19:38:16 +0000
commit488d28d4e5d083caf8353e6df726897bf23fa1ad (patch)
treef7acb6a1e0e297687cb994a13a8ec32a986e76d4 /llvm/lib/Target
parentcb56f7a5502ca5029557e5f8a90c4a149021c33f (diff)
downloadbcm5719-llvm-488d28d4e5d083caf8353e6df726897bf23fa1ad.tar.gz
bcm5719-llvm-488d28d4e5d083caf8353e6df726897bf23fa1ad.zip
[X86] Emit BZHI when mask is ~(-1 << nbits))
Summary: In D47428, i propose to choose the `~(-(1 << nbits))` as the canonical form of low-bit-mask formation. As it is seen from these tests, there is a reason for that. AArch64 currently better handles `~(-(1 << nbits))`, but not the more traditional `(1 << nbits) - 1` (sic!). The other way around for X86. It would be much better to canonicalize. This patch is completely monkey-typing. I don't really understand how this works :) I have based it on `// x & (-1 >> (32 - y))` pattern. Also, when we only have `BMI`, i wonder if we could use `BEXTR` with `start=0` ? Related links: https://bugs.llvm.org/show_bug.cgi?id=36419 https://bugs.llvm.org/show_bug.cgi?id=37603 https://bugs.llvm.org/show_bug.cgi?id=37610 https://rise4fun.com/Alive/idM Reviewers: craig.topper, spatel, RKSimon, javed.absar Reviewed By: craig.topper Subscribers: kristof.beyls, llvm-commits Differential Revision: https://reviews.llvm.org/D47453 llvm-svn: 334125
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.td54
1 files changed, 41 insertions, 13 deletions
diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td
index 0d2a80f9ace..4815aba5efa 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/llvm/lib/Target/X86/X86InstrInfo.td
@@ -2448,21 +2448,49 @@ let Predicates = [HasBMI2, NoTBM] in {
}
let Predicates = [HasBMI2] in {
- def : Pat<(and GR32:$src, (add (shl 1, GR8:$lz), -1)),
- (BZHI32rr GR32:$src,
- (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$lz, sub_8bit))>;
-
- def : Pat<(and (loadi32 addr:$src), (add (shl 1, GR8:$lz), -1)),
- (BZHI32rm addr:$src,
- (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$lz, sub_8bit))>;
+ multiclass _bmi_bzhi_pattern<dag regpattern, dag mempattern, RegisterClass RC,
+ ValueType VT, Instruction DstInst,
+ Instruction DstMemInst> {
+ def : Pat<regpattern,
+ (DstInst RC:$src,
+ (INSERT_SUBREG (VT (IMPLICIT_DEF)), GR8:$lz, sub_8bit))>;
+ def : Pat<mempattern,
+ (DstMemInst addr:$src,
+ (INSERT_SUBREG (VT (IMPLICIT_DEF)), GR8:$lz, sub_8bit))>;
+ }
- def : Pat<(and GR64:$src, (add (shl 1, GR8:$lz), -1)),
- (BZHI64rr GR64:$src,
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR8:$lz, sub_8bit))>;
+ multiclass bmi_bzhi_patterns<RegisterClass RC, int bitwidth, ValueType VT,
+ Instruction DstInst, X86MemOperand x86memop,
+ Instruction DstMemInst> {
+ // x & ((1 << y) - 1)
+ defm : _bmi_bzhi_pattern<(and RC:$src, (add (shl 1, GR8:$lz), -1)),
+ (and (x86memop addr:$src),
+ (add (shl 1, GR8:$lz), -1)),
+ RC, VT, DstInst, DstMemInst>;
+
+ // x & ~(-1 << y)
+ defm : _bmi_bzhi_pattern<(and RC:$src, (xor (shl -1, GR8:$lz), -1)),
+ (and (x86memop addr:$src),
+ (xor (shl -1, GR8:$lz), -1)),
+ RC, VT, DstInst, DstMemInst>;
+
+ // x & (-1 >> (bitwidth - y))
+ defm : _bmi_bzhi_pattern<(and RC:$src, (srl -1, (sub bitwidth, GR8:$lz))),
+ (and (x86memop addr:$src),
+ (srl -1, (sub bitwidth, GR8:$lz))),
+ RC, VT, DstInst, DstMemInst>;
+
+ // x << (bitwidth - y) >> (bitwidth - y)
+ defm : _bmi_bzhi_pattern<(srl (shl RC:$src, (sub bitwidth, GR8:$lz)),
+ (sub bitwidth, GR8:$lz)),
+ (srl (shl (x86memop addr:$src),
+ (sub bitwidth, GR8:$lz)),
+ (sub bitwidth, GR8:$lz)),
+ RC, VT, DstInst, DstMemInst>;
+ }
- def : Pat<(and (loadi64 addr:$src), (add (shl 1, GR8:$lz), -1)),
- (BZHI64rm addr:$src,
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR8:$lz, sub_8bit))>;
+ defm : bmi_bzhi_patterns<GR32, 32, i32, BZHI32rr, loadi32, BZHI32rm>;
+ defm : bmi_bzhi_patterns<GR64, 64, i64, BZHI64rr, loadi64, BZHI64rm>;
// x & (-1 >> (32 - y))
def : Pat<(and GR32:$src, (srl -1, (i8 (trunc (sub 32, GR32:$lz))))),
OpenPOWER on IntegriCloud