diff options
| author | Roman Lebedev <lebedev.ri@gmail.com> | 2018-06-06 19:38:16 +0000 |
|---|---|---|
| committer | Roman Lebedev <lebedev.ri@gmail.com> | 2018-06-06 19:38:16 +0000 |
| commit | 488d28d4e5d083caf8353e6df726897bf23fa1ad (patch) | |
| tree | f7acb6a1e0e297687cb994a13a8ec32a986e76d4 /llvm/lib/Target | |
| parent | cb56f7a5502ca5029557e5f8a90c4a149021c33f (diff) | |
| download | bcm5719-llvm-488d28d4e5d083caf8353e6df726897bf23fa1ad.tar.gz bcm5719-llvm-488d28d4e5d083caf8353e6df726897bf23fa1ad.zip | |
[X86] Emit BZHI when mask is ~(-1 << nbits))
Summary:
In D47428, i propose to choose the `~(-(1 << nbits))` as the canonical form of low-bit-mask formation.
As it is seen from these tests, there is a reason for that.
AArch64 currently better handles `~(-(1 << nbits))`, but not the more traditional `(1 << nbits) - 1` (sic!).
The other way around for X86.
It would be much better to canonicalize.
This patch is completely monkey-typing.
I don't really understand how this works :)
I have based it on `// x & (-1 >> (32 - y))` pattern.
Also, when we only have `BMI`, i wonder if we could use `BEXTR` with `start=0` ?
Related links:
https://bugs.llvm.org/show_bug.cgi?id=36419
https://bugs.llvm.org/show_bug.cgi?id=37603
https://bugs.llvm.org/show_bug.cgi?id=37610
https://rise4fun.com/Alive/idM
Reviewers: craig.topper, spatel, RKSimon, javed.absar
Reviewed By: craig.topper
Subscribers: kristof.beyls, llvm-commits
Differential Revision: https://reviews.llvm.org/D47453
llvm-svn: 334125
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrInfo.td | 54 |
1 files changed, 41 insertions, 13 deletions
diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td index 0d2a80f9ace..4815aba5efa 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -2448,21 +2448,49 @@ let Predicates = [HasBMI2, NoTBM] in { } let Predicates = [HasBMI2] in { - def : Pat<(and GR32:$src, (add (shl 1, GR8:$lz), -1)), - (BZHI32rr GR32:$src, - (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$lz, sub_8bit))>; - - def : Pat<(and (loadi32 addr:$src), (add (shl 1, GR8:$lz), -1)), - (BZHI32rm addr:$src, - (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$lz, sub_8bit))>; + multiclass _bmi_bzhi_pattern<dag regpattern, dag mempattern, RegisterClass RC, + ValueType VT, Instruction DstInst, + Instruction DstMemInst> { + def : Pat<regpattern, + (DstInst RC:$src, + (INSERT_SUBREG (VT (IMPLICIT_DEF)), GR8:$lz, sub_8bit))>; + def : Pat<mempattern, + (DstMemInst addr:$src, + (INSERT_SUBREG (VT (IMPLICIT_DEF)), GR8:$lz, sub_8bit))>; + } - def : Pat<(and GR64:$src, (add (shl 1, GR8:$lz), -1)), - (BZHI64rr GR64:$src, - (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR8:$lz, sub_8bit))>; + multiclass bmi_bzhi_patterns<RegisterClass RC, int bitwidth, ValueType VT, + Instruction DstInst, X86MemOperand x86memop, + Instruction DstMemInst> { + // x & ((1 << y) - 1) + defm : _bmi_bzhi_pattern<(and RC:$src, (add (shl 1, GR8:$lz), -1)), + (and (x86memop addr:$src), + (add (shl 1, GR8:$lz), -1)), + RC, VT, DstInst, DstMemInst>; + + // x & ~(-1 << y) + defm : _bmi_bzhi_pattern<(and RC:$src, (xor (shl -1, GR8:$lz), -1)), + (and (x86memop addr:$src), + (xor (shl -1, GR8:$lz), -1)), + RC, VT, DstInst, DstMemInst>; + + // x & (-1 >> (bitwidth - y)) + defm : _bmi_bzhi_pattern<(and RC:$src, (srl -1, (sub bitwidth, GR8:$lz))), + (and (x86memop addr:$src), + (srl -1, (sub bitwidth, GR8:$lz))), + RC, VT, DstInst, DstMemInst>; + + // x << (bitwidth - y) >> (bitwidth - y) + defm : _bmi_bzhi_pattern<(srl (shl RC:$src, (sub bitwidth, GR8:$lz)), + (sub bitwidth, GR8:$lz)), + (srl (shl (x86memop addr:$src), + (sub bitwidth, GR8:$lz)), + (sub bitwidth, GR8:$lz)), + RC, VT, DstInst, DstMemInst>; + } - def : Pat<(and (loadi64 addr:$src), (add (shl 1, GR8:$lz), -1)), - (BZHI64rm addr:$src, - (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR8:$lz, sub_8bit))>; + defm : bmi_bzhi_patterns<GR32, 32, i32, BZHI32rr, loadi32, BZHI32rm>; + defm : bmi_bzhi_patterns<GR64, 64, i64, BZHI64rr, loadi64, BZHI64rm>; // x & (-1 >> (32 - y)) def : Pat<(and GR32:$src, (srl -1, (i8 (trunc (sub 32, GR32:$lz))))), |

