diff options
| author | Roman Lebedev <lebedev.ri@gmail.com> | 2018-05-21 21:41:02 +0000 |
|---|---|---|
| committer | Roman Lebedev <lebedev.ri@gmail.com> | 2018-05-21 21:41:02 +0000 |
| commit | 7772de25d07c977e41f8faa3bbf327033cd81c20 (patch) | |
| tree | 7b728f8308187d0334a44d96cff110090adba71e /llvm/lib | |
| parent | fd79bc3aa25fa19eafa6871c7a0a40dd680b4775 (diff) | |
| download | bcm5719-llvm-7772de25d07c977e41f8faa3bbf327033cd81c20.tar.gz bcm5719-llvm-7772de25d07c977e41f8faa3bbf327033cd81c20.zip | |
[DAGCombine][X86][AArch64] Masked merge unfolding: vector edition.
Summary:
This **appears** to be the last missing piece for the masked merge pattern handling in the backend.
This is [[ https://bugs.llvm.org/show_bug.cgi?id=37104 | PR37104 ]].
[[ https://bugs.llvm.org/show_bug.cgi?id=6773 | PR6773 ]] will introduce an IR canonicalization that is likely bad for the end assembly.
Previously, `andps`+`andnps` / `bsl` would be generated. (see `@out`)
Now, they would no longer be generated (see `@in`), and we need to make sure that they are generated.
Differential Revision: https://reviews.llvm.org/D46528
llvm-svn: 332904
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 4 | ||||
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64ISelLowering.h | 15 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 25 |
3 files changed, 31 insertions, 13 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index fa4c1dd4aae..fc16a06bd99 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -5391,10 +5391,6 @@ SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) { EVT VT = N->getValueType(0); - // FIXME - if (VT.isVector()) - return SDValue(); - // There are 3 commutable operators in the pattern, // so we have to deal with 8 possible variants of the basic pattern. SDValue X, Y, M; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 7d300d628ba..461ebcca244 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -443,9 +443,18 @@ public: bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override; - bool hasAndNotCompare(SDValue) const override { - // 'bics' - return true; + bool hasAndNotCompare(SDValue V) const override { + // We can use bics for any scalar. + return V.getValueType().isScalarInteger(); + } + + bool hasAndNot(SDValue Y) const override { + EVT VT = Y.getValueType(); + + if (!VT.isVector()) + return hasAndNotCompare(Y); + + return VT.getSizeInBits() >= 64; // vector 'bic' } bool hasBitPreservingFPLogic(EVT VT) const override { diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 594dbed93fe..cfb4074300e 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -4751,26 +4751,39 @@ bool X86TargetLowering::isMaskAndCmp0FoldingBeneficial( } bool X86TargetLowering::hasAndNotCompare(SDValue Y) const { - // A mask and compare against constant is ok for an 'andn' too - // even though the BMI instruction doesn't have an immediate form. + EVT VT = Y.getValueType(); + + if (VT.isVector()) + return false; if (!Subtarget.hasBMI()) return false; // There are only 32-bit and 64-bit forms for 'andn'. - EVT VT = Y.getValueType(); if (VT != MVT::i32 && VT != MVT::i64) return false; + // A mask and compare against constant is ok for an 'andn' too + // even though the BMI instruction doesn't have an immediate form. + return true; } bool X86TargetLowering::hasAndNot(SDValue Y) const { - // x86 can't form 'andn' with an immediate. - if (isa<ConstantSDNode>(Y)) + EVT VT = Y.getValueType(); + + if (!VT.isVector()) // x86 can't form 'andn' with an immediate. + return !isa<ConstantSDNode>(Y) && hasAndNotCompare(Y); + + // Vector. + + if (!Subtarget.hasSSE1() || VT.getSizeInBits() < 128) return false; - return hasAndNotCompare(Y); + if (VT == MVT::v4i32) + return true; + + return Subtarget.hasSSE2(); } MVT X86TargetLowering::hasFastEqualityCompare(unsigned NumBits) const { |

