summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorRoman Lebedev <lebedev.ri@gmail.com>2018-05-21 21:41:02 +0000
committerRoman Lebedev <lebedev.ri@gmail.com>2018-05-21 21:41:02 +0000
commit7772de25d07c977e41f8faa3bbf327033cd81c20 (patch)
tree7b728f8308187d0334a44d96cff110090adba71e /llvm/lib
parentfd79bc3aa25fa19eafa6871c7a0a40dd680b4775 (diff)
downloadbcm5719-llvm-7772de25d07c977e41f8faa3bbf327033cd81c20.tar.gz
bcm5719-llvm-7772de25d07c977e41f8faa3bbf327033cd81c20.zip
[DAGCombine][X86][AArch64] Masked merge unfolding: vector edition.
Summary: This **appears** to be the last missing piece for the masked merge pattern handling in the backend. This is [[ https://bugs.llvm.org/show_bug.cgi?id=37104 | PR37104 ]]. [[ https://bugs.llvm.org/show_bug.cgi?id=6773 | PR6773 ]] will introduce an IR canonicalization that is likely bad for the end assembly. Previously, `andps`+`andnps` / `bsl` would be generated. (see `@out`) Now, they would no longer be generated (see `@in`), and we need to make sure that they are generated. Differential Revision: https://reviews.llvm.org/D46528 llvm-svn: 332904
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp4
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.h15
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp25
3 files changed, 31 insertions, 13 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index fa4c1dd4aae..fc16a06bd99 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -5391,10 +5391,6 @@ SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
EVT VT = N->getValueType(0);
- // FIXME
- if (VT.isVector())
- return SDValue();
-
// There are 3 commutable operators in the pattern,
// so we have to deal with 8 possible variants of the basic pattern.
SDValue X, Y, M;
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 7d300d628ba..461ebcca244 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -443,9 +443,18 @@ public:
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
- bool hasAndNotCompare(SDValue) const override {
- // 'bics'
- return true;
+ bool hasAndNotCompare(SDValue V) const override {
+ // We can use bics for any scalar.
+ return V.getValueType().isScalarInteger();
+ }
+
+ bool hasAndNot(SDValue Y) const override {
+ EVT VT = Y.getValueType();
+
+ if (!VT.isVector())
+ return hasAndNotCompare(Y);
+
+ return VT.getSizeInBits() >= 64; // vector 'bic'
}
bool hasBitPreservingFPLogic(EVT VT) const override {
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 594dbed93fe..cfb4074300e 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -4751,26 +4751,39 @@ bool X86TargetLowering::isMaskAndCmp0FoldingBeneficial(
}
bool X86TargetLowering::hasAndNotCompare(SDValue Y) const {
- // A mask and compare against constant is ok for an 'andn' too
- // even though the BMI instruction doesn't have an immediate form.
+ EVT VT = Y.getValueType();
+
+ if (VT.isVector())
+ return false;
if (!Subtarget.hasBMI())
return false;
// There are only 32-bit and 64-bit forms for 'andn'.
- EVT VT = Y.getValueType();
if (VT != MVT::i32 && VT != MVT::i64)
return false;
+ // A mask and compare against constant is ok for an 'andn' too
+ // even though the BMI instruction doesn't have an immediate form.
+
return true;
}
bool X86TargetLowering::hasAndNot(SDValue Y) const {
- // x86 can't form 'andn' with an immediate.
- if (isa<ConstantSDNode>(Y))
+ EVT VT = Y.getValueType();
+
+ if (!VT.isVector()) // x86 can't form 'andn' with an immediate.
+ return !isa<ConstantSDNode>(Y) && hasAndNotCompare(Y);
+
+ // Vector.
+
+ if (!Subtarget.hasSSE1() || VT.getSizeInBits() < 128)
return false;
- return hasAndNotCompare(Y);
+ if (VT == MVT::v4i32)
+ return true;
+
+ return Subtarget.hasSSE2();
}
MVT X86TargetLowering::hasFastEqualityCompare(unsigned NumBits) const {
OpenPOWER on IntegriCloud