summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp22
-rw-r--r--llvm/test/CodeGen/X86/avx512-mask-op.ll6
2 files changed, 13 insertions, 15 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 8c1501fc93a..c36be8f0b73 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -9210,17 +9210,17 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
}
// If the input is a constant, let getNode fold it.
- if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
- // If we can't allow illegal operations, we need to check that this is just
- // a fp -> int or int -> conversion and that the resulting operation will
- // be legal.
- if (!LegalOperations ||
- (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
- TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
- (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
- TLI.isOperationLegal(ISD::Constant, VT)))
- return DAG.getBitcast(VT, N0);
- }
+ // We always need to check that this is just a fp -> int or int -> conversion
+ // otherwise we will get back N which will confuse the caller into thinking
+ // we used CombineTo. This can block target combines from running. If we can't
+ // allowed legal operations, we need to ensure the resulting operation will be
+ // legal.
+ // TODO: Maybe we should check that the return value isn't N explicitly?
+ if ((isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::ConstantFP, VT))) ||
+ (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::Constant, VT))))
+ return DAG.getBitcast(VT, N0);
// (conv (conv x, t1), t2) -> (conv x, t2)
if (N0.getOpcode() == ISD::BITCAST)
diff --git a/llvm/test/CodeGen/X86/avx512-mask-op.ll b/llvm/test/CodeGen/X86/avx512-mask-op.ll
index 6c25ef9df04..3d7545313ef 100644
--- a/llvm/test/CodeGen/X86/avx512-mask-op.ll
+++ b/llvm/test/CodeGen/X86/avx512-mask-op.ll
@@ -3451,8 +3451,7 @@ entry:
define void @mask_not_cast(i8*, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>) {
; CHECK-LABEL: mask_not_cast:
; CHECK: ## %bb.0:
-; CHECK-NEXT: vpcmpleud %zmm3, %zmm2, %k0
-; CHECK-NEXT: knotw %k0, %k1
+; CHECK-NEXT: vpcmpnleud %zmm3, %zmm2, %k1
; CHECK-NEXT: vptestmd %zmm0, %zmm1, %k1 {%k1}
; CHECK-NEXT: vmovdqu32 %zmm0, (%rdi) {%k1}
; CHECK-NEXT: vzeroupper
@@ -3461,8 +3460,7 @@ define void @mask_not_cast(i8*, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>) {
; X86-LABEL: mask_not_cast:
; X86: ## %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: vpcmpleud %zmm3, %zmm2, %k0
-; X86-NEXT: knotw %k0, %k1
+; X86-NEXT: vpcmpnleud %zmm3, %zmm2, %k1
; X86-NEXT: vptestmd %zmm0, %zmm1, %k1 {%k1}
; X86-NEXT: vmovdqu32 %zmm0, (%eax) {%k1}
; X86-NEXT: vzeroupper
OpenPOWER on IntegriCloud