diff options
Diffstat (limited to 'llvm')
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 12 | ||||
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 21 | ||||
-rw-r--r-- | llvm/test/CodeGen/AArch64/dag-combine-mul-shl.ll | 117 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/shift-pcmp.ll | 6 |
4 files changed, 134 insertions, 22 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 4890441e37c..b3813ba82ce 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2157,11 +2157,11 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { // (mul (shl X, c1), c2) -> (mul X, c2 << c1) if (N0.getOpcode() == ISD::SHL && - isConstantOrConstantVector(N1) && - isConstantOrConstantVector(N0.getOperand(1))) { + isConstantOrConstantVector(N1, /* NoOpaques */ true) && + isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) { SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1)); - AddToWorklist(C3.getNode()); - return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3); + if (isConstantOrConstantVector(C3)) + return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3); } // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one @@ -4714,8 +4714,8 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { isConstantOrConstantVector(N1, /* No Opaques */ true) && isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) { SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1); - AddToWorklist(Shl.getNode()); - return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl); + if (isConstantOrConstantVector(Shl)) + return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl); } if (N1C && !N1C->isOpaque()) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index f2bb3795a2d..71c83ae5c0f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -3498,25 +3498,22 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT SVT = VT.getScalarType(); SmallVector<SDValue, 4> Outputs; for (unsigned I = 0, E = BV1->getNumOperands(); I != E; ++I) { - ConstantSDNode *V1 = dyn_cast<ConstantSDNode>(BV1->getOperand(I)); - ConstantSDNode *V2 = dyn_cast<ConstantSDNode>(BV2->getOperand(I)); - if (!V1 || !V2) // Not a constant, bail. - return SDValue(); - - if (V1->isOpaque() || V2->isOpaque()) - return SDValue(); + SDValue V1 = BV1->getOperand(I); + SDValue V2 = BV2->getOperand(I); // Avoid BUILD_VECTOR nodes that perform implicit truncation. - // FIXME: This is valid and could be handled by truncating the APInts. + // FIXME: This is valid and could be handled by truncation. if (V1->getValueType(0) != SVT || V2->getValueType(0) != SVT) return SDValue(); // Fold one vector element. - std::pair<APInt, bool> Folded = FoldValue(Opcode, V1->getAPIntValue(), - V2->getAPIntValue()); - if (!Folded.second) + SDValue ScalarResult = getNode(Opcode, DL, SVT, V1, V2); + + // Scalar folding only succeeded if the result is a constant or UNDEF. + if (!ScalarResult.isUndef() && ScalarResult.getOpcode() != ISD::Constant && + ScalarResult.getOpcode() != ISD::ConstantFP) return SDValue(); - Outputs.push_back(getConstant(Folded.first, DL, SVT)); + Outputs.push_back(ScalarResult); } assert(VT.getVectorNumElements() == Outputs.size() && diff --git a/llvm/test/CodeGen/AArch64/dag-combine-mul-shl.ll b/llvm/test/CodeGen/AArch64/dag-combine-mul-shl.ll new file mode 100644 index 00000000000..00c50059406 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/dag-combine-mul-shl.ll @@ -0,0 +1,117 @@ +; RUN: llc -mtriple=aarch64 < %s | FileCheck %s + +; CHECK-LABEL: fn1_vector: +; CHECK: adrp x[[BASE:[0-9]+]], .LCP +; CHECK-NEXT: ldr q[[NUM:[0-9]+]], [x[[BASE]], +; CHECK-NEXT: mul v0.16b, v0.16b, v[[NUM]].16b +; CHECK-NEXT: ret +define <16 x i8> @fn1_vector(<16 x i8> %arg) { +entry: + %shl = shl <16 x i8> %arg, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7> + %mul = mul <16 x i8> %shl, <i8 0, i8 1, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0> + ret <16 x i8> %mul +} + +; CHECK-LABEL: fn2_vector: +; CHECK: adrp x[[BASE:[0-9]+]], .LCP +; CHECK-NEXT: ldr q[[NUM:[0-9]+]], [x[[BASE]], +; CHECK-NEXT: mul v0.16b, v0.16b, v[[NUM]].16b +; CHECK-NEXT: ret +define <16 x i8> @fn2_vector(<16 x i8> %arg) { +entry: + %mul = mul <16 x i8> %arg, <i8 0, i8 1, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0> + %shl = shl <16 x i8> %mul, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7> + ret <16 x i8> %shl +} + +; CHECK-LABEL: fn1_vector_undef: +; CHECK: adrp x[[BASE:[0-9]+]], .LCP +; CHECK-NEXT: ldr q[[NUM:[0-9]+]], [x[[BASE]], +; CHECK-NEXT: mul v0.16b, v0.16b, v[[NUM]].16b +; CHECK-NEXT: ret +define <16 x i8> @fn1_vector_undef(<16 x i8> %arg) { +entry: + %shl = shl <16 x i8> %arg, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7> + %mul = mul <16 x i8> %shl, <i8 undef, i8 1, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0> + ret <16 x i8> %mul +} + +; CHECK-LABEL: fn2_vector_undef: +; CHECK: adrp x[[BASE:[0-9]+]], .LCP +; CHECK-NEXT: ldr q[[NUM:[0-9]+]], [x[[BASE]], +; CHECK-NEXT: mul v0.16b, v0.16b, v[[NUM]].16b +; CHECK-NEXT: ret +define <16 x i8> @fn2_vector_undef(<16 x i8> %arg) { +entry: + %mul = mul <16 x i8> %arg, <i8 undef, i8 1, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0> + %shl = shl <16 x i8> %mul, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7> + ret <16 x i8> %shl +} + +; CHECK-LABEL: fn1_scalar: +; CHECK: mov w[[REG:[0-9]+]], #1664 +; CHECK-NEXT: mul w0, w0, w[[REG]] +; CHECK-NEXT: ret +define i32 @fn1_scalar(i32 %arg) { +entry: + %shl = shl i32 %arg, 7 + %mul = mul i32 %shl, 13 + ret i32 %mul +} + +; CHECK-LABEL: fn2_scalar: +; CHECK: mov w[[REG:[0-9]+]], #1664 +; CHECK-NEXT: mul w0, w0, w[[REG]] +; CHECK-NEXT: ret +define i32 @fn2_scalar(i32 %arg) { +entry: + %mul = mul i32 %arg, 13 + %shl = shl i32 %mul, 7 + ret i32 %shl +} + +; CHECK-LABEL: fn1_scalar_undef: +; CHECK: mov w0 +; CHECK-NEXT: ret +define i32 @fn1_scalar_undef(i32 %arg) { +entry: + %shl = shl i32 %arg, 7 + %mul = mul i32 %shl, undef + ret i32 %mul +} + +; CHECK-LABEL: fn2_scalar_undef: +; CHECK: mov w0 +; CHECK-NEXT: ret +define i32 @fn2_scalar_undef(i32 %arg) { +entry: + %mul = mul i32 %arg, undef + %shl = shl i32 %mul, 7 + ret i32 %shl +} + +; CHECK-LABEL: fn1_scalar_opaque: +; CHECK: mov w[[REG:[0-9]+]], #13 +; CHECK-NEXT: mul w[[REG]], w0, w[[REG]] +; CHECK-NEXT: lsl w0, w[[REG]], #7 +; CHECK-NEXT: ret +define i32 @fn1_scalar_opaque(i32 %arg) { +entry: + %bitcast = bitcast i32 13 to i32 + %shl = shl i32 %arg, 7 + %mul = mul i32 %shl, %bitcast + ret i32 %mul +} + +; CHECK-LABEL: fn2_scalar_opaque: +; CHECK: mov w[[REG:[0-9]+]], #13 +; CHECK-NEXT: mul w[[REG]], w0, w[[REG]] +; CHECK-NEXT: lsl w0, w[[REG]], #7 +; CHECK-NEXT: ret +define i32 @fn2_scalar_opaque(i32 %arg) { +entry: + %bitcast = bitcast i32 13 to i32 + %mul = mul i32 %arg, %bitcast + %shl = shl i32 %mul, 7 + ret i32 %shl +} diff --git a/llvm/test/CodeGen/X86/shift-pcmp.ll b/llvm/test/CodeGen/X86/shift-pcmp.ll index 4945d6115db..adfd2f143d1 100644 --- a/llvm/test/CodeGen/X86/shift-pcmp.ll +++ b/llvm/test/CodeGen/X86/shift-pcmp.ll @@ -26,15 +26,13 @@ define <8 x i16> @bar(<8 x i16> %a, <8 x i16> %b) { ; SSE-LABEL: bar: ; SSE: # BB#0: ; SSE-NEXT: pcmpeqw %xmm1, %xmm0 -; SSE-NEXT: psrlw $15, %xmm0 -; SSE-NEXT: psllw $5, %xmm0 +; SSE-NEXT: pand {{.*}}(%rip), %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: bar: ; AVX: # BB#0: ; AVX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpsrlw $15, %xmm0, %xmm0 -; AVX-NEXT: vpsllw $5, %xmm0, %xmm0 +; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: retq ; %icmp = icmp eq <8 x i16> %a, %b |