diff options
-rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 27 | ||||
-rw-r--r-- | llvm/test/Transforms/InstCombine/X86/blend_x86.ll | 21 |
2 files changed, 30 insertions, 18 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index bff46597266..efe41b49274 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -2949,14 +2949,27 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // Convert to a vector select if we can bypass casts and find a boolean // vector condition value. Value *BoolVec; - if (match(peekThroughBitcast(Mask), m_SExt(m_Value(BoolVec)))) { - auto *VTy = dyn_cast<VectorType>(BoolVec->getType()); - if (VTy && VTy->getScalarSizeInBits() == 1 && - VTy->getVectorNumElements() == II->getType()->getVectorNumElements()) + Mask = peekThroughBitcast(Mask); + if (match(Mask, m_SExt(m_Value(BoolVec))) && + BoolVec->getType()->isVectorTy() && + BoolVec->getType()->getScalarSizeInBits() == 1) { + assert(Mask->getType()->getPrimitiveSizeInBits() == + II->getType()->getPrimitiveSizeInBits() && + "Not expecting mask and operands with different sizes"); + + unsigned NumMaskElts = Mask->getType()->getVectorNumElements(); + unsigned NumOperandElts = II->getType()->getVectorNumElements(); + if (NumMaskElts == NumOperandElts) return SelectInst::Create(BoolVec, Op1, Op0); - // TODO: If we can find a boolean vector condition with less elements, - // then we can form a vector select by bitcasting Op0/Op1 to a - // vector type with wider elements and bitcasting the result. + + // If the mask has less elements than the operands, each mask bit maps to + // multiple elements of the operands. Bitcast back and forth. + if (NumMaskElts < NumOperandElts) { + Value *CastOp0 = Builder.CreateBitCast(Op0, Mask->getType()); + Value *CastOp1 = Builder.CreateBitCast(Op1, Mask->getType()); + Value *Sel = Builder.CreateSelect(BoolVec, CastOp1, CastOp0); + return new BitCastInst(Sel, II->getType()); + } } break; diff --git a/llvm/test/Transforms/InstCombine/X86/blend_x86.ll b/llvm/test/Transforms/InstCombine/X86/blend_x86.ll index 7b0ca2a92ef..676d8ba9e7d 100644 --- a/llvm/test/Transforms/InstCombine/X86/blend_x86.ll +++ b/llvm/test/Transforms/InstCombine/X86/blend_x86.ll @@ -177,13 +177,14 @@ define <2 x double> @sel_v2f64(<2 x double> %x, <2 x double> %y, <2 x i1> %cond) ret <2 x double> %r } -; TODO: We can bitcast X, Y, and the select and remove the intrinsic. +; Bitcast X, Y, and the select and remove the intrinsic. define <16 x i8> @sel_v4i32(<16 x i8> %x, <16 x i8> %y, <4 x i1> %cond) { ; CHECK-LABEL: @sel_v4i32( -; CHECK-NEXT: [[S:%.*]] = sext <4 x i1> [[COND:%.*]] to <4 x i32> -; CHECK-NEXT: [[B:%.*]] = bitcast <4 x i32> [[S]] to <16 x i8> -; CHECK-NEXT: [[R:%.*]] = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> [[X:%.*]], <16 x i8> [[Y:%.*]], <16 x i8> [[B]]) +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[X:%.*]] to <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[Y:%.*]] to <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[COND:%.*]], <4 x i32> [[TMP2]], <4 x i32> [[TMP1]] +; CHECK-NEXT: [[R:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8> ; CHECK-NEXT: ret <16 x i8> [[R]] ; %s = sext <4 x i1> %cond to <4 x i32> @@ -238,19 +239,17 @@ define <2 x double> @sel_v2f64_sse_reality(<2 x double>* nocapture readonly %x, ret <2 x double> %r } -; TODO: We can bitcast the inputs to the select and the result and remove the intrinsic. +; Bitcast the inputs and the result and remove the intrinsic. define <2 x i64> @sel_v4i32_sse_reality(<2 x i64>* nocapture readonly %x, <2 x i64> %y, <2 x i64> %z) { ; CHECK-LABEL: @sel_v4i32_sse_reality( -; CHECK-NEXT: [[XCAST:%.*]] = bitcast <2 x i64>* [[X:%.*]] to <16 x i8>* -; CHECK-NEXT: [[LD:%.*]] = load <16 x i8>, <16 x i8>* [[XCAST]], align 16 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64>* [[X:%.*]] to <4 x i32>* +; CHECK-NEXT: [[LD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 16 ; CHECK-NEXT: [[YCAST:%.*]] = bitcast <2 x i64> [[Y:%.*]] to <4 x i32> ; CHECK-NEXT: [[ZCAST:%.*]] = bitcast <2 x i64> [[Z:%.*]] to <4 x i32> ; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <4 x i32> [[YCAST]], [[ZCAST]] -; CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> -; CHECK-NEXT: [[COND:%.*]] = bitcast <4 x i32> [[SEXT]] to <16 x i8> -; CHECK-NEXT: [[R:%.*]] = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> [[LD]], <16 x i8> zeroinitializer, <16 x i8> [[COND]]) -; CHECK-NEXT: [[RCAST:%.*]] = bitcast <16 x i8> [[R]] to <2 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[CMP]], <4 x i32> zeroinitializer, <4 x i32> [[LD1]] +; CHECK-NEXT: [[RCAST:%.*]] = bitcast <4 x i32> [[TMP2]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[RCAST]] ; %xcast = bitcast <2 x i64>* %x to <16 x i8>* |