summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp22
-rw-r--r--llvm/test/Transforms/InstCombine/X86/blend_x86.ll24
2 files changed, 23 insertions, 23 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 4755e8509b3..bff46597266 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -2929,16 +2929,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::x86_avx_blendv_ps_256:
case Intrinsic::x86_avx_blendv_pd_256:
case Intrinsic::x86_avx2_pblendvb: {
- // Convert blendv* to vector selects if the mask is constant.
- // This optimization is convoluted because the intrinsic is defined as
- // getting a vector of floats or doubles for the ps and pd versions.
- // FIXME: That should be changed.
-
+ // fold (blend A, A, Mask) -> A
Value *Op0 = II->getArgOperand(0);
Value *Op1 = II->getArgOperand(1);
Value *Mask = II->getArgOperand(2);
-
- // fold (blend A, A, Mask) -> A
if (Op0 == Op1)
return replaceInstUsesWith(CI, Op0);
@@ -2951,6 +2945,20 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
Constant *NewSelector = getNegativeIsTrueBoolVec(ConstantMask);
return SelectInst::Create(NewSelector, Op1, Op0, "blendv");
}
+
+ // Convert to a vector select if we can bypass casts and find a boolean
+ // vector condition value.
+ Value *BoolVec;
+ if (match(peekThroughBitcast(Mask), m_SExt(m_Value(BoolVec)))) {
+ auto *VTy = dyn_cast<VectorType>(BoolVec->getType());
+ if (VTy && VTy->getScalarSizeInBits() == 1 &&
+ VTy->getVectorNumElements() == II->getType()->getVectorNumElements())
+ return SelectInst::Create(BoolVec, Op1, Op0);
+ // TODO: If we can find a boolean vector condition with less elements,
+ // then we can form a vector select by bitcasting Op0/Op1 to a
+ // vector type with wider elements and bitcasting the result.
+ }
+
break;
}
diff --git a/llvm/test/Transforms/InstCombine/X86/blend_x86.ll b/llvm/test/Transforms/InstCombine/X86/blend_x86.ll
index e760c646c16..7b0ca2a92ef 100644
--- a/llvm/test/Transforms/InstCombine/X86/blend_x86.ll
+++ b/llvm/test/Transforms/InstCombine/X86/blend_x86.ll
@@ -157,9 +157,7 @@ define <32 x i8> @constant_pblendvb_avx2_dup(<32 x i8> %xyzw, <32 x i8> %sel) {
define <4 x float> @sel_v4f32(<4 x float> %x, <4 x float> %y, <4 x i1> %cond) {
; CHECK-LABEL: @sel_v4f32(
-; CHECK-NEXT: [[S:%.*]] = sext <4 x i1> [[COND:%.*]] to <4 x i32>
-; CHECK-NEXT: [[B:%.*]] = bitcast <4 x i32> [[S]] to <4 x float>
-; CHECK-NEXT: [[R:%.*]] = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[B]])
+; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[COND:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[X:%.*]]
; CHECK-NEXT: ret <4 x float> [[R]]
;
%s = sext <4 x i1> %cond to <4 x i32>
@@ -170,9 +168,7 @@ define <4 x float> @sel_v4f32(<4 x float> %x, <4 x float> %y, <4 x i1> %cond) {
define <2 x double> @sel_v2f64(<2 x double> %x, <2 x double> %y, <2 x i1> %cond) {
; CHECK-LABEL: @sel_v2f64(
-; CHECK-NEXT: [[S:%.*]] = sext <2 x i1> [[COND:%.*]] to <2 x i64>
-; CHECK-NEXT: [[B:%.*]] = bitcast <2 x i64> [[S]] to <2 x double>
-; CHECK-NEXT: [[R:%.*]] = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> [[X:%.*]], <2 x double> [[Y:%.*]], <2 x double> [[B]])
+; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[COND:%.*]], <2 x double> [[Y:%.*]], <2 x double> [[X:%.*]]
; CHECK-NEXT: ret <2 x double> [[R]]
;
%s = sext <2 x i1> %cond to <2 x i64>
@@ -198,8 +194,7 @@ define <16 x i8> @sel_v4i32(<16 x i8> %x, <16 x i8> %y, <4 x i1> %cond) {
define <16 x i8> @sel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i1> %cond) {
; CHECK-LABEL: @sel_v16i8(
-; CHECK-NEXT: [[S:%.*]] = sext <16 x i1> [[COND:%.*]] to <16 x i8>
-; CHECK-NEXT: [[R:%.*]] = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> [[X:%.*]], <16 x i8> [[Y:%.*]], <16 x i8> [[S]])
+; CHECK-NEXT: [[R:%.*]] = select <16 x i1> [[COND:%.*]], <16 x i8> [[Y:%.*]], <16 x i8> [[X:%.*]]
; CHECK-NEXT: ret <16 x i8> [[R]]
;
%s = sext <16 x i1> %cond to <16 x i8>
@@ -217,9 +212,7 @@ define <4 x float> @sel_v4f32_sse_reality(<4 x float>* %x, <4 x float> %y, <4 x
; CHECK-LABEL: @sel_v4f32_sse_reality(
; CHECK-NEXT: [[LD:%.*]] = load <4 x float>, <4 x float>* [[X:%.*]], align 16
; CHECK-NEXT: [[CMP:%.*]] = fcmp olt <4 x float> [[Z:%.*]], [[Y:%.*]]
-; CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
-; CHECK-NEXT: [[COND:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
-; CHECK-NEXT: [[R:%.*]] = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> [[LD]], <4 x float> zeroinitializer, <4 x float> [[COND]])
+; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[CMP]], <4 x float> zeroinitializer, <4 x float> [[LD]]
; CHECK-NEXT: ret <4 x float> [[R]]
;
%ld = load <4 x float>, <4 x float>* %x, align 16
@@ -234,9 +227,7 @@ define <2 x double> @sel_v2f64_sse_reality(<2 x double>* nocapture readonly %x,
; CHECK-LABEL: @sel_v2f64_sse_reality(
; CHECK-NEXT: [[LD:%.*]] = load <2 x double>, <2 x double>* [[X:%.*]], align 16
; CHECK-NEXT: [[CMP:%.*]] = fcmp olt <2 x double> [[Z:%.*]], [[Y:%.*]]
-; CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
-; CHECK-NEXT: [[COND:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
-; CHECK-NEXT: [[R:%.*]] = tail call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> [[LD]], <2 x double> zeroinitializer, <2 x double> [[COND]])
+; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[CMP]], <2 x double> zeroinitializer, <2 x double> [[LD]]
; CHECK-NEXT: ret <2 x double> [[R]]
;
%ld = load <2 x double>, <2 x double>* %x, align 16
@@ -247,6 +238,8 @@ define <2 x double> @sel_v2f64_sse_reality(<2 x double>* nocapture readonly %x,
ret <2 x double> %r
}
+; TODO: We can bitcast the inputs to the select and the result and remove the intrinsic.
+
define <2 x i64> @sel_v4i32_sse_reality(<2 x i64>* nocapture readonly %x, <2 x i64> %y, <2 x i64> %z) {
; CHECK-LABEL: @sel_v4i32_sse_reality(
; CHECK-NEXT: [[XCAST:%.*]] = bitcast <2 x i64>* [[X:%.*]] to <16 x i8>*
@@ -279,8 +272,7 @@ define <2 x i64> @sel_v16i8_sse_reality(<2 x i64>* nocapture readonly %x, <2 x i
; CHECK-NEXT: [[YCAST:%.*]] = bitcast <2 x i64> [[Y:%.*]] to <16 x i8>
; CHECK-NEXT: [[ZCAST:%.*]] = bitcast <2 x i64> [[Z:%.*]] to <16 x i8>
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <16 x i8> [[YCAST]], [[ZCAST]]
-; CHECK-NEXT: [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i8>
-; CHECK-NEXT: [[R:%.*]] = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> [[LD]], <16 x i8> zeroinitializer, <16 x i8> [[SEXT]])
+; CHECK-NEXT: [[R:%.*]] = select <16 x i1> [[CMP]], <16 x i8> zeroinitializer, <16 x i8> [[LD]]
; CHECK-NEXT: [[RCAST:%.*]] = bitcast <16 x i8> [[R]] to <2 x i64>
; CHECK-NEXT: ret <2 x i64> [[RCAST]]
;
OpenPOWER on IntegriCloud