[InstCombine] remove extract-of-select vector transform

The transform to convert an extract-of-a-select-of-vectors was added at: rL194013 And a question about the validity of this transform was raised in the review: https://reviews.llvm.org/D1539: ...but not answered AFAICT> Most of the motivating cases in that patch are now handled by other combines. These are the tests that were added with the original commit, but they are not regressing even after we remove the transform in this patch. The diffs we see after removing this transform cause us to avoid increasing the instruction count, so we don't want to do those transforms as canonicalizations. The motivation for not turning a vector-select-of-vectors into a scalar operation is shown in PR33301: https://bugs.llvm.org/show_bug.cgi?id=33301 ...in those cases, we'll get vector ops with this patch rather than the vector/scalar mix that we currently see. Differential Revision: https://reviews.llvm.org/D38006 llvm-svn: 314117
author: Sanjay Patel <spatel@rotateright.com> 2017-09-25 16:41:34 +0000
committer: Sanjay Patel <spatel@rotateright.com> 2017-09-25 16:41:34 +0000
commit: 9639897d7786d176f4c0e42ab4a9a7029ea50b78 (patch)
tree: 3b2adc0164bf2b5e7ce86764631d7c91789a68a2
parent: 0a62b2d88767bd2836bcf3182e6d58c89cc84cdf (diff)
download: bcm5719-llvm-9639897d7786d176f4c0e42ab4a9a7029ea50b78.tar.gz
bcm5719-llvm-9639897d7786d176f4c0e42ab4a9a7029ea50b78.zip
3 files changed, 52 insertions, 94 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 0d327795acf..d760101281a 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -255,39 +255,6 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
         Worklist.AddValue(EE);
         return CastInst::Create(CI->getOpcode(), EE, EI.getType());
       }
-    } else if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
-      if (SI->hasOneUse()) {
-        // TODO: For a select on vectors, it might be useful to do this if it
-        // has multiple extractelement uses. For vector select, that seems to
-        // fight the vectorizer.
-
-        // If we are extracting an element from a vector select or a select on
-        // vectors, create a select on the scalars extracted from the vector
-        // arguments.
-        Value *TrueVal = SI->getTrueValue();
-        Value *FalseVal = SI->getFalseValue();
-
-        Value *Cond = SI->getCondition();
-        if (Cond->getType()->isVectorTy()) {
-          Cond = Builder.CreateExtractElement(Cond,
-                                              EI.getIndexOperand(),
-                                              Cond->getName() + ".elt");
-        }
-
-        Value *V1Elem
-          = Builder.CreateExtractElement(TrueVal,
-                                         EI.getIndexOperand(),
-                                         TrueVal->getName() + ".elt");
-
-        Value *V2Elem
-          = Builder.CreateExtractElement(FalseVal,
-                                         EI.getIndexOperand(),
-                                         FalseVal->getName() + ".elt");
-        return SelectInst::Create(Cond,
-                                  V1Elem,
-                                  V2Elem,
-                                  SI->getName() + ".elt");
-      }
     }
   }
   return nullptr;
diff --git a/llvm/test/Transforms/InstCombine/select-extractelement.ll b/llvm/test/Transforms/InstCombine/select-extractelement.ll
index 10ce220346b..79d0b47f97d 100644
--- a/llvm/test/Transforms/InstCombine/select-extractelement.ll
+++ b/llvm/test/Transforms/InstCombine/select-extractelement.ll
@@ -5,9 +5,8 @@ declare void @v4float_user(<4 x float>) #0
 define float @extract_one_select(<4 x float> %a, <4 x float> %b, i32 %c) #0 {
 ; CHECK-LABEL: @extract_one_select(
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 %c, 0
-; CHECK-NEXT:    [[B_ELT:%.*]] = extractelement <4 x float> %b, i32 2
-; CHECK-NEXT:    [[A_ELT:%.*]] = extractelement <4 x float> %a, i32 2
-; CHECK-NEXT:    [[EXTRACT:%.*]] = select i1 [[CMP]], float [[B_ELT]], float [[A_ELT]]
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], <4 x float> %b, <4 x float> %a
+; CHECK-NEXT:    [[EXTRACT:%.*]] = extractelement <4 x float> [[SEL]], i32 2
 ; CHECK-NEXT:    ret float [[EXTRACT]]
 ;
   %cmp = icmp ne i32 %c, 0
@@ -64,14 +63,14 @@ define float @extract_one_vselect_user(<4 x float> %a, <4 x float> %b, <4 x i32>
   ret float %extract
 }
 
-; Extract from a vector select
+; Do not convert the vector select into a scalar select. That would increase 
+; the instruction count and potentially obfuscate a vector min/max idiom.
+
 define float @extract_one_vselect(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
 ; CHECK-LABEL: @extract_one_vselect(
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <4 x i32> %c, zeroinitializer
-; CHECK-NEXT:    [[CMP_ELT:%.*]] = extractelement <4 x i1> [[CMP]], i32 0
-; CHECK-NEXT:    [[B_ELT:%.*]] = extractelement <4 x float> %b, i32 0
-; CHECK-NEXT:    [[A_ELT:%.*]] = extractelement <4 x float> %a, i32 0
-; CHECK-NEXT:    [[EXTRACT:%.*]] = select i1 [[CMP_ELT]], float [[B_ELT]], float [[A_ELT]]
+; CHECK-NEXT:    [[SELECT:%.*]] = select <4 x i1> [[CMP]], <4 x float> %b, <4 x float> %a
+; CHECK-NEXT:    [[EXTRACT:%.*]] = extractelement <4 x float> [[SELECT]], i32 0
 ; CHECK-NEXT:    ret float [[EXTRACT]]
 ;
   %cmp = icmp ne <4 x i32> %c, zeroinitializer
@@ -97,36 +96,28 @@ define <2 x float> @extract_two_vselect(<4 x float> %a, <4 x float> %b, <4 x i32
   ret <2 x float> %build2
 }
 
-; All the vector selects should be decomposed into scalar selects
+; The vector selects are not decomposed into scalar selects because that would increase
+; the instruction count. Extract+insert is converted to non-lane-crossing shuffles.
 ; Test multiple extractelements
 define <4 x float> @simple_vector_select(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
 ; CHECK-LABEL: @simple_vector_select(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <4 x i32> %c, i32 0
 ; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[TMP0]], 0
-; CHECK-NEXT:    [[B_ELT:%.*]] = extractelement <4 x float> %b, i32 0
-; CHECK-NEXT:    [[A_ELT:%.*]] = extractelement <4 x float> %a, i32 0
-; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[TOBOOL]], float [[B_ELT]], float [[A_ELT]]
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
-; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i32> %c, i32 1
-; CHECK-NEXT:    [[TOBOOL1:%.*]] = icmp eq i32 [[TMP3]], 0
-; CHECK-NEXT:    [[B_ELT1:%.*]] = extractelement <4 x float> %b, i32 1
-; CHECK-NEXT:    [[A_ELT2:%.*]] = extractelement <4 x float> %a, i32 1
-; CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[TOBOOL1]], float [[B_ELT1]], float [[A_ELT2]]
-; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x float> [[TMP2]], float [[TMP4]], i32 1
-; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i32> %c, i32 2
-; CHECK-NEXT:    [[TOBOOL6:%.*]] = icmp eq i32 [[TMP6]], 0
-; CHECK-NEXT:    [[B_ELT3:%.*]] = extractelement <4 x float> %b, i32 2
-; CHECK-NEXT:    [[A_ELT4:%.*]] = extractelement <4 x float> %a, i32 2
-; CHECK-NEXT:    [[TMP7:%.*]] = select i1 [[TOBOOL6]], float [[B_ELT3]], float [[A_ELT4]]
-; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <4 x float> [[TMP5]], float [[TMP7]], i32 2
-; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <4 x i32> %c, i32 3
-; CHECK-NEXT:    [[TOBOOL11:%.*]] = icmp eq i32 [[TMP9]], 0
-; CHECK-NEXT:    [[B_ELT5:%.*]] = extractelement <4 x float> %b, i32 3
-; CHECK-NEXT:    [[A_ELT6:%.*]] = extractelement <4 x float> %a, i32 3
-; CHECK-NEXT:    [[TMP10:%.*]] = select i1 [[TOBOOL11]], float [[B_ELT5]], float [[A_ELT6]]
-; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <4 x float> [[TMP8]], float [[TMP10]], i32 3
-; CHECK-NEXT:    ret <4 x float> [[TMP11]]
+; CHECK-NEXT:    [[A_SINK:%.*]] = select i1 [[TOBOOL]], <4 x float> %b, <4 x float> %a
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i32> %c, i32 1
+; CHECK-NEXT:    [[TOBOOL1:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    [[A_SINK1:%.*]] = select i1 [[TOBOOL1]], <4 x float> %b, <4 x float> %a
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x float> [[A_SINK]], <4 x float> [[A_SINK1]], <4 x i32> <i32 0, i32 5, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i32> %c, i32 2
+; CHECK-NEXT:    [[TOBOOL6:%.*]] = icmp eq i32 [[TMP3]], 0
+; CHECK-NEXT:    [[A_SINK2:%.*]] = select i1 [[TOBOOL6]], <4 x float> %b, <4 x float> %a
+; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> [[A_SINK2]], <4 x i32> <i32 0, i32 1, i32 6, i32 undef>
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x i32> %c, i32 3
+; CHECK-NEXT:    [[TOBOOL11:%.*]] = icmp eq i32 [[TMP5]], 0
+; CHECK-NEXT:    [[A_SINK3:%.*]] = select i1 [[TOBOOL11]], <4 x float> %b, <4 x float> %a
+; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[A_SINK3]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT:    ret <4 x float> [[TMP6]]
 ;
 entry:
   %0 = extractelement <4 x i32> %c, i32 0
diff --git a/llvm/test/Transforms/LoopVectorize/minmax_reduction.ll b/llvm/test/Transforms/LoopVectorize/minmax_reduction.ll
index fd5ad7c38b0..188700d66d8 100644
--- a/llvm/test/Transforms/LoopVectorize/minmax_reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/minmax_reduction.ll
@@ -17,7 +17,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: icmp sgt <2 x i32>
-; CHECK: select i1
+; CHECK: select <2 x i1>
 
 define i32 @max_red(i32 %max) {
 entry:
@@ -46,7 +46,7 @@ for.end:
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: icmp sgt <2 x i32>
-; CHECK: select i1
+; CHECK: select <2 x i1>
 
 define i32 @max_red_inverse_select(i32 %max) {
 entry:
@@ -74,7 +74,7 @@ for.end:
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: icmp slt <2 x i32>
-; CHECK: select i1
+; CHECK: select <2 x i1>
 
 define i32 @min_red(i32 %max) {
 entry:
@@ -103,7 +103,7 @@ for.end:
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: icmp slt <2 x i32>
-; CHECK: select i1
+; CHECK: select <2 x i1>
 
 define i32 @min_red_inverse_select(i32 %max) {
 entry:
@@ -133,7 +133,7 @@ for.end:
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: icmp ugt <2 x i32>
-; CHECK: select i1
+; CHECK: select <2 x i1>
 
 define i32 @umax_red(i32 %max) {
 entry:
@@ -162,7 +162,7 @@ for.end:
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: icmp ugt <2 x i32>
-; CHECK: select i1
+; CHECK: select <2 x i1>
 
 define i32 @umax_red_inverse_select(i32 %max) {
 entry:
@@ -190,7 +190,7 @@ for.end:
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: icmp ult <2 x i32>
-; CHECK: select i1
+; CHECK: select <2 x i1>
 
 define i32 @umin_red(i32 %max) {
 entry:
@@ -219,7 +219,7 @@ for.end:
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: icmp ult <2 x i32>
-; CHECK: select i1
+; CHECK: select <2 x i1>
 
 define i32 @umin_red_inverse_select(i32 %max) {
 entry:
@@ -248,7 +248,7 @@ for.end:
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: icmp slt <2 x i32>
-; CHECK: select i1
+; CHECK: select <2 x i1>
 
 define i32 @sge_min_red(i32 %max) {
 entry:
@@ -277,7 +277,7 @@ for.end:
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: icmp sgt <2 x i32>
-; CHECK: select i1
+; CHECK: select <2 x i1>
 
 define i32 @sle_min_red(i32 %max) {
 entry:
@@ -306,7 +306,7 @@ for.end:
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: icmp ult <2 x i32>
-; CHECK: select i1
+; CHECK: select <2 x i1>
 
 define i32 @uge_min_red(i32 %max) {
 entry:
@@ -335,7 +335,7 @@ for.end:
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: icmp ugt <2 x i32>
-; CHECK: select i1
+; CHECK: select <2 x i1>
 
 define i32 @ule_min_red(i32 %max) {
 entry:
@@ -416,7 +416,7 @@ for.end:
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp fast ogt <2 x float>
-; CHECK: select i1
+; CHECK: select <2 x i1>
 
 define float @max_red_float(float %max) #0 {
 entry:
@@ -442,7 +442,7 @@ for.end:
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp fast ogt <2 x float>
-; CHECK: select i1
+; CHECK: select <2 x i1>
 
 define float @max_red_float_ge(float %max) #0 {
 entry:
@@ -468,7 +468,7 @@ for.end:
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp fast ogt <2 x float>
-; CHECK: select i1
+; CHECK: select <2 x i1>
 
 define float @inverted_max_red_float(float %max) #0 {
 entry:
@@ -494,7 +494,7 @@ for.end:
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp fast ogt <2 x float>
-; CHECK: select i1
+; CHECK: select <2 x i1>
 
 define float @inverted_max_red_float_le(float %max) #0 {
 entry:
@@ -520,7 +520,7 @@ for.end:
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp fast ogt <2 x float>
-; CHECK: select i1
+; CHECK: select <2 x i1>
 
 define float @unordered_max_red_float(float %max) #0 {
 entry:
@@ -546,7 +546,7 @@ for.end:
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp fast ogt <2 x float>
-; CHECK: select i1
+; CHECK: select <2 x i1>
 
 define float @unordered_max_red_float_ge(float %max) #0 {
 entry:
@@ -572,7 +572,7 @@ for.end:
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp fast ogt <2 x float>
-; CHECK: select i1
+; CHECK: select <2 x i1>
 
 define float @inverted_unordered_max_red_float(float %max) #0 {
 entry:
@@ -598,7 +598,7 @@ for.end:
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp fast ogt <2 x float>
-; CHECK: select i1
+; CHECK: select <2 x i1>
 
 define float @inverted_unordered_max_red_float_le(float %max) #0 {
 entry:
@@ -627,7 +627,7 @@ for.end:
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp fast olt <2 x float>
-; CHECK: select i1
+; CHECK: select <2 x i1>
 
 define float @min_red_float(float %min) #0 {
 entry:
@@ -653,7 +653,7 @@ for.end:
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp fast olt <2 x float>
-; CHECK: select i1
+; CHECK: select <2 x i1>
 
 define float @min_red_float_le(float %min) #0 {
 entry:
@@ -679,7 +679,7 @@ for.end:
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp fast olt <2 x float>
-; CHECK: select i1
+; CHECK: select <2 x i1>
 
 define float @inverted_min_red_float(float %min) #0 {
 entry:
@@ -705,7 +705,7 @@ for.end:
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp fast olt <2 x float>
-; CHECK: select i1
+; CHECK: select <2 x i1>
 
 define float @inverted_min_red_float_ge(float %min) #0 {
 entry:
@@ -731,7 +731,7 @@ for.end:
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp fast olt <2 x float>
-; CHECK: select i1
+; CHECK: select <2 x i1>
 
 define float @unordered_min_red_float(float %min) #0 {
 entry:
@@ -757,7 +757,7 @@ for.end:
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp fast olt <2 x float>
-; CHECK: select i1
+; CHECK: select <2 x i1>
 
 define float @unordered_min_red_float_le(float %min) #0 {
 entry:
@@ -783,7 +783,7 @@ for.end:
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp fast olt <2 x float>
-; CHECK: select i1
+; CHECK: select <2 x i1>
 
 define float @inverted_unordered_min_red_float(float %min) #0 {
 entry:
@@ -809,7 +809,7 @@ for.end:
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp fast olt <2 x float>
-; CHECK: select i1
+; CHECK: select <2 x i1>
 
 define float @inverted_unordered_min_red_float_ge(float %min) #0 {
 entry:
@@ -836,7 +836,7 @@ for.end:
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp fast olt <2 x double>
-; CHECK: select i1
+; CHECK: select <2 x i1>
 
 define double @min_red_double(double %min) #0 {
 entry:
author	Sanjay Patel <spatel@rotateright.com>	2017-09-25 16:41:34 +0000
committer	Sanjay Patel <spatel@rotateright.com>	2017-09-25 16:41:34 +0000
commit	9639897d7786d176f4c0e42ab4a9a7029ea50b78 (patch)
tree	3b2adc0164bf2b5e7ce86764631d7c91789a68a2
parent	0a62b2d88767bd2836bcf3182e6d58c89cc84cdf (diff)
download	bcm5719-llvm-9639897d7786d176f4c0e42ab4a9a7029ea50b78.tar.gz bcm5719-llvm-9639897d7786d176f4c0e42ab4a9a7029ea50b78.zip