2 files changed, 26 insertions, 8 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 28d5f187c3d..e76f41b4794 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -867,6 +867,20 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
   if (isa<UndefValue>(ScalarOp) || isa<UndefValue>(IdxOp))
     replaceInstUsesWith(IE, VecOp);
 
+  // If the vector and scalar are both bitcast from the same element type, do
+  // the insert in that source type followed by bitcast.
+  Value *VecSrc, *ScalarSrc;
+  if (match(VecOp, m_BitCast(m_Value(VecSrc))) &&
+      match(ScalarOp, m_BitCast(m_Value(ScalarSrc))) &&
+      (VecOp->hasOneUse() || ScalarOp->hasOneUse()) &&
+      VecSrc->getType()->isVectorTy() && !ScalarSrc->getType()->isVectorTy() &&
+      VecSrc->getType()->getVectorElementType() == ScalarSrc->getType()) {
+    // inselt (bitcast VecSrc), (bitcast ScalarSrc), IdxOp -->
+    //   bitcast (inselt VecSrc, ScalarSrc, IdxOp)
+    Value *NewInsElt = Builder.CreateInsertElement(VecSrc, ScalarSrc, IdxOp);
+    return new BitCastInst(NewInsElt, IE.getType());
+  }
+
   // If the inserted element was extracted from some other vector and both
   // indexes are constant, try to turn this into a shuffle.
   uint64_t InsertedIdx, ExtractedIdx;
diff --git a/llvm/test/Transforms/InstCombine/insertelement-bitcast.ll b/llvm/test/Transforms/InstCombine/insertelement-bitcast.ll
index 92d2aeeff18..5f698d015d7 100644
--- a/llvm/test/Transforms/InstCombine/insertelement-bitcast.ll
+++ b/llvm/test/Transforms/InstCombine/insertelement-bitcast.ll
@@ -6,9 +6,8 @@ declare void @use_v4f32(<4 x float>)
 
 define <4 x float> @bitcast_inselt(i32 %x, <4 x i32> %v) {
 ; CHECK-LABEL: @bitcast_inselt(
-; CHECK-NEXT:    [[XF:%.*]] = bitcast i32 [[X:%.*]] to float
-; CHECK-NEXT:    [[VF:%.*]] = bitcast <4 x i32> [[V:%.*]] to <4 x float>
-; CHECK-NEXT:    [[R:%.*]] = insertelement <4 x float> [[VF]], float [[XF]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i32> [[V:%.*]], i32 [[X:%.*]], i32 0
+; CHECK-NEXT:    [[R:%.*]] = bitcast <4 x i32> [[TMP1]] to <4 x float>
 ; CHECK-NEXT:    ret <4 x float> [[R]]
 ;
   %xf = bitcast i32 %x to float
@@ -21,8 +20,8 @@ define <4 x float> @bitcast_inselt_use1(i32 %x, <4 x i32> %v) {
 ; CHECK-LABEL: @bitcast_inselt_use1(
 ; CHECK-NEXT:    [[XF:%.*]] = bitcast i32 [[X:%.*]] to float
 ; CHECK-NEXT:    call void @use_f32(float [[XF]])
-; CHECK-NEXT:    [[VF:%.*]] = bitcast <4 x i32> [[V:%.*]] to <4 x float>
-; CHECK-NEXT:    [[R:%.*]] = insertelement <4 x float> [[VF]], float [[XF]], i32 1
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i32> [[V:%.*]], i32 [[X]], i32 1
+; CHECK-NEXT:    [[R:%.*]] = bitcast <4 x i32> [[TMP1]] to <4 x float>
 ; CHECK-NEXT:    ret <4 x float> [[R]]
 ;
   %xf = bitcast i32 %x to float
@@ -34,10 +33,10 @@ define <4 x float> @bitcast_inselt_use1(i32 %x, <4 x i32> %v) {
 
 define <4 x float> @bitcast_inselt_use2(i32 %x, <4 x i32> %v, i32 %index) {
 ; CHECK-LABEL: @bitcast_inselt_use2(
-; CHECK-NEXT:    [[XF:%.*]] = bitcast i32 [[X:%.*]] to float
 ; CHECK-NEXT:    [[VF:%.*]] = bitcast <4 x i32> [[V:%.*]] to <4 x float>
 ; CHECK-NEXT:    call void @use_v4f32(<4 x float> [[VF]])
-; CHECK-NEXT:    [[R:%.*]] = insertelement <4 x float> [[VF]], float [[XF]], i32 [[INDEX:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i32> [[V]], i32 [[X:%.*]], i32 [[INDEX:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = bitcast <4 x i32> [[TMP1]] to <4 x float>
 ; CHECK-NEXT:    ret <4 x float> [[R]]
 ;
   %xf = bitcast i32 %x to float
@@ -47,6 +46,8 @@ define <4 x float> @bitcast_inselt_use2(i32 %x, <4 x i32> %v, i32 %index) {
   ret <4 x float> %r
 }
 
+; Negative test - 2 extra uses would require an extra instruction.
+
 define <4 x float> @bitcast_inselt_use3(i32 %x, <4 x i32> %v) {
 ; CHECK-LABEL: @bitcast_inselt_use3(
 ; CHECK-NEXT:    [[XF:%.*]] = bitcast i32 [[X:%.*]] to float
@@ -64,6 +65,8 @@ define <4 x float> @bitcast_inselt_use3(i32 %x, <4 x i32> %v) {
   ret <4 x float> %r
 }
 
+; Negative test - wrong source type for vector.
+
 define <2 x float> @bitcast_inselt_wrong_bitcast1(i32 %x, i64 %notv) {
 ; CHECK-LABEL: @bitcast_inselt_wrong_bitcast1(
 ; CHECK-NEXT:    [[XF:%.*]] = bitcast i32 [[X:%.*]] to float
@@ -77,6 +80,8 @@ define <2 x float> @bitcast_inselt_wrong_bitcast1(i32 %x, i64 %notv) {
   ret <2 x float> %r
 }
 
+; Negative test - wrong source type for scalar.
+
 define <2 x float> @bitcast_inselt_wrong_bitcast2(<2 x i16> %notscalar, <2 x i32> %v) {
 ; CHECK-LABEL: @bitcast_inselt_wrong_bitcast2(
 ; CHECK-NEXT:    [[XF:%.*]] = bitcast <2 x i16> [[NOTSCALAR:%.*]] to float
@@ -89,4 +94,3 @@ define <2 x float> @bitcast_inselt_wrong_bitcast2(<2 x i16> %notscalar, <2 x i32
   %r = insertelement <2 x float> %vf, float %xf, i32 0
   ret <2 x float> %r
 }
-