diff options
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/sse41.ll | 11 |
2 files changed, 13 insertions, 2 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 5cabd14064e..bc4168b20a7 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -7805,12 +7805,13 @@ static SDValue getINSERTPS(ShuffleVectorSDNode *SVOp, SDLoc &dl, std::find_if(Mask.begin(), Mask.end(), FromV2Predicate) - Mask.begin(); } + unsigned SrcIndex = Mask[DestIndex] % 4; if (MayFoldLoad(From)) { // Trivial case, when From comes from a load and is only used by the // shuffle. Make it use insertps from the vector that we need from that // load. SDValue NewLoad = - NarrowVectorLoadToElement(cast<LoadSDNode>(From), DestIndex, DAG); + NarrowVectorLoadToElement(cast<LoadSDNode>(From), SrcIndex, DAG); if (!NewLoad.getNode()) return SDValue(); @@ -7831,7 +7832,6 @@ static SDValue getINSERTPS(ShuffleVectorSDNode *SVOp, SDLoc &dl, } // Vector-element-to-vector - unsigned SrcIndex = Mask[DestIndex] % 4; SDValue InsertpsMask = DAG.getIntPtrConstant(DestIndex << 4 | SrcIndex << 6); return DAG.getNode(X86ISD::INSERTPS, dl, VT, To, From, InsertpsMask); } diff --git a/llvm/test/CodeGen/X86/sse41.ll b/llvm/test/CodeGen/X86/sse41.ll index a77ede228df..6726a3ea9ff 100644 --- a/llvm/test/CodeGen/X86/sse41.ll +++ b/llvm/test/CodeGen/X86/sse41.ll @@ -703,3 +703,14 @@ define <4 x float> @insertps_with_undefs(<4 x float> %a, float* %b) { %result = shufflevector <4 x float> %a, <4 x float> %2, <4 x i32> <i32 4, i32 undef, i32 0, i32 7> ret <4 x float> %result } + +; Test for a bug in X86ISelLowering.cpp:getINSERTPS where we were using +; the destination index to change the load, instead of the source index. +define <4 x float> @pr20087(<4 x float> %a, <4 x float> *%ptr) { +; CHECK-LABEL: pr20087: +; CHECK: insertps $48 +; CHECK: ret + %load = load <4 x float> *%ptr + %ret = shufflevector <4 x float> %load, <4 x float> %a, <4 x i32> <i32 4, i32 undef, i32 6, i32 2> + ret <4 x float> %ret +} |