diff options
| author | Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net> | 2016-09-07 12:47:53 +0000 |
|---|---|---|
| committer | Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net> | 2016-09-07 12:47:53 +0000 |
| commit | f3fd3162238be28e742daf24b1309075908ceefb (patch) | |
| tree | 6cc38c7248676080ce416788814cc4abb09b9784 | |
| parent | c9113e4863e3bcdf44aa7e3ed49232e9f40a26de (diff) | |
| download | bcm5719-llvm-f3fd3162238be28e742daf24b1309075908ceefb.tar.gz bcm5719-llvm-f3fd3162238be28e742daf24b1309075908ceefb.zip | |
[InstCombine][SSE4a] Fix assertion failure in the insertq/insertqi combining logic.
This fixes a similar issue to the one already fixed by r280804
(revieved in D24256). Revision 280804 fixed the problem with unsafe dyn_casts
in the extrq/extrqi combining logic. However, it turns out that even the
insertq/insertqi logic was affected by the same problem.
llvm-svn: 280807
| -rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 6 | ||||
| -rw-r--r-- | llvm/test/Transforms/InstCombine/x86-sse4a.ll | 18 |
2 files changed, 21 insertions, 3 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index eb334442988..ebeba7ee2d5 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -722,10 +722,10 @@ static Value *simplifyX86insertq(IntrinsicInst &II, Value *Op0, Value *Op1, Constant *C0 = dyn_cast<Constant>(Op0); Constant *C1 = dyn_cast<Constant>(Op1); ConstantInt *CI00 = - C0 ? dyn_cast<ConstantInt>(C0->getAggregateElement((unsigned)0)) + C0 ? dyn_cast_or_null<ConstantInt>(C0->getAggregateElement((unsigned)0)) : nullptr; ConstantInt *CI10 = - C1 ? dyn_cast<ConstantInt>(C1->getAggregateElement((unsigned)0)) + C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)0)) : nullptr; // Constant Fold - insert bottom Length bits starting at the Index'th bit. @@ -1919,7 +1919,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // See if we're dealing with constant values. Constant *C1 = dyn_cast<Constant>(Op1); ConstantInt *CI11 = - C1 ? dyn_cast<ConstantInt>(C1->getAggregateElement((unsigned)1)) + C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)1)) : nullptr; // Attempt to simplify to a constant, shuffle vector or INSERTQI call. diff --git a/llvm/test/Transforms/InstCombine/x86-sse4a.ll b/llvm/test/Transforms/InstCombine/x86-sse4a.ll index e135c38d246..d2714e0c630 100644 --- a/llvm/test/Transforms/InstCombine/x86-sse4a.ll +++ b/llvm/test/Transforms/InstCombine/x86-sse4a.ll @@ -177,6 +177,15 @@ define <2 x i64> @test_insertq_constant_undef(<2 x i64> %x, <2 x i64> %y) { ret <2 x i64> %1 } +define <2 x i64> @test_insertq_call_constexpr(<2 x i64> %x) { +; CHECK-LABEL: @test_insertq_call_constexpr( +; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> bitcast (<16 x i8> trunc (<16 x i16> bitcast (<4 x i64> <i64 0, i64 undef, i64 2, i64 undef> to <16 x i16>) to <16 x i8>) to <2 x i64>)) +; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; + %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> bitcast (<16 x i8> trunc (<16 x i16> bitcast (<4 x i64> <i64 0, i64 undef, i64 2, i64 undef> to <16 x i16>) to <16 x i8>) to <2 x i64>)) + ret <2 x i64> %1 +} + ; ; INSERTQI ; @@ -213,6 +222,15 @@ define <2 x i64> @test_insertqi_constant(<2 x i64> %v, <2 x i64> %i) { ret <2 x i64> %1 } +define <2 x i64> @test_insertqi_call_constexpr(<2 x i64> %x) { +; CHECK-LABEL: @test_insertqi_call_constexpr( +; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> bitcast (<16 x i8> trunc (<16 x i16> bitcast (<4 x i64> <i64 0, i64 undef, i64 2, i64 undef> to <16 x i16>) to <16 x i8>) to <2 x i64>), i8 48, i8 3) +; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; + %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> bitcast (<16 x i8> trunc (<16 x i16> bitcast (<4 x i64> <i64 0, i64 undef, i64 2, i64 undef> to <16 x i16>) to <16 x i8>) to <2 x i64>), i8 48, i8 3) + ret <2 x i64> %1 +} + ; The result of this insert is the second arg, since the top 64 bits of ; the result are undefined, and we copy the bottom 64 bits from the ; second arg |

