diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-06-07 10:27:15 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-06-07 10:27:15 +0000 |
| commit | db9893fb90c29783d2e1fc6195dc8e9a6a149b2a (patch) | |
| tree | b421ef594c1952e91a793140e01109ebc500c0c8 /llvm/test | |
| parent | d9e1a61d3149ef54d6ce84e0268ae63f32bba181 (diff) | |
| download | bcm5719-llvm-db9893fb90c29783d2e1fc6195dc8e9a6a149b2a.tar.gz bcm5719-llvm-db9893fb90c29783d2e1fc6195dc8e9a6a149b2a.zip | |
[InstCombine][AVX2] Add support for simplifying AVX2 per-element shifts to native shifts
Unlike native shifts, the AVX2 per-element shift instructions VPSRAV/VPSRLV/VPSLLV handle out of range shift values (logical shifts set the result to zero, arithmetic shifts splat the sign bit).
If the shift amount is constant we can sometimes convert these instructions to native shifts:
1 - if all shift amounts are in range then the conversion is trivial.
2 - out of range arithmetic shifts can be clamped to the (bitwidth - 1) (a legal shift amount) before conversion.
3 - logical shifts just return zero if all elements have out of range shift amounts.
In addition, UNDEF shift amounts are handled - either as an UNDEF shift amount in a native shift or as an UNDEF in the logical 'all out of range' zero constant special case for logical shifts.
Differential Revision: http://reviews.llvm.org/D19675
llvm-svn: 271996
Diffstat (limited to 'llvm/test')
| -rw-r--r-- | llvm/test/Transforms/InstCombine/x86-vector-shifts.ll | 98 |
1 files changed, 40 insertions, 58 deletions
diff --git a/llvm/test/Transforms/InstCombine/x86-vector-shifts.ll b/llvm/test/Transforms/InstCombine/x86-vector-shifts.ll index 55b64b5afe9..012a826a765 100644 --- a/llvm/test/Transforms/InstCombine/x86-vector-shifts.ll +++ b/llvm/test/Transforms/InstCombine/x86-vector-shifts.ll @@ -940,8 +940,7 @@ define <4 x i64> @avx2_psll_q_64(<4 x i64> %v) { define <4 x i32> @avx2_psrav_d_128_0(<4 x i32> %v) { ; CHECK-LABEL: @avx2_psrav_d_128_0( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> zeroinitializer) -; CHECK-NEXT: ret <4 x i32> [[TMP1]] +; CHECK-NEXT: ret <4 x i32> %v ; %1 = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> zeroinitializer) ret <4 x i32> %1 @@ -949,8 +948,7 @@ define <4 x i32> @avx2_psrav_d_128_0(<4 x i32> %v) { define <8 x i32> @avx2_psrav_d_256_0(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psrav_d_256_0( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %v, <8 x i32> zeroinitializer) -; CHECK-NEXT: ret <8 x i32> [[TMP1]] +; CHECK-NEXT: ret <8 x i32> %v ; %1 = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %v, <8 x i32> zeroinitializer) ret <8 x i32> %1 @@ -958,7 +956,7 @@ define <8 x i32> @avx2_psrav_d_256_0(<8 x i32> %v) { define <4 x i32> @avx2_psrav_d_128_var(<4 x i32> %v) { ; CHECK-LABEL: @avx2_psrav_d_128_var( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> <i32 0, i32 8, i32 16, i32 64>) +; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> %v, <i32 0, i32 8, i32 16, i32 31> ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> <i32 0, i32 8, i32 16, i32 64>) @@ -967,7 +965,7 @@ define <4 x i32> @avx2_psrav_d_128_var(<4 x i32> %v) { define <8 x i32> @avx2_psrav_d_256_var(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psrav_d_256_var( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 8, i32 16, i32 24, i32 32, i32 24, i32 8, i32 0>) +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> %v, <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0> ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 8, i32 16, i32 24, i32 32, i32 24, i32 8, i32 0>) @@ -976,7 +974,7 @@ define <8 x i32> @avx2_psrav_d_256_var(<8 x i32> %v) { define <4 x i32> @avx2_psrav_d_128_allbig(<4 x i32> %v) { ; CHECK-LABEL: @avx2_psrav_d_128_allbig( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> <i32 32, i32 100, i32 -255, i32 undef>) +; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> %v, <i32 31, i32 31, i32 31, i32 undef> ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> <i32 32, i32 100, i32 -255, i32 undef>) @@ -985,7 +983,7 @@ define <4 x i32> @avx2_psrav_d_128_allbig(<4 x i32> %v) { define <8 x i32> @avx2_psrav_d_256_allbig(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psrav_d_256_allbig( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %v, <8 x i32> <i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555>) +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> %v, <i32 undef, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %v, <8 x i32> <i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555>) @@ -994,7 +992,7 @@ define <8 x i32> @avx2_psrav_d_256_allbig(<8 x i32> %v) { define <4 x i32> @avx2_psrav_d_128_undef(<4 x i32> %v) { ; CHECK-LABEL: @avx2_psrav_d_128_undef( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> <i32 undef, i32 8, i32 16, i32 64>) +; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> %v, <i32 undef, i32 8, i32 16, i32 31> ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = insertelement <4 x i32> <i32 0, i32 8, i32 16, i32 64>, i32 undef, i32 0 @@ -1004,7 +1002,7 @@ define <4 x i32> @avx2_psrav_d_128_undef(<4 x i32> %v) { define <8 x i32> @avx2_psrav_d_256_undef(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psrav_d_256_undef( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 undef, i32 16, i32 24, i32 32, i32 24, i32 8, i32 0>) +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> %v, <i32 0, i32 undef, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0> ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = insertelement <8 x i32> <i32 0, i32 8, i32 16, i32 24, i32 32, i32 24, i32 8, i32 0>, i32 undef, i32 1 @@ -1018,8 +1016,7 @@ define <8 x i32> @avx2_psrav_d_256_undef(<8 x i32> %v) { define <4 x i32> @avx2_psrlv_d_128_0(<4 x i32> %v) { ; CHECK-LABEL: @avx2_psrlv_d_128_0( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> zeroinitializer) -; CHECK-NEXT: ret <4 x i32> [[TMP1]] +; CHECK-NEXT: ret <4 x i32> %v ; %1 = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> zeroinitializer) ret <4 x i32> %1 @@ -1027,8 +1024,7 @@ define <4 x i32> @avx2_psrlv_d_128_0(<4 x i32> %v) { define <8 x i32> @avx2_psrlv_d_256_0(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psrlv_d_256_0( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> zeroinitializer) -; CHECK-NEXT: ret <8 x i32> [[TMP1]] +; CHECK-NEXT: ret <8 x i32> %v ; %1 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> zeroinitializer) ret <8 x i32> %1 @@ -1036,7 +1032,7 @@ define <8 x i32> @avx2_psrlv_d_256_0(<8 x i32> %v) { define <4 x i32> @avx2_psrlv_d_128_var(<4 x i32> %v) { ; CHECK-LABEL: @avx2_psrlv_d_128_var( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> <i32 0, i32 8, i32 16, i32 31>) +; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> %v, <i32 0, i32 8, i32 16, i32 31> ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> <i32 0, i32 8, i32 16, i32 31>) @@ -1045,7 +1041,7 @@ define <4 x i32> @avx2_psrlv_d_128_var(<4 x i32> %v) { define <8 x i32> @avx2_psrlv_d_256_var(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psrlv_d_256_var( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>) +; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i32> %v, <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0> ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>) @@ -1072,8 +1068,7 @@ define <8 x i32> @avx2_psrlv_d_256_big(<8 x i32> %v) { define <4 x i32> @avx2_psrlv_d_128_allbig(<4 x i32> %v) { ; CHECK-LABEL: @avx2_psrlv_d_128_allbig( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> <i32 32, i32 100, i32 -255, i32 undef>) -; CHECK-NEXT: ret <4 x i32> [[TMP1]] +; CHECK-NEXT: ret <4 x i32> <i32 0, i32 0, i32 0, i32 undef> ; %1 = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> <i32 32, i32 100, i32 -255, i32 undef>) ret <4 x i32> %1 @@ -1081,8 +1076,7 @@ define <4 x i32> @avx2_psrlv_d_128_allbig(<4 x i32> %v) { define <8 x i32> @avx2_psrlv_d_256_allbig(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psrlv_d_256_allbig( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> <i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555>) -; CHECK-NEXT: ret <8 x i32> [[TMP1]] +; CHECK-NEXT: ret <8 x i32> <i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> ; %1 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> <i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555>) ret <8 x i32> %1 @@ -1090,7 +1084,7 @@ define <8 x i32> @avx2_psrlv_d_256_allbig(<8 x i32> %v) { define <4 x i32> @avx2_psrlv_d_128_undef(<4 x i32> %v) { ; CHECK-LABEL: @avx2_psrlv_d_128_undef( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> <i32 undef, i32 8, i32 16, i32 31>) +; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> %v, <i32 undef, i32 8, i32 16, i32 31> ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = insertelement <4 x i32> <i32 0, i32 8, i32 16, i32 31>, i32 undef, i32 0 @@ -1100,7 +1094,7 @@ define <4 x i32> @avx2_psrlv_d_128_undef(<4 x i32> %v) { define <8 x i32> @avx2_psrlv_d_256_undef(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psrlv_d_256_undef( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 undef, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0>) +; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i32> %v, <i32 0, i32 undef, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0> ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = insertelement <8 x i32> <i32 0, i32 8, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0>, i32 undef, i32 1 @@ -1110,8 +1104,7 @@ define <8 x i32> @avx2_psrlv_d_256_undef(<8 x i32> %v) { define <2 x i64> @avx2_psrlv_q_128_0(<2 x i64> %v) { ; CHECK-LABEL: @avx2_psrlv_q_128_0( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> zeroinitializer) -; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; CHECK-NEXT: ret <2 x i64> %v ; %1 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> zeroinitializer) ret <2 x i64> %1 @@ -1119,8 +1112,7 @@ define <2 x i64> @avx2_psrlv_q_128_0(<2 x i64> %v) { define <4 x i64> @avx2_psrlv_q_256_0(<4 x i64> %v) { ; CHECK-LABEL: @avx2_psrlv_q_256_0( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> zeroinitializer) -; CHECK-NEXT: ret <4 x i64> [[TMP1]] +; CHECK-NEXT: ret <4 x i64> %v ; %1 = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> zeroinitializer) ret <4 x i64> %1 @@ -1128,7 +1120,7 @@ define <4 x i64> @avx2_psrlv_q_256_0(<4 x i64> %v) { define <2 x i64> @avx2_psrlv_q_128_var(<2 x i64> %v) { ; CHECK-LABEL: @avx2_psrlv_q_128_var( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> <i64 0, i64 8>) +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i64> %v, <i64 0, i64 8> ; CHECK-NEXT: ret <2 x i64> [[TMP1]] ; %1 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> <i64 0, i64 8>) @@ -1137,7 +1129,7 @@ define <2 x i64> @avx2_psrlv_q_128_var(<2 x i64> %v) { define <4 x i64> @avx2_psrlv_q_256_var(<4 x i64> %v) { ; CHECK-LABEL: @avx2_psrlv_q_256_var( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> <i64 0, i64 8, i64 16, i64 31>) +; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i64> %v, <i64 0, i64 8, i64 16, i64 31> ; CHECK-NEXT: ret <4 x i64> [[TMP1]] ; %1 = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> <i64 0, i64 8, i64 16, i64 31>) @@ -1164,8 +1156,7 @@ define <4 x i64> @avx2_psrlv_q_256_big(<4 x i64> %v) { define <2 x i64> @avx2_psrlv_q_128_allbig(<2 x i64> %v) { ; CHECK-LABEL: @avx2_psrlv_q_128_allbig( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> <i64 128, i64 -64>) -; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; CHECK-NEXT: ret <2 x i64> zeroinitializer ; %1 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> <i64 128, i64 -64>) ret <2 x i64> %1 @@ -1173,8 +1164,7 @@ define <2 x i64> @avx2_psrlv_q_128_allbig(<2 x i64> %v) { define <4 x i64> @avx2_psrlv_q_256_allbig(<4 x i64> %v) { ; CHECK-LABEL: @avx2_psrlv_q_256_allbig( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> <i64 64, i64 undef, i64 -128, i64 -60>) -; CHECK-NEXT: ret <4 x i64> [[TMP1]] +; CHECK-NEXT: ret <4 x i64> <i64 0, i64 undef, i64 0, i64 0> ; %1 = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> <i64 64, i64 undef, i64 -128, i64 -60>) ret <4 x i64> %1 @@ -1182,7 +1172,7 @@ define <4 x i64> @avx2_psrlv_q_256_allbig(<4 x i64> %v) { define <2 x i64> @avx2_psrlv_q_128_undef(<2 x i64> %v) { ; CHECK-LABEL: @avx2_psrlv_q_128_undef( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> <i64 0, i64 undef>) +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i64> %v, <i64 0, i64 undef> ; CHECK-NEXT: ret <2 x i64> [[TMP1]] ; %1 = insertelement <2 x i64> <i64 0, i64 8>, i64 undef, i64 1 @@ -1192,7 +1182,7 @@ define <2 x i64> @avx2_psrlv_q_128_undef(<2 x i64> %v) { define <4 x i64> @avx2_psrlv_q_256_undef(<4 x i64> %v) { ; CHECK-LABEL: @avx2_psrlv_q_256_undef( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> <i64 undef, i64 8, i64 16, i64 31>) +; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i64> %v, <i64 undef, i64 8, i64 16, i64 31> ; CHECK-NEXT: ret <4 x i64> [[TMP1]] ; %1 = insertelement <4 x i64> <i64 0, i64 8, i64 16, i64 31>, i64 undef, i64 0 @@ -1206,8 +1196,7 @@ define <4 x i64> @avx2_psrlv_q_256_undef(<4 x i64> %v) { define <4 x i32> @avx2_psllv_d_128_0(<4 x i32> %v) { ; CHECK-LABEL: @avx2_psllv_d_128_0( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> zeroinitializer) -; CHECK-NEXT: ret <4 x i32> [[TMP1]] +; CHECK-NEXT: ret <4 x i32> %v ; %1 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> zeroinitializer) ret <4 x i32> %1 @@ -1215,8 +1204,7 @@ define <4 x i32> @avx2_psllv_d_128_0(<4 x i32> %v) { define <8 x i32> @avx2_psllv_d_256_0(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psllv_d_256_0( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> zeroinitializer) -; CHECK-NEXT: ret <8 x i32> [[TMP1]] +; CHECK-NEXT: ret <8 x i32> %v ; %1 = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> zeroinitializer) ret <8 x i32> %1 @@ -1224,7 +1212,7 @@ define <8 x i32> @avx2_psllv_d_256_0(<8 x i32> %v) { define <4 x i32> @avx2_psllv_d_128_var(<4 x i32> %v) { ; CHECK-LABEL: @avx2_psllv_d_128_var( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> <i32 0, i32 8, i32 16, i32 31>) +; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i32> %v, <i32 0, i32 8, i32 16, i32 31> ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> <i32 0, i32 8, i32 16, i32 31>) @@ -1233,7 +1221,7 @@ define <4 x i32> @avx2_psllv_d_128_var(<4 x i32> %v) { define <8 x i32> @avx2_psllv_d_256_var(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psllv_d_256_var( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>) +; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> %v, <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0> ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>) @@ -1260,8 +1248,7 @@ define <8 x i32> @avx2_psllv_d_256_big(<8 x i32> %v) { define <4 x i32> @avx2_psllv_d_128_allbig(<4 x i32> %v) { ; CHECK-LABEL: @avx2_psllv_d_128_allbig( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> <i32 32, i32 100, i32 -255, i32 undef>) -; CHECK-NEXT: ret <4 x i32> [[TMP1]] +; CHECK-NEXT: ret <4 x i32> <i32 0, i32 0, i32 0, i32 undef> ; %1 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> <i32 32, i32 100, i32 -255, i32 undef>) ret <4 x i32> %1 @@ -1269,8 +1256,7 @@ define <4 x i32> @avx2_psllv_d_128_allbig(<4 x i32> %v) { define <8 x i32> @avx2_psllv_d_256_allbig(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psllv_d_256_allbig( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> <i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555>) -; CHECK-NEXT: ret <8 x i32> [[TMP1]] +; CHECK-NEXT: ret <8 x i32> <i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> ; %1 = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> <i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555>) ret <8 x i32> %1 @@ -1278,7 +1264,7 @@ define <8 x i32> @avx2_psllv_d_256_allbig(<8 x i32> %v) { define <4 x i32> @avx2_psllv_d_128_undef(<4 x i32> %v) { ; CHECK-LABEL: @avx2_psllv_d_128_undef( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> <i32 undef, i32 8, i32 16, i32 31>) +; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i32> %v, <i32 undef, i32 8, i32 16, i32 31> ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = insertelement <4 x i32> <i32 0, i32 8, i32 16, i32 31>, i32 undef, i32 0 @@ -1288,7 +1274,7 @@ define <4 x i32> @avx2_psllv_d_128_undef(<4 x i32> %v) { define <8 x i32> @avx2_psllv_d_256_undef(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psllv_d_256_undef( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 undef, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0>) +; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> %v, <i32 0, i32 undef, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0> ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = insertelement <8 x i32> <i32 0, i32 8, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0>, i32 undef, i32 1 @@ -1298,8 +1284,7 @@ define <8 x i32> @avx2_psllv_d_256_undef(<8 x i32> %v) { define <2 x i64> @avx2_psllv_q_128_0(<2 x i64> %v) { ; CHECK-LABEL: @avx2_psllv_q_128_0( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> zeroinitializer) -; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; CHECK-NEXT: ret <2 x i64> %v ; %1 = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> zeroinitializer) ret <2 x i64> %1 @@ -1307,8 +1292,7 @@ define <2 x i64> @avx2_psllv_q_128_0(<2 x i64> %v) { define <4 x i64> @avx2_psllv_q_256_0(<4 x i64> %v) { ; CHECK-LABEL: @avx2_psllv_q_256_0( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> zeroinitializer) -; CHECK-NEXT: ret <4 x i64> [[TMP1]] +; CHECK-NEXT: ret <4 x i64> %v ; %1 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> zeroinitializer) ret <4 x i64> %1 @@ -1316,7 +1300,7 @@ define <4 x i64> @avx2_psllv_q_256_0(<4 x i64> %v) { define <2 x i64> @avx2_psllv_q_128_var(<2 x i64> %v) { ; CHECK-LABEL: @avx2_psllv_q_128_var( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> <i64 0, i64 8>) +; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> %v, <i64 0, i64 8> ; CHECK-NEXT: ret <2 x i64> [[TMP1]] ; %1 = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> <i64 0, i64 8>) @@ -1325,7 +1309,7 @@ define <2 x i64> @avx2_psllv_q_128_var(<2 x i64> %v) { define <4 x i64> @avx2_psllv_q_256_var(<4 x i64> %v) { ; CHECK-LABEL: @avx2_psllv_q_256_var( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> <i64 0, i64 8, i64 16, i64 31>) +; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i64> %v, <i64 0, i64 8, i64 16, i64 31> ; CHECK-NEXT: ret <4 x i64> [[TMP1]] ; %1 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> <i64 0, i64 8, i64 16, i64 31>) @@ -1352,8 +1336,7 @@ define <4 x i64> @avx2_psllv_q_256_big(<4 x i64> %v) { define <2 x i64> @avx2_psllv_q_128_allbig(<2 x i64> %v) { ; CHECK-LABEL: @avx2_psllv_q_128_allbig( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> <i64 128, i64 -64>) -; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; CHECK-NEXT: ret <2 x i64> zeroinitializer ; %1 = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> <i64 128, i64 -64>) ret <2 x i64> %1 @@ -1361,8 +1344,7 @@ define <2 x i64> @avx2_psllv_q_128_allbig(<2 x i64> %v) { define <4 x i64> @avx2_psllv_q_256_allbig(<4 x i64> %v) { ; CHECK-LABEL: @avx2_psllv_q_256_allbig( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> <i64 64, i64 undef, i64 -128, i64 -60>) -; CHECK-NEXT: ret <4 x i64> [[TMP1]] +; CHECK-NEXT: ret <4 x i64> <i64 0, i64 undef, i64 0, i64 0> ; %1 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> <i64 64, i64 undef, i64 -128, i64 -60>) ret <4 x i64> %1 @@ -1370,7 +1352,7 @@ define <4 x i64> @avx2_psllv_q_256_allbig(<4 x i64> %v) { define <2 x i64> @avx2_psllv_q_128_undef(<2 x i64> %v) { ; CHECK-LABEL: @avx2_psllv_q_128_undef( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> <i64 0, i64 undef>) +; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> %v, <i64 0, i64 undef> ; CHECK-NEXT: ret <2 x i64> [[TMP1]] ; %1 = insertelement <2 x i64> <i64 0, i64 8>, i64 undef, i64 1 @@ -1380,7 +1362,7 @@ define <2 x i64> @avx2_psllv_q_128_undef(<2 x i64> %v) { define <4 x i64> @avx2_psllv_q_256_undef(<4 x i64> %v) { ; CHECK-LABEL: @avx2_psllv_q_256_undef( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> <i64 undef, i64 8, i64 16, i64 31>) +; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i64> %v, <i64 undef, i64 8, i64 16, i64 31> ; CHECK-NEXT: ret <4 x i64> [[TMP1]] ; %1 = insertelement <4 x i64> <i64 0, i64 8, i64 16, i64 31>, i64 undef, i64 0 |

