From db9893fb90c29783d2e1fc6195dc8e9a6a149b2a Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 7 Jun 2016 10:27:15 +0000 Subject: [InstCombine][AVX2] Add support for simplifying AVX2 per-element shifts to native shifts Unlike native shifts, the AVX2 per-element shift instructions VPSRAV/VPSRLV/VPSLLV handle out of range shift values (logical shifts set the result to zero, arithmetic shifts splat the sign bit). If the shift amount is constant we can sometimes convert these instructions to native shifts: 1 - if all shift amounts are in range then the conversion is trivial. 2 - out of range arithmetic shifts can be clamped to the (bitwidth - 1) (a legal shift amount) before conversion. 3 - logical shifts just return zero if all elements have out of range shift amounts. In addition, UNDEF shift amounts are handled - either as an UNDEF shift amount in a native shift or as an UNDEF in the logical 'all out of range' zero constant special case for logical shifts. Differential Revision: http://reviews.llvm.org/D19675 llvm-svn: 271996 --- .../Transforms/InstCombine/x86-vector-shifts.ll | 98 +++++++++------------- 1 file changed, 40 insertions(+), 58 deletions(-) (limited to 'llvm/test') diff --git a/llvm/test/Transforms/InstCombine/x86-vector-shifts.ll b/llvm/test/Transforms/InstCombine/x86-vector-shifts.ll index 55b64b5afe9..012a826a765 100644 --- a/llvm/test/Transforms/InstCombine/x86-vector-shifts.ll +++ b/llvm/test/Transforms/InstCombine/x86-vector-shifts.ll @@ -940,8 +940,7 @@ define <4 x i64> @avx2_psll_q_64(<4 x i64> %v) { define <4 x i32> @avx2_psrav_d_128_0(<4 x i32> %v) { ; CHECK-LABEL: @avx2_psrav_d_128_0( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> zeroinitializer) -; CHECK-NEXT: ret <4 x i32> [[TMP1]] +; CHECK-NEXT: ret <4 x i32> %v ; %1 = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> zeroinitializer) ret <4 x i32> %1 @@ -949,8 +948,7 @@ define <4 x i32> @avx2_psrav_d_128_0(<4 x i32> %v) { define <8 x i32> @avx2_psrav_d_256_0(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psrav_d_256_0( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %v, <8 x i32> zeroinitializer) -; CHECK-NEXT: ret <8 x i32> [[TMP1]] +; CHECK-NEXT: ret <8 x i32> %v ; %1 = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %v, <8 x i32> zeroinitializer) ret <8 x i32> %1 @@ -958,7 +956,7 @@ define <8 x i32> @avx2_psrav_d_256_0(<8 x i32> %v) { define <4 x i32> @avx2_psrav_d_128_var(<4 x i32> %v) { ; CHECK-LABEL: @avx2_psrav_d_128_var( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> ) +; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> %v, ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> ) @@ -967,7 +965,7 @@ define <4 x i32> @avx2_psrav_d_128_var(<4 x i32> %v) { define <8 x i32> @avx2_psrav_d_256_var(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psrav_d_256_var( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %v, <8 x i32> ) +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> %v, ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %v, <8 x i32> ) @@ -976,7 +974,7 @@ define <8 x i32> @avx2_psrav_d_256_var(<8 x i32> %v) { define <4 x i32> @avx2_psrav_d_128_allbig(<4 x i32> %v) { ; CHECK-LABEL: @avx2_psrav_d_128_allbig( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> ) +; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> %v, ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> ) @@ -985,7 +983,7 @@ define <4 x i32> @avx2_psrav_d_128_allbig(<4 x i32> %v) { define <8 x i32> @avx2_psrav_d_256_allbig(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psrav_d_256_allbig( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %v, <8 x i32> ) +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> %v, ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %v, <8 x i32> ) @@ -994,7 +992,7 @@ define <8 x i32> @avx2_psrav_d_256_allbig(<8 x i32> %v) { define <4 x i32> @avx2_psrav_d_128_undef(<4 x i32> %v) { ; CHECK-LABEL: @avx2_psrav_d_128_undef( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> ) +; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> %v, ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = insertelement <4 x i32> , i32 undef, i32 0 @@ -1004,7 +1002,7 @@ define <4 x i32> @avx2_psrav_d_128_undef(<4 x i32> %v) { define <8 x i32> @avx2_psrav_d_256_undef(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psrav_d_256_undef( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %v, <8 x i32> ) +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> %v, ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = insertelement <8 x i32> , i32 undef, i32 1 @@ -1018,8 +1016,7 @@ define <8 x i32> @avx2_psrav_d_256_undef(<8 x i32> %v) { define <4 x i32> @avx2_psrlv_d_128_0(<4 x i32> %v) { ; CHECK-LABEL: @avx2_psrlv_d_128_0( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> zeroinitializer) -; CHECK-NEXT: ret <4 x i32> [[TMP1]] +; CHECK-NEXT: ret <4 x i32> %v ; %1 = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> zeroinitializer) ret <4 x i32> %1 @@ -1027,8 +1024,7 @@ define <4 x i32> @avx2_psrlv_d_128_0(<4 x i32> %v) { define <8 x i32> @avx2_psrlv_d_256_0(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psrlv_d_256_0( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> zeroinitializer) -; CHECK-NEXT: ret <8 x i32> [[TMP1]] +; CHECK-NEXT: ret <8 x i32> %v ; %1 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> zeroinitializer) ret <8 x i32> %1 @@ -1036,7 +1032,7 @@ define <8 x i32> @avx2_psrlv_d_256_0(<8 x i32> %v) { define <4 x i32> @avx2_psrlv_d_128_var(<4 x i32> %v) { ; CHECK-LABEL: @avx2_psrlv_d_128_var( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> ) +; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> %v, ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> ) @@ -1045,7 +1041,7 @@ define <4 x i32> @avx2_psrlv_d_128_var(<4 x i32> %v) { define <8 x i32> @avx2_psrlv_d_256_var(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psrlv_d_256_var( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> ) +; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i32> %v, ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> ) @@ -1072,8 +1068,7 @@ define <8 x i32> @avx2_psrlv_d_256_big(<8 x i32> %v) { define <4 x i32> @avx2_psrlv_d_128_allbig(<4 x i32> %v) { ; CHECK-LABEL: @avx2_psrlv_d_128_allbig( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> ) -; CHECK-NEXT: ret <4 x i32> [[TMP1]] +; CHECK-NEXT: ret <4 x i32> ; %1 = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> ) ret <4 x i32> %1 @@ -1081,8 +1076,7 @@ define <4 x i32> @avx2_psrlv_d_128_allbig(<4 x i32> %v) { define <8 x i32> @avx2_psrlv_d_256_allbig(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psrlv_d_256_allbig( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> ) -; CHECK-NEXT: ret <8 x i32> [[TMP1]] +; CHECK-NEXT: ret <8 x i32> ; %1 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> ) ret <8 x i32> %1 @@ -1090,7 +1084,7 @@ define <8 x i32> @avx2_psrlv_d_256_allbig(<8 x i32> %v) { define <4 x i32> @avx2_psrlv_d_128_undef(<4 x i32> %v) { ; CHECK-LABEL: @avx2_psrlv_d_128_undef( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> ) +; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> %v, ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = insertelement <4 x i32> , i32 undef, i32 0 @@ -1100,7 +1094,7 @@ define <4 x i32> @avx2_psrlv_d_128_undef(<4 x i32> %v) { define <8 x i32> @avx2_psrlv_d_256_undef(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psrlv_d_256_undef( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> ) +; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i32> %v, ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = insertelement <8 x i32> , i32 undef, i32 1 @@ -1110,8 +1104,7 @@ define <8 x i32> @avx2_psrlv_d_256_undef(<8 x i32> %v) { define <2 x i64> @avx2_psrlv_q_128_0(<2 x i64> %v) { ; CHECK-LABEL: @avx2_psrlv_q_128_0( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> zeroinitializer) -; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; CHECK-NEXT: ret <2 x i64> %v ; %1 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> zeroinitializer) ret <2 x i64> %1 @@ -1119,8 +1112,7 @@ define <2 x i64> @avx2_psrlv_q_128_0(<2 x i64> %v) { define <4 x i64> @avx2_psrlv_q_256_0(<4 x i64> %v) { ; CHECK-LABEL: @avx2_psrlv_q_256_0( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> zeroinitializer) -; CHECK-NEXT: ret <4 x i64> [[TMP1]] +; CHECK-NEXT: ret <4 x i64> %v ; %1 = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> zeroinitializer) ret <4 x i64> %1 @@ -1128,7 +1120,7 @@ define <4 x i64> @avx2_psrlv_q_256_0(<4 x i64> %v) { define <2 x i64> @avx2_psrlv_q_128_var(<2 x i64> %v) { ; CHECK-LABEL: @avx2_psrlv_q_128_var( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> ) +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i64> %v, ; CHECK-NEXT: ret <2 x i64> [[TMP1]] ; %1 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> ) @@ -1137,7 +1129,7 @@ define <2 x i64> @avx2_psrlv_q_128_var(<2 x i64> %v) { define <4 x i64> @avx2_psrlv_q_256_var(<4 x i64> %v) { ; CHECK-LABEL: @avx2_psrlv_q_256_var( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> ) +; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i64> %v, ; CHECK-NEXT: ret <4 x i64> [[TMP1]] ; %1 = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> ) @@ -1164,8 +1156,7 @@ define <4 x i64> @avx2_psrlv_q_256_big(<4 x i64> %v) { define <2 x i64> @avx2_psrlv_q_128_allbig(<2 x i64> %v) { ; CHECK-LABEL: @avx2_psrlv_q_128_allbig( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> ) -; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; CHECK-NEXT: ret <2 x i64> zeroinitializer ; %1 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> ) ret <2 x i64> %1 @@ -1173,8 +1164,7 @@ define <2 x i64> @avx2_psrlv_q_128_allbig(<2 x i64> %v) { define <4 x i64> @avx2_psrlv_q_256_allbig(<4 x i64> %v) { ; CHECK-LABEL: @avx2_psrlv_q_256_allbig( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> ) -; CHECK-NEXT: ret <4 x i64> [[TMP1]] +; CHECK-NEXT: ret <4 x i64> ; %1 = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> ) ret <4 x i64> %1 @@ -1182,7 +1172,7 @@ define <4 x i64> @avx2_psrlv_q_256_allbig(<4 x i64> %v) { define <2 x i64> @avx2_psrlv_q_128_undef(<2 x i64> %v) { ; CHECK-LABEL: @avx2_psrlv_q_128_undef( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> ) +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i64> %v, ; CHECK-NEXT: ret <2 x i64> [[TMP1]] ; %1 = insertelement <2 x i64> , i64 undef, i64 1 @@ -1192,7 +1182,7 @@ define <2 x i64> @avx2_psrlv_q_128_undef(<2 x i64> %v) { define <4 x i64> @avx2_psrlv_q_256_undef(<4 x i64> %v) { ; CHECK-LABEL: @avx2_psrlv_q_256_undef( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> ) +; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i64> %v, ; CHECK-NEXT: ret <4 x i64> [[TMP1]] ; %1 = insertelement <4 x i64> , i64 undef, i64 0 @@ -1206,8 +1196,7 @@ define <4 x i64> @avx2_psrlv_q_256_undef(<4 x i64> %v) { define <4 x i32> @avx2_psllv_d_128_0(<4 x i32> %v) { ; CHECK-LABEL: @avx2_psllv_d_128_0( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> zeroinitializer) -; CHECK-NEXT: ret <4 x i32> [[TMP1]] +; CHECK-NEXT: ret <4 x i32> %v ; %1 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> zeroinitializer) ret <4 x i32> %1 @@ -1215,8 +1204,7 @@ define <4 x i32> @avx2_psllv_d_128_0(<4 x i32> %v) { define <8 x i32> @avx2_psllv_d_256_0(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psllv_d_256_0( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> zeroinitializer) -; CHECK-NEXT: ret <8 x i32> [[TMP1]] +; CHECK-NEXT: ret <8 x i32> %v ; %1 = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> zeroinitializer) ret <8 x i32> %1 @@ -1224,7 +1212,7 @@ define <8 x i32> @avx2_psllv_d_256_0(<8 x i32> %v) { define <4 x i32> @avx2_psllv_d_128_var(<4 x i32> %v) { ; CHECK-LABEL: @avx2_psllv_d_128_var( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> ) +; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i32> %v, ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> ) @@ -1233,7 +1221,7 @@ define <4 x i32> @avx2_psllv_d_128_var(<4 x i32> %v) { define <8 x i32> @avx2_psllv_d_256_var(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psllv_d_256_var( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> ) +; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> %v, ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> ) @@ -1260,8 +1248,7 @@ define <8 x i32> @avx2_psllv_d_256_big(<8 x i32> %v) { define <4 x i32> @avx2_psllv_d_128_allbig(<4 x i32> %v) { ; CHECK-LABEL: @avx2_psllv_d_128_allbig( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> ) -; CHECK-NEXT: ret <4 x i32> [[TMP1]] +; CHECK-NEXT: ret <4 x i32> ; %1 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> ) ret <4 x i32> %1 @@ -1269,8 +1256,7 @@ define <4 x i32> @avx2_psllv_d_128_allbig(<4 x i32> %v) { define <8 x i32> @avx2_psllv_d_256_allbig(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psllv_d_256_allbig( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> ) -; CHECK-NEXT: ret <8 x i32> [[TMP1]] +; CHECK-NEXT: ret <8 x i32> ; %1 = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> ) ret <8 x i32> %1 @@ -1278,7 +1264,7 @@ define <8 x i32> @avx2_psllv_d_256_allbig(<8 x i32> %v) { define <4 x i32> @avx2_psllv_d_128_undef(<4 x i32> %v) { ; CHECK-LABEL: @avx2_psllv_d_128_undef( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> ) +; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i32> %v, ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = insertelement <4 x i32> , i32 undef, i32 0 @@ -1288,7 +1274,7 @@ define <4 x i32> @avx2_psllv_d_128_undef(<4 x i32> %v) { define <8 x i32> @avx2_psllv_d_256_undef(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psllv_d_256_undef( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> ) +; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> %v, ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = insertelement <8 x i32> , i32 undef, i32 1 @@ -1298,8 +1284,7 @@ define <8 x i32> @avx2_psllv_d_256_undef(<8 x i32> %v) { define <2 x i64> @avx2_psllv_q_128_0(<2 x i64> %v) { ; CHECK-LABEL: @avx2_psllv_q_128_0( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> zeroinitializer) -; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; CHECK-NEXT: ret <2 x i64> %v ; %1 = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> zeroinitializer) ret <2 x i64> %1 @@ -1307,8 +1292,7 @@ define <2 x i64> @avx2_psllv_q_128_0(<2 x i64> %v) { define <4 x i64> @avx2_psllv_q_256_0(<4 x i64> %v) { ; CHECK-LABEL: @avx2_psllv_q_256_0( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> zeroinitializer) -; CHECK-NEXT: ret <4 x i64> [[TMP1]] +; CHECK-NEXT: ret <4 x i64> %v ; %1 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> zeroinitializer) ret <4 x i64> %1 @@ -1316,7 +1300,7 @@ define <4 x i64> @avx2_psllv_q_256_0(<4 x i64> %v) { define <2 x i64> @avx2_psllv_q_128_var(<2 x i64> %v) { ; CHECK-LABEL: @avx2_psllv_q_128_var( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> ) +; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> %v, ; CHECK-NEXT: ret <2 x i64> [[TMP1]] ; %1 = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> ) @@ -1325,7 +1309,7 @@ define <2 x i64> @avx2_psllv_q_128_var(<2 x i64> %v) { define <4 x i64> @avx2_psllv_q_256_var(<4 x i64> %v) { ; CHECK-LABEL: @avx2_psllv_q_256_var( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> ) +; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i64> %v, ; CHECK-NEXT: ret <4 x i64> [[TMP1]] ; %1 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> ) @@ -1352,8 +1336,7 @@ define <4 x i64> @avx2_psllv_q_256_big(<4 x i64> %v) { define <2 x i64> @avx2_psllv_q_128_allbig(<2 x i64> %v) { ; CHECK-LABEL: @avx2_psllv_q_128_allbig( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> ) -; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; CHECK-NEXT: ret <2 x i64> zeroinitializer ; %1 = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> ) ret <2 x i64> %1 @@ -1361,8 +1344,7 @@ define <2 x i64> @avx2_psllv_q_128_allbig(<2 x i64> %v) { define <4 x i64> @avx2_psllv_q_256_allbig(<4 x i64> %v) { ; CHECK-LABEL: @avx2_psllv_q_256_allbig( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> ) -; CHECK-NEXT: ret <4 x i64> [[TMP1]] +; CHECK-NEXT: ret <4 x i64> ; %1 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> ) ret <4 x i64> %1 @@ -1370,7 +1352,7 @@ define <4 x i64> @avx2_psllv_q_256_allbig(<4 x i64> %v) { define <2 x i64> @avx2_psllv_q_128_undef(<2 x i64> %v) { ; CHECK-LABEL: @avx2_psllv_q_128_undef( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> ) +; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> %v, ; CHECK-NEXT: ret <2 x i64> [[TMP1]] ; %1 = insertelement <2 x i64> , i64 undef, i64 1 @@ -1380,7 +1362,7 @@ define <2 x i64> @avx2_psllv_q_128_undef(<2 x i64> %v) { define <4 x i64> @avx2_psllv_q_256_undef(<4 x i64> %v) { ; CHECK-LABEL: @avx2_psllv_q_256_undef( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> ) +; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i64> %v, ; CHECK-NEXT: ret <4 x i64> [[TMP1]] ; %1 = insertelement <4 x i64> , i64 undef, i64 0 -- cgit v1.2.3