diff options
| author | Craig Topper <craig.topper@intel.com> | 2017-09-16 07:36:14 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@intel.com> | 2017-09-16 07:36:14 +0000 |
| commit | f264fcc704fcf1d8bd7ba2c5fc5d8131dac03c57 (patch) | |
| tree | f1e1eba2f892e895aee1c28456953c59b4544cc1 /llvm/test/Transforms | |
| parent | 6c196978ebb73231441936df6244766586b39249 (diff) | |
| download | bcm5719-llvm-f264fcc704fcf1d8bd7ba2c5fc5d8131dac03c57.tar.gz bcm5719-llvm-f264fcc704fcf1d8bd7ba2c5fc5d8131dac03c57.zip | |
[X86] Remove VPERM2F128/VPERM2I128 intrinsics and autoupgrade to native shuffles.
I've moved the test cases from the InstCombine optimizations to the backend to keep the coverage we had there. It covered every possible immediate so I've preserved the resulting shuffle mask for each of those immediates.
llvm-svn: 313450
Diffstat (limited to 'llvm/test/Transforms')
| -rw-r--r-- | llvm/test/Transforms/InstCombine/X86/x86-vperm2.ll | 313 |
1 files changed, 0 insertions, 313 deletions
diff --git a/llvm/test/Transforms/InstCombine/X86/x86-vperm2.ll b/llvm/test/Transforms/InstCombine/X86/x86-vperm2.ll deleted file mode 100644 index 84f69aa25d2..00000000000 --- a/llvm/test/Transforms/InstCombine/X86/x86-vperm2.ll +++ /dev/null @@ -1,313 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -instcombine -S | FileCheck %s - -; This should never happen, but make sure we don't crash handling a non-constant immediate byte. - -define <4 x double> @perm2pd_non_const_imm(<4 x double> %a0, <4 x double> %a1, i8 %b) { -; CHECK-LABEL: @perm2pd_non_const_imm( -; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 %b) -; CHECK-NEXT: ret <4 x double> [[RES]] -; - %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 %b) - ret <4 x double> %res - -} - - -; In the following 4 tests, both zero mask bits of the immediate are set. - -define <4 x double> @perm2pd_0x88(<4 x double> %a0, <4 x double> %a1) { -; CHECK-LABEL: @perm2pd_0x88( -; CHECK-NEXT: ret <4 x double> zeroinitializer -; - %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 136) - ret <4 x double> %res - -} - -define <8 x float> @perm2ps_0x88(<8 x float> %a0, <8 x float> %a1) { -; CHECK-LABEL: @perm2ps_0x88( -; CHECK-NEXT: ret <8 x float> zeroinitializer -; - %res = call <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float> %a0, <8 x float> %a1, i8 136) - ret <8 x float> %res - -} - -define <8 x i32> @perm2si_0x88(<8 x i32> %a0, <8 x i32> %a1) { -; CHECK-LABEL: @perm2si_0x88( -; CHECK-NEXT: ret <8 x i32> zeroinitializer -; - %res = call <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32> %a0, <8 x i32> %a1, i8 136) - ret <8 x i32> %res - -} - -define <4 x i64> @perm2i_0x88(<4 x i64> %a0, <4 x i64> %a1) { -; CHECK-LABEL: @perm2i_0x88( -; CHECK-NEXT: ret <4 x i64> zeroinitializer -; - %res = call <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64> %a0, <4 x i64> %a1, i8 136) - ret <4 x i64> %res - -} - - -; The other control bits are ignored when zero mask bits of the immediate are set. - -define <4 x double> @perm2pd_0xff(<4 x double> %a0, <4 x double> %a1) { -; CHECK-LABEL: @perm2pd_0xff( -; CHECK-NEXT: ret <4 x double> zeroinitializer -; - %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 255) - ret <4 x double> %res - -} - - -; The following 16 tests are simple shuffles, except for 2 cases where we can just return one of the -; source vectors. Verify that we generate the right shuffle masks and undef source operand where possible.. - -define <4 x double> @perm2pd_0x00(<4 x double> %a0, <4 x double> %a1) { -; CHECK-LABEL: @perm2pd_0x00( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> -; CHECK-NEXT: ret <4 x double> [[TMP1]] -; - %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 0) - ret <4 x double> %res - -} - -define <4 x double> @perm2pd_0x01(<4 x double> %a0, <4 x double> %a1) { -; CHECK-LABEL: @perm2pd_0x01( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1> -; CHECK-NEXT: ret <4 x double> [[TMP1]] -; - %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 1) - ret <4 x double> %res - -} - -define <4 x double> @perm2pd_0x02(<4 x double> %a0, <4 x double> %a1) { -; CHECK-LABEL: @perm2pd_0x02( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> %a1, <4 x double> %a0, <4 x i32> <i32 0, i32 1, i32 4, i32 5> -; CHECK-NEXT: ret <4 x double> [[TMP1]] -; - %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 2) - ret <4 x double> %res - -} - -define <4 x double> @perm2pd_0x03(<4 x double> %a0, <4 x double> %a1) { -; CHECK-LABEL: @perm2pd_0x03( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> %a1, <4 x double> %a0, <4 x i32> <i32 2, i32 3, i32 4, i32 5> -; CHECK-NEXT: ret <4 x double> [[TMP1]] -; - %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 3) - ret <4 x double> %res - -} - -define <4 x double> @perm2pd_0x10(<4 x double> %a0, <4 x double> %a1) { -; CHECK-LABEL: @perm2pd_0x10( -; CHECK-NEXT: ret <4 x double> %a0 -; - %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 16) - ret <4 x double> %res - -} - -define <4 x double> @perm2pd_0x11(<4 x double> %a0, <4 x double> %a1) { -; CHECK-LABEL: @perm2pd_0x11( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 2, i32 3> -; CHECK-NEXT: ret <4 x double> [[TMP1]] -; - %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 17) - ret <4 x double> %res - -} - -define <4 x double> @perm2pd_0x12(<4 x double> %a0, <4 x double> %a1) { -; CHECK-LABEL: @perm2pd_0x12( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> %a1, <4 x double> %a0, <4 x i32> <i32 0, i32 1, i32 6, i32 7> -; CHECK-NEXT: ret <4 x double> [[TMP1]] -; - %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 18) - ret <4 x double> %res - -} - -define <4 x double> @perm2pd_0x13(<4 x double> %a0, <4 x double> %a1) { -; CHECK-LABEL: @perm2pd_0x13( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> %a1, <4 x double> %a0, <4 x i32> <i32 2, i32 3, i32 6, i32 7> -; CHECK-NEXT: ret <4 x double> [[TMP1]] -; - %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 19) - ret <4 x double> %res - -} - -define <4 x double> @perm2pd_0x20(<4 x double> %a0, <4 x double> %a1) { -; CHECK-LABEL: @perm2pd_0x20( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 0, i32 1, i32 4, i32 5> -; CHECK-NEXT: ret <4 x double> [[TMP1]] -; - %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 32) - ret <4 x double> %res - -} - -define <4 x double> @perm2pd_0x21(<4 x double> %a0, <4 x double> %a1) { -; CHECK-LABEL: @perm2pd_0x21( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 2, i32 3, i32 4, i32 5> -; CHECK-NEXT: ret <4 x double> [[TMP1]] -; - %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 33) - ret <4 x double> %res - -} - -define <4 x double> @perm2pd_0x22(<4 x double> %a0, <4 x double> %a1) { -; CHECK-LABEL: @perm2pd_0x22( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> %a1, <4 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> -; CHECK-NEXT: ret <4 x double> [[TMP1]] -; - %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 34) - ret <4 x double> %res - -} - -define <4 x double> @perm2pd_0x23(<4 x double> %a0, <4 x double> %a1) { -; CHECK-LABEL: @perm2pd_0x23( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> %a1, <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1> -; CHECK-NEXT: ret <4 x double> [[TMP1]] -; - %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 35) - ret <4 x double> %res - -} - -define <4 x double> @perm2pd_0x30(<4 x double> %a0, <4 x double> %a1) { -; CHECK-LABEL: @perm2pd_0x30( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 0, i32 1, i32 6, i32 7> -; CHECK-NEXT: ret <4 x double> [[TMP1]] -; - %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 48) - ret <4 x double> %res - -} - -define <4 x double> @perm2pd_0x31(<4 x double> %a0, <4 x double> %a1) { -; CHECK-LABEL: @perm2pd_0x31( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 2, i32 3, i32 6, i32 7> -; CHECK-NEXT: ret <4 x double> [[TMP1]] -; - %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 49) - ret <4 x double> %res - -} - -define <4 x double> @perm2pd_0x32(<4 x double> %a0, <4 x double> %a1) { -; CHECK-LABEL: @perm2pd_0x32( -; CHECK-NEXT: ret <4 x double> %a1 -; - %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 50) - ret <4 x double> %res - -} - -define <4 x double> @perm2pd_0x33(<4 x double> %a0, <4 x double> %a1) { -; CHECK-LABEL: @perm2pd_0x33( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> %a1, <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 2, i32 3> -; CHECK-NEXT: ret <4 x double> [[TMP1]] -; - %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 51) - ret <4 x double> %res - -} - -; Confirm that a mask for 32-bit elements is also correct. - -define <8 x float> @perm2ps_0x31(<8 x float> %a0, <8 x float> %a1) { -; CHECK-LABEL: @perm2ps_0x31( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15> -; CHECK-NEXT: ret <8 x float> [[TMP1]] -; - %res = call <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float> %a0, <8 x float> %a1, i8 49) - ret <8 x float> %res - -} - - -; Confirm that the AVX2 version works the same. - -define <4 x i64> @perm2i_0x33(<4 x i64> %a0, <4 x i64> %a1) { -; CHECK-LABEL: @perm2i_0x33( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> %a1, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 2, i32 3> -; CHECK-NEXT: ret <4 x i64> [[TMP1]] -; - %res = call <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64> %a0, <4 x i64> %a1, i8 51) - ret <4 x i64> %res - -} - - -; Confirm that when a single zero mask bit is set, we replace a source vector with zeros. - -define <4 x double> @perm2pd_0x81(<4 x double> %a0, <4 x double> %a1) { -; CHECK-LABEL: @perm2pd_0x81( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> %a0, <4 x double> <double 0.000000e+00, double 0.000000e+00, double undef, double undef>, <4 x i32> <i32 2, i32 3, i32 4, i32 5> -; CHECK-NEXT: ret <4 x double> [[TMP1]] -; - %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 129) - ret <4 x double> %res - -} - -define <4 x double> @perm2pd_0x83(<4 x double> %a0, <4 x double> %a1) { -; CHECK-LABEL: @perm2pd_0x83( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> %a1, <4 x double> <double 0.000000e+00, double 0.000000e+00, double undef, double undef>, <4 x i32> <i32 2, i32 3, i32 4, i32 5> -; CHECK-NEXT: ret <4 x double> [[TMP1]] -; - %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 131) - ret <4 x double> %res - -} - -define <4 x double> @perm2pd_0x28(<4 x double> %a0, <4 x double> %a1) { -; CHECK-LABEL: @perm2pd_0x28( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> <double 0.000000e+00, double 0.000000e+00, double undef, double undef>, <4 x double> %a1, <4 x i32> <i32 0, i32 1, i32 4, i32 5> -; CHECK-NEXT: ret <4 x double> [[TMP1]] -; - %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 40) - ret <4 x double> %res - -} - -define <4 x double> @perm2pd_0x08(<4 x double> %a0, <4 x double> %a1) { -; CHECK-LABEL: @perm2pd_0x08( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> <double 0.000000e+00, double 0.000000e+00, double undef, double undef>, <4 x double> %a0, <4 x i32> <i32 0, i32 1, i32 4, i32 5> -; CHECK-NEXT: ret <4 x double> [[TMP1]] -; - %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 8) - ret <4 x double> %res - -} - -; Check one more with the AVX2 version. - -define <4 x i64> @perm2i_0x28(<4 x i64> %a0, <4 x i64> %a1) { -; CHECK-LABEL: @perm2i_0x28( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> <i64 0, i64 0, i64 undef, i64 undef>, <4 x i64> %a1, <4 x i32> <i32 0, i32 1, i32 4, i32 5> -; CHECK-NEXT: ret <4 x i64> [[TMP1]] -; - %res = call <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64> %a0, <4 x i64> %a1, i8 40) - ret <4 x i64> %res - -} - -declare <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone -declare <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone -declare <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32>, <8 x i32>, i8) nounwind readnone -declare <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64>, <4 x i64>, i8) nounwind readnone - |

