diff options
author | Craig Topper <craig.topper@intel.com> | 2019-02-20 20:18:20 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@intel.com> | 2019-02-20 20:18:20 +0000 |
commit | 31823fba2e9245c7cfdd8cb210a5de05c6d60075 (patch) | |
tree | 5c4384a54694574425fd64d7c5f40e975c883948 | |
parent | 9ad714f7d1a926c00b389a41a6d79969089da678 (diff) | |
download | bcm5719-llvm-31823fba2e9245c7cfdd8cb210a5de05c6d60075.tar.gz bcm5719-llvm-31823fba2e9245c7cfdd8cb210a5de05c6d60075.zip |
[X86] Add more load folding patterns for blend instructions as a follow up to r354363.
This avoids depending on the peephole pass to do load folding.
Also adds some load folding for some insert_subvector patterns that use blend.
All of this was found by temporarily adding TB_NO_FORWARD to the blend immediate entries in the load folding tables.
I've added -disable-peephole to some of the affected tests from that experiment to ensure we're testing isel patterns.
llvm-svn: 354511
-rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 72 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx-cvt-3.ll | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll | 20 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/commute-blend-sse41.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/insert-into-constant-vector.ll | 20 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/masked_load.ll | 12 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/merge-consecutive-loads-256.ll | 6 |
7 files changed, 98 insertions, 38 deletions
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index d661560461a..d5a6273088f 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -6510,17 +6510,35 @@ let Predicates = [HasAVX2] in { // Emulate vXi32/vXi64 blends with vXf32/vXf64. // ExecutionDomainFixPass will cleanup domains later on. let Predicates = [HasAVX] in { -def : Pat<(X86Blendi (v4i64 VR256:$src1), (v4i64 VR256:$src2), (iPTR imm:$src3)), +def : Pat<(X86Blendi (v4i64 VR256:$src1), (v4i64 VR256:$src2), imm:$src3), (VBLENDPDYrri VR256:$src1, VR256:$src2, imm:$src3)>; -def : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), (iPTR imm:$src3)), +def : Pat<(X86Blendi VR256:$src1, (loadv4i64 addr:$src2), imm:$src3), + (VBLENDPDYrmi VR256:$src1, addr:$src2, imm:$src3)>; +def : Pat<(X86Blendi (loadv4i64 addr:$src2), VR256:$src1, imm:$src3), + (VBLENDPDYrmi VR256:$src1, addr:$src2, (BlendCommuteImm4 imm:$src3))>; + +def : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), imm:$src3), (VBLENDPDrri VR128:$src1, VR128:$src2, imm:$src3)>; +def : Pat<(X86Blendi VR128:$src1, (loadv2i64 addr:$src2), imm:$src3), + (VBLENDPDrmi VR128:$src1, addr:$src2, imm:$src3)>; +def : Pat<(X86Blendi (loadv2i64 addr:$src2), VR128:$src1, imm:$src3), + (VBLENDPDrmi VR128:$src1, addr:$src2, (BlendCommuteImm2 imm:$src3))>; } let Predicates = [HasAVX1Only] in { -def : Pat<(X86Blendi (v8i32 VR256:$src1), (v8i32 VR256:$src2), (iPTR imm:$src3)), +def : Pat<(X86Blendi (v8i32 VR256:$src1), (v8i32 VR256:$src2), imm:$src3), (VBLENDPSYrri VR256:$src1, VR256:$src2, imm:$src3)>; -def : Pat<(X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2), (iPTR imm:$src3)), +def : Pat<(X86Blendi VR256:$src1, (loadv8i32 addr:$src2), imm:$src3), + (VBLENDPSYrmi VR256:$src1, addr:$src2, imm:$src3)>; +def : Pat<(X86Blendi (loadv8i32 addr:$src2), VR256:$src1, imm:$src3), + (VBLENDPSYrmi VR256:$src1, addr:$src2, (BlendCommuteImm8 imm:$src3))>; + +def : Pat<(X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2), imm:$src3), (VBLENDPSrri VR128:$src1, VR128:$src2, imm:$src3)>; +def : Pat<(X86Blendi VR128:$src1, (loadv4i32 VR128:$src2), imm:$src3), + (VBLENDPSrmi VR128:$src1, addr:$src2, imm:$src3)>; +def : Pat<(X86Blendi (loadv4i32 VR128:$src2), VR128:$src1, imm:$src3), + (VBLENDPSrmi VR128:$src1, addr:$src2, (BlendCommuteImm4 imm:$src3))>; } defm BLENDPS : SS41I_blend_rmi<0x0C, "blendps", X86Blendi, v4f32, @@ -6534,10 +6552,19 @@ defm PBLENDW : SS41I_blend_rmi<0x0E, "pblendw", X86Blendi, v8i16, SchedWriteBlend.XMM, BlendCommuteImm8>; let Predicates = [UseSSE41] in { -def : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), (iPTR imm:$src3)), +def : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), imm:$src3), (BLENDPDrri VR128:$src1, VR128:$src2, imm:$src3)>; -def : Pat<(X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2), (iPTR imm:$src3)), +def : Pat<(X86Blendi VR128:$src1, (memopv2i64 addr:$src2), imm:$src3), + (BLENDPDrmi VR128:$src1, addr:$src2, imm:$src3)>; +def : Pat<(X86Blendi (memopv2i64 addr:$src2), VR128:$src1, imm:$src3), + (BLENDPDrmi VR128:$src1, addr:$src2, (BlendCommuteImm2 imm:$src3))>; + +def : Pat<(X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2), imm:$src3), (BLENDPSrri VR128:$src1, VR128:$src2, imm:$src3)>; +def : Pat<(X86Blendi VR128:$src1, (memopv4i32 addr:$src2), imm:$src3), + (BLENDPSrmi VR128:$src1,addr:$src2, imm:$src3)>; +def : Pat<(X86Blendi (memopv4i32 addr:$src2), VR128:$src1, imm:$src3), + (BLENDPSrmi VR128:$src1,addr:$src2, (BlendCommuteImm4 imm:$src3))>; } // For insertion into the zero index (low half) of a 256-bit vector, it is @@ -6551,6 +6578,13 @@ def : Pat<(insert_subvector (v8f32 VR256:$src1), (v4f32 VR128:$src2), (iPTR 0)), (VBLENDPSYrri VR256:$src1, (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128:$src2, sub_xmm), 0xf)>; + +def : Pat<(insert_subvector (loadv4f64 addr:$src2), (v2f64 VR128:$src1), (iPTR 0)), + (VBLENDPDYrmi (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), + VR128:$src1, sub_xmm), addr:$src2, 0xc)>; +def : Pat<(insert_subvector (loadv8f32 addr:$src2), (v4f32 VR128:$src1), (iPTR 0)), + (VBLENDPSYrmi (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), + VR128:$src1, sub_xmm), addr:$src2, 0xf0)>; } /// SS41I_quaternary_vx - AVX SSE 4.1 with 4 operators @@ -7804,6 +7838,19 @@ def : Pat<(insert_subvector (v32i8 VR256:$src1), (v16i8 VR128:$src2), (iPTR 0)), (VPBLENDDYrri VR256:$src1, (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128:$src2, sub_xmm), 0xf)>; + +def : Pat<(insert_subvector (loadv8i32 addr:$src2), (v4i32 VR128:$src1), (iPTR 0)), + (VPBLENDDYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), + VR128:$src1, sub_xmm), addr:$src2, 0xf0)>; +def : Pat<(insert_subvector (loadv4i64 addr:$src2), (v2i64 VR128:$src1), (iPTR 0)), + (VPBLENDDYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), + VR128:$src1, sub_xmm), addr:$src2, 0xf0)>; +def : Pat<(insert_subvector (loadv16i16 addr:$src2), (v8i16 VR128:$src1), (iPTR 0)), + (VPBLENDDYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), + VR128:$src1, sub_xmm), addr:$src2, 0xf0)>; +def : Pat<(insert_subvector (loadv32i8 addr:$src2), (v16i8 VR128:$src1), (iPTR 0)), + (VPBLENDDYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), + VR128:$src1, sub_xmm), addr:$src2, 0xf0)>; } let Predicates = [HasAVX1Only] in { @@ -7823,6 +7870,19 @@ def : Pat<(insert_subvector (v32i8 VR256:$src1), (v16i8 VR128:$src2), (iPTR 0)), (VBLENDPSYrri VR256:$src1, (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128:$src2, sub_xmm), 0xf)>; + +def : Pat<(insert_subvector (loadv8i32 addr:$src2), (v4i32 VR128:$src1), (iPTR 0)), + (VBLENDPSYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), + VR128:$src1, sub_xmm), addr:$src2, 0xf0)>; +def : Pat<(insert_subvector (loadv4i64 addr:$src2), (v2i64 VR128:$src1), (iPTR 0)), + (VBLENDPSYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), + VR128:$src1, sub_xmm), addr:$src2, 0xf0)>; +def : Pat<(insert_subvector (loadv16i16 addr:$src2), (v8i16 VR128:$src1), (iPTR 0)), + (VBLENDPSYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), + VR128:$src1, sub_xmm), addr:$src2, 0xf0)>; +def : Pat<(insert_subvector (loadv32i8 addr:$src2), (v16i8 VR128:$src1), (iPTR 0)), + (VBLENDPSYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), + VR128:$src1, sub_xmm), addr:$src2, 0xf0)>; } //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/X86/avx-cvt-3.ll b/llvm/test/CodeGen/X86/avx-cvt-3.ll index ac99684ab3a..03592c8af9d 100644 --- a/llvm/test/CodeGen/X86/avx-cvt-3.ll +++ b/llvm/test/CodeGen/X86/avx-cvt-3.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx | FileCheck %s --check-prefix=X86 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=avx | FileCheck %s --check-prefix=X86 +; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefix=X64 ; Insertion/shuffles of all-zero/all-bits/constants into v8i32->v8f32 sitofp conversion. diff --git a/llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll b/llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll index 2378028e311..e0bec2861f5 100644 --- a/llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll +++ b/llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll @@ -1912,8 +1912,8 @@ define <2 x i64> @test_masked_z_4xi64_to_2xi64_perm_mem_mask0(<4 x i64>* %vp, <2 define <2 x i64> @test_masked_4xi64_to_2xi64_perm_mem_mask1(<4 x i64>* %vp, <2 x i64> %vec2, <2 x i64> %mask) { ; CHECK-LABEL: test_masked_4xi64_to_2xi64_perm_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vmovdqa (%rdi), %xmm2 -; CHECK-NEXT: vpblendd {{.*#+}} xmm2 = mem[0,1],xmm2[2,3] +; CHECK-NEXT: vmovdqa 16(%rdi), %xmm2 +; CHECK-NEXT: vpblendd {{.*#+}} xmm2 = xmm2[0,1],mem[2,3] ; CHECK-NEXT: vptestnmq %xmm1, %xmm1, %k1 ; CHECK-NEXT: vmovdqa64 %xmm2, %xmm0 {%k1} ; CHECK-NEXT: retq @@ -1927,8 +1927,8 @@ define <2 x i64> @test_masked_4xi64_to_2xi64_perm_mem_mask1(<4 x i64>* %vp, <2 x define <2 x i64> @test_masked_z_4xi64_to_2xi64_perm_mem_mask1(<4 x i64>* %vp, <2 x i64> %mask) { ; CHECK-LABEL: test_masked_z_4xi64_to_2xi64_perm_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vmovdqa (%rdi), %xmm1 -; CHECK-NEXT: vpblendd {{.*#+}} xmm1 = mem[0,1],xmm1[2,3] +; CHECK-NEXT: vmovdqa 16(%rdi), %xmm1 +; CHECK-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1],mem[2,3] ; CHECK-NEXT: vptestnmq %xmm0, %xmm0, %k1 ; CHECK-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} {z} ; CHECK-NEXT: retq @@ -2553,8 +2553,8 @@ define <4 x i64> @test_masked_z_8xi64_to_4xi64_perm_mem_mask7(<8 x i64>* %vp, <4 define <2 x i64> @test_8xi64_to_2xi64_perm_mem_mask0(<8 x i64>* %vp) { ; CHECK-LABEL: test_8xi64_to_2xi64_perm_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vmovaps (%rdi), %xmm0 -; CHECK-NEXT: vblendps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] +; CHECK-NEXT: vmovaps 32(%rdi), %xmm0 +; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],mem[2,3] ; CHECK-NEXT: retq %vec = load <8 x i64>, <8 x i64>* %vp %res = shufflevector <8 x i64> %vec, <8 x i64> undef, <2 x i32> <i32 4, i32 1> @@ -2563,8 +2563,8 @@ define <2 x i64> @test_8xi64_to_2xi64_perm_mem_mask0(<8 x i64>* %vp) { define <2 x i64> @test_masked_8xi64_to_2xi64_perm_mem_mask0(<8 x i64>* %vp, <2 x i64> %vec2, <2 x i64> %mask) { ; CHECK-LABEL: test_masked_8xi64_to_2xi64_perm_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vmovdqa (%rdi), %xmm2 -; CHECK-NEXT: vpblendd {{.*#+}} xmm2 = mem[0,1],xmm2[2,3] +; CHECK-NEXT: vmovdqa 32(%rdi), %xmm2 +; CHECK-NEXT: vpblendd {{.*#+}} xmm2 = xmm2[0,1],mem[2,3] ; CHECK-NEXT: vptestnmq %xmm1, %xmm1, %k1 ; CHECK-NEXT: vmovdqa64 %xmm2, %xmm0 {%k1} ; CHECK-NEXT: retq @@ -2578,8 +2578,8 @@ define <2 x i64> @test_masked_8xi64_to_2xi64_perm_mem_mask0(<8 x i64>* %vp, <2 x define <2 x i64> @test_masked_z_8xi64_to_2xi64_perm_mem_mask0(<8 x i64>* %vp, <2 x i64> %mask) { ; CHECK-LABEL: test_masked_z_8xi64_to_2xi64_perm_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vmovdqa (%rdi), %xmm1 -; CHECK-NEXT: vpblendd {{.*#+}} xmm1 = mem[0,1],xmm1[2,3] +; CHECK-NEXT: vmovdqa 32(%rdi), %xmm1 +; CHECK-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1],mem[2,3] ; CHECK-NEXT: vptestnmq %xmm0, %xmm0, %k1 ; CHECK-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} {z} ; CHECK-NEXT: retq diff --git a/llvm/test/CodeGen/X86/commute-blend-sse41.ll b/llvm/test/CodeGen/X86/commute-blend-sse41.ll index 5d64540dfb0..fa7cbb4b8ef 100644 --- a/llvm/test/CodeGen/X86/commute-blend-sse41.ll +++ b/llvm/test/CodeGen/X86/commute-blend-sse41.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.1 | FileCheck %s +; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown -mattr=+sse4.1 | FileCheck %s define <8 x i16> @commute_fold_pblendw(<8 x i16> %a, <8 x i16>* %b) { ; CHECK-LABEL: commute_fold_pblendw: diff --git a/llvm/test/CodeGen/X86/insert-into-constant-vector.ll b/llvm/test/CodeGen/X86/insert-into-constant-vector.ll index c34e9409dff..9d33a8bc447 100644 --- a/llvm/test/CodeGen/X86/insert-into-constant-vector.ll +++ b/llvm/test/CodeGen/X86/insert-into-constant-vector.ll @@ -1,14 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X32SSE --check-prefix=X32SSE2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64SSE --check-prefix=X64SSE2 -; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=X32SSE --check-prefix=X32SSE4 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=X64SSE --check-prefix=X64SSE4 -; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X32AVX --check-prefix=X32AVX1 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X64AVX --check-prefix=X64AVX1 -; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X32AVX --check-prefix=X32AVX2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X64AVX --check-prefix=X64AVX2 -; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=X32AVX --check-prefix=X32AVX512F -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=X64AVX --check-prefix=X64AVX512F +; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X32SSE --check-prefix=X32SSE2 +; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64SSE --check-prefix=X64SSE2 +; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=X32SSE --check-prefix=X32SSE4 +; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=X64SSE --check-prefix=X64SSE4 +; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X32AVX --check-prefix=X32AVX1 +; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X64AVX --check-prefix=X64AVX1 +; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X32AVX --check-prefix=X32AVX2 +; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X64AVX --check-prefix=X64AVX2 +; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=X32AVX --check-prefix=X32AVX512F +; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=X64AVX --check-prefix=X64AVX512F define <16 x i8> @elt0_v16i8(i8 %x) { ; X32SSE2-LABEL: elt0_v16i8: diff --git a/llvm/test/CodeGen/X86/masked_load.ll b/llvm/test/CodeGen/X86/masked_load.ll index 7c76b4d0a0a..e1213e4b9c8 100644 --- a/llvm/test/CodeGen/X86/masked_load.ll +++ b/llvm/test/CodeGen/X86/masked_load.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=sse2 | FileCheck %s --check-prefixes=SSE,SSE2 -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=sse4.2 | FileCheck %s --check-prefixes=SSE,SSE42 -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX1 -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx2 | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX2 -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx512f | FileCheck %s --check-prefixes=AVX,AVX512,AVX512F -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx512f,avx512bw,avx512vl | FileCheck %s --check-prefixes=AVX,AVX512,AVX512VLBW +; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=sse2 | FileCheck %s --check-prefixes=SSE,SSE2 +; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=sse4.2 | FileCheck %s --check-prefixes=SSE,SSE42 +; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=avx | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX1 +; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=avx2 | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX2 +; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=avx512f | FileCheck %s --check-prefixes=AVX,AVX512,AVX512F +; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=avx512f,avx512bw,avx512vl | FileCheck %s --check-prefixes=AVX,AVX512,AVX512VLBW define <1 x double> @load_v1f64_v1i64(<1 x i64> %trigger, <1 x double>* %addr, <1 x double> %dst) { ; SSE-LABEL: load_v1f64_v1i64: diff --git a/llvm/test/CodeGen/X86/merge-consecutive-loads-256.ll b/llvm/test/CodeGen/X86/merge-consecutive-loads-256.ll index 2feb9742c60..60eb93f4d19 100644 --- a/llvm/test/CodeGen/X86/merge-consecutive-loads-256.ll +++ b/llvm/test/CodeGen/X86/merge-consecutive-loads-256.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX --check-prefix=AVX512F +; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 +; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 +; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX --check-prefix=AVX512F ; ; Just one 32-bit run to make sure we do reasonable things. ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X32-AVX |