diff options
| -rw-r--r-- | llvm/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll | 32 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx-intrinsics-x86.ll | 68 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll | 168 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/packss.ll | 8 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/sse42-intrinsics-fast-isel.ll | 137 | ||||
| -rw-r--r-- | llvm/utils/UpdateTestChecks/asm.py | 2 | ||||
| -rw-r--r-- | llvm/utils/UpdateTestChecks/common.py | 39 | ||||
| -rwxr-xr-x | llvm/utils/update_llc_test_checks.py | 4 | 
8 files changed, 214 insertions, 244 deletions
diff --git a/llvm/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll b/llvm/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll index 38654f241a3..f557d7ab0da 100644 --- a/llvm/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll @@ -101,12 +101,12 @@ define <4 x double> @test_x86_avx_vbroadcastf128_pd_256(i8* %a0) {  ; X86:       # %bb.0:  ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax  ; X86-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] -; X86-NEXT:    ret{{[l|q]}} +; X86-NEXT:    retl  ;  ; X64-LABEL: test_x86_avx_vbroadcastf128_pd_256:  ; X64:       # %bb.0:  ; X64-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] -; X64-NEXT:    ret{{[l|q]}} +; X64-NEXT:    retq    %res = call <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8* %a0) ; <<4 x double>> [#uses=1]    ret <4 x double> %res  } @@ -118,12 +118,12 @@ define <8 x float> @test_x86_avx_vbroadcastf128_ps_256(i8* %a0) {  ; X86:       # %bb.0:  ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax  ; X86-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] -; X86-NEXT:    ret{{[l|q]}} +; X86-NEXT:    retl  ;  ; X64-LABEL: test_x86_avx_vbroadcastf128_ps_256:  ; X64:       # %bb.0:  ; X64-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] -; X64-NEXT:    ret{{[l|q]}} +; X64-NEXT:    retq    %res = call <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8* %a0) ; <<8 x float>> [#uses=1]    ret <8 x float> %res  } @@ -402,14 +402,14 @@ define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) {  ; X86-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1  ; X86-NEXT:    vpsubb %xmm1, %xmm0, %xmm0  ; X86-NEXT:    vmovdqu %xmm0, (%eax) -; X86-NEXT:    ret{{[l|q]}} +; X86-NEXT:    retl  ;  ; X64-LABEL: test_x86_sse2_storeu_dq:  ; X64:       # %bb.0:  ; X64-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1  ; X64-NEXT:    vpsubb %xmm1, %xmm0, %xmm0  ; X64-NEXT:    vmovdqu %xmm0, (%rdi) -; X64-NEXT:    ret{{[l|q]}} +; X64-NEXT:    retq    %a2 = add <16 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>    call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a2)    ret void @@ -426,7 +426,7 @@ define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) {  ; X86-NEXT:    vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0]  ; X86-NEXT:    vaddpd %xmm1, %xmm0, %xmm0  ; X86-NEXT:    vmovupd %xmm0, (%eax) -; X86-NEXT:    ret{{[l|q]}} +; X86-NEXT:    retl  ;  ; X64-LABEL: test_x86_sse2_storeu_pd:  ; X64:       # %bb.0: @@ -434,7 +434,7 @@ define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) {  ; X64-NEXT:    vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0]  ; X64-NEXT:    vaddpd %xmm1, %xmm0, %xmm0  ; X64-NEXT:    vmovupd %xmm0, (%rdi) -; X64-NEXT:    ret{{[l|q]}} +; X64-NEXT:    retq    %a2 = fadd <2 x double> %a1, <double 0x0, double 0x4200000000000000>    call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a2)    ret void @@ -447,12 +447,12 @@ define void @test_x86_sse_storeu_ps(i8* %a0, <4 x float> %a1) {  ; X86:       # %bb.0:  ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax  ; X86-NEXT:    vmovups %xmm0, (%eax) -; X86-NEXT:    ret{{[l|q]}} +; X86-NEXT:    retl  ;  ; X64-LABEL: test_x86_sse_storeu_ps:  ; X64:       # %bb.0:  ; X64-NEXT:    vmovups %xmm0, (%rdi) -; X64-NEXT:    ret{{[l|q]}} +; X64-NEXT:    retq    call void @llvm.x86.sse.storeu.ps(i8* %a0, <4 x float> %a1)    ret void  } @@ -472,7 +472,7 @@ define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) {  ; X86-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0  ; X86-NEXT:    vmovups %ymm0, (%eax)  ; X86-NEXT:    vzeroupper -; X86-NEXT:    ret{{[l|q]}} +; X86-NEXT:    retl  ;  ; X64-LABEL: test_x86_avx_storeu_dq_256:  ; X64:       # %bb.0: @@ -483,7 +483,7 @@ define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) {  ; X64-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0  ; X64-NEXT:    vmovups %ymm0, (%rdi)  ; X64-NEXT:    vzeroupper -; X64-NEXT:    ret{{[l|q]}} +; X64-NEXT:    retq    %a2 = add <32 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>    call void @llvm.x86.avx.storeu.dq.256(i8* %a0, <32 x i8> %a2)    ret void @@ -500,7 +500,7 @@ define void @test_x86_avx_storeu_pd_256(i8* %a0, <4 x double> %a1) {  ; X86-NEXT:    vaddpd %ymm1, %ymm0, %ymm0  ; X86-NEXT:    vmovupd %ymm0, (%eax)  ; X86-NEXT:    vzeroupper -; X86-NEXT:    ret{{[l|q]}} +; X86-NEXT:    retl  ;  ; X64-LABEL: test_x86_avx_storeu_pd_256:  ; X64:       # %bb.0: @@ -508,7 +508,7 @@ define void @test_x86_avx_storeu_pd_256(i8* %a0, <4 x double> %a1) {  ; X64-NEXT:    vaddpd %ymm1, %ymm0, %ymm0  ; X64-NEXT:    vmovupd %ymm0, (%rdi)  ; X64-NEXT:    vzeroupper -; X64-NEXT:    ret{{[l|q]}} +; X64-NEXT:    retq    %a2 = fadd <4 x double> %a1, <double 0x0, double 0x0, double 0x0, double 0x0>    call void @llvm.x86.avx.storeu.pd.256(i8* %a0, <4 x double> %a2)    ret void @@ -522,13 +522,13 @@ define void @test_x86_avx_storeu_ps_256(i8* %a0, <8 x float> %a1) {  ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax  ; X86-NEXT:    vmovups %ymm0, (%eax)  ; X86-NEXT:    vzeroupper -; X86-NEXT:    ret{{[l|q]}} +; X86-NEXT:    retl  ;  ; X64-LABEL: test_x86_avx_storeu_ps_256:  ; X64:       # %bb.0:  ; X64-NEXT:    vmovups %ymm0, (%rdi)  ; X64-NEXT:    vzeroupper -; X64-NEXT:    ret{{[l|q]}} +; X64-NEXT:    retq    call void @llvm.x86.avx.storeu.ps.256(i8* %a0, <8 x float> %a1)    ret void  } diff --git a/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll b/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll index e26c125d283..93d5da65c3b 100644 --- a/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll +++ b/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll @@ -287,12 +287,12 @@ define <32 x i8> @test_x86_avx_ldu_dq_256(i8* %a0) {  ; X86:       # %bb.0:  ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]  ; X86-NEXT:    vlddqu (%eax), %ymm0 # encoding: [0xc5,0xff,0xf0,0x00] -; X86-NEXT:    ret{{[l|q]}} # encoding: [0xc3] +; X86-NEXT:    retl # encoding: [0xc3]  ;  ; X64-LABEL: test_x86_avx_ldu_dq_256:  ; X64:       # %bb.0:  ; X64-NEXT:    vlddqu (%rdi), %ymm0 # encoding: [0xc5,0xff,0xf0,0x07] -; X64-NEXT:    ret{{[l|q]}} # encoding: [0xc3] +; X64-NEXT:    retq # encoding: [0xc3]    %res = call <32 x i8> @llvm.x86.avx.ldu.dq.256(i8* %a0) ; <<32 x i8>> [#uses=1]    ret <32 x i8> %res  } @@ -304,12 +304,12 @@ define <2 x double> @test_x86_avx_maskload_pd(i8* %a0, <2 x i64> %mask) {  ; X86:       # %bb.0:  ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]  ; X86-NEXT:    vmaskmovpd (%eax), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x2d,0x00] -; X86-NEXT:    ret{{[l|q]}} # encoding: [0xc3] +; X86-NEXT:    retl # encoding: [0xc3]  ;  ; X64-LABEL: test_x86_avx_maskload_pd:  ; X64:       # %bb.0:  ; X64-NEXT:    vmaskmovpd (%rdi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x2d,0x07] -; X64-NEXT:    ret{{[l|q]}} # encoding: [0xc3] +; X64-NEXT:    retq # encoding: [0xc3]    %res = call <2 x double> @llvm.x86.avx.maskload.pd(i8* %a0, <2 x i64> %mask) ; <<2 x double>> [#uses=1]    ret <2 x double> %res  } @@ -321,12 +321,12 @@ define <4 x double> @test_x86_avx_maskload_pd_256(i8* %a0, <4 x i64> %mask) {  ; X86:       # %bb.0:  ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]  ; X86-NEXT:    vmaskmovpd (%eax), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x2d,0x00] -; X86-NEXT:    ret{{[l|q]}} # encoding: [0xc3] +; X86-NEXT:    retl # encoding: [0xc3]  ;  ; X64-LABEL: test_x86_avx_maskload_pd_256:  ; X64:       # %bb.0:  ; X64-NEXT:    vmaskmovpd (%rdi), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x2d,0x07] -; X64-NEXT:    ret{{[l|q]}} # encoding: [0xc3] +; X64-NEXT:    retq # encoding: [0xc3]    %res = call <4 x double> @llvm.x86.avx.maskload.pd.256(i8* %a0, <4 x i64> %mask) ; <<4 x double>> [#uses=1]    ret <4 x double> %res  } @@ -338,12 +338,12 @@ define <4 x float> @test_x86_avx_maskload_ps(i8* %a0, <4 x i32> %mask) {  ; X86:       # %bb.0:  ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]  ; X86-NEXT:    vmaskmovps (%eax), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x2c,0x00] -; X86-NEXT:    ret{{[l|q]}} # encoding: [0xc3] +; X86-NEXT:    retl # encoding: [0xc3]  ;  ; X64-LABEL: test_x86_avx_maskload_ps:  ; X64:       # %bb.0:  ; X64-NEXT:    vmaskmovps (%rdi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x2c,0x07] -; X64-NEXT:    ret{{[l|q]}} # encoding: [0xc3] +; X64-NEXT:    retq # encoding: [0xc3]    %res = call <4 x float> @llvm.x86.avx.maskload.ps(i8* %a0, <4 x i32> %mask) ; <<4 x float>> [#uses=1]    ret <4 x float> %res  } @@ -355,12 +355,12 @@ define <8 x float> @test_x86_avx_maskload_ps_256(i8* %a0, <8 x i32> %mask) {  ; X86:       # %bb.0:  ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]  ; X86-NEXT:    vmaskmovps (%eax), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x2c,0x00] -; X86-NEXT:    ret{{[l|q]}} # encoding: [0xc3] +; X86-NEXT:    retl # encoding: [0xc3]  ;  ; X64-LABEL: test_x86_avx_maskload_ps_256:  ; X64:       # %bb.0:  ; X64-NEXT:    vmaskmovps (%rdi), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x2c,0x07] -; X64-NEXT:    ret{{[l|q]}} # encoding: [0xc3] +; X64-NEXT:    retq # encoding: [0xc3]    %res = call <8 x float> @llvm.x86.avx.maskload.ps.256(i8* %a0, <8 x i32> %mask) ; <<8 x float>> [#uses=1]    ret <8 x float> %res  } @@ -372,12 +372,12 @@ define void @test_x86_avx_maskstore_pd(i8* %a0, <2 x i64> %mask, <2 x double> %a  ; X86:       # %bb.0:  ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]  ; X86-NEXT:    vmaskmovpd %xmm1, %xmm0, (%eax) # encoding: [0xc4,0xe2,0x79,0x2f,0x08] -; X86-NEXT:    ret{{[l|q]}} # encoding: [0xc3] +; X86-NEXT:    retl # encoding: [0xc3]  ;  ; X64-LABEL: test_x86_avx_maskstore_pd:  ; X64:       # %bb.0:  ; X64-NEXT:    vmaskmovpd %xmm1, %xmm0, (%rdi) # encoding: [0xc4,0xe2,0x79,0x2f,0x0f] -; X64-NEXT:    ret{{[l|q]}} # encoding: [0xc3] +; X64-NEXT:    retq # encoding: [0xc3]    call void @llvm.x86.avx.maskstore.pd(i8* %a0, <2 x i64> %mask, <2 x double> %a2)    ret void  } @@ -390,13 +390,13 @@ define void @test_x86_avx_maskstore_pd_256(i8* %a0, <4 x i64> %mask, <4 x double  ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]  ; X86-NEXT:    vmaskmovpd %ymm1, %ymm0, (%eax) # encoding: [0xc4,0xe2,0x7d,0x2f,0x08]  ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77] -; X86-NEXT:    ret{{[l|q]}} # encoding: [0xc3] +; X86-NEXT:    retl # encoding: [0xc3]  ;  ; X64-LABEL: test_x86_avx_maskstore_pd_256:  ; X64:       # %bb.0:  ; X64-NEXT:    vmaskmovpd %ymm1, %ymm0, (%rdi) # encoding: [0xc4,0xe2,0x7d,0x2f,0x0f]  ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77] -; X64-NEXT:    ret{{[l|q]}} # encoding: [0xc3] +; X64-NEXT:    retq # encoding: [0xc3]    call void @llvm.x86.avx.maskstore.pd.256(i8* %a0, <4 x i64> %mask, <4 x double> %a2)    ret void  } @@ -408,12 +408,12 @@ define void @test_x86_avx_maskstore_ps(i8* %a0, <4 x i32> %mask, <4 x float> %a2  ; X86:       # %bb.0:  ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]  ; X86-NEXT:    vmaskmovps %xmm1, %xmm0, (%eax) # encoding: [0xc4,0xe2,0x79,0x2e,0x08] -; X86-NEXT:    ret{{[l|q]}} # encoding: [0xc3] +; X86-NEXT:    retl # encoding: [0xc3]  ;  ; X64-LABEL: test_x86_avx_maskstore_ps:  ; X64:       # %bb.0:  ; X64-NEXT:    vmaskmovps %xmm1, %xmm0, (%rdi) # encoding: [0xc4,0xe2,0x79,0x2e,0x0f] -; X64-NEXT:    ret{{[l|q]}} # encoding: [0xc3] +; X64-NEXT:    retq # encoding: [0xc3]    call void @llvm.x86.avx.maskstore.ps(i8* %a0, <4 x i32> %mask, <4 x float> %a2)    ret void  } @@ -426,13 +426,13 @@ define void @test_x86_avx_maskstore_ps_256(i8* %a0, <8 x i32> %mask, <8 x float>  ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]  ; X86-NEXT:    vmaskmovps %ymm1, %ymm0, (%eax) # encoding: [0xc4,0xe2,0x7d,0x2e,0x08]  ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77] -; X86-NEXT:    ret{{[l|q]}} # encoding: [0xc3] +; X86-NEXT:    retl # encoding: [0xc3]  ;  ; X64-LABEL: test_x86_avx_maskstore_ps_256:  ; X64:       # %bb.0:  ; X64-NEXT:    vmaskmovps %ymm1, %ymm0, (%rdi) # encoding: [0xc4,0xe2,0x7d,0x2e,0x0f]  ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77] -; X64-NEXT:    ret{{[l|q]}} # encoding: [0xc3] +; X64-NEXT:    retq # encoding: [0xc3]    call void @llvm.x86.avx.maskstore.ps.256(i8* %a0, <8 x i32> %mask, <8 x float> %a2)    ret void  } @@ -720,23 +720,23 @@ define <4 x float> @test_x86_avx_vpermilvar_ps_load(<4 x float> %a0, <4 x i32>*  ; X86-AVX:       # %bb.0:  ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]  ; X86-AVX-NEXT:    vpermilps (%eax), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x0c,0x00] -; X86-AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3] +; X86-AVX-NEXT:    retl # encoding: [0xc3]  ;  ; X86-AVX512VL-LABEL: test_x86_avx_vpermilvar_ps_load:  ; X86-AVX512VL:       # %bb.0:  ; X86-AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]  ; X86-AVX512VL-NEXT:    vpermilps (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x0c,0x00] -; X86-AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3] +; X86-AVX512VL-NEXT:    retl # encoding: [0xc3]  ;  ; X64-AVX-LABEL: test_x86_avx_vpermilvar_ps_load:  ; X64-AVX:       # %bb.0:  ; X64-AVX-NEXT:    vpermilps (%rdi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x0c,0x07] -; X64-AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3] +; X64-AVX-NEXT:    retq # encoding: [0xc3]  ;  ; X64-AVX512VL-LABEL: test_x86_avx_vpermilvar_ps_load:  ; X64-AVX512VL:       # %bb.0:  ; X64-AVX512VL-NEXT:    vpermilps (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x0c,0x07] -; X64-AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3] +; X64-AVX512VL-NEXT:    retq # encoding: [0xc3]    %a2 = load <4 x i32>, <4 x i32>* %a1    %res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a2) ; <<4 x float>> [#uses=1]    ret <4 x float> %res @@ -951,7 +951,7 @@ define void @movnt_dq(i8* %p, <2 x i64> %a1) nounwind {  ; X86-AVX-NEXT:    vpsubq %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfb,0xc1]  ; X86-AVX-NEXT:    vmovntdq %ymm0, (%eax) # encoding: [0xc5,0xfd,0xe7,0x00]  ; X86-AVX-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77] -; X86-AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3] +; X86-AVX-NEXT:    retl # encoding: [0xc3]  ;  ; X86-AVX512VL-LABEL: movnt_dq:  ; X86-AVX512VL:       # %bb.0: @@ -960,7 +960,7 @@ define void @movnt_dq(i8* %p, <2 x i64> %a1) nounwind {  ; X86-AVX512VL-NEXT:    vpsubq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfb,0xc1]  ; X86-AVX512VL-NEXT:    vmovntdq %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe7,0x00]  ; X86-AVX512VL-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77] -; X86-AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3] +; X86-AVX512VL-NEXT:    retl # encoding: [0xc3]  ;  ; X64-AVX-LABEL: movnt_dq:  ; X64-AVX:       # %bb.0: @@ -968,7 +968,7 @@ define void @movnt_dq(i8* %p, <2 x i64> %a1) nounwind {  ; X64-AVX-NEXT:    vpsubq %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfb,0xc1]  ; X64-AVX-NEXT:    vmovntdq %ymm0, (%rdi) # encoding: [0xc5,0xfd,0xe7,0x07]  ; X64-AVX-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77] -; X64-AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3] +; X64-AVX-NEXT:    retq # encoding: [0xc3]  ;  ; X64-AVX512VL-LABEL: movnt_dq:  ; X64-AVX512VL:       # %bb.0: @@ -976,7 +976,7 @@ define void @movnt_dq(i8* %p, <2 x i64> %a1) nounwind {  ; X64-AVX512VL-NEXT:    vpsubq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfb,0xc1]  ; X64-AVX512VL-NEXT:    vmovntdq %ymm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe7,0x07]  ; X64-AVX512VL-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77] -; X64-AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3] +; X64-AVX512VL-NEXT:    retq # encoding: [0xc3]    %a2 = add <2 x i64> %a1, <i64 1, i64 1>    %a3 = shufflevector <2 x i64> %a2, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>    tail call void @llvm.x86.avx.movnt.dq.256(i8* %p, <4 x i64> %a3) nounwind @@ -990,26 +990,26 @@ define void @movnt_ps(i8* %p, <8 x float> %a) nounwind {  ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]  ; X86-AVX-NEXT:    vmovntps %ymm0, (%eax) # encoding: [0xc5,0xfc,0x2b,0x00]  ; X86-AVX-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77] -; X86-AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3] +; X86-AVX-NEXT:    retl # encoding: [0xc3]  ;  ; X86-AVX512VL-LABEL: movnt_ps:  ; X86-AVX512VL:       # %bb.0:  ; X86-AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]  ; X86-AVX512VL-NEXT:    vmovntps %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x2b,0x00]  ; X86-AVX512VL-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77] -; X86-AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3] +; X86-AVX512VL-NEXT:    retl # encoding: [0xc3]  ;  ; X64-AVX-LABEL: movnt_ps:  ; X64-AVX:       # %bb.0:  ; X64-AVX-NEXT:    vmovntps %ymm0, (%rdi) # encoding: [0xc5,0xfc,0x2b,0x07]  ; X64-AVX-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77] -; X64-AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3] +; X64-AVX-NEXT:    retq # encoding: [0xc3]  ;  ; X64-AVX512VL-LABEL: movnt_ps:  ; X64-AVX512VL:       # %bb.0:  ; X64-AVX512VL-NEXT:    vmovntps %ymm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x2b,0x07]  ; X64-AVX512VL-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77] -; X64-AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3] +; X64-AVX512VL-NEXT:    retq # encoding: [0xc3]    tail call void @llvm.x86.avx.movnt.ps.256(i8* %p, <8 x float> %a) nounwind    ret void  } @@ -1024,7 +1024,7 @@ define void @movnt_pd(i8* %p, <4 x double> %a1) nounwind {  ; X86-AVX-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x58,0xc1]  ; X86-AVX-NEXT:    vmovntpd %ymm0, (%eax) # encoding: [0xc5,0xfd,0x2b,0x00]  ; X86-AVX-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77] -; X86-AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3] +; X86-AVX-NEXT:    retl # encoding: [0xc3]  ;  ; X86-AVX512VL-LABEL: movnt_pd:  ; X86-AVX512VL:       # %bb.0: @@ -1033,7 +1033,7 @@ define void @movnt_pd(i8* %p, <4 x double> %a1) nounwind {  ; X86-AVX512VL-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc1]  ; X86-AVX512VL-NEXT:    vmovntpd %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x2b,0x00]  ; X86-AVX512VL-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77] -; X86-AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3] +; X86-AVX512VL-NEXT:    retl # encoding: [0xc3]  ;  ; X64-AVX-LABEL: movnt_pd:  ; X64-AVX:       # %bb.0: @@ -1041,7 +1041,7 @@ define void @movnt_pd(i8* %p, <4 x double> %a1) nounwind {  ; X64-AVX-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x58,0xc1]  ; X64-AVX-NEXT:    vmovntpd %ymm0, (%rdi) # encoding: [0xc5,0xfd,0x2b,0x07]  ; X64-AVX-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77] -; X64-AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3] +; X64-AVX-NEXT:    retq # encoding: [0xc3]  ;  ; X64-AVX512VL-LABEL: movnt_pd:  ; X64-AVX512VL:       # %bb.0: @@ -1049,7 +1049,7 @@ define void @movnt_pd(i8* %p, <4 x double> %a1) nounwind {  ; X64-AVX512VL-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc1]  ; X64-AVX512VL-NEXT:    vmovntpd %ymm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x2b,0x07]  ; X64-AVX512VL-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77] -; X64-AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3] +; X64-AVX512VL-NEXT:    retq # encoding: [0xc3]    %a2 = fadd <4 x double> %a1, <double 0x0, double 0x0, double 0x0, double 0x0>    tail call void @llvm.x86.avx.movnt.pd.256(i8* %p, <4 x double> %a2) nounwind    ret void diff --git a/llvm/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll index 0a61f21c90c..aeb204e309a 100644 --- a/llvm/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll @@ -367,12 +367,12 @@ define <4 x i64> @test_mm256_broadcastsi128_si256_mem(<2 x i64>* %p0) {  ; X86:       # %bb.0:  ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax  ; X86-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] -; X86-NEXT:    ret{{[l|q]}} +; X86-NEXT:    retl  ;  ; X64-LABEL: test_mm256_broadcastsi128_si256_mem:  ; X64:       # %bb.0:  ; X64-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] -; X64-NEXT:    ret{{[l|q]}} +; X64-NEXT:    retq    %a0 = load <2 x i64>, <2 x i64>* %p0    %res = shufflevector <2 x i64> %a0, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>    ret <4 x i64> %res @@ -766,7 +766,7 @@ define <2 x i64> @test_mm_i32gather_epi32(i32 *%a0, <2 x i64> %a1) {  ; X86-NEXT:    vpxor %xmm1, %xmm1, %xmm1  ; X86-NEXT:    vpgatherdd %xmm2, (%eax,%xmm0,2), %xmm1  ; X86-NEXT:    vmovdqa %xmm1, %xmm0 -; X86-NEXT:    ret{{[l|q]}} +; X86-NEXT:    retl  ;  ; X64-LABEL: test_mm_i32gather_epi32:  ; X64:       # %bb.0: @@ -774,7 +774,7 @@ define <2 x i64> @test_mm_i32gather_epi32(i32 *%a0, <2 x i64> %a1) {  ; X64-NEXT:    vpxor %xmm1, %xmm1, %xmm1  ; X64-NEXT:    vpgatherdd %xmm2, (%rdi,%xmm0,2), %xmm1  ; X64-NEXT:    vmovdqa %xmm1, %xmm0 -; X64-NEXT:    ret{{[l|q]}} +; X64-NEXT:    retq    %arg0 = bitcast i32 *%a0 to i8*    %arg1 = bitcast <2 x i64> %a1 to <4 x i32>    %mask = bitcast <2 x i64> <i64 -1, i64 -1> to <4 x i32> @@ -789,12 +789,12 @@ define <2 x i64> @test_mm_mask_i32gather_epi32(<2 x i64> %a0, i32 *%a1, <2 x i64  ; X86:       # %bb.0:  ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax  ; X86-NEXT:    vpgatherdd %xmm2, (%eax,%xmm1,2), %xmm0 -; X86-NEXT:    ret{{[l|q]}} +; X86-NEXT:    retl  ;  ; X64-LABEL: test_mm_mask_i32gather_epi32:  ; X64:       # %bb.0:  ; X64-NEXT:    vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 -; X64-NEXT:    ret{{[l|q]}} +; X64-NEXT:    retq    %arg0 = bitcast <2 x i64> %a0 to <4 x i32>    %arg1 = bitcast i32 *%a1 to i8*    %arg2 = bitcast <2 x i64> %a2 to <4 x i32> @@ -812,7 +812,7 @@ define <4 x i64> @test_mm256_i32gather_epi32(i32 *%a0, <4 x i64> %a1) {  ; X86-NEXT:    vpxor %xmm1, %xmm1, %xmm1  ; X86-NEXT:    vpgatherdd %ymm2, (%eax,%ymm0,2), %ymm1  ; X86-NEXT:    vmovdqa %ymm1, %ymm0 -; X86-NEXT:    ret{{[l|q]}} +; X86-NEXT:    retl  ;  ; X64-LABEL: test_mm256_i32gather_epi32:  ; X64:       # %bb.0: @@ -820,7 +820,7 @@ define <4 x i64> @test_mm256_i32gather_epi32(i32 *%a0, <4 x i64> %a1) {  ; X64-NEXT:    vpxor %xmm1, %xmm1, %xmm1  ; X64-NEXT:    vpgatherdd %ymm2, (%rdi,%ymm0,2), %ymm1  ; X64-NEXT:    vmovdqa %ymm1, %ymm0 -; X64-NEXT:    ret{{[l|q]}} +; X64-NEXT:    retq    %arg0 = bitcast i32 *%a0 to i8*    %arg1 = bitcast <4 x i64> %a1 to <8 x i32>    %mask = bitcast <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1> to <8 x i32> @@ -835,12 +835,12 @@ define <4 x i64> @test_mm256_mask_i32gather_epi32(<4 x i64> %a0, i32 *%a1, <4 x  ; X86:       # %bb.0:  ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax  ; X86-NEXT:    vpgatherdd %ymm2, (%eax,%ymm1,2), %ymm0 -; X86-NEXT:    ret{{[l|q]}} +; X86-NEXT:    retl  ;  ; X64-LABEL: test_mm256_mask_i32gather_epi32:  ; X64:       # %bb.0:  ; X64-NEXT:    vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 -; X64-NEXT:    ret{{[l|q]}} +; X64-NEXT:    retq    %arg0 = bitcast <4 x i64> %a0 to <8 x i32>    %arg1 = bitcast i32 *%a1 to i8*    %arg2 = bitcast <4 x i64> %a2 to <8 x i32> @@ -858,7 +858,7 @@ define <2 x i64> @test_mm_i32gather_epi64(i64 *%a0, <2 x i64> %a1) {  ; X86-NEXT:    vpxor %xmm1, %xmm1, %xmm1  ; X86-NEXT:    vpgatherdq %xmm2, (%eax,%xmm0,2), %xmm1  ; X86-NEXT:    vmovdqa %xmm1, %xmm0 -; X86-NEXT:    ret{{[l|q]}} +; X86-NEXT:    retl  ;  ; X64-LABEL: test_mm_i32gather_epi64:  ; X64:       # %bb.0: @@ -866,7 +866,7 @@ define <2 x i64> @test_mm_i32gather_epi64(i64 *%a0, <2 x i64> %a1) {  ; X64-NEXT:    vpxor %xmm1, %xmm1, %xmm1  ; X64-NEXT:    vpgatherdq %xmm2, (%rdi,%xmm0,2), %xmm1  ; X64-NEXT:    vmovdqa %xmm1, %xmm0 -; X64-NEXT:    ret{{[l|q]}} +; X64-NEXT:    retq    %arg0 = bitcast i64 *%a0 to i8*    %arg1 = bitcast <2 x i64> %a1 to <4 x i32>    %res = call <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64> undef, i8* %arg0, <4 x i32> %arg1, <2 x i64> <i64 -1, i64 -1>, i8 2) @@ -879,12 +879,12 @@ define <2 x i64> @test_mm_mask_i32gather_epi64(<2 x i64> %a0, i64 *%a1, <2 x i64  ; X86:       # %bb.0:  ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax  ; X86-NEXT:    vpgatherdq %xmm2, (%eax,%xmm1,2), %xmm0 -; X86-NEXT:    ret{{[l|q]}} +; X86-NEXT:    retl  ;  ; X64-LABEL: test_mm_mask_i32gather_epi64:  ; X64:       # %bb.0:  ; X64-NEXT:    vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 -; X64-NEXT:    ret{{[l|q]}} +; X64-NEXT:    retq    %arg1 = bitcast i64 *%a1 to i8*    %arg2 = bitcast <2 x i64> %a2 to <4 x i32>    %res = call <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64> %a0, i8* %arg1, <4 x i32> %arg2, <2 x i64> %a3, i8 2) @@ -899,7 +899,7 @@ define <4 x i64> @test_mm256_i32gather_epi64(i64 *%a0, <2 x i64> %a1) {  ; X86-NEXT:    vpxor %xmm1, %xmm1, %xmm1  ; X86-NEXT:    vpgatherdq %ymm2, (%eax,%xmm0,2), %ymm1  ; X86-NEXT:    vmovdqa %ymm1, %ymm0 -; X86-NEXT:    ret{{[l|q]}} +; X86-NEXT:    retl  ;  ; X64-LABEL: test_mm256_i32gather_epi64:  ; X64:       # %bb.0: @@ -907,7 +907,7 @@ define <4 x i64> @test_mm256_i32gather_epi64(i64 *%a0, <2 x i64> %a1) {  ; X64-NEXT:    vpxor %xmm1, %xmm1, %xmm1  ; X64-NEXT:    vpgatherdq %ymm2, (%rdi,%xmm0,2), %ymm1  ; X64-NEXT:    vmovdqa %ymm1, %ymm0 -; X64-NEXT:    ret{{[l|q]}} +; X64-NEXT:    retq    %arg0 = bitcast i64 *%a0 to i8*    %arg1 = bitcast <2 x i64> %a1 to <4 x i32>    %res = call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> undef, i8* %arg0, <4 x i32> %arg1, <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1>, i8 2) @@ -920,12 +920,12 @@ define <4 x i64> @test_mm256_mask_i32gather_epi64(<4 x i64> %a0, i64 *%a1, <2 x  ; X86:       # %bb.0:  ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax  ; X86-NEXT:    vpgatherdq %ymm2, (%eax,%xmm1,2), %ymm0 -; X86-NEXT:    ret{{[l|q]}} +; X86-NEXT:    retl  ;  ; X64-LABEL: test_mm256_mask_i32gather_epi64:  ; X64:       # %bb.0:  ; X64-NEXT:    vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 -; X64-NEXT:    ret{{[l|q]}} +; X64-NEXT:    retq    %arg1 = bitcast i64 *%a1 to i8*    %arg2 = bitcast <2 x i64> %a2 to <4 x i32>    %res = call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> %a0, i8* %arg1, <4 x i32> %arg2, <4 x i64> %a3, i8 2) @@ -940,7 +940,7 @@ define <2 x double> @test_mm_i32gather_pd(double *%a0, <2 x i64> %a1) {  ; X86-NEXT:    vxorpd %xmm1, %xmm1, %xmm1  ; X86-NEXT:    vgatherdpd %xmm2, (%eax,%xmm0,2), %xmm1  ; X86-NEXT:    vmovapd %xmm1, %xmm0 -; X86-NEXT:    ret{{[l|q]}} +; X86-NEXT:    retl  ;  ; X64-LABEL: test_mm_i32gather_pd:  ; X64:       # %bb.0: @@ -948,7 +948,7 @@ define <2 x double> @test_mm_i32gather_pd(double *%a0, <2 x i64> %a1) {  ; X64-NEXT:    vxorpd %xmm1, %xmm1, %xmm1  ; X64-NEXT:    vgatherdpd %xmm2, (%rdi,%xmm0,2), %xmm1  ; X64-NEXT:    vmovapd %xmm1, %xmm0 -; X64-NEXT:    ret{{[l|q]}} +; X64-NEXT:    retq    %arg0 = bitcast double *%a0 to i8*    %arg1 = bitcast <2 x i64> %a1 to <4 x i32>    %cmp = fcmp oeq <2 x double> zeroinitializer, zeroinitializer @@ -964,12 +964,12 @@ define <2 x double> @test_mm_mask_i32gather_pd(<2 x double> %a0, double *%a1, <2  ; X86:       # %bb.0:  ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax  ; X86-NEXT:    vgatherdpd %xmm2, (%eax,%xmm1,2), %xmm0 -; X86-NEXT:    ret{{[l|q]}} +; X86-NEXT:    retl  ;  ; X64-LABEL: test_mm_mask_i32gather_pd:  ; X64:       # %bb.0:  ; X64-NEXT:    vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 -; X64-NEXT:    ret{{[l|q]}} +; X64-NEXT:    retq    %arg1 = bitcast double *%a1 to i8*    %arg2 = bitcast <2 x i64> %a2 to <4 x i32>    %res = call <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double> %a0, i8* %arg1, <4 x i32> %arg2, <2 x double> %a3, i8 2) @@ -984,7 +984,7 @@ define <4 x double> @test_mm256_i32gather_pd(double *%a0, <2 x i64> %a1) {  ; X86-NEXT:    vcmpeqpd %ymm1, %ymm1, %ymm2  ; X86-NEXT:    vgatherdpd %ymm2, (%eax,%xmm0,2), %ymm1  ; X86-NEXT:    vmovapd %ymm1, %ymm0 -; X86-NEXT:    ret{{[l|q]}} +; X86-NEXT:    retl  ;  ; X64-LABEL: test_mm256_i32gather_pd:  ; X64:       # %bb.0: @@ -992,7 +992,7 @@ define <4 x double> @test_mm256_i32gather_pd(double *%a0, <2 x i64> %a1) {  ; X64-NEXT:    vcmpeqpd %ymm1, %ymm1, %ymm2  ; X64-NEXT:    vgatherdpd %ymm2, (%rdi,%xmm0,2), %ymm1  ; X64-NEXT:    vmovapd %ymm1, %ymm0 -; X64-NEXT:    ret{{[l|q]}} +; X64-NEXT:    retq    %arg0 = bitcast double *%a0 to i8*    %arg1 = bitcast <2 x i64> %a1 to <4 x i32>    %mask = call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> zeroinitializer, <4 x double> zeroinitializer, i8 0) @@ -1006,12 +1006,12 @@ define <4 x double> @test_mm256_mask_i32gather_pd(<4 x double> %a0, double *%a1,  ; X86:       # %bb.0:  ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax  ; X86-NEXT:    vgatherdpd %ymm2, (%eax,%xmm1,2), %ymm0 -; X86-NEXT:    ret{{[l|q]}} +; X86-NEXT:    retl  ;  ; X64-LABEL: test_mm256_mask_i32gather_pd:  ; X64:       # %bb.0:  ; X64-NEXT:    vgatherdpd %ymm2, (%rdi,%xmm1,2), %ymm0 -; X64-NEXT:    ret{{[l|q]}} +; X64-NEXT:    retq    %arg1 = bitcast double *%a1 to i8*    %arg2 = bitcast <2 x i64> %a2 to <4 x i32>    %res = call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> %a0, i8* %arg1, <4 x i32> %arg2, <4 x double> %a3, i8 2) @@ -1026,7 +1026,7 @@ define <4 x float> @test_mm_i32gather_ps(float *%a0, <2 x i64> %a1) {  ; X86-NEXT:    vxorps %xmm1, %xmm1, %xmm1  ; X86-NEXT:    vgatherdps %xmm2, (%eax,%xmm0,2), %xmm1  ; X86-NEXT:    vmovaps %xmm1, %xmm0 -; X86-NEXT:    ret{{[l|q]}} +; X86-NEXT:    retl  ;  ; X64-LABEL: test_mm_i32gather_ps:  ; X64:       # %bb.0: @@ -1034,7 +1034,7 @@ define <4 x float> @test_mm_i32gather_ps(float *%a0, <2 x i64> %a1) {  ; X64-NEXT:    vxorps %xmm1, %xmm1, %xmm1  ; X64-NEXT:    vgatherdps %xmm2, (%rdi,%xmm0,2), %xmm1  ; X64-NEXT:    vmovaps %xmm1, %xmm0 -; X64-NEXT:    ret{{[l|q]}} +; X64-NEXT:    retq    %arg0 = bitcast float *%a0 to i8*    %arg1 = bitcast <2 x i64> %a1 to <4 x i32>    %cmp = fcmp oeq <4 x float> zeroinitializer, zeroinitializer @@ -1050,12 +1050,12 @@ define <4 x float> @test_mm_mask_i32gather_ps(<4 x float> %a0, float *%a1, <2 x  ; X86:       # %bb.0:  ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax  ; X86-NEXT:    vgatherdps %xmm2, (%eax,%xmm1,2), %xmm0 -; X86-NEXT:    ret{{[l|q]}} +; X86-NEXT:    retl  ;  ; X64-LABEL: test_mm_mask_i32gather_ps:  ; X64:       # %bb.0:  ; X64-NEXT:    vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 -; X64-NEXT:    ret{{[l|q]}} +; X64-NEXT:    retq    %arg1 = bitcast float *%a1 to i8*    %arg2 = bitcast <2 x i64> %a2 to <4 x i32>    %call = call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> %a0, i8* %arg1, <4 x i32> %arg2, <4 x float> %a3, i8 2) @@ -1070,7 +1070,7 @@ define <8 x float> @test_mm256_i32gather_ps(float *%a0, <4 x i64> %a1) {  ; X86-NEXT:    vcmpeqps %ymm1, %ymm1, %ymm2  ; X86-NEXT:    vgatherdps %ymm2, (%eax,%ymm0,2), %ymm1  ; X86-NEXT:    vmovaps %ymm1, %ymm0 -; X86-NEXT:    ret{{[l|q]}} +; X86-NEXT:    retl  ;  ; X64-LABEL: test_mm256_i32gather_ps:  ; X64:       # %bb.0: @@ -1078,7 +1078,7 @@ define <8 x float> @test_mm256_i32gather_ps(float *%a0, <4 x i64> %a1) {  ; X64-NEXT:    vcmpeqps %ymm1, %ymm1, %ymm2  ; X64-NEXT:    vgatherdps %ymm2, (%rdi,%ymm0,2), %ymm1  ; X64-NEXT:    vmovaps %ymm1, %ymm0 -; X64-NEXT:    ret{{[l|q]}} +; X64-NEXT:    retq    %arg0 = bitcast float *%a0 to i8*    %arg1 = bitcast <4 x i64> %a1 to <8 x i32>    %mask = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> zeroinitializer, <8 x float> zeroinitializer, i8 0) @@ -1092,12 +1092,12 @@ define <8 x float> @test_mm256_mask_i32gather_ps(<8 x float> %a0, float *%a1, <4  ; X86:       # %bb.0:  ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax  ; X86-NEXT:    vgatherdps %ymm2, (%eax,%ymm1,2), %ymm0 -; X86-NEXT:    ret{{[l|q]}} +; X86-NEXT:    retl  ;  ; X64-LABEL: test_mm256_mask_i32gather_ps:  ; X64:       # %bb.0:  ; X64-NEXT:    vgatherdps %ymm2, (%rdi,%ymm1,2), %ymm0 -; X64-NEXT:    ret{{[l|q]}} +; X64-NEXT:    retq    %arg1 = bitcast float *%a1 to i8*    %arg2 = bitcast <4 x i64> %a2 to <8 x i32>    %call = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> %a0, i8* %arg1, <8 x i32> %arg2, <8 x float> %a3, i8 2) @@ -1112,7 +1112,7 @@ define <2 x i64> @test_mm_i64gather_epi32(i32 *%a0, <2 x i64> %a1) {  ; X86-NEXT:    vpxor %xmm1, %xmm1, %xmm1  ; X86-NEXT:    vpgatherqd %xmm2, (%eax,%xmm0,2), %xmm1  ; X86-NEXT:    vmovdqa %xmm1, %xmm0 -; X86-NEXT:    ret{{[l|q]}} +; X86-NEXT:    retl  ;  ; X64-LABEL: test_mm_i64gather_epi32:  ; X64:       # %bb.0: @@ -1120,7 +1120,7 @@ define <2 x i64> @test_mm_i64gather_epi32(i32 *%a0, <2 x i64> %a1) {  ; X64-NEXT:    vpxor %xmm1, %xmm1, %xmm1  ; X64-NEXT:    vpgatherqd %xmm2, (%rdi,%xmm0,2), %xmm1  ; X64-NEXT:    vmovdqa %xmm1, %xmm0 -; X64-NEXT:    ret{{[l|q]}} +; X64-NEXT:    retq    %arg0 = bitcast i32 *%a0 to i8*    %mask = bitcast <2 x i64> <i64 -1, i64 -1> to <4 x i32>    %call = call <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32> undef, i8* %arg0, <2 x i64> %a1, <4 x i32> %mask, i8 2) @@ -1134,12 +1134,12 @@ define <2 x i64> @test_mm_mask_i64gather_epi32(<2 x i64> %a0, i32 *%a1, <2 x i64  ; X86:       # %bb.0:  ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax  ; X86-NEXT:    vpgatherqd %xmm2, (%eax,%xmm1,2), %xmm0 -; X86-NEXT:    ret{{[l|q]}} +; X86-NEXT:    retl  ;  ; X64-LABEL: test_mm_mask_i64gather_epi32:  ; X64:       # %bb.0:  ; X64-NEXT:    vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 -; X64-NEXT:    ret{{[l|q]}} +; X64-NEXT:    retq    %arg0 = bitcast <2 x i64> %a0 to <4 x i32>    %arg1 = bitcast i32 *%a1 to i8*    %arg3 = bitcast <2 x i64> %a3 to <4 x i32> @@ -1157,7 +1157,7 @@ define <2 x i64> @test_mm256_i64gather_epi32(i32 *%a0, <4 x i64> %a1) {  ; X86-NEXT:    vpgatherqd %xmm2, (%eax,%ymm0,2), %xmm1  ; X86-NEXT:    vmovdqa %xmm1, %xmm0  ; X86-NEXT:    vzeroupper -; X86-NEXT:    ret{{[l|q]}} +; X86-NEXT:    retl  ;  ; X64-LABEL: test_mm256_i64gather_epi32:  ; X64:       # %bb.0: @@ -1166,7 +1166,7 @@ define <2 x i64> @test_mm256_i64gather_epi32(i32 *%a0, <4 x i64> %a1) {  ; X64-NEXT:    vpgatherqd %xmm2, (%rdi,%ymm0,2), %xmm1  ; X64-NEXT:    vmovdqa %xmm1, %xmm0  ; X64-NEXT:    vzeroupper -; X64-NEXT:    ret{{[l|q]}} +; X64-NEXT:    retq    %arg0 = bitcast i32 *%a0 to i8*    %mask = bitcast <2 x i64> <i64 -1, i64 -1> to <4 x i32>    %call = call <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32> undef, i8* %arg0, <4 x i64> %a1, <4 x i32> %mask, i8 2) @@ -1181,13 +1181,13 @@ define <2 x i64> @test_mm256_mask_i64gather_epi32(<2 x i64> %a0, i32 *%a1, <4 x  ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax  ; X86-NEXT:    vpgatherqd %xmm2, (%eax,%ymm1,2), %xmm0  ; X86-NEXT:    vzeroupper -; X86-NEXT:    ret{{[l|q]}} +; X86-NEXT:    retl  ;  ; X64-LABEL: test_mm256_mask_i64gather_epi32:  ; X64:       # %bb.0:  ; X64-NEXT:    vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0  ; X64-NEXT:    vzeroupper -; X64-NEXT:    ret{{[l|q]}} +; X64-NEXT:    retq    %arg0 = bitcast <2 x i64> %a0 to <4 x i32>    %arg1 = bitcast i32 *%a1 to i8*    %arg3 = bitcast <2 x i64> %a3 to <4 x i32> @@ -1204,7 +1204,7 @@ define <2 x i64> @test_mm_i64gather_epi64(i64 *%a0, <2 x i64> %a1) {  ; X86-NEXT:    vpxor %xmm1, %xmm1, %xmm1  ; X86-NEXT:    vpgatherqq %xmm2, (%eax,%xmm0,2), %xmm1  ; X86-NEXT:    vmovdqa %xmm1, %xmm0 -; X86-NEXT:    ret{{[l|q]}} +; X86-NEXT:    retl  ;  ; X64-LABEL: test_mm_i64gather_epi64:  ; X64:       # %bb.0: @@ -1212,7 +1212,7 @@ define <2 x i64> @test_mm_i64gather_epi64(i64 *%a0, <2 x i64> %a1) {  ; X64-NEXT:    vpxor %xmm1, %xmm1, %xmm1  ; X64-NEXT:    vpgatherqq %xmm2, (%rdi,%xmm0,2), %xmm1  ; X64-NEXT:    vmovdqa %xmm1, %xmm0 -; X64-NEXT:    ret{{[l|q]}} +; X64-NEXT:    retq    %arg0 = bitcast i64 *%a0 to i8*    %call = call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> undef, i8* %arg0, <2 x i64> %a1, <2 x i64> <i64 -1, i64 -1>, i8 2)    ret <2 x i64> %call @@ -1224,12 +1224,12 @@ define <2 x i64> @test_mm_mask_i64gather_epi64(<2 x i64> %a0, i64 *%a1, <2 x i64  ; X86:       # %bb.0:  ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax  ; X86-NEXT:    vpgatherqq %xmm2, (%eax,%xmm1,2), %xmm0 -; X86-NEXT:    ret{{[l|q]}} +; X86-NEXT:    retl  ;  ; X64-LABEL: test_mm_mask_i64gather_epi64:  ; X64:       # %bb.0:  ; X64-NEXT:    vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 -; X64-NEXT:    ret{{[l|q]}} +; X64-NEXT:    retq    %arg1 = bitcast i64 *%a1 to i8*    %call = call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> %a0, i8* %arg1, <2 x i64> %a2, <2 x i64> %a3, i8 2)    ret <2 x i64> %call @@ -1243,7 +1243,7 @@ define <4 x i64> @test_mm256_i64gather_epi64(i64 *%a0, <4 x i64> %a1) {  ; X86-NEXT:    vpxor %xmm1, %xmm1, %xmm1  ; X86-NEXT:    vpgatherqq %ymm2, (%eax,%ymm0,2), %ymm1  ; X86-NEXT:    vmovdqa %ymm1, %ymm0 -; X86-NEXT:    ret{{[l|q]}} +; X86-NEXT:    retl  ;  ; X64-LABEL: test_mm256_i64gather_epi64:  ; X64:       # %bb.0: @@ -1251,7 +1251,7 @@ define <4 x i64> @test_mm256_i64gather_epi64(i64 *%a0, <4 x i64> %a1) {  ; X64-NEXT:    vpxor %xmm1, %xmm1, %xmm1  ; X64-NEXT:    vpgatherqq %ymm2, (%rdi,%ymm0,2), %ymm1  ; X64-NEXT:    vmovdqa %ymm1, %ymm0 -; X64-NEXT:    ret{{[l|q]}} +; X64-NEXT:    retq    %arg0 = bitcast i64 *%a0 to i8*    %call = call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> undef, i8* %arg0, <4 x i64> %a1, <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1>, i8 2)    ret <4 x i64> %call @@ -1263,12 +1263,12 @@ define <4 x i64> @test_mm256_mask_i64gather_epi64(<4 x i64> %a0, i64 *%a1, <4 x  ; X86:       # %bb.0:  ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax  ; X86-NEXT:    vpgatherqq %ymm2, (%eax,%ymm1,2), %ymm0 -; X86-NEXT:    ret{{[l|q]}} +; X86-NEXT:    retl  ;  ; X64-LABEL: test_mm256_mask_i64gather_epi64:  ; X64:       # %bb.0:  ; X64-NEXT:    vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 -; X64-NEXT:    ret{{[l|q]}} +; X64-NEXT:    retq    %arg1 = bitcast i64 *%a1 to i8*    %call = call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> %a0, i8* %arg1, <4 x i64> %a2, <4 x i64> %a3, i8 2)    ret <4 x i64> %call @@ -1282,7 +1282,7 @@ define <2 x double> @test_mm_i64gather_pd(double *%a0, <2 x i64> %a1) {  ; X86-NEXT:    vxorpd %xmm1, %xmm1, %xmm1  ; X86-NEXT:    vgatherqpd %xmm2, (%eax,%xmm0,2), %xmm1  ; X86-NEXT:    vmovapd %xmm1, %xmm0 -; X86-NEXT:    ret{{[l|q]}} +; X86-NEXT:    retl  ;  ; X64-LABEL: test_mm_i64gather_pd:  ; X64:       # %bb.0: @@ -1290,7 +1290,7 @@ define <2 x double> @test_mm_i64gather_pd(double *%a0, <2 x i64> %a1) {  ; X64-NEXT:    vxorpd %xmm1, %xmm1, %xmm1  ; X64-NEXT:    vgatherqpd %xmm2, (%rdi,%xmm0,2), %xmm1  ; X64-NEXT:    vmovapd %xmm1, %xmm0 -; X64-NEXT:    ret{{[l|q]}} +; X64-NEXT:    retq    %arg0 = bitcast double *%a0 to i8*    %cmp = fcmp oeq <2 x double> zeroinitializer, zeroinitializer    %sext = sext <2 x i1> %cmp to <2 x i64> @@ -1305,12 +1305,12 @@ define <2 x double> @test_mm_mask_i64gather_pd(<2 x double> %a0, double *%a1, <2  ; X86:       # %bb.0:  ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax  ; X86-NEXT:    vgatherqpd %xmm2, (%eax,%xmm1,2), %xmm0 -; X86-NEXT:    ret{{[l|q]}} +; X86-NEXT:    retl  ;  ; X64-LABEL: test_mm_mask_i64gather_pd:  ; X64:       # %bb.0:  ; X64-NEXT:    vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 -; X64-NEXT:    ret{{[l|q]}} +; X64-NEXT:    retq    %arg1 = bitcast double *%a1 to i8*    %call = call <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double> %a0, i8* %arg1, <2 x i64> %a2, <2 x double> %a3, i8 2)    ret <2 x double> %call @@ -1324,7 +1324,7 @@ define <4 x double> @test_mm256_i64gather_pd(double *%a0, <4 x i64> %a1) {  ; X86-NEXT:    vcmpeqpd %ymm1, %ymm1, %ymm2  ; X86-NEXT:    vgatherqpd %ymm2, (%eax,%ymm0,2), %ymm1  ; X86-NEXT:    vmovapd %ymm1, %ymm0 -; X86-NEXT:    ret{{[l|q]}} +; X86-NEXT:    retl  ;  ; X64-LABEL: test_mm256_i64gather_pd:  ; X64:       # %bb.0: @@ -1332,7 +1332,7 @@ define <4 x double> @test_mm256_i64gather_pd(double *%a0, <4 x i64> %a1) {  ; X64-NEXT:    vcmpeqpd %ymm1, %ymm1, %ymm2  ; X64-NEXT:    vgatherqpd %ymm2, (%rdi,%ymm0,2), %ymm1  ; X64-NEXT:    vmovapd %ymm1, %ymm0 -; X64-NEXT:    ret{{[l|q]}} +; X64-NEXT:    retq    %arg0 = bitcast double *%a0 to i8*    %mask = call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> zeroinitializer, <4 x double> zeroinitializer, i8 0)    %call = call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> undef, i8* %arg0, <4 x i64> %a1, <4 x double> %mask, i8 2) @@ -1345,12 +1345,12 @@ define <4 x double> @test_mm256_mask_i64gather_pd(<4 x double> %a0, i64 *%a1, <4  ; X86:       # %bb.0:  ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax  ; X86-NEXT:    vgatherqpd %ymm2, (%eax,%ymm1,2), %ymm0 -; X86-NEXT:    ret{{[l|q]}} +; X86-NEXT:    retl  ;  ; X64-LABEL: test_mm256_mask_i64gather_pd:  ; X64:       # %bb.0:  ; X64-NEXT:    vgatherqpd %ymm2, (%rdi,%ymm1,2), %ymm0 -; X64-NEXT:    ret{{[l|q]}} +; X64-NEXT:    retq    %arg1 = bitcast i64 *%a1 to i8*    %call = call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> %a0, i8* %arg1, <4 x i64> %a2, <4 x double> %a3, i8 2)    ret <4 x double> %call @@ -1364,7 +1364,7 @@ define <4 x float> @test_mm_i64gather_ps(float *%a0, <2 x i64> %a1) {  ; X86-NEXT:    vxorps %xmm1, %xmm1, %xmm1  ; X86-NEXT:    vgatherqps %xmm2, (%eax,%xmm0,2), %xmm1  ; X86-NEXT:    vmovaps %xmm1, %xmm0 -; X86-NEXT:    ret{{[l|q]}} +; X86-NEXT:    retl  ;  ; X64-LABEL: test_mm_i64gather_ps:  ; X64:       # %bb.0: @@ -1372,7 +1372,7 @@ define <4 x float> @test_mm_i64gather_ps(float *%a0, <2 x i64> %a1) {  ; X64-NEXT:    vxorps %xmm1, %xmm1, %xmm1  ; X64-NEXT:    vgatherqps %xmm2, (%rdi,%xmm0,2), %xmm1  ; X64-NEXT:    vmovaps %xmm1, %xmm0 -; X64-NEXT:    ret{{[l|q]}} +; X64-NEXT:    retq    %arg0 = bitcast float *%a0 to i8*    %cmp = fcmp oeq <4 x float> zeroinitializer, zeroinitializer    %sext = sext <4 x i1> %cmp to <4 x i32> @@ -1387,12 +1387,12 @@ define <4 x float> @test_mm_mask_i64gather_ps(<4 x float> %a0, float *%a1, <2 x  ; X86:       # %bb.0:  ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax  ; X86-NEXT:    vgatherqps %xmm2, (%eax,%xmm1,2), %xmm0 -; X86-NEXT:    ret{{[l|q]}} +; X86-NEXT:    retl  ;  ; X64-LABEL: test_mm_mask_i64gather_ps:  ; X64:       # %bb.0:  ; X64-NEXT:    vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 -; X64-NEXT:    ret{{[l|q]}} +; X64-NEXT:    retq    %arg1 = bitcast float *%a1 to i8*    %call = call <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float> %a0, i8* %arg1, <2 x i64> %a2, <4 x float> %a3, i8 2)    ret <4 x float> %call @@ -1407,7 +1407,7 @@ define <4 x float> @test_mm256_i64gather_ps(float *%a0, <4 x i64> %a1) {  ; X86-NEXT:    vgatherqps %xmm2, (%eax,%ymm0,2), %xmm1  ; X86-NEXT:    vmovaps %xmm1, %xmm0  ; X86-NEXT:    vzeroupper -; X86-NEXT:    ret{{[l|q]}} +; X86-NEXT:    retl  ;  ; X64-LABEL: test_mm256_i64gather_ps:  ; X64:       # %bb.0: @@ -1416,7 +1416,7 @@ define <4 x float> @test_mm256_i64gather_ps(float *%a0, <4 x i64> %a1) {  ; X64-NEXT:    vgatherqps %xmm2, (%rdi,%ymm0,2), %xmm1  ; X64-NEXT:    vmovaps %xmm1, %xmm0  ; X64-NEXT:    vzeroupper -; X64-NEXT:    ret{{[l|q]}} +; X64-NEXT:    retq    %arg0 = bitcast float *%a0 to i8*    %cmp = fcmp oeq <4 x float> zeroinitializer, zeroinitializer    %sext = sext <4 x i1> %cmp to <4 x i32> @@ -1432,13 +1432,13 @@ define <4 x float> @test_mm256_mask_i64gather_ps(<4 x float> %a0, float *%a1, <4  ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax  ; X86-NEXT:    vgatherqps %xmm2, (%eax,%ymm1,2), %xmm0  ; X86-NEXT:    vzeroupper -; X86-NEXT:    ret{{[l|q]}} +; X86-NEXT:    retl  ;  ; X64-LABEL: test_mm256_mask_i64gather_ps:  ; X64:       # %bb.0:  ; X64-NEXT:    vgatherqps %xmm2, (%rdi,%ymm1,2), %xmm0  ; X64-NEXT:    vzeroupper -; X64-NEXT:    ret{{[l|q]}} +; X64-NEXT:    retq    %arg1 = bitcast float *%a1 to i8*    %call = call <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> %a0, i8* %arg1, <4 x i64> %a2, <4 x float> %a3, i8 2)    ret <4 x float> %call @@ -1496,12 +1496,12 @@ define <2 x i64> @test_mm_maskload_epi32(i32* %a0, <2 x i64> %a1) nounwind {  ; X86:       # %bb.0:  ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax  ; X86-NEXT:    vpmaskmovd (%eax), %xmm0, %xmm0 -; X86-NEXT:    ret{{[l|q]}} +; X86-NEXT:    retl  ;  ; X64-LABEL: test_mm_maskload_epi32:  ; X64:       # %bb.0:  ; X64-NEXT:    vpmaskmovd (%rdi), %xmm0, %xmm0 -; X64-NEXT:    ret{{[l|q]}} +; X64-NEXT:    retq    %arg0 = bitcast i32* %a0 to i8*    %arg1 = bitcast <2 x i64> %a1 to <4 x i32>    %call = call <4 x i32> @llvm.x86.avx2.maskload.d(i8* %arg0, <4 x i32> %arg1) @@ -1515,12 +1515,12 @@ define <4 x i64> @test_mm256_maskload_epi32(i32* %a0, <4 x i64> %a1) nounwind {  ; X86:       # %bb.0:  ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax  ; X86-NEXT:    vpmaskmovd (%eax), %ymm0, %ymm0 -; X86-NEXT:    ret{{[l|q]}} +; X86-NEXT:    retl  ;  ; X64-LABEL: test_mm256_maskload_epi32:  ; X64:       # %bb.0:  ; X64-NEXT:    vpmaskmovd (%rdi), %ymm0, %ymm0 -; X64-NEXT:    ret{{[l|q]}} +; X64-NEXT:    retq    %arg0 = bitcast i32* %a0 to i8*    %arg1 = bitcast <4 x i64> %a1 to <8 x i32>    %call = call <8 x i32> @llvm.x86.avx2.maskload.d.256(i8* %arg0, <8 x i32> %arg1) @@ -1534,12 +1534,12 @@ define <2 x i64> @test_mm_maskload_epi64(i64* %a0, <2 x i64> %a1) nounwind {  ; X86:       # %bb.0:  ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax  ; X86-NEXT:    vpmaskmovq (%eax), %xmm0, %xmm0 -; X86-NEXT:    ret{{[l|q]}} +; X86-NEXT:    retl  ;  ; X64-LABEL: test_mm_maskload_epi64:  ; X64:       # %bb.0:  ; X64-NEXT:    vpmaskmovq (%rdi), %xmm0, %xmm0 -; X64-NEXT:    ret{{[l|q]}} +; X64-NEXT:    retq    %arg0 = bitcast i64* %a0 to i8*    %res = call <2 x i64> @llvm.x86.avx2.maskload.q(i8* %arg0, <2 x i64> %a1)    ret <2 x i64> %res @@ -1551,12 +1551,12 @@ define <4 x i64> @test_mm256_maskload_epi64(i64* %a0, <4 x i64> %a1) nounwind {  ; X86:       # %bb.0:  ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax  ; X86-NEXT:    vpmaskmovq (%eax), %ymm0, %ymm0 -; X86-NEXT:    ret{{[l|q]}} +; X86-NEXT:    retl  ;  ; X64-LABEL: test_mm256_maskload_epi64:  ; X64:       # %bb.0:  ; X64-NEXT:    vpmaskmovq (%rdi), %ymm0, %ymm0 -; X64-NEXT:    ret{{[l|q]}} +; X64-NEXT:    retq    %arg0 = bitcast i64* %a0 to i8*    %res = call <4 x i64> @llvm.x86.avx2.maskload.q.256(i8* %arg0, <4 x i64> %a1)    ret <4 x i64> %res @@ -1568,12 +1568,12 @@ define void @test_mm_maskstore_epi32(float* %a0, <2 x i64> %a1, <2 x i64> %a2) n  ; X86:       # %bb.0:  ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax  ; X86-NEXT:    vpmaskmovd %xmm1, %xmm0, (%eax) -; X86-NEXT:    ret{{[l|q]}} +; X86-NEXT:    retl  ;  ; X64-LABEL: test_mm_maskstore_epi32:  ; X64:       # %bb.0:  ; X64-NEXT:    vpmaskmovd %xmm1, %xmm0, (%rdi) -; X64-NEXT:    ret{{[l|q]}} +; X64-NEXT:    retq    %arg0 = bitcast float* %a0 to i8*    %arg1 = bitcast <2 x i64> %a1 to <4 x i32>    %arg2 = bitcast <2 x i64> %a2 to <4 x i32> @@ -1588,13 +1588,13 @@ define void @test_mm256_maskstore_epi32(float* %a0, <4 x i64> %a1, <4 x i64> %a2  ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax  ; X86-NEXT:    vpmaskmovd %ymm1, %ymm0, (%eax)  ; X86-NEXT:    vzeroupper -; X86-NEXT:    ret{{[l|q]}} +; X86-NEXT:    retl  ;  ; X64-LABEL: test_mm256_maskstore_epi32:  ; X64:       # %bb.0:  ; X64-NEXT:    vpmaskmovd %ymm1, %ymm0, (%rdi)  ; X64-NEXT:    vzeroupper -; X64-NEXT:    ret{{[l|q]}} +; X64-NEXT:    retq    %arg0 = bitcast float* %a0 to i8*    %arg1 = bitcast <4 x i64> %a1 to <8 x i32>    %arg2 = bitcast <4 x i64> %a2 to <8 x i32> @@ -1608,12 +1608,12 @@ define void @test_mm_maskstore_epi64(i64* %a0, <2 x i64> %a1, <2 x i64> %a2) nou  ; X86:       # %bb.0:  ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax  ; X86-NEXT:    vpmaskmovq %xmm1, %xmm0, (%eax) -; X86-NEXT:    ret{{[l|q]}} +; X86-NEXT:    retl  ;  ; X64-LABEL: test_mm_maskstore_epi64:  ; X64:       # %bb.0:  ; X64-NEXT:    vpmaskmovq %xmm1, %xmm0, (%rdi) -; X64-NEXT:    ret{{[l|q]}} +; X64-NEXT:    retq    %arg0 = bitcast i64* %a0 to i8*    call void @llvm.x86.avx2.maskstore.q(i8* %arg0, <2 x i64> %a1, <2 x i64> %a2)    ret void @@ -1626,13 +1626,13 @@ define void @test_mm256_maskstore_epi64(i64* %a0, <4 x i64> %a1, <4 x i64> %a2)  ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax  ; X86-NEXT:    vpmaskmovq %ymm1, %ymm0, (%eax)  ; X86-NEXT:    vzeroupper -; X86-NEXT:    ret{{[l|q]}} +; X86-NEXT:    retl  ;  ; X64-LABEL: test_mm256_maskstore_epi64:  ; X64:       # %bb.0:  ; X64-NEXT:    vpmaskmovq %ymm1, %ymm0, (%rdi)  ; X64-NEXT:    vzeroupper -; X64-NEXT:    ret{{[l|q]}} +; X64-NEXT:    retq    %arg0 = bitcast i64* %a0 to i8*    call void @llvm.x86.avx2.maskstore.q.256(i8* %arg0, <4 x i64> %a1, <4 x i64> %a2)    ret void @@ -2465,12 +2465,12 @@ define <4 x i64> @test_mm256_stream_load_si256(<4 x i64> *%a0) {  ; X86:       # %bb.0:  ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax  ; X86-NEXT:    vmovntdqa (%eax), %ymm0 -; X86-NEXT:    ret{{[l|q]}} +; X86-NEXT:    retl  ;  ; X64-LABEL: test_mm256_stream_load_si256:  ; X64:       # %bb.0:  ; X64-NEXT:    vmovntdqa (%rdi), %ymm0 -; X64-NEXT:    ret{{[l|q]}} +; X64-NEXT:    retq    %arg0 = bitcast <4 x i64> *%a0 to i8*    %res = call <4 x i64> @llvm.x86.avx2.movntdqa(i8* %arg0)    ret <4 x i64> %res diff --git a/llvm/test/CodeGen/X86/packss.ll b/llvm/test/CodeGen/X86/packss.ll index 0b4335f2b6f..62bed060880 100644 --- a/llvm/test/CodeGen/X86/packss.ll +++ b/llvm/test/CodeGen/X86/packss.ll @@ -74,28 +74,28 @@ define <8 x i16> @trunc_ashr_v4i32_icmp_v4i32(<4 x i32> %a, <4 x i32> %b) nounwi  ; X86-SSE-NEXT:    psrad $31, %xmm0  ; X86-SSE-NEXT:    pcmpgtd {{\.LCPI.*}}, %xmm1  ; X86-SSE-NEXT:    packssdw %xmm1, %xmm0 -; X86-SSE-NEXT:    ret{{[l|q]}} +; X86-SSE-NEXT:    retl  ;  ; X86-AVX-LABEL: trunc_ashr_v4i32_icmp_v4i32:  ; X86-AVX:       # %bb.0:  ; X86-AVX-NEXT:    vpsrad $31, %xmm0, %xmm0  ; X86-AVX-NEXT:    vpcmpgtd {{\.LCPI.*}}, %xmm1, %xmm1  ; X86-AVX-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0 -; X86-AVX-NEXT:    ret{{[l|q]}} +; X86-AVX-NEXT:    retl  ;  ; X64-SSE-LABEL: trunc_ashr_v4i32_icmp_v4i32:  ; X64-SSE:       # %bb.0:  ; X64-SSE-NEXT:    psrad $31, %xmm0  ; X64-SSE-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1  ; X64-SSE-NEXT:    packssdw %xmm1, %xmm0 -; X64-SSE-NEXT:    ret{{[l|q]}} +; X64-SSE-NEXT:    retq  ;  ; X64-AVX-LABEL: trunc_ashr_v4i32_icmp_v4i32:  ; X64-AVX:       # %bb.0:  ; X64-AVX-NEXT:    vpsrad $31, %xmm0, %xmm0  ; X64-AVX-NEXT:    vpcmpgtd {{.*}}(%rip), %xmm1, %xmm1  ; X64-AVX-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0 -; X64-AVX-NEXT:    ret{{[l|q]}} +; X64-AVX-NEXT:    retq    %1 = ashr <4 x i32> %a, <i32 31, i32 31, i32 31, i32 31>    %2 = icmp sgt <4 x i32> %b, <i32 1, i32 16, i32 255, i32 65535>    %3 = sext <4 x i1> %2 to <4 x i32> diff --git a/llvm/test/CodeGen/X86/sse42-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/sse42-intrinsics-fast-isel.ll index 8ff3fae6509..ee7b4babe63 100644 --- a/llvm/test/CodeGen/X86/sse42-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/sse42-intrinsics-fast-isel.ll @@ -195,34 +195,22 @@ define i32 @test_mm_cmpestrz(<2 x i64> %a0, i32 %a1, <2 x i64> %a2, i32 %a3) nou  declare i32 @llvm.x86.sse42.pcmpestriz128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone  define <2 x i64> @test_mm_cmpgt_epi64(<2 x i64> %a0, <2 x i64> %a1) { -; X32-LABEL: test_mm_cmpgt_epi64: -; X32:       # %bb.0: -; X32-NEXT:    pcmpgtq %xmm1, %xmm0 -; X32-NEXT:    retl -; -; X64-LABEL: test_mm_cmpgt_epi64: -; X64:       # %bb.0: -; X64-NEXT:    pcmpgtq %xmm1, %xmm0 -; X64-NEXT:    retq +; ALL-LABEL: test_mm_cmpgt_epi64: +; ALL:       # %bb.0: +; ALL-NEXT:    pcmpgtq %xmm1, %xmm0 +; ALL-NEXT:    ret{{[l|q]}}    %cmp = icmp sgt <2 x i64> %a0, %a1    %res = sext <2 x i1> %cmp to <2 x i64>    ret <2 x i64> %res  }  define i32 @test_mm_cmpistra(<2 x i64> %a0, <2 x i64> %a1) { -; X32-LABEL: test_mm_cmpistra: -; X32:       # %bb.0: -; X32-NEXT:    xorl %eax, %eax -; X32-NEXT:    pcmpistri $7, %xmm1, %xmm0 -; X32-NEXT:    seta %al -; X32-NEXT:    retl -; -; X64-LABEL: test_mm_cmpistra: -; X64:       # %bb.0: -; X64-NEXT:    xorl %eax, %eax -; X64-NEXT:    pcmpistri $7, %xmm1, %xmm0 -; X64-NEXT:    seta %al -; X64-NEXT:    retq +; ALL-LABEL: test_mm_cmpistra: +; ALL:       # %bb.0: +; ALL-NEXT:    xorl %eax, %eax +; ALL-NEXT:    pcmpistri $7, %xmm1, %xmm0 +; ALL-NEXT:    seta %al +; ALL-NEXT:    ret{{[l|q]}}    %arg0 = bitcast <2 x i64> %a0 to <16 x i8>    %arg1 = bitcast <2 x i64> %a1 to <16 x i8>    %res = call i32 @llvm.x86.sse42.pcmpistria128(<16 x i8> %arg0, <16 x i8> %arg1, i8 7) @@ -231,19 +219,12 @@ define i32 @test_mm_cmpistra(<2 x i64> %a0, <2 x i64> %a1) {  declare i32 @llvm.x86.sse42.pcmpistria128(<16 x i8>, <16 x i8>, i8) nounwind readnone  define i32 @test_mm_cmpistrc(<2 x i64> %a0, <2 x i64> %a1) { -; X32-LABEL: test_mm_cmpistrc: -; X32:       # %bb.0: -; X32-NEXT:    xorl %eax, %eax -; X32-NEXT:    pcmpistri $7, %xmm1, %xmm0 -; X32-NEXT:    setb %al -; X32-NEXT:    retl -; -; X64-LABEL: test_mm_cmpistrc: -; X64:       # %bb.0: -; X64-NEXT:    xorl %eax, %eax -; X64-NEXT:    pcmpistri $7, %xmm1, %xmm0 -; X64-NEXT:    setb %al -; X64-NEXT:    retq +; ALL-LABEL: test_mm_cmpistrc: +; ALL:       # %bb.0: +; ALL-NEXT:    xorl %eax, %eax +; ALL-NEXT:    pcmpistri $7, %xmm1, %xmm0 +; ALL-NEXT:    setb %al +; ALL-NEXT:    ret{{[l|q]}}    %arg0 = bitcast <2 x i64> %a0 to <16 x i8>    %arg1 = bitcast <2 x i64> %a1 to <16 x i8>    %res = call i32 @llvm.x86.sse42.pcmpistric128(<16 x i8> %arg0, <16 x i8> %arg1, i8 7) @@ -252,17 +233,11 @@ define i32 @test_mm_cmpistrc(<2 x i64> %a0, <2 x i64> %a1) {  declare i32 @llvm.x86.sse42.pcmpistric128(<16 x i8>, <16 x i8>, i8) nounwind readnone  define i32 @test_mm_cmpistri(<2 x i64> %a0, <2 x i64> %a1) { -; X32-LABEL: test_mm_cmpistri: -; X32:       # %bb.0: -; X32-NEXT:    pcmpistri $7, %xmm1, %xmm0 -; X32-NEXT:    movl %ecx, %eax -; X32-NEXT:    retl -; -; X64-LABEL: test_mm_cmpistri: -; X64:       # %bb.0: -; X64-NEXT:    pcmpistri $7, %xmm1, %xmm0 -; X64-NEXT:    movl %ecx, %eax -; X64-NEXT:    retq +; ALL-LABEL: test_mm_cmpistri: +; ALL:       # %bb.0: +; ALL-NEXT:    pcmpistri $7, %xmm1, %xmm0 +; ALL-NEXT:    movl %ecx, %eax +; ALL-NEXT:    ret{{[l|q]}}    %arg0 = bitcast <2 x i64> %a0 to <16 x i8>    %arg1 = bitcast <2 x i64> %a1 to <16 x i8>    %res = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %arg0, <16 x i8> %arg1, i8 7) @@ -271,15 +246,10 @@ define i32 @test_mm_cmpistri(<2 x i64> %a0, <2 x i64> %a1) {  declare i32 @llvm.x86.sse42.pcmpistri128(<16 x i8>, <16 x i8>, i8) nounwind readnone  define <2 x i64> @test_mm_cmpistrm(<2 x i64> %a0, <2 x i64> %a1) { -; X32-LABEL: test_mm_cmpistrm: -; X32:       # %bb.0: -; X32-NEXT:    pcmpistrm $7, %xmm1, %xmm0 -; X32-NEXT:    retl -; -; X64-LABEL: test_mm_cmpistrm: -; X64:       # %bb.0: -; X64-NEXT:    pcmpistrm $7, %xmm1, %xmm0 -; X64-NEXT:    retq +; ALL-LABEL: test_mm_cmpistrm: +; ALL:       # %bb.0: +; ALL-NEXT:    pcmpistrm $7, %xmm1, %xmm0 +; ALL-NEXT:    ret{{[l|q]}}    %arg0 = bitcast <2 x i64> %a0 to <16 x i8>    %arg1 = bitcast <2 x i64> %a1 to <16 x i8>    %res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %arg0, <16 x i8> %arg1, i8 7) @@ -289,19 +259,12 @@ define <2 x i64> @test_mm_cmpistrm(<2 x i64> %a0, <2 x i64> %a1) {  declare <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8>, <16 x i8>, i8) nounwind readnone  define i32 @test_mm_cmpistro(<2 x i64> %a0, <2 x i64> %a1) { -; X32-LABEL: test_mm_cmpistro: -; X32:       # %bb.0: -; X32-NEXT:    xorl %eax, %eax -; X32-NEXT:    pcmpistri $7, %xmm1, %xmm0 -; X32-NEXT:    seto %al -; X32-NEXT:    retl -; -; X64-LABEL: test_mm_cmpistro: -; X64:       # %bb.0: -; X64-NEXT:    xorl %eax, %eax -; X64-NEXT:    pcmpistri $7, %xmm1, %xmm0 -; X64-NEXT:    seto %al -; X64-NEXT:    retq +; ALL-LABEL: test_mm_cmpistro: +; ALL:       # %bb.0: +; ALL-NEXT:    xorl %eax, %eax +; ALL-NEXT:    pcmpistri $7, %xmm1, %xmm0 +; ALL-NEXT:    seto %al +; ALL-NEXT:    ret{{[l|q]}}    %arg0 = bitcast <2 x i64> %a0 to <16 x i8>    %arg1 = bitcast <2 x i64> %a1 to <16 x i8>    %res = call i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8> %arg0, <16 x i8> %arg1, i8 7) @@ -310,19 +273,12 @@ define i32 @test_mm_cmpistro(<2 x i64> %a0, <2 x i64> %a1) {  declare i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8>, <16 x i8>, i8) nounwind readnone  define i32 @test_mm_cmpistrs(<2 x i64> %a0, <2 x i64> %a1) { -; X32-LABEL: test_mm_cmpistrs: -; X32:       # %bb.0: -; X32-NEXT:    xorl %eax, %eax -; X32-NEXT:    pcmpistri $7, %xmm1, %xmm0 -; X32-NEXT:    sets %al -; X32-NEXT:    retl -; -; X64-LABEL: test_mm_cmpistrs: -; X64:       # %bb.0: -; X64-NEXT:    xorl %eax, %eax -; X64-NEXT:    pcmpistri $7, %xmm1, %xmm0 -; X64-NEXT:    sets %al -; X64-NEXT:    retq +; ALL-LABEL: test_mm_cmpistrs: +; ALL:       # %bb.0: +; ALL-NEXT:    xorl %eax, %eax +; ALL-NEXT:    pcmpistri $7, %xmm1, %xmm0 +; ALL-NEXT:    sets %al +; ALL-NEXT:    ret{{[l|q]}}    %arg0 = bitcast <2 x i64> %a0 to <16 x i8>    %arg1 = bitcast <2 x i64> %a1 to <16 x i8>    %res = call i32 @llvm.x86.sse42.pcmpistris128(<16 x i8> %arg0, <16 x i8> %arg1, i8 7) @@ -331,19 +287,12 @@ define i32 @test_mm_cmpistrs(<2 x i64> %a0, <2 x i64> %a1) {  declare i32 @llvm.x86.sse42.pcmpistris128(<16 x i8>, <16 x i8>, i8) nounwind readnone  define i32 @test_mm_cmpistrz(<2 x i64> %a0, <2 x i64> %a1) { -; X32-LABEL: test_mm_cmpistrz: -; X32:       # %bb.0: -; X32-NEXT:    xorl %eax, %eax -; X32-NEXT:    pcmpistri $7, %xmm1, %xmm0 -; X32-NEXT:    sete %al -; X32-NEXT:    retl -; -; X64-LABEL: test_mm_cmpistrz: -; X64:       # %bb.0: -; X64-NEXT:    xorl %eax, %eax -; X64-NEXT:    pcmpistri $7, %xmm1, %xmm0 -; X64-NEXT:    sete %al -; X64-NEXT:    retq +; ALL-LABEL: test_mm_cmpistrz: +; ALL:       # %bb.0: +; ALL-NEXT:    xorl %eax, %eax +; ALL-NEXT:    pcmpistri $7, %xmm1, %xmm0 +; ALL-NEXT:    sete %al +; ALL-NEXT:    ret{{[l|q]}}    %arg0 = bitcast <2 x i64> %a0 to <16 x i8>    %arg1 = bitcast <2 x i64> %a1 to <16 x i8>    %res = call i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8> %arg0, <16 x i8> %arg1, i8 7) diff --git a/llvm/utils/UpdateTestChecks/asm.py b/llvm/utils/UpdateTestChecks/asm.py index f3feb3507ce..10b35b56f9d 100644 --- a/llvm/utils/UpdateTestChecks/asm.py +++ b/llvm/utils/UpdateTestChecks/asm.py @@ -107,7 +107,7 @@ def scrub_asm_x86(asm, args):    asm = SCRUB_X86_RIP_RE.sub(r'{{.*}}(%rip)', asm)    # Generically match a LCP symbol.    asm = SCRUB_X86_LCP_RE.sub(r'{{\.LCPI.*}}', asm) -  if getattr(args, 'x86_extra_scrub', False): +  if getattr(args, 'extra_scrub', False):      # Avoid generating different checks for 32- and 64-bit because of 'retl' vs 'retq'.      asm = SCRUB_X86_RET_RE.sub(r'ret{{[l|q]}}', asm)    # Strip kill operands inserted into the asm. diff --git a/llvm/utils/UpdateTestChecks/common.py b/llvm/utils/UpdateTestChecks/common.py index da076645320..daea395e31f 100644 --- a/llvm/utils/UpdateTestChecks/common.py +++ b/llvm/utils/UpdateTestChecks/common.py @@ -3,6 +3,7 @@ import re  import string  import subprocess  import sys +import copy  if sys.version_info[0] > 2:    class string: @@ -80,13 +81,29 @@ def scrub_body(body):    body = SCRUB_TRAILING_WHITESPACE_RE.sub(r'', body)    return body +def do_scrub(body, scrubber, scrubber_args, extra): +  if scrubber_args: +    local_args = copy.deepcopy(scrubber_args) +    local_args[0].extra_scrub = extra +    return scrubber(body, *local_args) +  return scrubber(body, *scrubber_args) +  # Build up a dictionary of all the function bodies. +class function_body(object): +  def __init__(self, string, extra): +    self.scrub = string +    self.extrascrub = extra +  def __str__(self): +    return self.scrub +  def build_function_body_dictionary(function_re, scrubber, scrubber_args, raw_tool_output, prefixes, func_dict, verbose):    for m in function_re.finditer(raw_tool_output):      if not m:        continue      func = m.group('func') -    scrubbed_body = scrubber(m.group('body'), *scrubber_args) +    body = m.group('body') +    scrubbed_body = do_scrub(body, scrubber, scrubber_args, extra = False) +    scrubbed_extra = do_scrub(body, scrubber, scrubber_args, extra = True)      if m.groupdict().has_key('analysis'):        analysis = m.group('analysis')        if analysis.lower() != 'cost model analysis': @@ -99,15 +116,19 @@ def build_function_body_dictionary(function_re, scrubber, scrubber_args, raw_too        for l in scrubbed_body.splitlines():          print('  ' + l, file=sys.stderr)      for prefix in prefixes: -      if func in func_dict[prefix] and func_dict[prefix][func] != scrubbed_body: -        if prefix == prefixes[-1]: -          print('WARNING: Found conflicting asm under the ' -                               'same prefix: %r!' % (prefix,), file=sys.stderr) -        else: -          func_dict[prefix][func] = None +      if func in func_dict[prefix] and str(func_dict[prefix][func]) != scrubbed_body: +        if func_dict[prefix][func] and func_dict[prefix][func].extrascrub == scrubbed_extra: +          func_dict[prefix][func].scrub = scrubbed_extra            continue +        else: +          if prefix == prefixes[-1]: +            print('WARNING: Found conflicting asm under the ' +                                 'same prefix: %r!' % (prefix,), file=sys.stderr) +          else: +            func_dict[prefix][func] = None +            continue -      func_dict[prefix][func] = scrubbed_body +      func_dict[prefix][func] = function_body(scrubbed_body, scrubbed_extra)  ##### Generator of LLVM IR CHECK lines @@ -188,7 +209,7 @@ def add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name,        printed_prefixes.append(checkprefix)        output_lines.append(check_label_format % (checkprefix, func_name)) -      func_body = func_dict[checkprefix][func_name].splitlines() +      func_body = str(func_dict[checkprefix][func_name]).splitlines()        # For ASM output, just emit the check lines.        if is_asm == True: diff --git a/llvm/utils/update_llc_test_checks.py b/llvm/utils/update_llc_test_checks.py index dd7a1d27005..f7d94cdbd78 100755 --- a/llvm/utils/update_llc_test_checks.py +++ b/llvm/utils/update_llc_test_checks.py @@ -28,8 +28,8 @@ def main():    parser.add_argument(        '--function', help='The function in the test file to update')    parser.add_argument( -      '--x86_extra_scrub', action='store_true', -      help='Use more regex for x86 matching to reduce diffs between various subtargets') +      '--extra_scrub', action='store_true', +      help='Always use additional regex to further reduce diffs between various subtargets')    parser.add_argument('tests', nargs='+')    args = parser.parse_args()  | 

