diff options
Diffstat (limited to 'llvm/test/CodeGen/X86')
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512-cmp-kor-sequence.ll | 10 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512-intrinsics.ll | 14 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512dq-intrinsics-fast-isel.ll | 12 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512dq-intrinsics.ll | 20 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512dqvl-intrinsics-fast-isel.ll | 24 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll | 40 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512vl-intrinsics.ll | 16 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll | 26 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/stack-folding-fp-avx512.ll | 8 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/stack-folding-fp-avx512vl.ll | 16 |
10 files changed, 93 insertions, 93 deletions
diff --git a/llvm/test/CodeGen/X86/avx512-cmp-kor-sequence.ll b/llvm/test/CodeGen/X86/avx512-cmp-kor-sequence.ll index 9f35fc38eba..72e48ae02ac 100644 --- a/llvm/test/CodeGen/X86/avx512-cmp-kor-sequence.ll +++ b/llvm/test/CodeGen/X86/avx512-cmp-kor-sequence.ll @@ -22,13 +22,13 @@ define zeroext i16 @cmp_kor_seq_16(<16 x float> %a, <16 x float> %b, <16 x float ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq entry: - %0 = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a, <16 x float> %x, i32 13, i32 4) + %0 = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %a, <16 x float> %x, i32 13, i32 4) %1 = bitcast <16 x i1> %0 to i16 - %2 = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %b, <16 x float> %x, i32 13, i32 4) + %2 = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %b, <16 x float> %x, i32 13, i32 4) %3 = bitcast <16 x i1> %2 to i16 - %4 = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %c, <16 x float> %x, i32 13, i32 4) + %4 = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %c, <16 x float> %x, i32 13, i32 4) %5 = bitcast <16 x i1> %4 to i16 - %6 = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %d, <16 x float> %x, i32 13, i32 4) + %6 = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %d, <16 x float> %x, i32 13, i32 4) %7 = bitcast <16 x i1> %6 to i16 %8 = bitcast i16 %1 to <16 x i1> %9 = bitcast i16 %3 to <16 x i1> @@ -46,7 +46,7 @@ entry: } ; Function Attrs: nounwind readnone -declare <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float>, <16 x float>, i32, i32) #1 +declare <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float>, <16 x float>, i32, i32) #1 attributes #0 = { nounwind readnone uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="knl" "target-features"="+adx,+aes,+avx,+avx2,+avx512cd,+avx512er,+avx512f,+avx512pf,+bmi,+bmi2,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prefetchwt1,+rdrnd,+rdseed,+rtm,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { nounwind readnone } diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics.ll b/llvm/test/CodeGen/X86/avx512-intrinsics.ll index e2c7d6cf0c1..e7db5429094 100644 --- a/llvm/test/CodeGen/X86/avx512-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512-intrinsics.ll @@ -902,11 +902,11 @@ define i16 @test_cmpps(<16 x float> %a, <16 x float> %b) { ; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq - %res = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 2, i32 8) + %res = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 2, i32 8) %1 = bitcast <16 x i1> %res to i16 ret i16 %1 } -declare <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float>, <16 x float>, i32, i32) +declare <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float>, <16 x float>, i32, i32) define i8 @test_cmppd(<8 x double> %a, <8 x double> %b) { ; CHECK-LABEL: test_cmppd: @@ -916,11 +916,11 @@ define i8 @test_cmppd(<8 x double> %a, <8 x double> %b) { ; CHECK-NEXT: ## kill: def $al killed $al killed $eax ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq - %res = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 4, i32 4) + %res = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 4, i32 4) %1 = bitcast <8 x i1> %res to i8 ret i8 %1 } -declare <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double>, <8 x double>, i32, i32) +declare <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double>, <8 x double>, i32, i32) ; Function Attrs: nounwind readnone @@ -5121,9 +5121,9 @@ define <16 x float> @bad_mask_transition(<8 x double> %a, <8 x double> %b, <8 x ; CHECK-NEXT: vblendmps %zmm5, %zmm4, %zmm0 {%k1} ; CHECK-NEXT: retq entry: - %0 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 17, i32 4) + %0 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 17, i32 4) %1 = bitcast <8 x i1> %0 to i8 - %2 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %c, <8 x double> %d, i32 17, i32 4) + %2 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %c, <8 x double> %d, i32 17, i32 4) %3 = bitcast <8 x i1> %2 to i8 %conv = zext i8 %1 to i16 %conv2 = zext i8 %3 to i16 @@ -5146,7 +5146,7 @@ define <16 x float> @bad_mask_transition_2(<8 x double> %a, <8 x double> %b, <8 ; CHECK-NEXT: vblendmps %zmm5, %zmm4, %zmm0 {%k1} ; CHECK-NEXT: retq entry: - %0 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 17, i32 4) + %0 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 17, i32 4) %1 = bitcast <8 x i1> %0 to i8 %conv = zext i8 %1 to i16 %2 = bitcast i16 %conv to <16 x i1> diff --git a/llvm/test/CodeGen/X86/avx512dq-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx512dq-intrinsics-fast-isel.ll index cd86d5e10f6..64063bdf833 100644 --- a/llvm/test/CodeGen/X86/avx512dq-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/avx512dq-intrinsics-fast-isel.ll @@ -23,14 +23,14 @@ define zeroext i8 @test_mm512_mask_fpclass_pd_mask(i8 zeroext %__U, <8 x double> ; X64-NEXT: vzeroupper ; X64-NEXT: retq entry: - %0 = tail call <8 x i1> @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double> %__A, i32 4) + %0 = tail call <8 x i1> @llvm.x86.avx512.fpclass.pd.512(<8 x double> %__A, i32 4) %1 = bitcast i8 %__U to <8 x i1> %2 = and <8 x i1> %0, %1 %3 = bitcast <8 x i1> %2 to i8 ret i8 %3 } -declare <8 x i1> @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double>, i32) +declare <8 x i1> @llvm.x86.avx512.fpclass.pd.512(<8 x double>, i32) define zeroext i8 @test_mm512_fpclass_pd_mask(<8 x double> %__A) { ; CHECK-LABEL: test_mm512_fpclass_pd_mask: @@ -41,7 +41,7 @@ define zeroext i8 @test_mm512_fpclass_pd_mask(<8 x double> %__A) { ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: ret{{[l|q]}} entry: - %0 = tail call <8 x i1> @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double> %__A, i32 4) + %0 = tail call <8 x i1> @llvm.x86.avx512.fpclass.pd.512(<8 x double> %__A, i32 4) %1 = bitcast <8 x i1> %0 to i8 ret i8 %1 } @@ -65,14 +65,14 @@ define zeroext i16 @test_mm512_mask_fpclass_ps_mask(i16 zeroext %__U, <16 x floa ; X64-NEXT: vzeroupper ; X64-NEXT: retq entry: - %0 = tail call <16 x i1> @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float> %__A, i32 4) + %0 = tail call <16 x i1> @llvm.x86.avx512.fpclass.ps.512(<16 x float> %__A, i32 4) %1 = bitcast i16 %__U to <16 x i1> %2 = and <16 x i1> %0, %1 %3 = bitcast <16 x i1> %2 to i16 ret i16 %3 } -declare <16 x i1> @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float>, i32) +declare <16 x i1> @llvm.x86.avx512.fpclass.ps.512(<16 x float>, i32) define zeroext i16 @test_mm512_fpclass_ps_mask(<16 x float> %__A) { ; CHECK-LABEL: test_mm512_fpclass_ps_mask: @@ -83,7 +83,7 @@ define zeroext i16 @test_mm512_fpclass_ps_mask(<16 x float> %__A) { ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: ret{{[l|q]}} entry: - %0 = tail call <16 x i1> @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float> %__A, i32 4) + %0 = tail call <16 x i1> @llvm.x86.avx512.fpclass.ps.512(<16 x float> %__A, i32 4) %1 = bitcast <16 x i1> %0 to i16 ret i16 %1 } diff --git a/llvm/test/CodeGen/X86/avx512dq-intrinsics.ll b/llvm/test/CodeGen/X86/avx512dq-intrinsics.ll index 41e0952d51a..c9bea5862a0 100644 --- a/llvm/test/CodeGen/X86/avx512dq-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512dq-intrinsics.ll @@ -600,10 +600,10 @@ define <2 x double>@test_int_x86_avx512_mask_range_sd(<2 x double> %x0, <2 x dou ret <2 x double> %res4 } -declare <8 x i1> @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double>, i32) +declare <8 x i1> @llvm.x86.avx512.fpclass.pd.512(<8 x double>, i32) -define i8 @test_int_x86_avx512_mask_fpclass_pd_512(<8 x double> %x0) { -; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_pd_512: +define i8 @test_int_x86_avx512_fpclass_pd_512(<8 x double> %x0) { +; CHECK-LABEL: test_int_x86_avx512_fpclass_pd_512: ; CHECK: # %bb.0: ; CHECK-NEXT: vfpclasspd $2, %zmm0, %k1 # encoding: [0x62,0xf3,0xfd,0x48,0x66,0xc8,0x02] ; CHECK-NEXT: vfpclasspd $4, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x66,0xc0,0x04] @@ -611,16 +611,16 @@ define i8 @test_int_x86_avx512_mask_fpclass_pd_512(<8 x double> %x0) { ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] - %res = call <8 x i1> @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double> %x0, i32 4) - %res1 = call <8 x i1> @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double> %x0, i32 2) + %res = call <8 x i1> @llvm.x86.avx512.fpclass.pd.512(<8 x double> %x0, i32 4) + %res1 = call <8 x i1> @llvm.x86.avx512.fpclass.pd.512(<8 x double> %x0, i32 2) %1 = and <8 x i1> %res1, %res %2 = bitcast <8 x i1> %1 to i8 ret i8 %2 } -declare <16 x i1> @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float>, i32) +declare <16 x i1> @llvm.x86.avx512.fpclass.ps.512(<16 x float>, i32) -define i16@test_int_x86_avx512_mask_fpclass_ps_512(<16 x float> %x0) { -; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_ps_512: +define i16@test_int_x86_avx512_fpclass_ps_512(<16 x float> %x0) { +; CHECK-LABEL: test_int_x86_avx512_fpclass_ps_512: ; CHECK: # %bb.0: ; CHECK-NEXT: vfpclassps $2, %zmm0, %k1 # encoding: [0x62,0xf3,0x7d,0x48,0x66,0xc8,0x02] ; CHECK-NEXT: vfpclassps $4, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x66,0xc0,0x04] @@ -628,8 +628,8 @@ define i16@test_int_x86_avx512_mask_fpclass_ps_512(<16 x float> %x0) { ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] - %res = call <16 x i1> @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float> %x0, i32 4) - %res1 = call <16 x i1> @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float> %x0, i32 2) + %res = call <16 x i1> @llvm.x86.avx512.fpclass.ps.512(<16 x float> %x0, i32 4) + %res1 = call <16 x i1> @llvm.x86.avx512.fpclass.ps.512(<16 x float> %x0, i32 2) %1 = and <16 x i1> %res1, %res %2 = bitcast <16 x i1> %1 to i16 ret i16 %2 diff --git a/llvm/test/CodeGen/X86/avx512dqvl-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx512dqvl-intrinsics-fast-isel.ll index 48cfa9a4eaf..703591acef5 100644 --- a/llvm/test/CodeGen/X86/avx512dqvl-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/avx512dqvl-intrinsics-fast-isel.ll @@ -221,7 +221,7 @@ define zeroext i8 @test_mm_mask_fpclass_pd_mask(i8 zeroext %__U, <2 x double> %_ ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq entry: - %0 = tail call <2 x i1> @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double> %__A, i32 2) + %0 = tail call <2 x i1> @llvm.x86.avx512.fpclass.pd.128(<2 x double> %__A, i32 2) %1 = bitcast i8 %__U to <8 x i1> %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1> %2 = and <2 x i1> %0, %extract @@ -230,7 +230,7 @@ entry: ret i8 %4 } -declare <2 x i1> @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double>, i32) +declare <2 x i1> @llvm.x86.avx512.fpclass.pd.128(<2 x double>, i32) define zeroext i8 @test_mm_fpclass_pd_mask(<2 x double> %__A) { ; CHECK-LABEL: test_mm_fpclass_pd_mask: @@ -240,7 +240,7 @@ define zeroext i8 @test_mm_fpclass_pd_mask(<2 x double> %__A) { ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: ret{{[l|q]}} entry: - %0 = tail call <2 x i1> @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double> %__A, i32 2) + %0 = tail call <2 x i1> @llvm.x86.avx512.fpclass.pd.128(<2 x double> %__A, i32 2) %1 = shufflevector <2 x i1> %0, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> %2 = bitcast <8 x i1> %1 to i8 ret i8 %2 @@ -265,7 +265,7 @@ define zeroext i8 @test_mm256_mask_fpclass_pd_mask(i8 zeroext %__U, <4 x double> ; X64-NEXT: vzeroupper ; X64-NEXT: retq entry: - %0 = tail call <4 x i1> @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double> %__A, i32 2) + %0 = tail call <4 x i1> @llvm.x86.avx512.fpclass.pd.256(<4 x double> %__A, i32 2) %1 = bitcast i8 %__U to <8 x i1> %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> %2 = and <4 x i1> %0, %extract @@ -274,7 +274,7 @@ entry: ret i8 %4 } -declare <4 x i1> @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double>, i32) +declare <4 x i1> @llvm.x86.avx512.fpclass.pd.256(<4 x double>, i32) define zeroext i8 @test_mm256_fpclass_pd_mask(<4 x double> %__A) { ; CHECK-LABEL: test_mm256_fpclass_pd_mask: @@ -285,7 +285,7 @@ define zeroext i8 @test_mm256_fpclass_pd_mask(<4 x double> %__A) { ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: ret{{[l|q]}} entry: - %0 = tail call <4 x i1> @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double> %__A, i32 2) + %0 = tail call <4 x i1> @llvm.x86.avx512.fpclass.pd.256(<4 x double> %__A, i32 2) %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> %2 = bitcast <8 x i1> %1 to i8 ret i8 %2 @@ -308,7 +308,7 @@ define zeroext i8 @test_mm_mask_fpclass_ps_mask(i8 zeroext %__U, <4 x float> %__ ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq entry: - %0 = tail call <4 x i1> @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float> %__A, i32 2) + %0 = tail call <4 x i1> @llvm.x86.avx512.fpclass.ps.128(<4 x float> %__A, i32 2) %1 = bitcast i8 %__U to <8 x i1> %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> %2 = and <4 x i1> %0, %extract @@ -317,7 +317,7 @@ entry: ret i8 %4 } -declare <4 x i1> @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float>, i32) +declare <4 x i1> @llvm.x86.avx512.fpclass.ps.128(<4 x float>, i32) define zeroext i8 @test_mm_fpclass_ps_mask(<4 x float> %__A) { ; CHECK-LABEL: test_mm_fpclass_ps_mask: @@ -327,7 +327,7 @@ define zeroext i8 @test_mm_fpclass_ps_mask(<4 x float> %__A) { ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: ret{{[l|q]}} entry: - %0 = tail call <4 x i1> @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float> %__A, i32 2) + %0 = tail call <4 x i1> @llvm.x86.avx512.fpclass.ps.128(<4 x float> %__A, i32 2) %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> %2 = bitcast <8 x i1> %1 to i8 ret i8 %2 @@ -352,14 +352,14 @@ define zeroext i8 @test_mm256_mask_fpclass_ps_mask(i8 zeroext %__U, <8 x float> ; X64-NEXT: vzeroupper ; X64-NEXT: retq entry: - %0 = tail call <8 x i1> @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float> %__A, i32 2) + %0 = tail call <8 x i1> @llvm.x86.avx512.fpclass.ps.256(<8 x float> %__A, i32 2) %1 = bitcast i8 %__U to <8 x i1> %2 = and <8 x i1> %0, %1 %3 = bitcast <8 x i1> %2 to i8 ret i8 %3 } -declare <8 x i1> @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float>, i32) +declare <8 x i1> @llvm.x86.avx512.fpclass.ps.256(<8 x float>, i32) define zeroext i8 @test_mm256_fpclass_ps_mask(<8 x float> %__A) { ; CHECK-LABEL: test_mm256_fpclass_ps_mask: @@ -370,7 +370,7 @@ define zeroext i8 @test_mm256_fpclass_ps_mask(<8 x float> %__A) { ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: ret{{[l|q]}} entry: - %0 = tail call <8 x i1> @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float> %__A, i32 2) + %0 = tail call <8 x i1> @llvm.x86.avx512.fpclass.ps.256(<8 x float> %__A, i32 2) %1 = bitcast <8 x i1> %0 to i8 ret i8 %1 } diff --git a/llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll index e0bfc188a27..6ba69e7be95 100644 --- a/llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll @@ -734,28 +734,28 @@ define <8 x float>@test_int_x86_avx512_mask_range_ps_256(<8 x float> %x0, <8 x f ret <8 x float> %res2 } -declare <4 x i1> @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float>, i32) +declare <4 x i1> @llvm.x86.avx512.fpclass.ps.128(<4 x float>, i32) -define i8 @test_int_x86_avx512_mask_fpclass_ps_128(<4 x float> %x0) { -; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_ps_128: +define i8 @test_int_x86_avx512_fpclass_ps_128(<4 x float> %x0) { +; CHECK-LABEL: test_int_x86_avx512_fpclass_ps_128: ; CHECK: # %bb.0: ; CHECK-NEXT: vfpclassps $4, %xmm0, %k1 # encoding: [0x62,0xf3,0x7d,0x08,0x66,0xc8,0x04] ; CHECK-NEXT: vfpclassps $2, %xmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x66,0xc0,0x02] ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] - %res = call <4 x i1> @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float> %x0, i32 2) - %res1 = call <4 x i1> @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float> %x0, i32 4) + %res = call <4 x i1> @llvm.x86.avx512.fpclass.ps.128(<4 x float> %x0, i32 2) + %res1 = call <4 x i1> @llvm.x86.avx512.fpclass.ps.128(<4 x float> %x0, i32 4) %1 = and <4 x i1> %res1, %res %2 = shufflevector <4 x i1> %1, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> %3 = bitcast <8 x i1> %2 to i8 ret i8 %3 } -declare <8 x i1> @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float>, i32) +declare <8 x i1> @llvm.x86.avx512.fpclass.ps.256(<8 x float>, i32) -define i8 @test_int_x86_avx512_mask_fpclass_ps_256(<8 x float> %x0) { -; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_ps_256: +define i8 @test_int_x86_avx512_fpclass_ps_256(<8 x float> %x0) { +; CHECK-LABEL: test_int_x86_avx512_fpclass_ps_256: ; CHECK: # %bb.0: ; CHECK-NEXT: vfpclassps $4, %ymm0, %k1 # encoding: [0x62,0xf3,0x7d,0x28,0x66,0xc8,0x04] ; CHECK-NEXT: vfpclassps $2, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x66,0xc0,0x02] @@ -763,35 +763,35 @@ define i8 @test_int_x86_avx512_mask_fpclass_ps_256(<8 x float> %x0) { ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] - %res = call <8 x i1> @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float> %x0, i32 2) - %res1 = call <8 x i1> @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float> %x0, i32 4) + %res = call <8 x i1> @llvm.x86.avx512.fpclass.ps.256(<8 x float> %x0, i32 2) + %res1 = call <8 x i1> @llvm.x86.avx512.fpclass.ps.256(<8 x float> %x0, i32 4) %1 = and <8 x i1> %res1, %res %2 = bitcast <8 x i1> %1 to i8 ret i8 %2 } -declare <2 x i1> @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double>, i32) +declare <2 x i1> @llvm.x86.avx512.fpclass.pd.128(<2 x double>, i32) -define i8 @test_int_x86_avx512_mask_fpclass_pd_128(<2 x double> %x0) { -; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_pd_128: +define i8 @test_int_x86_avx512_fpclass_pd_128(<2 x double> %x0) { +; CHECK-LABEL: test_int_x86_avx512_fpclass_pd_128: ; CHECK: # %bb.0: ; CHECK-NEXT: vfpclasspd $2, %xmm0, %k1 # encoding: [0x62,0xf3,0xfd,0x08,0x66,0xc8,0x02] ; CHECK-NEXT: vfpclasspd $4, %xmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x66,0xc0,0x04] ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] - %res = call <2 x i1> @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double> %x0, i32 4) - %res1 = call <2 x i1> @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double> %x0, i32 2) + %res = call <2 x i1> @llvm.x86.avx512.fpclass.pd.128(<2 x double> %x0, i32 4) + %res1 = call <2 x i1> @llvm.x86.avx512.fpclass.pd.128(<2 x double> %x0, i32 2) %1 = and <2 x i1> %res1, %res %2 = shufflevector <2 x i1> %1, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> %3 = bitcast <8 x i1> %2 to i8 ret i8 %3 } -declare <4 x i1> @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double>, i32) +declare <4 x i1> @llvm.x86.avx512.fpclass.pd.256(<4 x double>, i32) -define i8 @test_int_x86_avx512_mask_fpclass_pd_256(<4 x double> %x0) { -; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_pd_256: +define i8 @test_int_x86_avx512_fpclass_pd_256(<4 x double> %x0) { +; CHECK-LABEL: test_int_x86_avx512_fpclass_pd_256: ; CHECK: # %bb.0: ; CHECK-NEXT: vfpclasspd $4, %ymm0, %k1 # encoding: [0x62,0xf3,0xfd,0x28,0x66,0xc8,0x04] ; CHECK-NEXT: vfpclasspd $2, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x66,0xc0,0x02] @@ -799,8 +799,8 @@ define i8 @test_int_x86_avx512_mask_fpclass_pd_256(<4 x double> %x0) { ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] - %res = call <4 x i1> @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double> %x0, i32 2) - %res1 = call <4 x i1> @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double> %x0, i32 4) + %res = call <4 x i1> @llvm.x86.avx512.fpclass.pd.256(<4 x double> %x0, i32 2) + %res1 = call <4 x i1> @llvm.x86.avx512.fpclass.pd.256(<4 x double> %x0, i32 4) %1 = and <4 x i1> %res1, %res %2 = shufflevector <4 x i1> %1, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> %3 = bitcast <8 x i1> %2 to i8 diff --git a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll index 6b4b68fb4c1..9bd6c2047cb 100644 --- a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll @@ -746,11 +746,11 @@ define i8 @test_cmpps_256(<8 x float> %a, <8 x float> %b) { ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] - %res = call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %a, <8 x float> %b, i32 2) + %res = call <8 x i1> @llvm.x86.avx512.cmp.ps.256(<8 x float> %a, <8 x float> %b, i32 2) %1 = bitcast <8 x i1> %res to i8 ret i8 %1 } -declare <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float>, <8 x float>, i32) +declare <8 x i1> @llvm.x86.avx512.cmp.ps.256(<8 x float>, <8 x float>, i32) define i8 @test_cmpps_128(<4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: test_cmpps_128: @@ -759,12 +759,12 @@ define i8 @test_cmpps_128(<4 x float> %a, <4 x float> %b) { ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] - %res = call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %a, <4 x float> %b, i32 2) + %res = call <4 x i1> @llvm.x86.avx512.cmp.ps.128(<4 x float> %a, <4 x float> %b, i32 2) %1 = shufflevector <4 x i1> %res, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> %2 = bitcast <8 x i1> %1 to i8 ret i8 %2 } -declare <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float>, <4 x float>, i32) +declare <4 x i1> @llvm.x86.avx512.cmp.ps.128(<4 x float>, <4 x float>, i32) define i8 @test_cmppd_256(<4 x double> %a, <4 x double> %b) { ; CHECK-LABEL: test_cmppd_256: @@ -774,12 +774,12 @@ define i8 @test_cmppd_256(<4 x double> %a, <4 x double> %b) { ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] - %res = call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %a, <4 x double> %b, i32 2) + %res = call <4 x i1> @llvm.x86.avx512.cmp.pd.256(<4 x double> %a, <4 x double> %b, i32 2) %1 = shufflevector <4 x i1> %res, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> %2 = bitcast <8 x i1> %1 to i8 ret i8 %2 } -declare <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double>, <4 x double>, i32) +declare <4 x i1> @llvm.x86.avx512.cmp.pd.256(<4 x double>, <4 x double>, i32) define i8 @test_cmppd_128(<2 x double> %a, <2 x double> %b) { ; CHECK-LABEL: test_cmppd_128: @@ -788,12 +788,12 @@ define i8 @test_cmppd_128(<2 x double> %a, <2 x double> %b) { ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] - %res = call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %a, <2 x double> %b, i32 2) + %res = call <2 x i1> @llvm.x86.avx512.cmp.pd.128(<2 x double> %a, <2 x double> %b, i32 2) %1 = shufflevector <2 x i1> %res, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> %2 = bitcast <8 x i1> %1 to i8 ret i8 %2 } -declare <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double>, <2 x double>, i32) +declare <2 x i1> @llvm.x86.avx512.cmp.pd.128(<2 x double>, <2 x double>, i32) define <8 x float> @test_mm512_maskz_max_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) { ; X86-LABEL: test_mm512_maskz_max_ps_256: diff --git a/llvm/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll b/llvm/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll index e6d1fa982a6..8c3fe900336 100644 --- a/llvm/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll +++ b/llvm/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll @@ -19762,7 +19762,7 @@ entry: } -declare <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float>, <16 x float>, i32, i32) +declare <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float>, <16 x float>, i32, i32) define zeroext i8 @test_vcmpoeqps_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask: ; VLX: # %bb.0: # %entry @@ -21229,7 +21229,7 @@ define zeroext i32 @test_vcmpoeqps_v16i1_v32i1_sae_mask(<8 x i64> %__a, <8 x i64 entry: %0 = bitcast <8 x i64> %__a to <16 x float> %1 = bitcast <8 x i64> %__b to <16 x float> - %2 = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, i32 8) + %2 = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, i32 8) %3 = bitcast <16 x i1> %2 to i16 %4 = zext i16 %3 to i32 ret i32 %4 @@ -21254,7 +21254,7 @@ define zeroext i32 @test_masked_vcmpoeqps_v16i1_v32i1_sae_mask(i16 zeroext %__u, entry: %0 = bitcast <8 x i64> %__a to <16 x float> %1 = bitcast <8 x i64> %__b to <16 x float> - %2 = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, i32 8) + %2 = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, i32 8) %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = bitcast <16 x i1> %4 to i16 @@ -21444,7 +21444,7 @@ define zeroext i64 @test_vcmpoeqps_v16i1_v64i1_sae_mask(<8 x i64> %__a, <8 x i64 entry: %0 = bitcast <8 x i64> %__a to <16 x float> %1 = bitcast <8 x i64> %__b to <16 x float> - %2 = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, i32 8) + %2 = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, i32 8) %3 = bitcast <16 x i1> %2 to i16 %4 = zext i16 %3 to i64 ret i64 %4 @@ -21469,7 +21469,7 @@ define zeroext i64 @test_masked_vcmpoeqps_v16i1_v64i1_sae_mask(i16 zeroext %__u, entry: %0 = bitcast <8 x i64> %__a to <16 x float> %1 = bitcast <8 x i64> %__b to <16 x float> - %2 = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, i32 8) + %2 = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, i32 8) %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = bitcast <16 x i1> %4 to i16 @@ -21479,7 +21479,7 @@ entry: -declare <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double>, <8 x double>, i32, i32) +declare <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double>, <8 x double>, i32, i32) define zeroext i4 @test_vcmpoeqpd_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask: ; VLX: # %bb.0: # %entry @@ -23345,7 +23345,7 @@ define zeroext i16 @test_vcmpoeqpd_v8i1_v16i1_sae_mask(<8 x i64> %__a, <8 x i64> entry: %0 = bitcast <8 x i64> %__a to <8 x double> %1 = bitcast <8 x i64> %__b to <8 x double> - %2 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i32 8) + %2 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i32 8) %3 = bitcast <8 x i1> %2 to i8 %4 = zext i8 %3 to i16 ret i16 %4 @@ -23374,7 +23374,7 @@ define zeroext i16 @test_masked_vcmpoeqpd_v8i1_v16i1_sae_mask(i8 zeroext %__u, < entry: %0 = bitcast <8 x i64> %__a to <8 x double> %1 = bitcast <8 x i64> %__b to <8 x double> - %2 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i32 8) + %2 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i32 8) %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = bitcast <8 x i1> %4 to i8 @@ -23560,7 +23560,7 @@ define zeroext i32 @test_vcmpoeqpd_v8i1_v32i1_sae_mask(<8 x i64> %__a, <8 x i64> entry: %0 = bitcast <8 x i64> %__a to <8 x double> %1 = bitcast <8 x i64> %__b to <8 x double> - %2 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i32 8) + %2 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i32 8) %3 = bitcast <8 x i1> %2 to i8 %4 = zext i8 %3 to i32 ret i32 %4 @@ -23587,7 +23587,7 @@ define zeroext i32 @test_masked_vcmpoeqpd_v8i1_v32i1_sae_mask(i8 zeroext %__u, < entry: %0 = bitcast <8 x i64> %__a to <8 x double> %1 = bitcast <8 x i64> %__b to <8 x double> - %2 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i32 8) + %2 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i32 8) %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = bitcast <8 x i1> %4 to i8 @@ -23780,7 +23780,7 @@ define zeroext i64 @test_vcmpoeqpd_v8i1_v64i1_sae_mask(<8 x i64> %__a, <8 x i64> entry: %0 = bitcast <8 x i64> %__a to <8 x double> %1 = bitcast <8 x i64> %__b to <8 x double> - %2 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i32 8) + %2 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i32 8) %3 = bitcast <8 x i1> %2 to i8 %4 = zext i8 %3 to i64 ret i64 %4 @@ -23807,7 +23807,7 @@ define zeroext i64 @test_masked_vcmpoeqpd_v8i1_v64i1_sae_mask(i8 zeroext %__u, < entry: %0 = bitcast <8 x i64> %__a to <8 x double> %1 = bitcast <8 x i64> %__b to <8 x double> - %2 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i32 8) + %2 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i32 8) %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = bitcast <8 x i1> %4 to i8 @@ -23830,7 +23830,7 @@ define i32 @test_cmpm_rnd_zero(<16 x float> %a, <16 x float> %b) { ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq - %res = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 2, i32 8) + %res = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 2, i32 8) %1 = bitcast <16 x i1> %res to i16 %cast = bitcast i16 %1 to <16 x i1> %shuffle = shufflevector <16 x i1> %cast, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> diff --git a/llvm/test/CodeGen/X86/stack-folding-fp-avx512.ll b/llvm/test/CodeGen/X86/stack-folding-fp-avx512.ll index 5beb8d4ad72..6b5f8ae4023 100644 --- a/llvm/test/CodeGen/X86/stack-folding-fp-avx512.ll +++ b/llvm/test/CodeGen/X86/stack-folding-fp-avx512.ll @@ -140,21 +140,21 @@ define i8 @stack_fold_cmppd(<8 x double> %a0, <8 x double> %a1) { ;CHECK-LABEL: stack_fold_cmppd ;CHECK: vcmpeqpd {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%k[0-9]}} {{.*#+}} 64-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() - %res = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %a0, <8 x double> %a1, i32 0, i32 4) + %res = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %a0, <8 x double> %a1, i32 0, i32 4) %2 = bitcast <8 x i1> %res to i8 ret i8 %2 } -declare <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double>, <8 x double>, i32, i32) +declare <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double>, <8 x double>, i32, i32) define i16 @stack_fold_cmpps(<16 x float> %a0, <16 x float> %a1) { ;CHECK-LABEL: stack_fold_cmpps ;CHECK: vcmpeqps {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%k[0-9]*}} {{.*#+}} 64-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() - %res = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0, i32 4) + %res = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0, i32 4) %2 = bitcast <16 x i1> %res to i16 ret i16 %2 } -declare <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float>, <16 x float>, i32, i32) +declare <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float>, <16 x float>, i32, i32) define <2 x double> @stack_fold_divsd_int(<2 x double> %a0, <2 x double> %a1) { ;CHECK-LABEL: stack_fold_divsd_int diff --git a/llvm/test/CodeGen/X86/stack-folding-fp-avx512vl.ll b/llvm/test/CodeGen/X86/stack-folding-fp-avx512vl.ll index 23b485b5145..fd8bed3c82e 100644 --- a/llvm/test/CodeGen/X86/stack-folding-fp-avx512vl.ll +++ b/llvm/test/CodeGen/X86/stack-folding-fp-avx512vl.ll @@ -152,44 +152,44 @@ define i8 @stack_fold_cmppd(<2 x double> %a0, <2 x double> %a1) { ;CHECK-LABEL: stack_fold_cmppd ;CHECK: vcmpeqpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%k[0-9]}} {{.*#+}} 16-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() - %res = call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %a0, <2 x double> %a1, i32 0) + %res = call <2 x i1> @llvm.x86.avx512.cmp.pd.128(<2 x double> %a0, <2 x double> %a1, i32 0) %2 = shufflevector <2 x i1> %res, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> %3 = bitcast <8 x i1> %2 to i8 ret i8 %3 } -declare <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double>, <2 x double>, i32) +declare <2 x i1> @llvm.x86.avx512.cmp.pd.128(<2 x double>, <2 x double>, i32) define i8 @stack_fold_cmppd_ymm(<4 x double> %a0, <4 x double> %a1) { ;CHECK-LABEL: stack_fold_cmppd_ymm ;CHECK: vcmpeqpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%k[0-9]}} {{.*#+}} 32-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() - %res = call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %a0, <4 x double> %a1, i32 0) + %res = call <4 x i1> @llvm.x86.avx512.cmp.pd.256(<4 x double> %a0, <4 x double> %a1, i32 0) %2 = shufflevector <4 x i1> %res, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> %3 = bitcast <8 x i1> %2 to i8 ret i8 %3 } -declare <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double>, <4 x double>, i32) +declare <4 x i1> @llvm.x86.avx512.cmp.pd.256(<4 x double>, <4 x double>, i32) define i8 @stack_fold_cmpps(<4 x float> %a0, <4 x float> %a1) { ;CHECK-LABEL: stack_fold_cmpps ;CHECK: vcmpeqps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%k[0-9]*}} {{.*#+}} 16-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() - %res = call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %a0, <4 x float> %a1, i32 0) + %res = call <4 x i1> @llvm.x86.avx512.cmp.ps.128(<4 x float> %a0, <4 x float> %a1, i32 0) %2 = shufflevector <4 x i1> %res, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> %3 = bitcast <8 x i1> %2 to i8 ret i8 %3 } -declare <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float>, <4 x float>, i32) +declare <4 x i1> @llvm.x86.avx512.cmp.ps.128(<4 x float>, <4 x float>, i32) define i8 @stack_fold_cmpps_ymm(<8 x float> %a0, <8 x float> %a1) { ;CHECK-LABEL: stack_fold_cmpps_ymm ;CHECK: vcmpeqps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%k[0-9]*}} {{.*#+}} 32-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() - %res = call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i32 0) + %res = call <8 x i1> @llvm.x86.avx512.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i32 0) %2 = bitcast <8 x i1> %res to i8 ret i8 %2 } -declare <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float>, <8 x float>, i32) +declare <8 x i1> @llvm.x86.avx512.cmp.ps.256(<8 x float>, <8 x float>, i32) define <2 x double> @stack_fold_divpd(<2 x double> %a0, <2 x double> %a1) { ;CHECK-LABEL: stack_fold_divpd |

