diff options
author | Craig Topper <craig.topper@gmail.com> | 2016-08-28 06:06:28 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@gmail.com> | 2016-08-28 06:06:28 +0000 |
commit | abe80cc04df000d51895d7532c748be2f25fbd86 (patch) | |
tree | 2e1bd49eafd6e3481cde380221656168279b7ff1 /llvm/test | |
parent | 8046e2033e725a811f5078652ce21e26e736375d (diff) | |
download | bcm5719-llvm-abe80cc04df000d51895d7532c748be2f25fbd86.tar.gz bcm5719-llvm-abe80cc04df000d51895d7532c748be2f25fbd86.zip |
[AVX-512] Promote AND/OR/XOR to v2i64/v4i64/v8i64 even when we have AVX512F/AVX512VL.
Previously we weren't creating masked logical operations if bitcasts appeared between the logic operation and the select. The IR optimizers can move bitcasts across logic operations and create these cases. To minimize the number of cases we need to handle, this change promotes all logic ops to an i64 vector type just like when only SSE or AVX is available.
Unfortunately, this also has the consequence of making it difficult to select unmasked VPANDD/VPORD/VPXORD in all the cases it was previously used. This is the cause of most of the test change. This shouldn't result in any functional change though.
llvm-svn: 279929
Diffstat (limited to 'llvm/test')
-rw-r--r-- | llvm/test/CodeGen/X86/avx512-arith.ll | 12 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx512-logic.ll | 51 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx512-select.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/stack-folding-fp-avx512.ll | 8 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/stack-folding-fp-avx512vl.ll | 12 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vector-bitreverse.ll | 6 |
6 files changed, 53 insertions, 38 deletions
diff --git a/llvm/test/CodeGen/X86/avx512-arith.ll b/llvm/test/CodeGen/X86/avx512-arith.ll index 72219a8413e..783983344cf 100644 --- a/llvm/test/CodeGen/X86/avx512-arith.ll +++ b/llvm/test/CodeGen/X86/avx512-arith.ll @@ -945,17 +945,17 @@ define <8 x double> @test_maskz_broadcast_vaddpd(<8 x double> %i, double* %j, define <16 x float> @test_fxor(<16 x float> %a) { ; AVX512F-LABEL: test_fxor: ; AVX512F: ## BB#0: -; AVX512F-NEXT: vpxord {{.*}}(%rip), %zmm0, %zmm0 +; AVX512F-NEXT: vpxorq {{.*}}(%rip), %zmm0, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: test_fxor: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpxord {{.*}}(%rip), %zmm0, %zmm0 +; AVX512VL-NEXT: vpxorq {{.*}}(%rip), %zmm0, %zmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: test_fxor: ; AVX512BW: ## BB#0: -; AVX512BW-NEXT: vpxord {{.*}}(%rip), %zmm0, %zmm0 +; AVX512BW-NEXT: vpxorq {{.*}}(%rip), %zmm0, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: test_fxor: @@ -1015,17 +1015,17 @@ declare <8 x double> @llvm.fabs.v8f64(<8 x double> %p) define <16 x float> @fabs_v16f32(<16 x float> %p) ; AVX512F-LABEL: fabs_v16f32: ; AVX512F: ## BB#0: -; AVX512F-NEXT: vpandd {{.*}}(%rip), %zmm0, %zmm0 +; AVX512F-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: fabs_v16f32: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpandd {{.*}}(%rip), %zmm0, %zmm0 +; AVX512VL-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: fabs_v16f32: ; AVX512BW: ## BB#0: -; AVX512BW-NEXT: vpandd {{.*}}(%rip), %zmm0, %zmm0 +; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: fabs_v16f32: diff --git a/llvm/test/CodeGen/X86/avx512-logic.ll b/llvm/test/CodeGen/X86/avx512-logic.ll index 543ce12a924..e4ee454dd00 100644 --- a/llvm/test/CodeGen/X86/avx512-logic.ll +++ b/llvm/test/CodeGen/X86/avx512-logic.ll @@ -430,12 +430,17 @@ define <8 x double> @masked_xor_v8f64(<8 x double> %a, <8 x double> %b, <8 x dou } define <8 x i64> @test_mm512_mask_and_epi32(<8 x i64> %__src, i16 zeroext %__k, <8 x i64> %__a, <8 x i64> %__b) { -; ALL-LABEL: test_mm512_mask_and_epi32: -; ALL: ## BB#0: ## %entry -; ALL-NEXT: vpandq %zmm2, %zmm1, %zmm1 -; ALL-NEXT: kmovw %edi, %k1 -; ALL-NEXT: vpblendmd %zmm1, %zmm0, %zmm0 {%k1} -; ALL-NEXT: retq +; KNL-LABEL: test_mm512_mask_and_epi32: +; KNL: ## BB#0: ## %entry +; KNL-NEXT: kmovw %edi, %k1 +; KNL-NEXT: vpandd %zmm2, %zmm1, %zmm0 {%k1} +; KNL-NEXT: retq +; +; SKX-LABEL: test_mm512_mask_and_epi32: +; SKX: ## BB#0: ## %entry +; SKX-NEXT: kmovw %edi, %k1 +; SKX-NEXT: vandps %zmm2, %zmm1, %zmm0 {%k1} +; SKX-NEXT: retq entry: %and1.i.i = and <8 x i64> %__a, %__b %0 = bitcast <8 x i64> %and1.i.i to <16 x i32> @@ -447,12 +452,17 @@ entry: } define <8 x i64> @test_mm512_mask_or_epi32(<8 x i64> %__src, i16 zeroext %__k, <8 x i64> %__a, <8 x i64> %__b) { -; ALL-LABEL: test_mm512_mask_or_epi32: -; ALL: ## BB#0: ## %entry -; ALL-NEXT: vporq %zmm2, %zmm1, %zmm1 -; ALL-NEXT: kmovw %edi, %k1 -; ALL-NEXT: vpblendmd %zmm1, %zmm0, %zmm0 {%k1} -; ALL-NEXT: retq +; KNL-LABEL: test_mm512_mask_or_epi32: +; KNL: ## BB#0: ## %entry +; KNL-NEXT: kmovw %edi, %k1 +; KNL-NEXT: vpord %zmm2, %zmm1, %zmm0 {%k1} +; KNL-NEXT: retq +; +; SKX-LABEL: test_mm512_mask_or_epi32: +; SKX: ## BB#0: ## %entry +; SKX-NEXT: kmovw %edi, %k1 +; SKX-NEXT: vorps %zmm2, %zmm1, %zmm0 {%k1} +; SKX-NEXT: retq entry: %or1.i.i = or <8 x i64> %__a, %__b %0 = bitcast <8 x i64> %or1.i.i to <16 x i32> @@ -464,12 +474,17 @@ entry: } define <8 x i64> @test_mm512_mask_xor_epi32(<8 x i64> %__src, i16 zeroext %__k, <8 x i64> %__a, <8 x i64> %__b) { -; ALL-LABEL: test_mm512_mask_xor_epi32: -; ALL: ## BB#0: ## %entry -; ALL-NEXT: vpxorq %zmm2, %zmm1, %zmm1 -; ALL-NEXT: kmovw %edi, %k1 -; ALL-NEXT: vpblendmd %zmm1, %zmm0, %zmm0 {%k1} -; ALL-NEXT: retq +; KNL-LABEL: test_mm512_mask_xor_epi32: +; KNL: ## BB#0: ## %entry +; KNL-NEXT: kmovw %edi, %k1 +; KNL-NEXT: vpxord %zmm2, %zmm1, %zmm0 {%k1} +; KNL-NEXT: retq +; +; SKX-LABEL: test_mm512_mask_xor_epi32: +; SKX: ## BB#0: ## %entry +; SKX-NEXT: kmovw %edi, %k1 +; SKX-NEXT: vxorps %zmm2, %zmm1, %zmm0 {%k1} +; SKX-NEXT: retq entry: %xor1.i.i = xor <8 x i64> %__a, %__b %0 = bitcast <8 x i64> %xor1.i.i to <16 x i32> diff --git a/llvm/test/CodeGen/X86/avx512-select.ll b/llvm/test/CodeGen/X86/avx512-select.ll index d010a83277d..ee9be946c76 100644 --- a/llvm/test/CodeGen/X86/avx512-select.ll +++ b/llvm/test/CodeGen/X86/avx512-select.ll @@ -10,7 +10,7 @@ define <16 x i32> @select00(i32 %a, <16 x i32> %b) nounwind { ; CHECK-NEXT: ## BB#1: ; CHECK-NEXT: vmovdqa64 %zmm0, %zmm1 ; CHECK-NEXT: LBB0_2: -; CHECK-NEXT: vpxord %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: vpxorq %zmm1, %zmm0, %zmm0 ; CHECK-NEXT: retq %cmpres = icmp eq i32 %a, 255 %selres = select i1 %cmpres, <16 x i32> zeroinitializer, <16 x i32> %b diff --git a/llvm/test/CodeGen/X86/stack-folding-fp-avx512.ll b/llvm/test/CodeGen/X86/stack-folding-fp-avx512.ll index 86fd2dd5ad5..52faee31995 100644 --- a/llvm/test/CodeGen/X86/stack-folding-fp-avx512.ll +++ b/llvm/test/CodeGen/X86/stack-folding-fp-avx512.ll @@ -78,7 +78,7 @@ define <8 x double> @stack_fold_andnpd_zmm(<8 x double> %a0, <8 x double> %a1) { define <16 x float> @stack_fold_andnps_zmm(<16 x float> %a0, <16 x float> %a1) { ;CHECK-LABEL: stack_fold_andnps_zmm - ;CHECK: vpandnd {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload + ;CHECK: vpandnq {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() %2 = bitcast <16 x float> %a0 to <16 x i32> %3 = bitcast <16 x float> %a1 to <16 x i32> @@ -105,7 +105,7 @@ define <8 x double> @stack_fold_andpd_zmm(<8 x double> %a0, <8 x double> %a1) { define <16 x float> @stack_fold_andps_zmm(<16 x float> %a0, <16 x float> %a1) { ;CHECK-LABEL: stack_fold_andps_zmm - ;CHECK: vpandd {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload + ;CHECK: vpandq {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() %2 = bitcast <16 x float> %a0 to <16 x i32> %3 = bitcast <16 x float> %a1 to <16 x i32> @@ -295,7 +295,7 @@ define <8 x double> @stack_fold_orpd_zmm(<8 x double> %a0, <8 x double> %a1) { define <16 x float> @stack_fold_orps_zmm(<16 x float> %a0, <16 x float> %a1) { ;CHECK-LABEL: stack_fold_orps_zmm - ;CHECK: vpord {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload + ;CHECK: vporq {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() %2 = bitcast <16 x float> %a0 to <16 x i32> %3 = bitcast <16 x float> %a1 to <16 x i32> @@ -375,7 +375,7 @@ define <8 x double> @stack_fold_xorpd_zmm(<8 x double> %a0, <8 x double> %a1) { define <16 x float> @stack_fold_xorps_zmm(<16 x float> %a0, <16 x float> %a1) { ;CHECK-LABEL: stack_fold_xorps_zmm - ;CHECK: vpxord {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload + ;CHECK: vpxorq {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() %2 = bitcast <16 x float> %a0 to <16 x i32> %3 = bitcast <16 x float> %a1 to <16 x i32> diff --git a/llvm/test/CodeGen/X86/stack-folding-fp-avx512vl.ll b/llvm/test/CodeGen/X86/stack-folding-fp-avx512vl.ll index f551e33383b..0f499c268e7 100644 --- a/llvm/test/CodeGen/X86/stack-folding-fp-avx512vl.ll +++ b/llvm/test/CodeGen/X86/stack-folding-fp-avx512vl.ll @@ -124,7 +124,7 @@ define <4 x double> @stack_fold_andpd_ymm(<4 x double> %a0, <4 x double> %a1) { define <4 x float> @stack_fold_andps(<4 x float> %a0, <4 x float> %a1) { ;CHECK-LABEL: stack_fold_andps - ;CHECK: vpandd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload + ;CHECK: vpandq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() %2 = bitcast <4 x float> %a0 to <4 x i32> %3 = bitcast <4 x float> %a1 to <4 x i32> @@ -137,7 +137,7 @@ define <4 x float> @stack_fold_andps(<4 x float> %a0, <4 x float> %a1) { define <8 x float> @stack_fold_andps_ymm(<8 x float> %a0, <8 x float> %a1) { ;CHECK-LABEL: stack_fold_andps_ymm - ;CHECK: vpandd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload + ;CHECK: vpandq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() %2 = bitcast <8 x float> %a0 to <8 x i32> %3 = bitcast <8 x float> %a1 to <8 x i32> @@ -314,7 +314,7 @@ define <4 x double> @stack_fold_orpd_ymm(<4 x double> %a0, <4 x double> %a1) { define <4 x float> @stack_fold_orps(<4 x float> %a0, <4 x float> %a1) { ;CHECK-LABEL: stack_fold_orps - ;CHECK: vpord {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload + ;CHECK: vporq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() %2 = bitcast <4 x float> %a0 to <4 x i32> %3 = bitcast <4 x float> %a1 to <4 x i32> @@ -327,7 +327,7 @@ define <4 x float> @stack_fold_orps(<4 x float> %a0, <4 x float> %a1) { define <8 x float> @stack_fold_orps_ymm(<8 x float> %a0, <8 x float> %a1) { ;CHECK-LABEL: stack_fold_orps_ymm - ;CHECK: vpord {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload + ;CHECK: vporq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() %2 = bitcast <8 x float> %a0 to <8 x i32> %3 = bitcast <8 x float> %a1 to <8 x i32> @@ -398,7 +398,7 @@ define <4 x double> @stack_fold_xorpd_ymm(<4 x double> %a0, <4 x double> %a1) { define <4 x float> @stack_fold_xorps(<4 x float> %a0, <4 x float> %a1) { ;CHECK-LABEL: stack_fold_xorps - ;CHECK: vpxord {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload + ;CHECK: vpxorq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() %2 = bitcast <4 x float> %a0 to <4 x i32> %3 = bitcast <4 x float> %a1 to <4 x i32> @@ -411,7 +411,7 @@ define <4 x float> @stack_fold_xorps(<4 x float> %a0, <4 x float> %a1) { define <8 x float> @stack_fold_xorps_ymm(<8 x float> %a0, <8 x float> %a1) { ;CHECK-LABEL: stack_fold_xorps_ymm - ;CHECK: vpxord {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload + ;CHECK: vpxorq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() %2 = bitcast <8 x float> %a0 to <8 x i32> %3 = bitcast <8 x float> %a1 to <8 x i32> diff --git a/llvm/test/CodeGen/X86/vector-bitreverse.ll b/llvm/test/CodeGen/X86/vector-bitreverse.ll index b4b52a60b9c..f9746bcfcde 100644 --- a/llvm/test/CodeGen/X86/vector-bitreverse.ll +++ b/llvm/test/CodeGen/X86/vector-bitreverse.ll @@ -2041,12 +2041,12 @@ define <16 x i32> @test_bitreverse_v16i32(<16 x i32> %a) nounwind { ; AVX512F-NEXT: vpsrld $24, %zmm0, %zmm1 ; AVX512F-NEXT: vpsrld $8, %zmm0, %zmm2 ; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2 -; AVX512F-NEXT: vpord %zmm1, %zmm2, %zmm1 +; AVX512F-NEXT: vporq %zmm1, %zmm2, %zmm1 ; AVX512F-NEXT: vpslld $24, %zmm0, %zmm2 ; AVX512F-NEXT: vpslld $8, %zmm0, %zmm0 ; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm0 -; AVX512F-NEXT: vpord %zmm1, %zmm0, %zmm0 -; AVX512F-NEXT: vpord %zmm0, %zmm2, %zmm0 +; AVX512F-NEXT: vporq %zmm1, %zmm0, %zmm0 +; AVX512F-NEXT: vporq %zmm0, %zmm2, %zmm0 ; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm1 ; AVX512F-NEXT: vpslld $4, %zmm1, %zmm1 ; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm0 |