diff options
| author | Craig Topper <craig.topper@intel.com> | 2018-02-08 07:45:55 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@intel.com> | 2018-02-08 07:45:55 +0000 |
| commit | f5465f98d2f210c20c94be9fe4edb9e60243ab80 (patch) | |
| tree | 9c6ee00db9bbf609eead41eeaba2e2ef2fab1134 /llvm/test/CodeGen | |
| parent | 7e5ee26d1ae40ecd4966e2142ea1ea7c246becbe (diff) | |
| download | bcm5719-llvm-f5465f98d2f210c20c94be9fe4edb9e60243ab80.tar.gz bcm5719-llvm-f5465f98d2f210c20c94be9fe4edb9e60243ab80.zip | |
[X86] Don't emit KTEST instructions unless only the Z flag is being used
Summary:
KTEST has weird flag behavior. The Z flag is set for all bits in the AND of the k-registers being 0, and the C flag is set for all bits being 1. All other flags are cleared.
We currently emit this instruction in EmitTEST and don't check the condition code. This can lead to strange things like using the S flag after a KTEST for a signed compare.
The domain reassignment pass can also transform TEST instructions into KTEST and is not protected against the flag usage either. For now I've disabled this part of the domain reassignment pass. I tried to comment out the checks in the mir test so that we could recover them later, but I couldn't figure out how to get that to work.
This patch moves the KTEST handling into LowerSETCC and now creates a ktest+x86setcc. I've chosen this approach because I'd like to add support for the C flag for all ones in a followup patch. To do that requires that I can rewrite the condition code going in the x86setcc to be different than the original SETCC condition code.
This fixes PR36182. I'll file a PR to fix domain reassignment once this goes in. Should this be merged to 6.0?
Reviewers: spatel, guyblank, RKSimon, zvi
Reviewed By: guyblank
Subscribers: llvm-commits
Differential Revision: https://reviews.llvm.org/D42770
llvm-svn: 324576
Diffstat (limited to 'llvm/test/CodeGen')
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512-mask-op.ll | 92 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/domain-reassignment.mir | 48 |
2 files changed, 112 insertions, 28 deletions
diff --git a/llvm/test/CodeGen/X86/avx512-mask-op.ll b/llvm/test/CodeGen/X86/avx512-mask-op.ll index 3761b9a0ab8..6c2dfcb5cb7 100644 --- a/llvm/test/CodeGen/X86/avx512-mask-op.ll +++ b/llvm/test/CodeGen/X86/avx512-mask-op.ll @@ -2694,3 +2694,95 @@ define i8 @test_v8i1_mul(i8 %x, i8 %y) { %ret = bitcast <8 x i1> %m2 to i8 ret i8 %ret } + +; Make sure we don't emit a ktest for signed comparisons. +define void @ktest_signed(<16 x i32> %x, <16 x i32> %y) { +; KNL-LABEL: ktest_signed: +; KNL: ## %bb.0: +; KNL-NEXT: pushq %rax +; KNL-NEXT: .cfi_def_cfa_offset 16 +; KNL-NEXT: vporq %zmm1, %zmm0, %zmm0 +; KNL-NEXT: vptestnmd %zmm0, %zmm0, %k0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: testw %ax, %ax +; KNL-NEXT: jle LBB64_1 +; KNL-NEXT: ## %bb.2: ## %bb.2 +; KNL-NEXT: popq %rax +; KNL-NEXT: vzeroupper +; KNL-NEXT: retq +; KNL-NEXT: LBB64_1: ## %bb.1 +; KNL-NEXT: vzeroupper +; KNL-NEXT: callq _foo +; KNL-NEXT: popq %rax +; KNL-NEXT: retq +; +; SKX-LABEL: ktest_signed: +; SKX: ## %bb.0: +; SKX-NEXT: pushq %rax +; SKX-NEXT: .cfi_def_cfa_offset 16 +; SKX-NEXT: vporq %zmm1, %zmm0, %zmm0 +; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k0 +; SKX-NEXT: kmovd %k0, %eax +; SKX-NEXT: testw %ax, %ax +; SKX-NEXT: jle LBB64_1 +; SKX-NEXT: ## %bb.2: ## %bb.2 +; SKX-NEXT: popq %rax +; SKX-NEXT: vzeroupper +; SKX-NEXT: retq +; SKX-NEXT: LBB64_1: ## %bb.1 +; SKX-NEXT: vzeroupper +; SKX-NEXT: callq _foo +; SKX-NEXT: popq %rax +; SKX-NEXT: retq +; +; AVX512BW-LABEL: ktest_signed: +; AVX512BW: ## %bb.0: +; AVX512BW-NEXT: pushq %rax +; AVX512BW-NEXT: .cfi_def_cfa_offset 16 +; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vptestnmd %zmm0, %zmm0, %k0 +; AVX512BW-NEXT: kmovd %k0, %eax +; AVX512BW-NEXT: testw %ax, %ax +; AVX512BW-NEXT: jle LBB64_1 +; AVX512BW-NEXT: ## %bb.2: ## %bb.2 +; AVX512BW-NEXT: popq %rax +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; AVX512BW-NEXT: LBB64_1: ## %bb.1 +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: callq _foo +; AVX512BW-NEXT: popq %rax +; AVX512BW-NEXT: retq +; +; AVX512DQ-LABEL: ktest_signed: +; AVX512DQ: ## %bb.0: +; AVX512DQ-NEXT: pushq %rax +; AVX512DQ-NEXT: .cfi_def_cfa_offset 16 +; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0 +; AVX512DQ-NEXT: vptestnmd %zmm0, %zmm0, %k0 +; AVX512DQ-NEXT: kmovw %k0, %eax +; AVX512DQ-NEXT: testw %ax, %ax +; AVX512DQ-NEXT: jle LBB64_1 +; AVX512DQ-NEXT: ## %bb.2: ## %bb.2 +; AVX512DQ-NEXT: popq %rax +; AVX512DQ-NEXT: vzeroupper +; AVX512DQ-NEXT: retq +; AVX512DQ-NEXT: LBB64_1: ## %bb.1 +; AVX512DQ-NEXT: vzeroupper +; AVX512DQ-NEXT: callq _foo +; AVX512DQ-NEXT: popq %rax +; AVX512DQ-NEXT: retq + %a = icmp eq <16 x i32> %x, zeroinitializer + %b = icmp eq <16 x i32> %y, zeroinitializer + %c = and <16 x i1> %a, %b + %d = bitcast <16 x i1> %c to i16 + %e = icmp sgt i16 %d, 0 + br i1 %e, label %bb.2, label %bb.1 +bb.1: + call void @foo() + br label %bb.2 +bb.2: + ret void +} +declare void @foo() + diff --git a/llvm/test/CodeGen/X86/domain-reassignment.mir b/llvm/test/CodeGen/X86/domain-reassignment.mir index 2c57ecc186b..4dffbbbd27d 100644 --- a/llvm/test/CodeGen/X86/domain-reassignment.mir +++ b/llvm/test/CodeGen/X86/domain-reassignment.mir @@ -256,7 +256,7 @@ constants: body: | ; CHECK-LABEL: name: test_8bitops ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: successors: %bb.1(0x80000000) ; CHECK: liveins: $rdi, $zmm0, $zmm1, $zmm2, $zmm3 ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rdi ; CHECK: [[COPY1:%[0-9]+]]:vr512 = COPY $zmm0 @@ -277,9 +277,6 @@ body: | ; CHECK: [[COPY8:%[0-9]+]]:vk8wm = COPY [[COPY7]] ; CHECK: [[VMOVAPDZrrk:%[0-9]+]]:vr512 = VMOVAPDZrrk [[COPY2]], killed [[COPY8]], [[COPY1]] ; CHECK: VMOVAPDZmr [[COPY]], 1, $noreg, 0, $noreg, killed [[VMOVAPDZrrk]] - ; CHECK: KTESTBrr [[KADDBrr]], [[KADDBrr]], implicit-def $eflags - ; CHECK: JE_1 %bb.1, implicit $eflags - ; CHECK: JMP_1 %bb.2 ; CHECK: bb.1: ; CHECK: successors: %bb.2(0x80000000) ; CHECK: bb.2: @@ -311,9 +308,10 @@ body: | %11 = VMOVAPDZrrk %2, killed %10, %1 VMOVAPDZmr %0, 1, $noreg, 0, $noreg, killed %11 - TEST8rr %18, %18, implicit-def $eflags - JE_1 %bb.1, implicit $eflags - JMP_1 %bb.2 + ; FIXME We can't replace TEST with KTEST due to flag differences + ; TEST8rr %18, %18, implicit-def $eflags + ; JE_1 %bb.1, implicit $eflags + ; JMP_1 %bb.2 bb.1: @@ -377,7 +375,7 @@ constants: body: | ; CHECK-LABEL: name: test_16bitops ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: successors: %bb.1(0x80000000) ; CHECK: liveins: $rdi, $zmm0, $zmm1, $zmm2, $zmm3 ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rdi ; CHECK: [[COPY1:%[0-9]+]]:vr512 = COPY $zmm0 @@ -397,9 +395,6 @@ body: | ; CHECK: [[COPY8:%[0-9]+]]:vk16wm = COPY [[COPY7]] ; CHECK: [[VMOVAPSZrrk:%[0-9]+]]:vr512 = VMOVAPSZrrk [[COPY2]], killed [[COPY8]], [[COPY1]] ; CHECK: VMOVAPSZmr [[COPY]], 1, $noreg, 0, $noreg, killed [[VMOVAPSZrrk]] - ; CHECK: KTESTWrr [[KXORWrr]], [[KXORWrr]], implicit-def $eflags - ; CHECK: JE_1 %bb.1, implicit $eflags - ; CHECK: JMP_1 %bb.2 ; CHECK: bb.1: ; CHECK: successors: %bb.2(0x80000000) ; CHECK: bb.2: @@ -430,9 +425,10 @@ body: | %11 = VMOVAPSZrrk %2, killed %10, %1 VMOVAPSZmr %0, 1, $noreg, 0, $noreg, killed %11 - TEST16rr %17, %17, implicit-def $eflags - JE_1 %bb.1, implicit $eflags - JMP_1 %bb.2 + ; FIXME We can't replace TEST with KTEST due to flag differences + ; FIXME TEST16rr %17, %17, implicit-def $eflags + ; FIXME JE_1 %bb.1, implicit $eflags + ; FIXME JMP_1 %bb.2 bb.1: @@ -490,7 +486,7 @@ constants: body: | ; CHECK-LABEL: name: test_32bitops ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: successors: %bb.1(0x80000000) ; CHECK: liveins: $rdi, $zmm0, $zmm1 ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rdi ; CHECK: [[COPY1:%[0-9]+]]:vr512 = COPY $zmm0 @@ -507,9 +503,6 @@ body: | ; CHECK: [[COPY3:%[0-9]+]]:vk32wm = COPY [[KADDDrr]] ; CHECK: [[VMOVDQU16Zrrk:%[0-9]+]]:vr512 = VMOVDQU16Zrrk [[COPY2]], killed [[COPY3]], [[COPY1]] ; CHECK: VMOVDQA32Zmr [[COPY]], 1, $noreg, 0, $noreg, killed [[VMOVDQU16Zrrk]] - ; CHECK: KTESTDrr [[KADDDrr]], [[KADDDrr]], implicit-def $eflags - ; CHECK: JE_1 %bb.1, implicit $eflags - ; CHECK: JMP_1 %bb.2 ; CHECK: bb.1: ; CHECK: successors: %bb.2(0x80000000) ; CHECK: bb.2: @@ -535,9 +528,10 @@ body: | %4 = VMOVDQU16Zrrk %2, killed %3, %1 VMOVDQA32Zmr %0, 1, $noreg, 0, $noreg, killed %4 - TEST32rr %13, %13, implicit-def $eflags - JE_1 %bb.1, implicit $eflags - JMP_1 %bb.2 + ; FIXME We can't replace TEST with KTEST due to flag differences + ; FIXME TEST32rr %13, %13, implicit-def $eflags + ; FIXME JE_1 %bb.1, implicit $eflags + ; FIXME JMP_1 %bb.2 bb.1: @@ -595,7 +589,7 @@ constants: body: | ; CHECK-LABEL: name: test_64bitops ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: successors: %bb.1(0x80000000) ; CHECK: liveins: $rdi, $zmm0, $zmm1 ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rdi ; CHECK: [[COPY1:%[0-9]+]]:vr512 = COPY $zmm0 @@ -612,9 +606,6 @@ body: | ; CHECK: [[COPY3:%[0-9]+]]:vk64wm = COPY [[KADDQrr]] ; CHECK: [[VMOVDQU8Zrrk:%[0-9]+]]:vr512 = VMOVDQU8Zrrk [[COPY2]], killed [[COPY3]], [[COPY1]] ; CHECK: VMOVDQA32Zmr [[COPY]], 1, $noreg, 0, $noreg, killed [[VMOVDQU8Zrrk]] - ; CHECK: KTESTQrr [[KADDQrr]], [[KADDQrr]], implicit-def $eflags - ; CHECK: JE_1 %bb.1, implicit $eflags - ; CHECK: JMP_1 %bb.2 ; CHECK: bb.1: ; CHECK: successors: %bb.2(0x80000000) ; CHECK: bb.2: @@ -640,9 +631,10 @@ body: | %4 = VMOVDQU8Zrrk %2, killed %3, %1 VMOVDQA32Zmr %0, 1, $noreg, 0, $noreg, killed %4 - TEST64rr %13, %13, implicit-def $eflags - JE_1 %bb.1, implicit $eflags - JMP_1 %bb.2 + ; FIXME We can't replace TEST with KTEST due to flag differences + ; FIXME TEST64rr %13, %13, implicit-def $eflags + ; FIXME JE_1 %bb.1, implicit $eflags + ; FIXME JMP_1 %bb.2 bb.1: |

