summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2018-02-05 18:31:04 +0000
committerCraig Topper <craig.topper@intel.com>2018-02-05 18:31:04 +0000
commit9a06f24704fc832931c2981d9ec4baa2ab895225 (patch)
treec1b836f0439392ff73f35047f6577ddb5ed047e1
parentc0f116e0607ea0da61128fb14806bd5e4230254f (diff)
downloadbcm5719-llvm-9a06f24704fc832931c2981d9ec4baa2ab895225.tar.gz
bcm5719-llvm-9a06f24704fc832931c2981d9ec4baa2ab895225.zip
[X86] Artificially lower the complexity of the scalar ANDN patterns so that AND with immediate will match first.
This allows the immediate to folded into the and instead of being forced to move into a register. This can sometimes result in shorter encodings since the and can sign extend an immediate. This also allows us to match an and to a movzx after a not. This can cause an extra move if the input to the separate NOT has an additional user which requires a copy before the NOT. llvm-svn: 324260
-rw-r--r--llvm/lib/Target/X86/X86InstrArithmetic.td5
-rwxr-xr-xllvm/test/CodeGen/X86/avx512-schedule.ll5
-rw-r--r--llvm/test/CodeGen/X86/bmi.ll4
-rw-r--r--llvm/test/CodeGen/X86/pr32282.ll35
4 files changed, 27 insertions, 22 deletions
diff --git a/llvm/lib/Target/X86/X86InstrArithmetic.td b/llvm/lib/Target/X86/X86InstrArithmetic.td
index d35b4338c72..e6219aa6fcb 100644
--- a/llvm/lib/Target/X86/X86InstrArithmetic.td
+++ b/llvm/lib/Target/X86/X86InstrArithmetic.td
@@ -1285,12 +1285,13 @@ multiclass bmi_andn<string mnemonic, RegisterClass RC, X86MemOperand x86memop,
Sched<[WriteALULd, ReadAfterLd]>;
}
-let Predicates = [HasBMI], Defs = [EFLAGS] in {
+// Complexity is reduced to give and with immediate a chance to match first.
+let Predicates = [HasBMI], Defs = [EFLAGS], AddedComplexity = -6 in {
defm ANDN32 : bmi_andn<"andn{l}", GR32, i32mem, loadi32>, T8PS, VEX_4V;
defm ANDN64 : bmi_andn<"andn{q}", GR64, i64mem, loadi64>, T8PS, VEX_4V, VEX_W;
}
-let Predicates = [HasBMI] in {
+let Predicates = [HasBMI], AddedComplexity = -6 in {
def : Pat<(and (not GR32:$src1), GR32:$src2),
(ANDN32rr GR32:$src1, GR32:$src2)>;
def : Pat<(and (not GR64:$src1), GR64:$src2),
diff --git a/llvm/test/CodeGen/X86/avx512-schedule.ll b/llvm/test/CodeGen/X86/avx512-schedule.ll
index 7c973af6a6d..886e4f2fbcb 100755
--- a/llvm/test/CodeGen/X86/avx512-schedule.ll
+++ b/llvm/test/CodeGen/X86/avx512-schedule.ll
@@ -6687,9 +6687,8 @@ define i32 @mask16_zext(i16 %x) {
;
; SKX-LABEL: mask16_zext:
; SKX: # %bb.0:
-; SKX-NEXT: movl $65535, %eax # imm = 0xFFFF
-; SKX-NEXT: # sched: [1:0.25]
-; SKX-NEXT: andnl %eax, %edi, %eax # sched: [1:0.50]
+; SKX-NEXT: notl %edi # sched: [1:0.25]
+; SKX-NEXT: movzwl %di, %eax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
%m0 = bitcast i16 %x to <16 x i1>
%m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
diff --git a/llvm/test/CodeGen/X86/bmi.ll b/llvm/test/CodeGen/X86/bmi.ll
index 4da10f6cffa..0e79a5a65c1 100644
--- a/llvm/test/CodeGen/X86/bmi.ll
+++ b/llvm/test/CodeGen/X86/bmi.ll
@@ -188,8 +188,8 @@ define i1 @and_cmp4(i32 %x, i32 %y) {
define i1 @and_cmp_const(i32 %x) {
; CHECK-LABEL: and_cmp_const:
; CHECK: # %bb.0:
-; CHECK-NEXT: movl $43, %eax
-; CHECK-NEXT: andnl %eax, %edi, %eax
+; CHECK-NEXT: notl %edi
+; CHECK-NEXT: andl $43, %edi
; CHECK-NEXT: sete %al
; CHECK-NEXT: retq
%and = and i32 %x, 43
diff --git a/llvm/test/CodeGen/X86/pr32282.ll b/llvm/test/CodeGen/X86/pr32282.ll
index 6da2ae0b564..7398974b354 100644
--- a/llvm/test/CodeGen/X86/pr32282.ll
+++ b/llvm/test/CodeGen/X86/pr32282.ll
@@ -12,24 +12,28 @@
define void @foo() {
; X86-LABEL: foo:
; X86: # %bb.0:
-; X86-NEXT: pushl %eax
+; X86-NEXT: pushl %esi
; X86-NEXT: .cfi_def_cfa_offset 8
-; X86-NEXT: movl d, %eax
-; X86-NEXT: movl d+4, %ecx
-; X86-NEXT: movl $701685459, %edx # imm = 0x29D2DED3
-; X86-NEXT: andnl %edx, %ecx, %ecx
-; X86-NEXT: movl $-564453154, %edx # imm = 0xDE5B20DE
-; X86-NEXT: andnl %edx, %eax, %edx
-; X86-NEXT: shrdl $21, %ecx, %edx
-; X86-NEXT: shrl $21, %ecx
+; X86-NEXT: pushl %eax
+; X86-NEXT: .cfi_def_cfa_offset 12
+; X86-NEXT: .cfi_offset %esi, -8
+; X86-NEXT: movl d, %ecx
+; X86-NEXT: notl %ecx
+; X86-NEXT: movl d+4, %edx
+; X86-NEXT: notl %edx
+; X86-NEXT: andl $701685459, %edx # imm = 0x29D2DED3
+; X86-NEXT: andl $-564453154, %ecx # imm = 0xDE5B20DE
+; X86-NEXT: shrdl $21, %edx, %ecx
+; X86-NEXT: shrl $21, %edx
; X86-NEXT: xorl %eax, %eax
; X86-NEXT: testb %al, %al
-; X86-NEXT: cmovnel %ecx, %edx
-; X86-NEXT: cmovnel %eax, %ecx
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: cmovnel %eax, %esi
+; X86-NEXT: cmovel %ecx, %edx
; X86-NEXT: andl $-2, %edx
; X86-NEXT: addl $7, %edx
-; X86-NEXT: adcxl %eax, %ecx
-; X86-NEXT: pushl %ecx
+; X86-NEXT: adcxl %eax, %esi
+; X86-NEXT: pushl %esi
; X86-NEXT: .cfi_adjust_cfa_offset 4
; X86-NEXT: pushl %edx
; X86-NEXT: .cfi_adjust_cfa_offset 4
@@ -37,12 +41,13 @@ define void @foo() {
; X86-NEXT: .cfi_adjust_cfa_offset 4
; X86-NEXT: pushl $0
; X86-NEXT: .cfi_adjust_cfa_offset 4
-; X86-NEXT: calll __divdi3
+; X86-NEXT: calll __divdi3@PLT
; X86-NEXT: addl $16, %esp
; X86-NEXT: .cfi_adjust_cfa_offset -16
; X86-NEXT: orl %eax, %edx
; X86-NEXT: setne {{[0-9]+}}(%esp)
-; X86-NEXT: popl %eax
+; X86-NEXT: addl $4, %esp
+; X86-NEXT: popl %esi
; X86-NEXT: retl
;
; X64-LABEL: foo:
OpenPOWER on IntegriCloud