summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td1
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.cpp57
-rw-r--r--llvm/test/CodeGen/X86/avx512vl-vec-cmp.ll18
3 files changed, 63 insertions, 13 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 885f1350fdb..31ed86fb7f0 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -1647,6 +1647,7 @@ def : Pat<(v8i1 (X86pcmpeqm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
multiclass avx512_icmp_cc<bits<8> opc, string Suffix, SDNode OpNode,
X86VectorVTInfo _> {
+ let isCommutable = 1 in
def rri : AVX512AIi8<opc, MRMSrcReg,
(outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, AVX512ICC:$cc),
!strconcat("vpcmp${cc}", Suffix,
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index fda3f85a198..65fe45e27e3 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -1789,6 +1789,14 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPCMPGTDZrr, X86::VPCMPGTDZrm, 0 },
{ X86::VPCMPGTQZrr, X86::VPCMPGTQZrm, 0 },
{ X86::VPCMPGTWZrr, X86::VPCMPGTWZrm, 0 },
+ { X86::VPCMPBZrri, X86::VPCMPBZrmi, 0 },
+ { X86::VPCMPDZrri, X86::VPCMPDZrmi, 0 },
+ { X86::VPCMPQZrri, X86::VPCMPQZrmi, 0 },
+ { X86::VPCMPWZrri, X86::VPCMPWZrmi, 0 },
+ { X86::VPCMPUBZrri, X86::VPCMPUBZrmi, 0 },
+ { X86::VPCMPUDZrri, X86::VPCMPUDZrmi, 0 },
+ { X86::VPCMPUQZrri, X86::VPCMPUQZrmi, 0 },
+ { X86::VPCMPUWZrri, X86::VPCMPUWZrmi, 0 },
// AVX-512{F,VL} foldable instructions
{ X86::VBROADCASTSSZ256rkz, X86::VBROADCASTSSZ256mkz, TB_NO_REVERSE },
@@ -1872,6 +1880,22 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPCMPGTQZ256rr, X86::VPCMPGTQZ256rm, 0 },
{ X86::VPCMPGTWZ128rr, X86::VPCMPGTWZ128rm, 0 },
{ X86::VPCMPGTWZ256rr, X86::VPCMPGTWZ256rm, 0 },
+ { X86::VPCMPBZ128rri, X86::VPCMPBZ128rmi, 0 },
+ { X86::VPCMPBZ256rri, X86::VPCMPBZ256rmi, 0 },
+ { X86::VPCMPDZ128rri, X86::VPCMPDZ128rmi, 0 },
+ { X86::VPCMPDZ256rri, X86::VPCMPDZ256rmi, 0 },
+ { X86::VPCMPQZ128rri, X86::VPCMPQZ128rmi, 0 },
+ { X86::VPCMPQZ256rri, X86::VPCMPQZ256rmi, 0 },
+ { X86::VPCMPWZ128rri, X86::VPCMPWZ128rmi, 0 },
+ { X86::VPCMPWZ256rri, X86::VPCMPWZ256rmi, 0 },
+ { X86::VPCMPUBZ128rri, X86::VPCMPUBZ128rmi, 0 },
+ { X86::VPCMPUBZ256rri, X86::VPCMPUBZ256rmi, 0 },
+ { X86::VPCMPUDZ128rri, X86::VPCMPUDZ128rmi, 0 },
+ { X86::VPCMPUDZ256rri, X86::VPCMPUDZ256rmi, 0 },
+ { X86::VPCMPUQZ128rri, X86::VPCMPUQZ128rmi, 0 },
+ { X86::VPCMPUQZ256rri, X86::VPCMPUQZ256rmi, 0 },
+ { X86::VPCMPUWZ128rri, X86::VPCMPUWZ128rmi, 0 },
+ { X86::VPCMPUWZ256rri, X86::VPCMPUWZ256rmi, 0 },
{ X86::VPADDBZ128rr, X86::VPADDBZ128rm, 0 },
{ X86::VPADDBZ256rr, X86::VPADDBZ256rm, 0 },
{ X86::VPADDDZ128rr, X86::VPADDDZ128rm, 0 },
@@ -3484,6 +3508,37 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
return nullptr;
}
}
+ case X86::VPCMPBZ128rri: case X86::VPCMPUBZ128rri:
+ case X86::VPCMPBZ256rri: case X86::VPCMPUBZ256rri:
+ case X86::VPCMPBZrri: case X86::VPCMPUBZrri:
+ case X86::VPCMPDZ128rri: case X86::VPCMPUDZ128rri:
+ case X86::VPCMPDZ256rri: case X86::VPCMPUDZ256rri:
+ case X86::VPCMPDZrri: case X86::VPCMPUDZrri:
+ case X86::VPCMPQZ128rri: case X86::VPCMPUQZ128rri:
+ case X86::VPCMPQZ256rri: case X86::VPCMPUQZ256rri:
+ case X86::VPCMPQZrri: case X86::VPCMPUQZrri:
+ case X86::VPCMPWZ128rri: case X86::VPCMPUWZ128rri:
+ case X86::VPCMPWZ256rri: case X86::VPCMPUWZ256rri:
+ case X86::VPCMPWZrri: case X86::VPCMPUWZrri: {
+ // Flip comparison mode immediate (if necessary).
+ unsigned Imm = MI.getOperand(3).getImm() & 0x7;
+ switch (Imm) {
+ default: llvm_unreachable("Unreachable!");
+ case 0x01: Imm = 0x06; break; // LT -> NLE
+ case 0x02: Imm = 0x05; break; // LE -> NLT
+ case 0x05: Imm = 0x02; break; // NLT -> LE
+ case 0x06: Imm = 0x01; break; // NLE -> LT
+ case 0x00: // EQ
+ case 0x03: // FALSE
+ case 0x04: // NE
+ case 0x07: // TRUE
+ break;
+ }
+ auto &WorkingMI = cloneIfNew(MI);
+ WorkingMI.getOperand(3).setImm(Imm);
+ return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
+ OpIdx1, OpIdx2);
+ }
case X86::VPCOMBri: case X86::VPCOMUBri:
case X86::VPCOMDri: case X86::VPCOMUDri:
case X86::VPCOMQri: case X86::VPCOMUQri:
@@ -3491,6 +3546,7 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
// Flip comparison mode immediate (if necessary).
unsigned Imm = MI.getOperand(3).getImm() & 0x7;
switch (Imm) {
+ default: llvm_unreachable("Unreachable!");
case 0x00: Imm = 0x02; break; // LT -> GT
case 0x01: Imm = 0x03; break; // LE -> GE
case 0x02: Imm = 0x00; break; // GT -> LT
@@ -3499,7 +3555,6 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
case 0x05: // NE
case 0x06: // FALSE
case 0x07: // TRUE
- default:
break;
}
auto &WorkingMI = cloneIfNew(MI);
diff --git a/llvm/test/CodeGen/X86/avx512vl-vec-cmp.ll b/llvm/test/CodeGen/X86/avx512vl-vec-cmp.ll
index db6119629c8..e0acf2be653 100644
--- a/llvm/test/CodeGen/X86/avx512vl-vec-cmp.ll
+++ b/llvm/test/CodeGen/X86/avx512vl-vec-cmp.ll
@@ -132,8 +132,7 @@ define <8 x i32> @test256_8(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %y.ptr) noun
define <8 x i32> @test256_8b(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %y.ptr) nounwind {
; CHECK-LABEL: test256_8b:
; CHECK: ## BB#0:
-; CHECK-NEXT: vmovdqu32 (%rdi), %ymm2
-; CHECK-NEXT: vpcmpnltud %ymm0, %ymm2, %k1
+; CHECK-NEXT: vpcmpleud (%rdi), %ymm0, %k1
; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
; CHECK-NEXT: retq
%y = load <8 x i32>, <8 x i32>* %y.ptr, align 4
@@ -277,8 +276,7 @@ define <8 x i32> @test256_17(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %yp) nounwi
define <8 x i32> @test256_18(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %yp) nounwind {
; CHECK-LABEL: test256_18:
; CHECK: ## BB#0:
-; CHECK-NEXT: vmovdqu32 (%rdi), %ymm2
-; CHECK-NEXT: vpcmpneqd %ymm0, %ymm2, %k1
+; CHECK-NEXT: vpcmpneqd (%rdi), %ymm0, %k1
; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
; CHECK-NEXT: retq
%y = load <8 x i32>, <8 x i32>* %yp, align 4
@@ -302,8 +300,7 @@ define <8 x i32> @test256_19(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %yp) nounwi
define <8 x i32> @test256_20(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %yp) nounwind {
; CHECK-LABEL: test256_20:
; CHECK: ## BB#0:
-; CHECK-NEXT: vmovdqu32 (%rdi), %ymm2
-; CHECK-NEXT: vpcmpnltud %ymm0, %ymm2, %k1
+; CHECK-NEXT: vpcmpleud (%rdi), %ymm0, %k1
; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
; CHECK-NEXT: retq
%y = load <8 x i32>, <8 x i32>* %yp, align 4
@@ -443,8 +440,7 @@ define <4 x i32> @test128_8(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %y.ptr) noun
define <4 x i32> @test128_8b(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %y.ptr) nounwind {
; CHECK-LABEL: test128_8b:
; CHECK: ## BB#0:
-; CHECK-NEXT: vmovdqu32 (%rdi), %xmm2
-; CHECK-NEXT: vpcmpnltud %xmm0, %xmm2, %k1
+; CHECK-NEXT: vpcmpleud (%rdi), %xmm0, %k1
; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1}
; CHECK-NEXT: retq
%y = load <4 x i32>, <4 x i32>* %y.ptr, align 4
@@ -588,8 +584,7 @@ define <4 x i32> @test128_17(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %y.ptr) nou
define <4 x i32> @test128_18(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %y.ptr) nounwind {
; CHECK-LABEL: test128_18:
; CHECK: ## BB#0:
-; CHECK-NEXT: vmovdqu32 (%rdi), %xmm2
-; CHECK-NEXT: vpcmpneqd %xmm0, %xmm2, %k1
+; CHECK-NEXT: vpcmpneqd (%rdi), %xmm0, %k1
; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1}
; CHECK-NEXT: retq
%y = load <4 x i32>, <4 x i32>* %y.ptr, align 4
@@ -613,8 +608,7 @@ define <4 x i32> @test128_19(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %y.ptr) nou
define <4 x i32> @test128_20(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %y.ptr) nounwind {
; CHECK-LABEL: test128_20:
; CHECK: ## BB#0:
-; CHECK-NEXT: vmovdqu32 (%rdi), %xmm2
-; CHECK-NEXT: vpcmpnltud %xmm0, %xmm2, %k1
+; CHECK-NEXT: vpcmpleud (%rdi), %xmm0, %k1
; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1}
; CHECK-NEXT: retq
%y = load <4 x i32>, <4 x i32>* %y.ptr, align 4
OpenPOWER on IntegriCloud