diff options
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrInfo.cpp | 39 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512-broadcast-unfold.ll | 3 |
2 files changed, 34 insertions, 8 deletions
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 4fa49e61ab2..bc5357d10d1 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -1720,6 +1720,27 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, OpIdx1, OpIdx2); } + case X86::VCMPSDZrr: + case X86::VCMPSSZrr: + case X86::VCMPPDZrri: + case X86::VCMPPSZrri: + case X86::VCMPPDZ128rri: + case X86::VCMPPSZ128rri: + case X86::VCMPPDZ256rri: + case X86::VCMPPSZ256rri: + case X86::VCMPPDZrrik: + case X86::VCMPPSZrrik: + case X86::VCMPPDZ128rrik: + case X86::VCMPPSZ128rrik: + case X86::VCMPPDZ256rrik: + case X86::VCMPPSZ256rrik: { + unsigned Imm = MI.getOperand(MI.getNumOperands() - 1).getImm() & 0x1f; + Imm = X86::getSwappedVCMPImm(Imm); + auto &WorkingMI = cloneIfNew(MI); + WorkingMI.getOperand(MI.getNumOperands() - 1).setImm(Imm); + return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, + OpIdx1, OpIdx2); + } case X86::VPERM2F128rr: case X86::VPERM2I128rr: { // Flip permute source immediate. @@ -1963,17 +1984,23 @@ bool X86InstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1, // Ordered/Unordered/Equal/NotEqual tests unsigned Imm = MI.getOperand(3 + OpOffset).getImm() & 0x7; switch (Imm) { + default: + // EVEX versions can be commuted. + if ((Desc.TSFlags & X86II::EncodingMask) == X86II::EVEX) + break; + return false; case 0x00: // EQUAL case 0x03: // UNORDERED case 0x04: // NOT EQUAL case 0x07: // ORDERED - // The indices of the commutable operands are 1 and 2 (or 2 and 3 - // when masked). - // Assign them to the returned operand indices here. - return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 1 + OpOffset, - 2 + OpOffset); + break; } - return false; + + // The indices of the commutable operands are 1 and 2 (or 2 and 3 + // when masked). + // Assign them to the returned operand indices here. + return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 1 + OpOffset, + 2 + OpOffset); } case X86::MOVSSrr: // X86::MOVSDrr is always commutable. MOVSS is only commutable if we can diff --git a/llvm/test/CodeGen/X86/avx512-broadcast-unfold.ll b/llvm/test/CodeGen/X86/avx512-broadcast-unfold.ll index e337fa3ec99..cb0f6ed105d 100644 --- a/llvm/test/CodeGen/X86/avx512-broadcast-unfold.ll +++ b/llvm/test/CodeGen/X86/avx512-broadcast-unfold.ll @@ -4457,8 +4457,7 @@ define void @bcast_unfold_cmp_v8f32_refold(float* nocapture %0) { ; CHECK-NEXT: vbroadcastss {{.*#+}} ymm1 = [3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0] ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB126_1: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vmovups 4096(%rdi,%rax), %ymm2 -; CHECK-NEXT: vcmpltps %ymm0, %ymm2, %k1 +; CHECK-NEXT: vcmpgtps 4096(%rdi,%rax), %ymm0, %k1 ; CHECK-NEXT: vmovaps %ymm1, %ymm2 ; CHECK-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 {%k1} ; CHECK-NEXT: vmovups %ymm2, 4096(%rdi,%rax) |

