diff options
-rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 1 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrInfo.cpp | 40 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/stack-folding-int-avx512.ll | 13 |
3 files changed, 40 insertions, 14 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 56ae8f6cc79..2620679df25 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -1765,6 +1765,7 @@ multiclass avx512_icmp_cc<bits<8> opc, string Suffix, SDNode OpNode, (_.VT (bitconvert (_.LdFrag addr:$src2))), imm:$cc))], IIC_SSE_ALU_F32P_RM>, EVEX_4V; + let isCommutable = 1 in def rrik : AVX512AIi8<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2, AVX512ICC:$cc), diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index d3ac67f280d..f3094b781c4 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -5230,20 +5230,32 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, return nullptr; } } - case X86::VPCMPBZ128rri: case X86::VPCMPUBZ128rri: - case X86::VPCMPBZ256rri: case X86::VPCMPUBZ256rri: - case X86::VPCMPBZrri: case X86::VPCMPUBZrri: - case X86::VPCMPDZ128rri: case X86::VPCMPUDZ128rri: - case X86::VPCMPDZ256rri: case X86::VPCMPUDZ256rri: - case X86::VPCMPDZrri: case X86::VPCMPUDZrri: - case X86::VPCMPQZ128rri: case X86::VPCMPUQZ128rri: - case X86::VPCMPQZ256rri: case X86::VPCMPUQZ256rri: - case X86::VPCMPQZrri: case X86::VPCMPUQZrri: - case X86::VPCMPWZ128rri: case X86::VPCMPUWZ128rri: - case X86::VPCMPWZ256rri: case X86::VPCMPUWZ256rri: - case X86::VPCMPWZrri: case X86::VPCMPUWZrri: { + case X86::VPCMPBZ128rri: case X86::VPCMPUBZ128rri: + case X86::VPCMPBZ256rri: case X86::VPCMPUBZ256rri: + case X86::VPCMPBZrri: case X86::VPCMPUBZrri: + case X86::VPCMPDZ128rri: case X86::VPCMPUDZ128rri: + case X86::VPCMPDZ256rri: case X86::VPCMPUDZ256rri: + case X86::VPCMPDZrri: case X86::VPCMPUDZrri: + case X86::VPCMPQZ128rri: case X86::VPCMPUQZ128rri: + case X86::VPCMPQZ256rri: case X86::VPCMPUQZ256rri: + case X86::VPCMPQZrri: case X86::VPCMPUQZrri: + case X86::VPCMPWZ128rri: case X86::VPCMPUWZ128rri: + case X86::VPCMPWZ256rri: case X86::VPCMPUWZ256rri: + case X86::VPCMPWZrri: case X86::VPCMPUWZrri: + case X86::VPCMPBZ128rrik: case X86::VPCMPUBZ128rrik: + case X86::VPCMPBZ256rrik: case X86::VPCMPUBZ256rrik: + case X86::VPCMPBZrrik: case X86::VPCMPUBZrrik: + case X86::VPCMPDZ128rrik: case X86::VPCMPUDZ128rrik: + case X86::VPCMPDZ256rrik: case X86::VPCMPUDZ256rrik: + case X86::VPCMPDZrrik: case X86::VPCMPUDZrrik: + case X86::VPCMPQZ128rrik: case X86::VPCMPUQZ128rrik: + case X86::VPCMPQZ256rrik: case X86::VPCMPUQZ256rrik: + case X86::VPCMPQZrrik: case X86::VPCMPUQZrrik: + case X86::VPCMPWZ128rrik: case X86::VPCMPUWZ128rrik: + case X86::VPCMPWZ256rrik: case X86::VPCMPUWZ256rrik: + case X86::VPCMPWZrrik: case X86::VPCMPUWZrrik: { // Flip comparison mode immediate (if necessary). - unsigned Imm = MI.getOperand(3).getImm() & 0x7; + unsigned Imm = MI.getOperand(MI.getNumOperands() - 1).getImm() & 0x7; switch (Imm) { default: llvm_unreachable("Unreachable!"); case 0x01: Imm = 0x06; break; // LT -> NLE @@ -5257,7 +5269,7 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, break; } auto &WorkingMI = cloneIfNew(MI); - WorkingMI.getOperand(3).setImm(Imm); + WorkingMI.getOperand(MI.getNumOperands() - 1).setImm(Imm); return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, OpIdx1, OpIdx2); } diff --git a/llvm/test/CodeGen/X86/stack-folding-int-avx512.ll b/llvm/test/CodeGen/X86/stack-folding-int-avx512.ll index 7b51b55630f..362e656b4f2 100644 --- a/llvm/test/CodeGen/X86/stack-folding-int-avx512.ll +++ b/llvm/test/CodeGen/X86/stack-folding-int-avx512.ll @@ -546,6 +546,19 @@ define i16 @stack_fold_pcmpled_mask(<16 x i32> %a0, <16 x i32> %a1, <16 x i32>* ret i16 %7 } +define i16 @stack_fold_pcmpleud(<16 x i32> %a0, <16 x i32> %a1, <16 x i32>* %a2, i16 %mask) { + ;CHECK-LABEL: stack_fold_pcmpleud + ;CHECK: vpcmpleud {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%k[0-7]}} {{.*#+}} 64-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() + %2 = load <16 x i32>, <16 x i32>* %a2 + %3 = add <16 x i32> %a1, %2 + %4 = bitcast i16 %mask to <16 x i1> + %5 = icmp uge <16 x i32> %a0, %3 + %6 = and <16 x i1> %5, %4 + %7 = bitcast <16 x i1> %6 to i16 + ret i16 %7 +} + define <64 x i8> @stack_fold_permbvar(<64 x i8> %a0, <64 x i8> %a1) { ;CHECK-LABEL: stack_fold_permbvar ;CHECK: vpermb {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload |