diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/ARM/ARMInstrMVE.td | 22 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/MVEVPTBlockPass.cpp | 114 |
2 files changed, 118 insertions, 18 deletions
diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td index 5b4b65ada4e..f996024f0cf 100644 --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -4526,7 +4526,7 @@ class MVE_VPT<string suffix, bits<2> size, dag iops, string asm, list<dag> patte let Inst{7} = fc{0}; let Inst{4} = 0b0; - let Defs = [VPR, P0]; + let Defs = [VPR]; } class MVE_VPTt1<string suffix, bits<2> size, dag iops> @@ -4542,7 +4542,7 @@ class MVE_VPTt1<string suffix, bits<2> size, dag iops> class MVE_VPTt1i<string suffix, bits<2> size> : MVE_VPTt1<suffix, size, - (ins vpt_mask:$Mk, pred_basic_i:$fc, MQPR:$Qn, MQPR:$Qm)> { + (ins vpt_mask:$Mk, MQPR:$Qn, MQPR:$Qm, pred_basic_i:$fc)> { let Inst{12} = 0b0; let Inst{0} = 0b0; } @@ -4553,7 +4553,7 @@ def MVE_VPTv16i8 : MVE_VPTt1i<"i8", 0b00>; class MVE_VPTt1u<string suffix, bits<2> size> : MVE_VPTt1<suffix, size, - (ins vpt_mask:$Mk, pred_basic_u:$fc, MQPR:$Qn, MQPR:$Qm)> { + (ins vpt_mask:$Mk, MQPR:$Qn, MQPR:$Qm, pred_basic_u:$fc)> { let Inst{12} = 0b0; let Inst{0} = 0b1; } @@ -4564,7 +4564,7 @@ def MVE_VPTv16u8 : MVE_VPTt1u<"u8", 0b00>; class MVE_VPTt1s<string suffix, bits<2> size> : MVE_VPTt1<suffix, size, - (ins vpt_mask:$Mk, pred_basic_s:$fc, MQPR:$Qn, MQPR:$Qm)> { + (ins vpt_mask:$Mk, MQPR:$Qn, MQPR:$Qm, pred_basic_s:$fc)> { let Inst{12} = 0b1; } @@ -4586,7 +4586,7 @@ class MVE_VPTt2<string suffix, bits<2> size, dag iops> class MVE_VPTt2i<string suffix, bits<2> size> : MVE_VPTt2<suffix, size, - (ins vpt_mask:$Mk, pred_basic_i:$fc, MQPR:$Qn, GPRwithZR:$Rm)> { + (ins vpt_mask:$Mk, MQPR:$Qn, GPRwithZR:$Rm, pred_basic_i:$fc)> { let Inst{12} = 0b0; let Inst{5} = 0b0; } @@ -4597,7 +4597,7 @@ def MVE_VPTv16i8r : MVE_VPTt2i<"i8", 0b00>; class MVE_VPTt2u<string suffix, bits<2> size> : MVE_VPTt2<suffix, size, - (ins vpt_mask:$Mk, pred_basic_u:$fc, MQPR:$Qn, GPRwithZR:$Rm)> { + (ins vpt_mask:$Mk, MQPR:$Qn, GPRwithZR:$Rm, pred_basic_u:$fc)> { let Inst{12} = 0b0; let Inst{5} = 0b1; } @@ -4608,7 +4608,7 @@ def MVE_VPTv16u8r : MVE_VPTt2u<"u8", 0b00>; class MVE_VPTt2s<string suffix, bits<2> size> : MVE_VPTt2<suffix, size, - (ins vpt_mask:$Mk, pred_basic_s:$fc, MQPR:$Qn, GPRwithZR:$Rm)> { + (ins vpt_mask:$Mk, MQPR:$Qn, GPRwithZR:$Rm, pred_basic_s:$fc)> { let Inst{12} = 0b1; } @@ -4637,12 +4637,12 @@ class MVE_VPTf<string suffix, bit size, dag iops, string asm, list<dag> pattern= let Inst{7} = fc{0}; let Inst{4} = 0b0; - let Defs = [P0]; + let Defs = [VPR]; let Predicates = [HasMVEFloat]; } class MVE_VPTft1<string suffix, bit size> - : MVE_VPTf<suffix, size, (ins vpt_mask:$Mk, pred_basic_fp:$fc, MQPR:$Qn, MQPR:$Qm), + : MVE_VPTf<suffix, size, (ins vpt_mask:$Mk, MQPR:$Qn, MQPR:$Qm, pred_basic_fp:$fc), "$fc, $Qn, $Qm"> { bits<3> fc; bits<4> Qm; @@ -4657,7 +4657,7 @@ def MVE_VPTv4f32 : MVE_VPTft1<"f32", 0b0>; def MVE_VPTv8f16 : MVE_VPTft1<"f16", 0b1>; class MVE_VPTft2<string suffix, bit size> - : MVE_VPTf<suffix, size, (ins vpt_mask:$Mk, pred_basic_fp:$fc, MQPR:$Qn, GPRwithZR:$Rm), + : MVE_VPTf<suffix, size, (ins vpt_mask:$Mk, MQPR:$Qn, GPRwithZR:$Rm, pred_basic_fp:$fc), "$fc, $Qn, $Rm"> { bits<3> fc; bits<4> Rm; @@ -4683,7 +4683,7 @@ def MVE_VPST : MVE_MI<(outs ), (ins vpt_mask:$Mk), NoItinerary, let Unpredictable{7} = 0b1; let Unpredictable{5} = 0b1; - let Defs = [P0]; + let Defs = [VPR]; } def MVE_VPSEL : MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm), NoItinerary, diff --git a/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp b/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp index bfd7d37dfc8..9b1bd21ef28 100644 --- a/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp +++ b/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp @@ -80,6 +80,90 @@ enum VPTMaskValue { TETE = 15 // 0b1111 }; +unsigned VCMPOpcodeToVPT(unsigned Opcode) { + switch (Opcode) { + case ARM::MVE_VCMPf32: + return ARM::MVE_VPTv4f32; + case ARM::MVE_VCMPf16: + return ARM::MVE_VPTv8f16; + case ARM::MVE_VCMPi8: + return ARM::MVE_VPTv16i8; + case ARM::MVE_VCMPi16: + return ARM::MVE_VPTv8i16; + case ARM::MVE_VCMPi32: + return ARM::MVE_VPTv4i32; + case ARM::MVE_VCMPu8: + return ARM::MVE_VPTv16u8; + case ARM::MVE_VCMPu16: + return ARM::MVE_VPTv8u16; + case ARM::MVE_VCMPu32: + return ARM::MVE_VPTv4u32; + case ARM::MVE_VCMPs8: + return ARM::MVE_VPTv16s8; + case ARM::MVE_VCMPs16: + return ARM::MVE_VPTv8s16; + case ARM::MVE_VCMPs32: + return ARM::MVE_VPTv4s32; + + case ARM::MVE_VCMPf32r: + return ARM::MVE_VPTv4f32r; + case ARM::MVE_VCMPf16r: + return ARM::MVE_VPTv8f16r; + case ARM::MVE_VCMPi8r: + return ARM::MVE_VPTv16i8r; + case ARM::MVE_VCMPi16r: + return ARM::MVE_VPTv8i16r; + case ARM::MVE_VCMPi32r: + return ARM::MVE_VPTv4i32r; + case ARM::MVE_VCMPu8r: + return ARM::MVE_VPTv16u8r; + case ARM::MVE_VCMPu16r: + return ARM::MVE_VPTv8u16r; + case ARM::MVE_VCMPu32r: + return ARM::MVE_VPTv4u32r; + case ARM::MVE_VCMPs8r: + return ARM::MVE_VPTv16s8r; + case ARM::MVE_VCMPs16r: + return ARM::MVE_VPTv8s16r; + case ARM::MVE_VCMPs32r: + return ARM::MVE_VPTv4s32r; + + default: + return 0; + } +} + +MachineInstr *findVCMPToFoldIntoVPST(MachineBasicBlock::iterator MI, + const TargetRegisterInfo *TRI, + unsigned &NewOpcode) { + // Search backwards to the instruction that defines VPR. This may or not + // be a VCMP, we check that after this loop. If we find another instruction + // that reads cpsr, we return nullptr. + MachineBasicBlock::iterator CmpMI = MI; + while (CmpMI != MI->getParent()->begin()) { + --CmpMI; + if (CmpMI->modifiesRegister(ARM::VPR, TRI)) + break; + if (CmpMI->readsRegister(ARM::VPR, TRI)) + break; + } + + if (CmpMI == MI) + return nullptr; + NewOpcode = VCMPOpcodeToVPT(CmpMI->getOpcode()); + if (NewOpcode == 0) + return nullptr; + + // Search forward from CmpMI to MI, checking if either register was def'd + if (registerDefinedBetween(CmpMI->getOperand(1).getReg(), std::next(CmpMI), + MI, TRI)) + return nullptr; + if (registerDefinedBetween(CmpMI->getOperand(2).getReg(), std::next(CmpMI), + MI, TRI)) + return nullptr; + return &*CmpMI; +} + bool MVEVPTBlock::InsertVPTBlocks(MachineBasicBlock &Block) { bool Modified = false; MachineBasicBlock::instr_iterator MBIter = Block.instr_begin(); @@ -123,26 +207,42 @@ bool MVEVPTBlock::InsertVPTBlocks(MachineBasicBlock &Block) { ++MBIter; }; - // Create the new VPST - MachineInstrBuilder MIBuilder = - BuildMI(Block, MI, dl, TII->get(ARM::MVE_VPST)); + unsigned BlockMask = 0; switch (VPTInstCnt) { case 1: - MIBuilder.addImm(VPTMaskValue::T); + BlockMask = VPTMaskValue::T; break; case 2: - MIBuilder.addImm(VPTMaskValue::TT); + BlockMask = VPTMaskValue::TT; break; case 3: - MIBuilder.addImm(VPTMaskValue::TTT); + BlockMask = VPTMaskValue::TTT; break; case 4: - MIBuilder.addImm(VPTMaskValue::TTTT); + BlockMask = VPTMaskValue::TTTT; break; default: llvm_unreachable("Unexpected number of instruction in a VPT block"); }; + // Search back for a VCMP that can be folded to create a VPT, or else create + // a VPST directly + MachineInstrBuilder MIBuilder; + unsigned NewOpcode; + MachineInstr *VCMP = findVCMPToFoldIntoVPST(MI, TRI, NewOpcode); + if (VCMP) { + LLVM_DEBUG(dbgs() << " folding VCMP into VPST: "; VCMP->dump()); + MIBuilder = BuildMI(Block, MI, dl, TII->get(NewOpcode)); + MIBuilder.addImm(BlockMask); + MIBuilder.add(VCMP->getOperand(1)); + MIBuilder.add(VCMP->getOperand(2)); + MIBuilder.add(VCMP->getOperand(3)); + VCMP->eraseFromParent(); + } else { + MIBuilder = BuildMI(Block, MI, dl, TII->get(ARM::MVE_VPST)); + MIBuilder.addImm(BlockMask); + } + finalizeBundle( Block, MachineBasicBlock::instr_iterator(MIBuilder.getInstr()), MBIter); |