diff options
author | Amara Emerson <aemerson@apple.com> | 2019-03-14 22:48:18 +0000 |
---|---|---|
committer | Amara Emerson <aemerson@apple.com> | 2019-03-14 22:48:18 +0000 |
commit | d61b89be8d73d09c9507c28826a468c5ee8f11fc (patch) | |
tree | a7fd415ae205438c5fa46769f5b2db5e4ad6b45b | |
parent | 2ff2298c3e25e2b4a3603c1f78643a116b49e0e5 (diff) | |
download | bcm5719-llvm-d61b89be8d73d09c9507c28826a468c5ee8f11fc.tar.gz bcm5719-llvm-d61b89be8d73d09c9507c28826a468c5ee8f11fc.zip |
[AArch64][GlobalISel] Implement selection for G_UNMERGE of vectors to vectors.
This re-uses the previous support for extract vector elt to extract the
subvectors.
Differential Revision: https://reviews.llvm.org/D59390
llvm-svn: 356213
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp | 152 | ||||
-rw-r--r-- | llvm/test/CodeGen/AArch64/GlobalISel/select-unmerge.mir | 56 |
2 files changed, 155 insertions, 53 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp index e1f26ecf4a1..a2eb920571a 100644 --- a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -93,6 +93,8 @@ private: bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI) const; bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI) const; bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI) const; + bool selectSplitVectorUnmerge(MachineInstr &I, + MachineRegisterInfo &MRI) const; unsigned emitConstantPoolEntry(Constant *CPVal, MachineFunction &MF) const; MachineInstr *emitLoadFromConstantPool(Constant *CPVal, @@ -102,6 +104,10 @@ private: MachineInstr *emitVectorConcat(Optional<unsigned> Dst, unsigned Op1, unsigned Op2, MachineIRBuilder &MIRBuilder) const; + MachineInstr *emitExtractVectorElt(Optional<unsigned> DstReg, + const RegisterBank &DstRB, LLT ScalarTy, + unsigned VecReg, unsigned LaneIdx, + MachineIRBuilder &MIRBuilder) const; ComplexRendererFns selectArithImmed(MachineOperand &Root) const; @@ -1870,6 +1876,68 @@ static bool getConstantValueForReg(unsigned Reg, MachineRegisterInfo &MRI, return true; } +MachineInstr *AArch64InstructionSelector::emitExtractVectorElt( + Optional<unsigned> DstReg, const RegisterBank &DstRB, LLT ScalarTy, + unsigned VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const { + MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); + unsigned CopyOpc = 0; + unsigned ExtractSubReg = 0; + if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) { + LLVM_DEBUG( + dbgs() << "Couldn't determine lane copy opcode for instruction.\n"); + return nullptr; + } + + const TargetRegisterClass *DstRC = + getRegClassForTypeOnBank(ScalarTy, DstRB, RBI, true); + if (!DstRC) { + LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n"); + return nullptr; + } + + const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI); + const LLT &VecTy = MRI.getType(VecReg); + const TargetRegisterClass *VecRC = + getRegClassForTypeOnBank(VecTy, VecRB, RBI, true); + if (!VecRC) { + LLVM_DEBUG(dbgs() << "Could not determine source register class.\n"); + return nullptr; + } + + // The register that we're going to copy into. + unsigned InsertReg = VecReg; + if (!DstReg) + DstReg = MRI.createVirtualRegister(DstRC); + // If the lane index is 0, we just use a subregister COPY. + if (LaneIdx == 0) { + auto CopyMI = + BuildMI(MIRBuilder.getMBB(), MIRBuilder.getInsertPt(), + MIRBuilder.getDL(), TII.get(TargetOpcode::COPY), *DstReg) + .addUse(VecReg, 0, ExtractSubReg); + RBI.constrainGenericRegister(*DstReg, *DstRC, MRI); + return &*CopyMI; + } + + // Lane copies require 128-bit wide registers. If we're dealing with an + // unpacked vector, then we need to move up to that width. Insert an implicit + // def and a subregister insert to get us there. + if (VecTy.getSizeInBits() != 128) { + MachineInstr *ScalarToVector = emitScalarToVector( + VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder); + if (!ScalarToVector) + return nullptr; + InsertReg = ScalarToVector->getOperand(0).getReg(); + } + + MachineInstr *LaneCopyMI = + MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx); + constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI); + + // Make sure that we actually constrain the initial copy. + RBI.constrainGenericRegister(*DstReg, *DstRC, MRI); + return LaneCopyMI; +} + bool AArch64InstructionSelector::selectExtractElt( MachineInstr &I, MachineRegisterInfo &MRI) const { assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT && @@ -1878,7 +1946,7 @@ bool AArch64InstructionSelector::selectExtractElt( const LLT NarrowTy = MRI.getType(DstReg); const unsigned SrcReg = I.getOperand(1).getReg(); const LLT WideTy = MRI.getType(SrcReg); - + (void)WideTy; assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() && "source register size too small!"); assert(NarrowTy.isScalar() && "cannot extract vector into vector!"); @@ -1897,63 +1965,44 @@ bool AArch64InstructionSelector::selectExtractElt( if (!getConstantValueForReg(LaneIdxOp.getReg(), MRI, LaneIdx)) return false; - unsigned CopyOpc = 0; - unsigned ExtractSubReg = 0; - if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits())) { - LLVM_DEBUG( - dbgs() << "Couldn't determine lane copy opcode for instruction.\n"); - return false; - } + MachineIRBuilder MIRBuilder(I); const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); - const TargetRegisterClass *DstRC = - getRegClassForTypeOnBank(NarrowTy, DstRB, RBI, true); - if (!DstRC) { - LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n"); + MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg, + LaneIdx, MIRBuilder); + if (!Extract) return false; - } - const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI); - const TargetRegisterClass *SrcRC = - getRegClassForTypeOnBank(WideTy, SrcRB, RBI, true); - if (!SrcRC) { - LLVM_DEBUG(dbgs() << "Could not determine source register class.\n"); - return false; - } + I.eraseFromParent(); + return true; +} - // The register that we're going to copy into. - unsigned InsertReg = SrcReg; - MachineIRBuilder MIRBuilder(I); +bool AArch64InstructionSelector::selectSplitVectorUnmerge( + MachineInstr &I, MachineRegisterInfo &MRI) const { + unsigned NumElts = I.getNumOperands() - 1; + unsigned SrcReg = I.getOperand(NumElts).getReg(); + const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg()); + const LLT SrcTy = MRI.getType(SrcReg); - // If the lane index is 0, we just use a subregister COPY. - if (LaneIdx == 0) { - unsigned CopyTo = I.getOperand(0).getReg(); - BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(TargetOpcode::COPY), - CopyTo) - .addUse(SrcReg, 0, ExtractSubReg); - RBI.constrainGenericRegister(CopyTo, *DstRC, MRI); - I.eraseFromParent(); - return true; + assert(NarrowTy.isVector() && "Expected an unmerge into vectors"); + if (SrcTy.getSizeInBits() > 128) { + LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge"); + return false; } - // Lane copies require 128-bit wide registers. If we're dealing with an - // unpacked vector, then we need to move up to that width. Insert an implicit - // def and a subregister insert to get us there. - if (WideTy.getSizeInBits() != 128) { - MachineInstr *ScalarToVector = emitScalarToVector( - WideTy.getSizeInBits(), &AArch64::FPR128RegClass, SrcReg, MIRBuilder); - if (!ScalarToVector) + MachineIRBuilder MIB(I); + + // We implement a split vector operation by treating the sub-vectors as + // scalars and extracting them. + const RegisterBank &DstRB = + *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI); + for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) { + unsigned Dst = I.getOperand(OpIdx).getReg(); + MachineInstr *Extract = + emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB); + if (!Extract) return false; - InsertReg = ScalarToVector->getOperand(0).getReg(); } - - MachineInstr *LaneCopyMI = - MIRBuilder.buildInstr(CopyOpc, {DstReg}, {InsertReg}).addImm(LaneIdx); - constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI); - - // Make sure that we actually constrain the initial copy. - RBI.constrainGenericRegister(DstReg, *DstRC, MRI); - I.eraseFromParent(); return true; } @@ -1984,11 +2033,8 @@ bool AArch64InstructionSelector::selectUnmergeValues( assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() && "source register size too small!"); - // TODO: Handle unmerging into vectors. - if (!NarrowTy.isScalar()) { - LLVM_DEBUG(dbgs() << "Vector-to-vector unmerges not supported yet.\n"); - return false; - } + if (!NarrowTy.isScalar()) + return selectSplitVectorUnmerge(I, MRI); // Choose a lane copy opcode and subregister based off of the size of the // vector's elements. diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-unmerge.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-unmerge.mir index 6814b993394..fdc5f12912b 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-unmerge.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-unmerge.mir @@ -19,6 +19,14 @@ ret <8 x half> %a } + define <2 x float> @test_vecsplit_2v2s32_v4s32(<4 x float> %a) { + ret <2 x float> undef + } + + define <2 x half> @test_vecsplit_2v2s16_v4s16(<4 x half> %a) { + ret <2 x half> undef + } + ... --- name: test_v2s64_unmerge @@ -152,3 +160,51 @@ body: | $q0 = COPY %1(<8 x s16>) RET_ReallyLR implicit $q0 ... +--- +name: test_vecsplit_2v2s32_v4s32 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $q0 + ; CHECK-LABEL: name: test_vecsplit_2v2s32_v4s32 + ; CHECK: liveins: $q0 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY [[COPY]].dsub + ; CHECK: [[CPYi64_:%[0-9]+]]:fpr64 = CPYi64 [[COPY]], 1 + ; CHECK: $d0 = COPY [[COPY1]] + ; CHECK: $d1 = COPY [[CPYi64_]] + ; CHECK: RET_ReallyLR implicit $d0 + %0:fpr(<4 x s32>) = COPY $q0 + %1:fpr(<2 x s32>), %2:fpr(<2 x s32>) = G_UNMERGE_VALUES %0(<4 x s32>) + $d0 = COPY %1(<2 x s32>) + $d1 = COPY %2(<2 x s32>) + RET_ReallyLR implicit $d0 +... +--- +name: test_vecsplit_2v2s16_v4s16 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $d0 + ; CHECK-LABEL: name: test_vecsplit_2v2s16_v4s16 + ; CHECK: liveins: $d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY [[COPY]].ssub + ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.dsub + ; CHECK: [[CPYi32_:%[0-9]+]]:fpr32 = CPYi32 [[INSERT_SUBREG]], 1 + ; CHECK: $s0 = COPY [[COPY1]] + ; CHECK: $s1 = COPY [[CPYi32_]] + ; CHECK: RET_ReallyLR implicit $s0 + %0:fpr(<4 x s16>) = COPY $d0 + %1:fpr(<2 x s16>), %2:fpr(<2 x s16>) = G_UNMERGE_VALUES %0(<4 x s16>) + $s0 = COPY %1(<2 x s16>) + $s1 = COPY %2(<2 x s16>) + RET_ReallyLR implicit $s0 +... |