diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-01-30 02:22:13 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-01-30 02:22:13 +0000 |
commit | ccefbbd0f02c5036dd1244ee5c6603f365653fbe (patch) | |
tree | 9f12f6a0f09dbd4b2fcd59c1a6ae36693f5e8620 | |
parent | 48dc110eea1f44319f6df632a3a8fde903864f7e (diff) | |
download | bcm5719-llvm-ccefbbd0f02c5036dd1244ee5c6603f365653fbe.tar.gz bcm5719-llvm-ccefbbd0f02c5036dd1244ee5c6603f365653fbe.zip |
GlobalISel: Handle some odd splits in fewerElementsVector
Also add some quick hacks to AMDGPU legality for the tests.
llvm-svn: 352591
-rw-r--r-- | llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp | 65 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir | 78 |
2 files changed, 130 insertions, 13 deletions
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 24c89956190..7f65ad8edf7 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -1441,18 +1441,63 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorImplicitDef( LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorBasic(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { - unsigned Opc = MI.getOpcode(); - unsigned NarrowSize = NarrowTy.getSizeInBits(); - unsigned DstReg = MI.getOperand(0).getReg(); - unsigned Flags = MI.getFlags(); - unsigned Size = MRI.getType(DstReg).getSizeInBits(); - int NumParts = Size / NarrowSize; - // FIXME: Don't know how to handle the situation where the small vectors - // aren't all the same size yet. - if (Size % NarrowSize != 0) + const unsigned Opc = MI.getOpcode(); + const unsigned NumOps = MI.getNumOperands() - 1; + const unsigned NarrowSize = NarrowTy.getSizeInBits(); + const unsigned DstReg = MI.getOperand(0).getReg(); + const unsigned Flags = MI.getFlags(); + const LLT DstTy = MRI.getType(DstReg); + const unsigned Size = DstTy.getSizeInBits(); + const int NumParts = Size / NarrowSize; + const LLT EltTy = DstTy.getElementType(); + const unsigned EltSize = EltTy.getSizeInBits(); + const unsigned BitsForNumParts = NarrowSize * NumParts; + + // Check if we have any leftovers. If we do, then only handle the case where + // the leftover is one element. + if (BitsForNumParts != Size && BitsForNumParts + EltSize != Size) return UnableToLegalize; - unsigned NumOps = MI.getNumOperands() - 1; + if (BitsForNumParts != Size) { + unsigned AccumDstReg = MRI.createGenericVirtualRegister(DstTy); + MIRBuilder.buildUndef(AccumDstReg); + + // Handle the pieces which evenly divide into the requested type with + // extract/op/insert sequence. + for (unsigned Offset = 0; Offset < BitsForNumParts; Offset += NarrowSize) { + SmallVector<SrcOp, 4> SrcOps; + for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) { + unsigned PartOpReg = MRI.createGenericVirtualRegister(NarrowTy); + MIRBuilder.buildExtract(PartOpReg, MI.getOperand(I).getReg(), Offset); + SrcOps.push_back(PartOpReg); + } + + unsigned PartDstReg = MRI.createGenericVirtualRegister(NarrowTy); + MIRBuilder.buildInstr(Opc, {PartDstReg}, SrcOps, Flags); + + unsigned PartInsertReg = MRI.createGenericVirtualRegister(DstTy); + MIRBuilder.buildInsert(PartInsertReg, AccumDstReg, PartDstReg, Offset); + AccumDstReg = PartInsertReg; + Offset += NarrowSize; + } + + // Handle the remaining element sized leftover piece. + SmallVector<SrcOp, 4> SrcOps; + for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) { + unsigned PartOpReg = MRI.createGenericVirtualRegister(EltTy); + MIRBuilder.buildExtract(PartOpReg, MI.getOperand(I).getReg(), + BitsForNumParts); + SrcOps.push_back(PartOpReg); + } + + unsigned PartDstReg = MRI.createGenericVirtualRegister(EltTy); + MIRBuilder.buildInstr(Opc, {PartDstReg}, SrcOps, Flags); + MIRBuilder.buildInsert(DstReg, AccumDstReg, PartDstReg, BitsForNumParts); + MI.eraseFromParent(); + + return Legalized; + } + SmallVector<unsigned, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs; extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src0Regs); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir index 835fd70f987..7f6bf22a83b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir @@ -140,9 +140,9 @@ body: | ; CHECK: $vgpr0 = COPY [[COPY4]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(s16) = G_AND %2, %3 + %2:_(s24) = G_TRUNC %0 + %3:_(s24) = G_TRUNC %1 + %4:_(s24) = G_AND %2, %3 %5:_(s32) = G_ANYEXT %4 $vgpr0 = COPY %5 ... @@ -165,6 +165,78 @@ body: | ... --- +name: test_and_v3i32 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 + + ; CHECK-LABEL: name: test_and_v3i32 + ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[UV3]] + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[UV4]] + ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[UV5]] + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32) + ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 + %2:_(<3 x s32>) = G_AND %0, %1 + $vgpr0_vgpr1_vgpr2 = COPY %2 +... + +--- +name: test_and_v4i32 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 + + ; CHECK-LABEL: name: test_and_v4i32 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[UV4]] + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[UV5]] + ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[UV6]] + ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[UV7]] + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32), [[AND3]](s32) + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %2:_(<4 x s32>) = G_AND %0, %1 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 +... + +--- +name: test_and_v5i32 +body: | + bb.0: + + ; CHECK-LABEL: name: test_and_v5i32 + ; CHECK: [[DEF:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[DEF1:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<5 x s32>) + ; CHECK: [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<5 x s32>) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[UV5]] + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[UV6]] + ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[UV7]] + ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[UV8]] + ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[UV9]] + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32), [[AND3]](s32), [[AND4]](s32) + ; CHECK: [[DEF2:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[INSERT:%[0-9]+]]:_(<8 x s32>) = G_INSERT [[DEF2]], [[BUILD_VECTOR]](<5 x s32>), 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<8 x s32>) + %0:_(<5 x s32>) = G_IMPLICIT_DEF + %1:_(<5 x s32>) = G_IMPLICIT_DEF + %2:_(<5 x s32>) = G_AND %0, %1 + %3:_(<8 x s32>) = G_IMPLICIT_DEF + %4:_(<8 x s32>) = G_INSERT %3, %2, 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %4 +... + +--- name: test_and_v2s64 body: | bb.0: |