diff options
5 files changed, 1 insertions, 359 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp index 4d7f768871b..e1352c366e8 100644 --- a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -23,7 +23,6 @@ #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -76,14 +75,6 @@ private: bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const; bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const; - void collectShuffleMaskIndices(MachineInstr &I, MachineRegisterInfo &MRI, - SmallVectorImpl<int> &Idxs) const; - bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI) const; - - unsigned emitConstantPoolEntry(Constant *CPVal, MachineFunction &MF) const; - MachineInstr *emitLoadFromConstantPool(Constant *CPVal, - MachineIRBuilder &MIRBuilder) const; - ComplexRendererFns selectArithImmed(MachineOperand &Root) const; ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root, @@ -1705,8 +1696,6 @@ bool AArch64InstructionSelector::select(MachineInstr &I, return selectMergeValues(I, MRI); case TargetOpcode::G_UNMERGE_VALUES: return selectUnmergeValues(I, MRI); - case TargetOpcode::G_SHUFFLE_VECTOR: - return selectShuffleVector(I, MRI); } return false; @@ -1924,125 +1913,6 @@ bool AArch64InstructionSelector::selectUnmergeValues( return true; } -void AArch64InstructionSelector::collectShuffleMaskIndices( - MachineInstr &I, MachineRegisterInfo &MRI, - SmallVectorImpl<int> &Idxs) const { - MachineInstr *MaskDef = MRI.getVRegDef(I.getOperand(3).getReg()); - assert( - MaskDef->getOpcode() == TargetOpcode::G_BUILD_VECTOR && - "G_SHUFFLE_VECTOR should have a constant mask operand as G_BUILD_VECTOR"); - // Find the constant indices. - for (unsigned i = 1, e = MaskDef->getNumOperands(); i < e; ++i) { - MachineInstr *ScalarDef = MRI.getVRegDef(MaskDef->getOperand(i).getReg()); - assert(ScalarDef && "Could not find vreg def of shufflevec index op"); - // Look through copies. - while (ScalarDef->getOpcode() == TargetOpcode::COPY) { - ScalarDef = MRI.getVRegDef(ScalarDef->getOperand(1).getReg()); - assert(ScalarDef && "Could not find def of copy operand"); - } - assert(ScalarDef->getOpcode() == TargetOpcode::G_CONSTANT); - Idxs.push_back(ScalarDef->getOperand(1).getCImm()->getSExtValue()); - } -} - -unsigned -AArch64InstructionSelector::emitConstantPoolEntry(Constant *CPVal, - MachineFunction &MF) const { - Type *CPTy = CPVal->getType()->getPointerTo(); - unsigned Align = MF.getDataLayout().getPrefTypeAlignment(CPTy); - if (Align == 0) - Align = MF.getDataLayout().getTypeAllocSize(CPTy); - - MachineConstantPool *MCP = MF.getConstantPool(); - return MCP->getConstantPoolIndex(CPVal, Align); -} - -MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool( - Constant *CPVal, MachineIRBuilder &MIRBuilder) const { - unsigned CPIdx = emitConstantPoolEntry(CPVal, MIRBuilder.getMF()); - - auto Adrp = - MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {}) - .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE); - auto Load = - MIRBuilder.buildInstr(AArch64::LDRQui, {&AArch64::FPR128RegClass}, {Adrp}) - .addConstantPoolIndex(CPIdx, 0, - AArch64II::MO_PAGEOFF | AArch64II::MO_NC); - constrainSelectedInstRegOperands(*Adrp, TII, TRI, RBI); - constrainSelectedInstRegOperands(*Load, TII, TRI, RBI); - return &*Load; -} - -bool AArch64InstructionSelector::selectShuffleVector( - MachineInstr &I, MachineRegisterInfo &MRI) const { - const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); - unsigned Src1Reg = I.getOperand(1).getReg(); - const LLT Src1Ty = MRI.getType(Src1Reg); - unsigned Src2Reg = I.getOperand(2).getReg(); - const LLT Src2Ty = MRI.getType(Src2Reg); - - MachineBasicBlock &MBB = *I.getParent(); - MachineFunction &MF = *MBB.getParent(); - LLVMContext &Ctx = MF.getFunction().getContext(); - - // G_SHUFFLE_VECTOR doesn't really have a strictly enforced constant mask - // operand, it comes in as a normal vector value which we have to analyze to - // find the mask indices. - SmallVector<int, 8> Mask; - collectShuffleMaskIndices(I, MRI, Mask); - assert(!Mask.empty() && "Expected to find mask indices"); - - // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if - // it's originated from a <1 x T> type. Those should have been lowered into - // G_BUILD_VECTOR earlier. - if (!Src1Ty.isVector() || !Src2Ty.isVector()) { - LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n"); - return false; - } - - unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8; - - SmallVector<Constant *, 64> CstIdxs; - for (int Val : Mask) { - for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) { - unsigned Offset = Byte + Val * BytesPerElt; - CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset)); - } - } - - if (DstTy.getSizeInBits() != 128) { - assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty"); - // This case can be done with TBL1. - return false; - } - - // Use a constant pool to load the index vector for TBL. - Constant *CPVal = ConstantVector::get(CstIdxs); - MachineIRBuilder MIRBuilder(I); - MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIRBuilder); - if (!IndexLoad) { - LLVM_DEBUG(dbgs() << "Could not load from a constant pool"); - return false; - } - - // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive - // Q registers for regalloc. - auto RegSeq = MIRBuilder - .buildInstr(TargetOpcode::REG_SEQUENCE, - {&AArch64::QQRegClass}, {Src1Reg}) - .addImm(AArch64::qsub0) - .addUse(Src2Reg) - .addImm(AArch64::qsub1); - - auto TBL2 = - MIRBuilder.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0).getReg()}, - {RegSeq, IndexLoad->getOperand(0).getReg()}); - constrainSelectedInstRegOperands(*RegSeq, TII, TRI, RBI); - constrainSelectedInstRegOperands(*TBL2, TII, TRI, RBI); - I.eraseFromParent(); - return true; -} - bool AArch64InstructionSelector::selectBuildVector( MachineInstr &I, MachineRegisterInfo &MRI) const { assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR); diff --git a/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp index c9a439c0012..94a66286368 100644 --- a/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp @@ -461,29 +461,6 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) { {s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32}) .scalarize(1); - getActionDefinitionsBuilder(G_SHUFFLE_VECTOR) - .legalIf([=](const LegalityQuery &Query) { - const LLT &DstTy = Query.Types[0]; - const LLT &SrcTy = Query.Types[1]; - // For now just support the TBL2 variant which needs the source vectors - // to be the same size as the dest. - if (DstTy != SrcTy) - return false; - ArrayRef<LLT> SupportedDstTys = {v2s32, v4s32, v2s64}; - for (auto &Ty : SupportedDstTys) { - if (DstTy == Ty) - return true; - } - return false; - }) - // G_SHUFFLE_VECTOR can have scalar sources (from 1 x s vectors), we - // just want those lowered into G_BUILD_VECTOR - .lowerIf([=](const LegalityQuery &Query) { - return !Query.Types[1].isVector(); - }) - .clampNumElements(0, v4s32, v4s32) - .clampNumElements(0, v2s64, v2s64); - computeTables(); verify(*ST.getInstrInfo()); } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir deleted file mode 100644 index 4dbaae13a7b..00000000000 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir +++ /dev/null @@ -1,54 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=aarch64 -O0 -run-pass=legalizer -global-isel-abort=1 %s -o - | FileCheck %s ---- -name: shuffle_v4i32 -alignment: 2 -tracksRegLiveness: true -body: | - bb.1: - liveins: $q0, $q1 - - ; CHECK-LABEL: name: shuffle_v4i32 - ; CHECK: liveins: $q0, $q1 - ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 - ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) - ; CHECK: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[COPY1]], [[BUILD_VECTOR]](<4 x s32>) - ; CHECK: $q0 = COPY [[SHUF]](<4 x s32>) - ; CHECK: RET_ReallyLR implicit $q0 - %0:_(<4 x s32>) = COPY $q0 - %1:_(<4 x s32>) = COPY $q1 - %4:_(s32) = G_CONSTANT i32 0 - %3:_(<4 x s32>) = G_BUILD_VECTOR %4(s32), %4(s32), %4(s32), %4(s32) - %2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1, %3(<4 x s32>) - $q0 = COPY %2(<4 x s32>) - RET_ReallyLR implicit $q0 - -... ---- -name: shuffle_v2i64 -alignment: 2 -tracksRegLiveness: true -body: | - bb.1: - liveins: $q0, $q1 - - ; CHECK-LABEL: name: shuffle_v2i64 - ; CHECK: liveins: $q0, $q1 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 - ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32) - ; CHECK: [[SHUF:%[0-9]+]]:_(<2 x s64>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s64>), [[COPY1]], [[BUILD_VECTOR]](<2 x s32>) - ; CHECK: $q0 = COPY [[SHUF]](<2 x s64>) - ; CHECK: RET_ReallyLR implicit $q0 - %0:_(<2 x s64>) = COPY $q0 - %1:_(<2 x s64>) = COPY $q1 - %4:_(s32) = G_CONSTANT i32 0 - %3:_(<2 x s32>) = G_BUILD_VECTOR %4(s32), %4(s32) - %2:_(<2 x s64>) = G_SHUFFLE_VECTOR %0(<2 x s64>), %1, %3(<2 x s32>) - $q0 = COPY %2(<2 x s64>) - RET_ReallyLR implicit $q0 - -... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir index 5a0e8d800c5..623ab523caf 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir @@ -313,7 +313,7 @@ # DEBUG: .. type index coverage check SKIPPED: user-defined predicate detected # # DEBUG-NEXT: G_SHUFFLE_VECTOR (opcode {{[0-9]+}}): 3 type indices -# DEBUG: .. type index coverage check SKIPPED: user-defined predicate detected +# DEBUG: .. type index coverage check SKIPPED: no rules defined # # DEBUG-NEXT: G_CTTZ (opcode {{[0-9]+}}): 2 type indices # DEBUG: .. type index coverage check SKIPPED: no rules defined diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-shuffle-vector.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-shuffle-vector.mir deleted file mode 100644 index b78c7a55e79..00000000000 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-shuffle-vector.mir +++ /dev/null @@ -1,151 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=aarch64-- -O0 -run-pass=instruction-select -verify-machineinstrs %s -global-isel-abort=1 -o - | FileCheck %s ---- | - ; ModuleID = 'shufflevec-only-legal.ll' - source_filename = "shufflevec-only-legal.ll" - target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" - target triple = "aarch64" - - define <4 x i32> @shuffle_v4i32(<4 x i32> %a, <4 x i32> %b) { - %shuf = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 3, i32 0> - ret <4 x i32> %shuf - } - - define <4 x i32> @shuffle_tbl_v4i32(<4 x i32> %a, <4 x i32> %b) { - %shuf = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 5, i32 7, i32 1, i32 0> - ret <4 x i32> %shuf - } - - define <2 x i64> @shuffle_v2i64(<2 x i64> %a, <2 x i64> %b) { - %shuf = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> zeroinitializer - ret <2 x i64> %shuf - } - -... ---- -name: shuffle_v4i32 -alignment: 2 -legalized: true -regBankSelected: true -tracksRegLiveness: true -registers: - - { id: 0, class: fpr } - - { id: 1, class: fpr } - - { id: 2, class: fpr } - - { id: 3, class: fpr } - - { id: 4, class: gpr } - - { id: 5, class: gpr } - - { id: 6, class: gpr } -body: | - bb.1 (%ir-block.0): - liveins: $q0, $q1 - - ; CHECK-LABEL: name: shuffle_v4i32 - ; CHECK: constants: - ; CHECK: value: '<16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 12, i8 13, i8 14, i8 15, i8 0, i8 1, i8 2, i8 3>' - ; CHECK: alignment: 8 - ; CHECK: isTargetSpecific: false - ; CHECK: liveins: $q0, $q1 - ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 - ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1 - ; CHECK: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0 - ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) %const.0 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:qq = REG_SEQUENCE [[COPY]], %subreg.qsub0, [[COPY1]], %subreg.qsub1 - ; CHECK: [[TBLv16i8Two:%[0-9]+]]:fpr128 = TBLv16i8Two [[REG_SEQUENCE]], [[LDRQui]] - ; CHECK: $q0 = COPY [[TBLv16i8Two]] - ; CHECK: RET_ReallyLR implicit $q0 - %0:fpr(<4 x s32>) = COPY $q0 - %1:fpr(<4 x s32>) = COPY $q1 - %4:gpr(s32) = G_CONSTANT i32 0 - %5:gpr(s32) = G_CONSTANT i32 1 - %6:gpr(s32) = G_CONSTANT i32 3 - %3:fpr(<4 x s32>) = G_BUILD_VECTOR %4(s32), %5(s32), %6(s32), %4(s32) - %2:fpr(<4 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1, %3(<4 x s32>) - $q0 = COPY %2(<4 x s32>) - RET_ReallyLR implicit $q0 - -... ---- -name: shuffle_tbl_v4i32 -alignment: 2 -legalized: true -regBankSelected: true -tracksRegLiveness: true -registers: - - { id: 0, class: fpr } - - { id: 1, class: fpr } - - { id: 2, class: fpr } - - { id: 3, class: fpr } - - { id: 4, class: gpr } - - { id: 5, class: gpr } - - { id: 6, class: gpr } - - { id: 7, class: gpr } -body: | - bb.1 (%ir-block.0): - liveins: $q0, $q1 - - ; CHECK-LABEL: name: shuffle_tbl_v4i32 - ; CHECK: constants: - ; CHECK: value: '<16 x i8> <i8 20, i8 21, i8 22, i8 23, i8 28, i8 29, i8 30, i8 31, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3>' - ; CHECK: alignment: 8 - ; CHECK: isTargetSpecific: false - ; CHECK: liveins: $q0, $q1 - ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 - ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1 - ; CHECK: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0 - ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) %const.0 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:qq = REG_SEQUENCE [[COPY]], %subreg.qsub0, [[COPY1]], %subreg.qsub1 - ; CHECK: [[TBLv16i8Two:%[0-9]+]]:fpr128 = TBLv16i8Two [[REG_SEQUENCE]], [[LDRQui]] - ; CHECK: $q0 = COPY [[TBLv16i8Two]] - ; CHECK: RET_ReallyLR implicit $q0 - %0:fpr(<4 x s32>) = COPY $q0 - %1:fpr(<4 x s32>) = COPY $q1 - %4:gpr(s32) = G_CONSTANT i32 5 - %5:gpr(s32) = G_CONSTANT i32 7 - %6:gpr(s32) = G_CONSTANT i32 1 - %7:gpr(s32) = G_CONSTANT i32 0 - %3:fpr(<4 x s32>) = G_BUILD_VECTOR %4(s32), %5(s32), %6(s32), %7(s32) - %2:fpr(<4 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1, %3(<4 x s32>) - $q0 = COPY %2(<4 x s32>) - RET_ReallyLR implicit $q0 - -... ---- -name: shuffle_v2i64 -alignment: 2 -legalized: true -regBankSelected: true -tracksRegLiveness: true -registers: - - { id: 0, class: fpr } - - { id: 1, class: fpr } - - { id: 2, class: fpr } - - { id: 3, class: fpr } - - { id: 4, class: gpr } -body: | - bb.1 (%ir-block.0): - liveins: $q0, $q1 - - ; CHECK-LABEL: name: shuffle_v2i64 - ; CHECK: constants: - ; CHECK: value: '<16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>' - ; CHECK: alignment: 8 - ; CHECK: isTargetSpecific: false - ; CHECK: liveins: $q0, $q1 - ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 - ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1 - ; CHECK: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0 - ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) %const.0 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:qq = REG_SEQUENCE [[COPY]], %subreg.qsub0, [[COPY1]], %subreg.qsub1 - ; CHECK: [[TBLv16i8Two:%[0-9]+]]:fpr128 = TBLv16i8Two [[REG_SEQUENCE]], [[LDRQui]] - ; CHECK: $q0 = COPY [[TBLv16i8Two]] - ; CHECK: RET_ReallyLR implicit $q0 - %0:fpr(<2 x s64>) = COPY $q0 - %1:fpr(<2 x s64>) = COPY $q1 - %4:gpr(s32) = G_CONSTANT i32 0 - %3:fpr(<2 x s32>) = G_BUILD_VECTOR %4(s32), %4(s32) - %2:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %0(<2 x s64>), %1, %3(<2 x s32>) - $q0 = COPY %2(<2 x s64>) - RET_ReallyLR implicit $q0 - -... |