diff options
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r-- | llvm/lib/Target/X86/X86GenRegisterBankInfo.def | 40 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstructionSelector.cpp | 177 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstructionSelector.h | 11 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86LegalizerInfo.cpp | 41 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86LegalizerInfo.h | 2 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86RegisterBankInfo.cpp | 64 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86RegisterBanks.td | 3 |
7 files changed, 306 insertions, 32 deletions
diff --git a/llvm/lib/Target/X86/X86GenRegisterBankInfo.def b/llvm/lib/Target/X86/X86GenRegisterBankInfo.def index 33301b78fb2..bac483c0df2 100644 --- a/llvm/lib/Target/X86/X86GenRegisterBankInfo.def +++ b/llvm/lib/Target/X86/X86GenRegisterBankInfo.def @@ -19,10 +19,17 @@ namespace llvm { RegisterBankInfo::PartialMapping X86GenRegisterBankInfo::PartMappings[]{ /* StartIdx, Length, RegBank */ // GPR value - {0, 8, X86::GPRRegBank}, // :0 - {0, 16, X86::GPRRegBank}, // :1 - {0, 32, X86::GPRRegBank}, // :2 - {0, 64, X86::GPRRegBank}, // :3 + {0, 8, X86::GPRRegBank}, // :0 + {0, 16, X86::GPRRegBank}, // :1 + {0, 32, X86::GPRRegBank}, // :2 + {0, 64, X86::GPRRegBank}, // :3 + // FR32/64 , xmm registers + {0, 32, X86::VECRRegBank}, // :4 + {0, 64, X86::VECRRegBank}, // :5 + // VR128/256/512 + {0, 128, X86::VECRRegBank}, // :6 + {0, 256, X86::VECRRegBank}, // :7 + {0, 512, X86::VECRRegBank}, // :8 }; enum PartialMappingIdx { @@ -31,6 +38,11 @@ enum PartialMappingIdx { PMI_GPR16, PMI_GPR32, PMI_GPR64, + PMI_FP32, + PMI_FP64, + PMI_VEC128, + PMI_VEC256, + PMI_VEC512 }; #define INSTR_3OP(INFO) INFO, INFO, INFO, @@ -44,17 +56,27 @@ RegisterBankInfo::ValueMapping X86GenRegisterBankInfo::ValMappings[]{ INSTR_3OP(BREAKDOWN(PMI_GPR8, 1)) // 0: GPR_8 INSTR_3OP(BREAKDOWN(PMI_GPR16, 1)) // 3: GPR_16 INSTR_3OP(BREAKDOWN(PMI_GPR32, 1)) // 6: GPR_32 - INSTR_3OP(BREAKDOWN(PMI_GPR64, 1)) // 9: GPR_64 + INSTR_3OP(BREAKDOWN(PMI_GPR64, 1)) // 9: GPR_64 + INSTR_3OP(BREAKDOWN(PMI_FP32, 1)) // 12: Fp32 + INSTR_3OP(BREAKDOWN(PMI_FP64, 1)) // 15: Fp64 + INSTR_3OP(BREAKDOWN(PMI_VEC128, 1)) // 18: Vec128 + INSTR_3OP(BREAKDOWN(PMI_VEC256, 1)) // 21: Vec256 + INSTR_3OP(BREAKDOWN(PMI_VEC512, 1)) // 24: Vec512 }; #undef INSTR_3OP #undef BREAKDOWN enum ValueMappingIdx { VMI_None = -1, - VMI_3OpsGpr8Idx = 0, - VMI_3OpsGpr16Idx = 3, - VMI_3OpsGpr32Idx = 6, - VMI_3OpsGpr64Idx = 9, + VMI_3OpsGpr8Idx = PMI_GPR8 * 3, + VMI_3OpsGpr16Idx = PMI_GPR16 * 3, + VMI_3OpsGpr32Idx = PMI_GPR32 * 3, + VMI_3OpsGpr64Idx = PMI_GPR64 * 3, + VMI_3OpsFp32Idx = PMI_FP32 * 3, + VMI_3OpsFp64Idx = PMI_FP64 * 3, + VMI_3OpsVec128Idx = PMI_VEC128 * 3, + VMI_3OpsVec256Idx = PMI_VEC256 * 3, + VMI_3OpsVec512Idx = PMI_VEC512 * 3, }; } // End llvm namespace. diff --git a/llvm/lib/Target/X86/X86InstructionSelector.cpp b/llvm/lib/Target/X86/X86InstructionSelector.cpp index cc9fae540c7..0927c96abaa 100644 --- a/llvm/lib/Target/X86/X86InstructionSelector.cpp +++ b/llvm/lib/Target/X86/X86InstructionSelector.cpp @@ -39,7 +39,7 @@ using namespace llvm; X86InstructionSelector::X86InstructionSelector(const X86Subtarget &STI, const X86RegisterBankInfo &RBI) - : InstructionSelector(), TII(*STI.getInstrInfo()), + : InstructionSelector(), STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()), RBI(RBI) {} // FIXME: This should be target-independent, inferred from the types declared @@ -47,11 +47,23 @@ X86InstructionSelector::X86InstructionSelector(const X86Subtarget &STI, static const TargetRegisterClass * getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB) { if (RB.getID() == X86::GPRRegBankID) { - if (Ty.getSizeInBits() <= 32) + if (Ty.getSizeInBits() == 32) return &X86::GR32RegClass; if (Ty.getSizeInBits() == 64) return &X86::GR64RegClass; } + if (RB.getID() == X86::VECRRegBankID) { + if (Ty.getSizeInBits() == 32) + return &X86::FR32XRegClass; + if (Ty.getSizeInBits() == 64) + return &X86::FR64XRegClass; + if (Ty.getSizeInBits() == 128) + return &X86::VR128XRegClass; + if (Ty.getSizeInBits() == 256) + return &X86::VR256XRegClass; + if (Ty.getSizeInBits() == 512) + return &X86::VR512RegClass; + } llvm_unreachable("Unknown RegBank!"); } @@ -89,6 +101,9 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, assert((DstSize <= 64) && "GPRs cannot get more than 64-bit width values."); RC = getRegClassForTypeOnBank(MRI.getType(DstReg), RegBank); break; + case X86::VECRRegBankID: + RC = getRegClassForTypeOnBank(MRI.getType(DstReg), RegBank); + break; default: llvm_unreachable("Unknown RegBank!"); } @@ -96,10 +111,13 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, // No need to constrain SrcReg. It will get constrained when // we hit another of its use or its defs. // Copies do not have constraints. - if (!RBI.constrainGenericRegister(DstReg, *RC, MRI)) { - DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode()) - << " operand\n"); - return false; + const TargetRegisterClass *OldRC = MRI.getRegClassOrNull(DstReg); + if (!OldRC || !RC->hasSubClassEq(OldRC)) { + if (!RBI.constrainGenericRegister(DstReg, *RC, MRI)) { + DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode()) + << " operand\n"); + return false; + } } I.setDesc(TII.get(X86::COPY)); return true; @@ -127,5 +145,152 @@ bool X86InstructionSelector::select(MachineInstr &I) const { assert(I.getNumOperands() == I.getNumExplicitOperands() && "Generic instruction has unexpected implicit operands\n"); + // TODO: This should be implemented by tblgen, pattern with predicate not supported yet. + if (selectBinaryOp(I, MRI)) + return true; + return selectImpl(I); } + +unsigned X86InstructionSelector::getFAddOp(LLT &Ty, + const RegisterBank &RB) const { + + if (X86::VECRRegBankID != RB.getID()) + return TargetOpcode::G_FADD; + + if (Ty == LLT::scalar(32)) { + if (STI.hasAVX512()) { + return X86::VADDSSZrr; + } else if (STI.hasAVX()) { + return X86::VADDSSrr; + } else if (STI.hasSSE1()) { + return X86::ADDSSrr; + } + } else if (Ty == LLT::scalar(64)) { + if (STI.hasAVX512()) { + return X86::VADDSDZrr; + } else if (STI.hasAVX()) { + return X86::VADDSDrr; + } else if (STI.hasSSE2()) { + return X86::ADDSDrr; + } + } else if (Ty == LLT::vector(4, 32)) { + if ((STI.hasAVX512()) && (STI.hasVLX())) { + return X86::VADDPSZ128rr; + } else if (STI.hasAVX()) { + return X86::VADDPSrr; + } else if (STI.hasSSE1()) { + return X86::ADDPSrr; + } + } + + return TargetOpcode::G_FADD; +} + +unsigned X86InstructionSelector::getFSubOp(LLT &Ty, + const RegisterBank &RB) const { + + if (X86::VECRRegBankID != RB.getID()) + return TargetOpcode::G_FSUB; + + if (Ty == LLT::scalar(32)) { + if (STI.hasAVX512()) { + return X86::VSUBSSZrr; + } else if (STI.hasAVX()) { + return X86::VSUBSSrr; + } else if (STI.hasSSE1()) { + return X86::SUBSSrr; + } + } else if (Ty == LLT::scalar(64)) { + if (STI.hasAVX512()) { + return X86::VSUBSDZrr; + } else if (STI.hasAVX()) { + return X86::VSUBSDrr; + } else if (STI.hasSSE2()) { + return X86::SUBSDrr; + } + } else if (Ty == LLT::vector(4, 32)) { + if ((STI.hasAVX512()) && (STI.hasVLX())) { + return X86::VSUBPSZ128rr; + } else if (STI.hasAVX()) { + return X86::VSUBPSrr; + } else if (STI.hasSSE1()) { + return X86::SUBPSrr; + } + } + + return TargetOpcode::G_FSUB; +} + +unsigned X86InstructionSelector::getAddOp(LLT &Ty, + const RegisterBank &RB) const { + + if (X86::VECRRegBankID != RB.getID()) + return TargetOpcode::G_ADD; + + if (Ty == LLT::vector(4, 32)) { + if (STI.hasAVX512() && STI.hasVLX()) { + return X86::VPADDDZ128rr; + } else if (STI.hasAVX()) { + return X86::VPADDDrr; + } else if (STI.hasSSE2()) { + return X86::PADDDrr; + } + } + + return TargetOpcode::G_ADD; +} + +unsigned X86InstructionSelector::getSubOp(LLT &Ty, + const RegisterBank &RB) const { + + if (X86::VECRRegBankID != RB.getID()) + return TargetOpcode::G_SUB; + + if (Ty == LLT::vector(4, 32)) { + if (STI.hasAVX512() && STI.hasVLX()) { + return X86::VPSUBDZ128rr; + } else if (STI.hasAVX()) { + return X86::VPSUBDrr; + } else if (STI.hasSSE2()) { + return X86::PSUBDrr; + } + } + + return TargetOpcode::G_SUB; +} + +bool X86InstructionSelector::selectBinaryOp(MachineInstr &I, + MachineRegisterInfo &MRI) const { + + LLT Ty = MRI.getType(I.getOperand(0).getReg()); + const unsigned DefReg = I.getOperand(0).getReg(); + const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI); + + unsigned NewOpc = I.getOpcode(); + + switch (I.getOpcode()) { + case TargetOpcode::G_FADD: + NewOpc = getFAddOp(Ty, RB); + break; + case TargetOpcode::G_FSUB: + NewOpc = getFSubOp(Ty, RB); + break; + case TargetOpcode::G_ADD: + NewOpc = getAddOp(Ty, RB); + break; + case TargetOpcode::G_SUB: + NewOpc = getSubOp(Ty, RB); + break; + default: + break; + } + + if (NewOpc == I.getOpcode()) + return false; + + I.setDesc(TII.get(NewOpc)); + + return constrainSelectedInstRegOperands(I, TII, TRI, RBI); +} + diff --git a/llvm/lib/Target/X86/X86InstructionSelector.h b/llvm/lib/Target/X86/X86InstructionSelector.h index 774fce1efb5..19f34fc6168 100644 --- a/llvm/lib/Target/X86/X86InstructionSelector.h +++ b/llvm/lib/Target/X86/X86InstructionSelector.h @@ -22,6 +22,9 @@ class X86RegisterBankInfo; class X86RegisterInfo; class X86Subtarget; class X86TargetMachine; +class LLT; +class RegisterBank; +class MachineRegisterInfo; class X86InstructionSelector : public InstructionSelector { public: @@ -35,6 +38,14 @@ private: /// the patterns that don't require complex C++. bool selectImpl(MachineInstr &I) const; + // TODO: remove after selectImpl support pattern with a predicate. + unsigned getFAddOp(LLT &Ty, const RegisterBank &RB) const; + unsigned getFSubOp(LLT &Ty, const RegisterBank &RB) const; + unsigned getAddOp(LLT &Ty, const RegisterBank &RB) const; + unsigned getSubOp(LLT &Ty, const RegisterBank &RB) const; + bool selectBinaryOp(MachineInstr &I, MachineRegisterInfo &MRI) const; + + const X86Subtarget &STI; const X86InstrInfo &TII; const X86RegisterInfo &TRI; const X86RegisterBankInfo &RBI; diff --git a/llvm/lib/Target/X86/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/X86LegalizerInfo.cpp index 5f63f8d4e98..06c11c84e35 100644 --- a/llvm/lib/Target/X86/X86LegalizerInfo.cpp +++ b/llvm/lib/Target/X86/X86LegalizerInfo.cpp @@ -19,6 +19,7 @@ #include "llvm/Target/TargetOpcodes.h" using namespace llvm; +using namespace TargetOpcode; #ifndef LLVM_BUILD_GLOBAL_ISEL #error "You shouldn't build this" @@ -28,6 +29,8 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI) : Subtarget(STI) { setLegalizerInfo32bit(); setLegalizerInfo64bit(); + setLegalizerInfoSSE1(); + setLegalizerInfoSSE2(); computeTables(); } @@ -39,8 +42,8 @@ void X86LegalizerInfo::setLegalizerInfo32bit() { const LLT s32 = LLT::scalar(32); for (auto Ty : {s8, s16, s32}) { - setAction({TargetOpcode::G_ADD, Ty}, Legal); - setAction({TargetOpcode::G_SUB, Ty}, Legal); + setAction({G_ADD, Ty}, Legal); + setAction({G_SUB, Ty}, Legal); } } @@ -51,6 +54,36 @@ void X86LegalizerInfo::setLegalizerInfo64bit() { const LLT s64 = LLT::scalar(64); - setAction({TargetOpcode::G_ADD, s64}, Legal); - setAction({TargetOpcode::G_SUB, s64}, Legal); + setAction({G_ADD, s64}, Legal); + setAction({G_SUB, s64}, Legal); +} + +void X86LegalizerInfo::setLegalizerInfoSSE1() { + if (!Subtarget.hasSSE1()) + return; + + const LLT s32 = LLT::scalar(32); + const LLT v4s32 = LLT::vector(4, 32); + + for (unsigned BinOp : {G_FADD, G_FSUB, G_FMUL, G_FDIV}) + for (auto Ty : {s32, v4s32}) + setAction({BinOp, Ty}, Legal); +} + +void X86LegalizerInfo::setLegalizerInfoSSE2() { + if (!Subtarget.hasSSE2()) + return; + + const LLT s64 = LLT::scalar(64); + const LLT v4s32 = LLT::vector(4, 32); + const LLT v2s64 = LLT::vector(2, 64); + + for (unsigned BinOp : {G_FADD, G_FSUB, G_FMUL, G_FDIV}) + for (auto Ty : {s64, v2s64}) + setAction({BinOp, Ty}, Legal); + + for (unsigned BinOp : {G_ADD, G_SUB}) + for (auto Ty : {v4s32}) + setAction({BinOp, Ty}, Legal); + } diff --git a/llvm/lib/Target/X86/X86LegalizerInfo.h b/llvm/lib/Target/X86/X86LegalizerInfo.h index 10eb047d4d3..b9cf42f8016 100644 --- a/llvm/lib/Target/X86/X86LegalizerInfo.h +++ b/llvm/lib/Target/X86/X86LegalizerInfo.h @@ -34,6 +34,8 @@ public: private: void setLegalizerInfo32bit(); void setLegalizerInfo64bit(); + void setLegalizerInfoSSE1(); + void setLegalizerInfoSSE2(); }; } // End llvm namespace. #endif diff --git a/llvm/lib/Target/X86/X86RegisterBankInfo.cpp b/llvm/lib/Target/X86/X86RegisterBankInfo.cpp index 626e7ef4bed..fd9f62480c5 100644 --- a/llvm/lib/Target/X86/X86RegisterBankInfo.cpp +++ b/llvm/lib/Target/X86/X86RegisterBankInfo.cpp @@ -54,6 +54,13 @@ const RegisterBank &X86RegisterBankInfo::getRegBankFromRegClass( X86::GR64RegClass.hasSubClassEq(&RC)) return getRegBank(X86::GPRRegBankID); + if (X86::FR32XRegClass.hasSubClassEq(&RC) || + X86::FR64XRegClass.hasSubClassEq(&RC) || + X86::VR128XRegClass.hasSubClassEq(&RC) || + X86::VR256XRegClass.hasSubClassEq(&RC) || + X86::VR512RegClass.hasSubClassEq(&RC)) + return getRegBank(X86::VECRRegBankID); + llvm_unreachable("Unsupported register kind yet."); } @@ -71,26 +78,51 @@ X86RegisterBankInfo::getOperandsMapping(const MachineInstr &MI, bool isFP) { llvm_unreachable("Unsupported operand maping yet."); ValueMappingIdx ValMapIdx = VMI_None; - if (!isFP) { + + if (Ty.isScalar()) { + if (!isFP) { + switch (Ty.getSizeInBits()) { + case 8: + ValMapIdx = VMI_3OpsGpr8Idx; + break; + case 16: + ValMapIdx = VMI_3OpsGpr16Idx; + break; + case 32: + ValMapIdx = VMI_3OpsGpr32Idx; + break; + case 64: + ValMapIdx = VMI_3OpsGpr64Idx; + break; + default: + llvm_unreachable("Unsupported register size."); + } + } else { + switch (Ty.getSizeInBits()) { + case 32: + ValMapIdx = VMI_3OpsFp32Idx; + break; + case 64: + ValMapIdx = VMI_3OpsFp64Idx; + break; + default: + llvm_unreachable("Unsupported register size."); + } + } + } else { switch (Ty.getSizeInBits()) { - case 8: - ValMapIdx = VMI_3OpsGpr8Idx; + case 128: + ValMapIdx = VMI_3OpsVec128Idx; break; - case 16: - ValMapIdx = VMI_3OpsGpr16Idx; + case 256: + ValMapIdx = VMI_3OpsVec256Idx; break; - case 32: - ValMapIdx = VMI_3OpsGpr32Idx; - break; - case 64: - ValMapIdx = VMI_3OpsGpr64Idx; + case 512: + ValMapIdx = VMI_3OpsVec512Idx; break; default: llvm_unreachable("Unsupported register size."); - break; } - } else { - llvm_unreachable("Floating point not supported yet."); } return InstructionMapping{DefaultMappingID, 1, &ValMappings[ValMapIdx], @@ -114,6 +146,12 @@ X86RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case TargetOpcode::G_SUB: return getOperandsMapping(MI, false); break; + case TargetOpcode::G_FADD: + case TargetOpcode::G_FSUB: + case TargetOpcode::G_FMUL: + case TargetOpcode::G_FDIV: + return getOperandsMapping(MI, true); + break; default: return InstructionMapping{}; } diff --git a/llvm/lib/Target/X86/X86RegisterBanks.td b/llvm/lib/Target/X86/X86RegisterBanks.td index b01bf528201..6d17cd53a0c 100644 --- a/llvm/lib/Target/X86/X86RegisterBanks.td +++ b/llvm/lib/Target/X86/X86RegisterBanks.td @@ -12,3 +12,6 @@ /// General Purpose Registers: RAX, RCX,... def GPRRegBank : RegisterBank<"GPR", [GR64]>; + +/// Floating Point/Vector Registers +def VECRRegBank : RegisterBank<"VECR", [VR512]>; |