summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/X86
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/X86')
-rw-r--r--llvm/lib/Target/X86/X86GenRegisterBankInfo.def40
-rw-r--r--llvm/lib/Target/X86/X86InstructionSelector.cpp177
-rw-r--r--llvm/lib/Target/X86/X86InstructionSelector.h11
-rw-r--r--llvm/lib/Target/X86/X86LegalizerInfo.cpp41
-rw-r--r--llvm/lib/Target/X86/X86LegalizerInfo.h2
-rw-r--r--llvm/lib/Target/X86/X86RegisterBankInfo.cpp64
-rw-r--r--llvm/lib/Target/X86/X86RegisterBanks.td3
7 files changed, 306 insertions, 32 deletions
diff --git a/llvm/lib/Target/X86/X86GenRegisterBankInfo.def b/llvm/lib/Target/X86/X86GenRegisterBankInfo.def
index 33301b78fb2..bac483c0df2 100644
--- a/llvm/lib/Target/X86/X86GenRegisterBankInfo.def
+++ b/llvm/lib/Target/X86/X86GenRegisterBankInfo.def
@@ -19,10 +19,17 @@ namespace llvm {
RegisterBankInfo::PartialMapping X86GenRegisterBankInfo::PartMappings[]{
/* StartIdx, Length, RegBank */
// GPR value
- {0, 8, X86::GPRRegBank}, // :0
- {0, 16, X86::GPRRegBank}, // :1
- {0, 32, X86::GPRRegBank}, // :2
- {0, 64, X86::GPRRegBank}, // :3
+ {0, 8, X86::GPRRegBank}, // :0
+ {0, 16, X86::GPRRegBank}, // :1
+ {0, 32, X86::GPRRegBank}, // :2
+ {0, 64, X86::GPRRegBank}, // :3
+ // FR32/64 , xmm registers
+ {0, 32, X86::VECRRegBank}, // :4
+ {0, 64, X86::VECRRegBank}, // :5
+ // VR128/256/512
+ {0, 128, X86::VECRRegBank}, // :6
+ {0, 256, X86::VECRRegBank}, // :7
+ {0, 512, X86::VECRRegBank}, // :8
};
enum PartialMappingIdx {
@@ -31,6 +38,11 @@ enum PartialMappingIdx {
PMI_GPR16,
PMI_GPR32,
PMI_GPR64,
+ PMI_FP32,
+ PMI_FP64,
+ PMI_VEC128,
+ PMI_VEC256,
+ PMI_VEC512
};
#define INSTR_3OP(INFO) INFO, INFO, INFO,
@@ -44,17 +56,27 @@ RegisterBankInfo::ValueMapping X86GenRegisterBankInfo::ValMappings[]{
INSTR_3OP(BREAKDOWN(PMI_GPR8, 1)) // 0: GPR_8
INSTR_3OP(BREAKDOWN(PMI_GPR16, 1)) // 3: GPR_16
INSTR_3OP(BREAKDOWN(PMI_GPR32, 1)) // 6: GPR_32
- INSTR_3OP(BREAKDOWN(PMI_GPR64, 1)) // 9: GPR_64
+ INSTR_3OP(BREAKDOWN(PMI_GPR64, 1)) // 9: GPR_64
+ INSTR_3OP(BREAKDOWN(PMI_FP32, 1)) // 12: Fp32
+ INSTR_3OP(BREAKDOWN(PMI_FP64, 1)) // 15: Fp64
+ INSTR_3OP(BREAKDOWN(PMI_VEC128, 1)) // 18: Vec128
+ INSTR_3OP(BREAKDOWN(PMI_VEC256, 1)) // 21: Vec256
+ INSTR_3OP(BREAKDOWN(PMI_VEC512, 1)) // 24: Vec512
};
#undef INSTR_3OP
#undef BREAKDOWN
enum ValueMappingIdx {
VMI_None = -1,
- VMI_3OpsGpr8Idx = 0,
- VMI_3OpsGpr16Idx = 3,
- VMI_3OpsGpr32Idx = 6,
- VMI_3OpsGpr64Idx = 9,
+ VMI_3OpsGpr8Idx = PMI_GPR8 * 3,
+ VMI_3OpsGpr16Idx = PMI_GPR16 * 3,
+ VMI_3OpsGpr32Idx = PMI_GPR32 * 3,
+ VMI_3OpsGpr64Idx = PMI_GPR64 * 3,
+ VMI_3OpsFp32Idx = PMI_FP32 * 3,
+ VMI_3OpsFp64Idx = PMI_FP64 * 3,
+ VMI_3OpsVec128Idx = PMI_VEC128 * 3,
+ VMI_3OpsVec256Idx = PMI_VEC256 * 3,
+ VMI_3OpsVec512Idx = PMI_VEC512 * 3,
};
} // End llvm namespace.
diff --git a/llvm/lib/Target/X86/X86InstructionSelector.cpp b/llvm/lib/Target/X86/X86InstructionSelector.cpp
index cc9fae540c7..0927c96abaa 100644
--- a/llvm/lib/Target/X86/X86InstructionSelector.cpp
+++ b/llvm/lib/Target/X86/X86InstructionSelector.cpp
@@ -39,7 +39,7 @@ using namespace llvm;
X86InstructionSelector::X86InstructionSelector(const X86Subtarget &STI,
const X86RegisterBankInfo &RBI)
- : InstructionSelector(), TII(*STI.getInstrInfo()),
+ : InstructionSelector(), STI(STI), TII(*STI.getInstrInfo()),
TRI(*STI.getRegisterInfo()), RBI(RBI) {}
// FIXME: This should be target-independent, inferred from the types declared
@@ -47,11 +47,23 @@ X86InstructionSelector::X86InstructionSelector(const X86Subtarget &STI,
static const TargetRegisterClass *
getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB) {
if (RB.getID() == X86::GPRRegBankID) {
- if (Ty.getSizeInBits() <= 32)
+ if (Ty.getSizeInBits() == 32)
return &X86::GR32RegClass;
if (Ty.getSizeInBits() == 64)
return &X86::GR64RegClass;
}
+ if (RB.getID() == X86::VECRRegBankID) {
+ if (Ty.getSizeInBits() == 32)
+ return &X86::FR32XRegClass;
+ if (Ty.getSizeInBits() == 64)
+ return &X86::FR64XRegClass;
+ if (Ty.getSizeInBits() == 128)
+ return &X86::VR128XRegClass;
+ if (Ty.getSizeInBits() == 256)
+ return &X86::VR256XRegClass;
+ if (Ty.getSizeInBits() == 512)
+ return &X86::VR512RegClass;
+ }
llvm_unreachable("Unknown RegBank!");
}
@@ -89,6 +101,9 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
assert((DstSize <= 64) && "GPRs cannot get more than 64-bit width values.");
RC = getRegClassForTypeOnBank(MRI.getType(DstReg), RegBank);
break;
+ case X86::VECRRegBankID:
+ RC = getRegClassForTypeOnBank(MRI.getType(DstReg), RegBank);
+ break;
default:
llvm_unreachable("Unknown RegBank!");
}
@@ -96,10 +111,13 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
// No need to constrain SrcReg. It will get constrained when
// we hit another of its use or its defs.
// Copies do not have constraints.
- if (!RBI.constrainGenericRegister(DstReg, *RC, MRI)) {
- DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
- << " operand\n");
- return false;
+ const TargetRegisterClass *OldRC = MRI.getRegClassOrNull(DstReg);
+ if (!OldRC || !RC->hasSubClassEq(OldRC)) {
+ if (!RBI.constrainGenericRegister(DstReg, *RC, MRI)) {
+ DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
+ << " operand\n");
+ return false;
+ }
}
I.setDesc(TII.get(X86::COPY));
return true;
@@ -127,5 +145,152 @@ bool X86InstructionSelector::select(MachineInstr &I) const {
assert(I.getNumOperands() == I.getNumExplicitOperands() &&
"Generic instruction has unexpected implicit operands\n");
+ // TODO: This should be implemented by tblgen, pattern with predicate not supported yet.
+ if (selectBinaryOp(I, MRI))
+ return true;
+
return selectImpl(I);
}
+
+unsigned X86InstructionSelector::getFAddOp(LLT &Ty,
+ const RegisterBank &RB) const {
+
+ if (X86::VECRRegBankID != RB.getID())
+ return TargetOpcode::G_FADD;
+
+ if (Ty == LLT::scalar(32)) {
+ if (STI.hasAVX512()) {
+ return X86::VADDSSZrr;
+ } else if (STI.hasAVX()) {
+ return X86::VADDSSrr;
+ } else if (STI.hasSSE1()) {
+ return X86::ADDSSrr;
+ }
+ } else if (Ty == LLT::scalar(64)) {
+ if (STI.hasAVX512()) {
+ return X86::VADDSDZrr;
+ } else if (STI.hasAVX()) {
+ return X86::VADDSDrr;
+ } else if (STI.hasSSE2()) {
+ return X86::ADDSDrr;
+ }
+ } else if (Ty == LLT::vector(4, 32)) {
+ if ((STI.hasAVX512()) && (STI.hasVLX())) {
+ return X86::VADDPSZ128rr;
+ } else if (STI.hasAVX()) {
+ return X86::VADDPSrr;
+ } else if (STI.hasSSE1()) {
+ return X86::ADDPSrr;
+ }
+ }
+
+ return TargetOpcode::G_FADD;
+}
+
+unsigned X86InstructionSelector::getFSubOp(LLT &Ty,
+ const RegisterBank &RB) const {
+
+ if (X86::VECRRegBankID != RB.getID())
+ return TargetOpcode::G_FSUB;
+
+ if (Ty == LLT::scalar(32)) {
+ if (STI.hasAVX512()) {
+ return X86::VSUBSSZrr;
+ } else if (STI.hasAVX()) {
+ return X86::VSUBSSrr;
+ } else if (STI.hasSSE1()) {
+ return X86::SUBSSrr;
+ }
+ } else if (Ty == LLT::scalar(64)) {
+ if (STI.hasAVX512()) {
+ return X86::VSUBSDZrr;
+ } else if (STI.hasAVX()) {
+ return X86::VSUBSDrr;
+ } else if (STI.hasSSE2()) {
+ return X86::SUBSDrr;
+ }
+ } else if (Ty == LLT::vector(4, 32)) {
+ if ((STI.hasAVX512()) && (STI.hasVLX())) {
+ return X86::VSUBPSZ128rr;
+ } else if (STI.hasAVX()) {
+ return X86::VSUBPSrr;
+ } else if (STI.hasSSE1()) {
+ return X86::SUBPSrr;
+ }
+ }
+
+ return TargetOpcode::G_FSUB;
+}
+
+unsigned X86InstructionSelector::getAddOp(LLT &Ty,
+ const RegisterBank &RB) const {
+
+ if (X86::VECRRegBankID != RB.getID())
+ return TargetOpcode::G_ADD;
+
+ if (Ty == LLT::vector(4, 32)) {
+ if (STI.hasAVX512() && STI.hasVLX()) {
+ return X86::VPADDDZ128rr;
+ } else if (STI.hasAVX()) {
+ return X86::VPADDDrr;
+ } else if (STI.hasSSE2()) {
+ return X86::PADDDrr;
+ }
+ }
+
+ return TargetOpcode::G_ADD;
+}
+
+unsigned X86InstructionSelector::getSubOp(LLT &Ty,
+ const RegisterBank &RB) const {
+
+ if (X86::VECRRegBankID != RB.getID())
+ return TargetOpcode::G_SUB;
+
+ if (Ty == LLT::vector(4, 32)) {
+ if (STI.hasAVX512() && STI.hasVLX()) {
+ return X86::VPSUBDZ128rr;
+ } else if (STI.hasAVX()) {
+ return X86::VPSUBDrr;
+ } else if (STI.hasSSE2()) {
+ return X86::PSUBDrr;
+ }
+ }
+
+ return TargetOpcode::G_SUB;
+}
+
+bool X86InstructionSelector::selectBinaryOp(MachineInstr &I,
+ MachineRegisterInfo &MRI) const {
+
+ LLT Ty = MRI.getType(I.getOperand(0).getReg());
+ const unsigned DefReg = I.getOperand(0).getReg();
+ const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
+
+ unsigned NewOpc = I.getOpcode();
+
+ switch (I.getOpcode()) {
+ case TargetOpcode::G_FADD:
+ NewOpc = getFAddOp(Ty, RB);
+ break;
+ case TargetOpcode::G_FSUB:
+ NewOpc = getFSubOp(Ty, RB);
+ break;
+ case TargetOpcode::G_ADD:
+ NewOpc = getAddOp(Ty, RB);
+ break;
+ case TargetOpcode::G_SUB:
+ NewOpc = getSubOp(Ty, RB);
+ break;
+ default:
+ break;
+ }
+
+ if (NewOpc == I.getOpcode())
+ return false;
+
+ I.setDesc(TII.get(NewOpc));
+
+ return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+}
+
diff --git a/llvm/lib/Target/X86/X86InstructionSelector.h b/llvm/lib/Target/X86/X86InstructionSelector.h
index 774fce1efb5..19f34fc6168 100644
--- a/llvm/lib/Target/X86/X86InstructionSelector.h
+++ b/llvm/lib/Target/X86/X86InstructionSelector.h
@@ -22,6 +22,9 @@ class X86RegisterBankInfo;
class X86RegisterInfo;
class X86Subtarget;
class X86TargetMachine;
+class LLT;
+class RegisterBank;
+class MachineRegisterInfo;
class X86InstructionSelector : public InstructionSelector {
public:
@@ -35,6 +38,14 @@ private:
/// the patterns that don't require complex C++.
bool selectImpl(MachineInstr &I) const;
+ // TODO: remove after selectImpl support pattern with a predicate.
+ unsigned getFAddOp(LLT &Ty, const RegisterBank &RB) const;
+ unsigned getFSubOp(LLT &Ty, const RegisterBank &RB) const;
+ unsigned getAddOp(LLT &Ty, const RegisterBank &RB) const;
+ unsigned getSubOp(LLT &Ty, const RegisterBank &RB) const;
+ bool selectBinaryOp(MachineInstr &I, MachineRegisterInfo &MRI) const;
+
+ const X86Subtarget &STI;
const X86InstrInfo &TII;
const X86RegisterInfo &TRI;
const X86RegisterBankInfo &RBI;
diff --git a/llvm/lib/Target/X86/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/X86LegalizerInfo.cpp
index 5f63f8d4e98..06c11c84e35 100644
--- a/llvm/lib/Target/X86/X86LegalizerInfo.cpp
+++ b/llvm/lib/Target/X86/X86LegalizerInfo.cpp
@@ -19,6 +19,7 @@
#include "llvm/Target/TargetOpcodes.h"
using namespace llvm;
+using namespace TargetOpcode;
#ifndef LLVM_BUILD_GLOBAL_ISEL
#error "You shouldn't build this"
@@ -28,6 +29,8 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI) : Subtarget(STI) {
setLegalizerInfo32bit();
setLegalizerInfo64bit();
+ setLegalizerInfoSSE1();
+ setLegalizerInfoSSE2();
computeTables();
}
@@ -39,8 +42,8 @@ void X86LegalizerInfo::setLegalizerInfo32bit() {
const LLT s32 = LLT::scalar(32);
for (auto Ty : {s8, s16, s32}) {
- setAction({TargetOpcode::G_ADD, Ty}, Legal);
- setAction({TargetOpcode::G_SUB, Ty}, Legal);
+ setAction({G_ADD, Ty}, Legal);
+ setAction({G_SUB, Ty}, Legal);
}
}
@@ -51,6 +54,36 @@ void X86LegalizerInfo::setLegalizerInfo64bit() {
const LLT s64 = LLT::scalar(64);
- setAction({TargetOpcode::G_ADD, s64}, Legal);
- setAction({TargetOpcode::G_SUB, s64}, Legal);
+ setAction({G_ADD, s64}, Legal);
+ setAction({G_SUB, s64}, Legal);
+}
+
+void X86LegalizerInfo::setLegalizerInfoSSE1() {
+ if (!Subtarget.hasSSE1())
+ return;
+
+ const LLT s32 = LLT::scalar(32);
+ const LLT v4s32 = LLT::vector(4, 32);
+
+ for (unsigned BinOp : {G_FADD, G_FSUB, G_FMUL, G_FDIV})
+ for (auto Ty : {s32, v4s32})
+ setAction({BinOp, Ty}, Legal);
+}
+
+void X86LegalizerInfo::setLegalizerInfoSSE2() {
+ if (!Subtarget.hasSSE2())
+ return;
+
+ const LLT s64 = LLT::scalar(64);
+ const LLT v4s32 = LLT::vector(4, 32);
+ const LLT v2s64 = LLT::vector(2, 64);
+
+ for (unsigned BinOp : {G_FADD, G_FSUB, G_FMUL, G_FDIV})
+ for (auto Ty : {s64, v2s64})
+ setAction({BinOp, Ty}, Legal);
+
+ for (unsigned BinOp : {G_ADD, G_SUB})
+ for (auto Ty : {v4s32})
+ setAction({BinOp, Ty}, Legal);
+
}
diff --git a/llvm/lib/Target/X86/X86LegalizerInfo.h b/llvm/lib/Target/X86/X86LegalizerInfo.h
index 10eb047d4d3..b9cf42f8016 100644
--- a/llvm/lib/Target/X86/X86LegalizerInfo.h
+++ b/llvm/lib/Target/X86/X86LegalizerInfo.h
@@ -34,6 +34,8 @@ public:
private:
void setLegalizerInfo32bit();
void setLegalizerInfo64bit();
+ void setLegalizerInfoSSE1();
+ void setLegalizerInfoSSE2();
};
} // End llvm namespace.
#endif
diff --git a/llvm/lib/Target/X86/X86RegisterBankInfo.cpp b/llvm/lib/Target/X86/X86RegisterBankInfo.cpp
index 626e7ef4bed..fd9f62480c5 100644
--- a/llvm/lib/Target/X86/X86RegisterBankInfo.cpp
+++ b/llvm/lib/Target/X86/X86RegisterBankInfo.cpp
@@ -54,6 +54,13 @@ const RegisterBank &X86RegisterBankInfo::getRegBankFromRegClass(
X86::GR64RegClass.hasSubClassEq(&RC))
return getRegBank(X86::GPRRegBankID);
+ if (X86::FR32XRegClass.hasSubClassEq(&RC) ||
+ X86::FR64XRegClass.hasSubClassEq(&RC) ||
+ X86::VR128XRegClass.hasSubClassEq(&RC) ||
+ X86::VR256XRegClass.hasSubClassEq(&RC) ||
+ X86::VR512RegClass.hasSubClassEq(&RC))
+ return getRegBank(X86::VECRRegBankID);
+
llvm_unreachable("Unsupported register kind yet.");
}
@@ -71,26 +78,51 @@ X86RegisterBankInfo::getOperandsMapping(const MachineInstr &MI, bool isFP) {
llvm_unreachable("Unsupported operand maping yet.");
ValueMappingIdx ValMapIdx = VMI_None;
- if (!isFP) {
+
+ if (Ty.isScalar()) {
+ if (!isFP) {
+ switch (Ty.getSizeInBits()) {
+ case 8:
+ ValMapIdx = VMI_3OpsGpr8Idx;
+ break;
+ case 16:
+ ValMapIdx = VMI_3OpsGpr16Idx;
+ break;
+ case 32:
+ ValMapIdx = VMI_3OpsGpr32Idx;
+ break;
+ case 64:
+ ValMapIdx = VMI_3OpsGpr64Idx;
+ break;
+ default:
+ llvm_unreachable("Unsupported register size.");
+ }
+ } else {
+ switch (Ty.getSizeInBits()) {
+ case 32:
+ ValMapIdx = VMI_3OpsFp32Idx;
+ break;
+ case 64:
+ ValMapIdx = VMI_3OpsFp64Idx;
+ break;
+ default:
+ llvm_unreachable("Unsupported register size.");
+ }
+ }
+ } else {
switch (Ty.getSizeInBits()) {
- case 8:
- ValMapIdx = VMI_3OpsGpr8Idx;
+ case 128:
+ ValMapIdx = VMI_3OpsVec128Idx;
break;
- case 16:
- ValMapIdx = VMI_3OpsGpr16Idx;
+ case 256:
+ ValMapIdx = VMI_3OpsVec256Idx;
break;
- case 32:
- ValMapIdx = VMI_3OpsGpr32Idx;
- break;
- case 64:
- ValMapIdx = VMI_3OpsGpr64Idx;
+ case 512:
+ ValMapIdx = VMI_3OpsVec512Idx;
break;
default:
llvm_unreachable("Unsupported register size.");
- break;
}
- } else {
- llvm_unreachable("Floating point not supported yet.");
}
return InstructionMapping{DefaultMappingID, 1, &ValMappings[ValMapIdx],
@@ -114,6 +146,12 @@ X86RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case TargetOpcode::G_SUB:
return getOperandsMapping(MI, false);
break;
+ case TargetOpcode::G_FADD:
+ case TargetOpcode::G_FSUB:
+ case TargetOpcode::G_FMUL:
+ case TargetOpcode::G_FDIV:
+ return getOperandsMapping(MI, true);
+ break;
default:
return InstructionMapping{};
}
diff --git a/llvm/lib/Target/X86/X86RegisterBanks.td b/llvm/lib/Target/X86/X86RegisterBanks.td
index b01bf528201..6d17cd53a0c 100644
--- a/llvm/lib/Target/X86/X86RegisterBanks.td
+++ b/llvm/lib/Target/X86/X86RegisterBanks.td
@@ -12,3 +12,6 @@
/// General Purpose Registers: RAX, RCX,...
def GPRRegBank : RegisterBank<"GPR", [GR64]>;
+
+/// Floating Point/Vector Registers
+def VECRRegBank : RegisterBank<"VECR", [VR512]>;
OpenPOWER on IntegriCloud