summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/X86/X86InstrInfo.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/X86/X86InstrInfo.cpp')
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.cpp191
1 files changed, 188 insertions, 3 deletions
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index b8093641308..71464270d46 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -9694,8 +9694,6 @@ static const uint16_t ReplaceableInstrsAVX2[][3] = {
{ X86::VBROADCASTSDYrr, X86::VBROADCASTSDYrr, X86::VPBROADCASTQYrr},
{ X86::VBROADCASTSDYrm, X86::VBROADCASTSDYrm, X86::VPBROADCASTQYrm},
{ X86::VBROADCASTF128, X86::VBROADCASTF128, X86::VBROADCASTI128 },
- { X86::VBLENDPSrri, X86::VBLENDPSrri, X86::VPBLENDDrri },
- { X86::VBLENDPSrmi, X86::VBLENDPSrmi, X86::VPBLENDDrmi },
{ X86::VBLENDPSYrri, X86::VBLENDPSYrri, X86::VPBLENDDYrri },
{ X86::VBLENDPSYrmi, X86::VBLENDPSYrmi, X86::VPBLENDDYrmi },
{ X86::VPERMILPSYmi, X86::VPERMILPSYmi, X86::VPSHUFDYmi },
@@ -9949,6 +9947,24 @@ static const uint16_t ReplaceableInstrsAVX512DQMasked[][4] = {
X86::VPXORQZrmbkz, X86::VPXORDZrmbkz },
};
+// NOTE: These should only be used by the custom domain methods.
+static const uint16_t ReplaceableCustomInstrs[][3] = {
+ //PackedSingle PackedDouble PackedInt
+ { X86::BLENDPSrmi, X86::BLENDPDrmi, X86::PBLENDWrmi },
+ { X86::BLENDPSrri, X86::BLENDPDrri, X86::PBLENDWrri },
+ { X86::VBLENDPSrmi, X86::VBLENDPDrmi, X86::VPBLENDWrmi },
+ { X86::VBLENDPSrri, X86::VBLENDPDrri, X86::VPBLENDWrri },
+ { X86::VBLENDPSYrmi, X86::VBLENDPDYrmi, X86::VPBLENDWYrmi },
+ { X86::VBLENDPSYrri, X86::VBLENDPDYrri, X86::VPBLENDWYrri },
+};
+static const uint16_t ReplaceableCustomAVX2Instrs[][3] = {
+ //PackedSingle PackedDouble PackedInt
+ { X86::VBLENDPSrmi, X86::VBLENDPDrmi, X86::VPBLENDDrmi },
+ { X86::VBLENDPSrri, X86::VBLENDPDrri, X86::VPBLENDDrri },
+ { X86::VBLENDPSYrmi, X86::VBLENDPDYrmi, X86::VPBLENDDYrmi },
+ { X86::VBLENDPSYrri, X86::VBLENDPDYrri, X86::VPBLENDDYrri },
+};
+
// FIXME: Some shuffle and unpack instructions have equivalents in different
// domains, but they require a bit more work than just switching opcodes.
@@ -9969,13 +9985,177 @@ static const uint16_t *lookupAVX512(unsigned opcode, unsigned domain,
return nullptr;
}
+// Helper to attempt to widen/narrow blend masks.
+static bool AdjustBlendMask(unsigned OldMask, unsigned OldWidth,
+ unsigned NewWidth, unsigned *pNewMask = nullptr) {
+ assert(((OldWidth % NewWidth) == 0 || (NewWidth % OldWidth) == 0) &&
+ "Illegal blend mask scale");
+ unsigned NewMask = 0;
+
+ if ((OldWidth % NewWidth) == 0) {
+ unsigned Scale = OldWidth / NewWidth;
+ unsigned SubMask = (1u << Scale) - 1;
+ for (unsigned i = 0; i != NewWidth; ++i) {
+ unsigned Sub = (OldMask >> (i * Scale)) & SubMask;
+ if (Sub == SubMask)
+ NewMask |= (1u << i);
+ else if (Sub != 0x0)
+ return false;
+ }
+ } else {
+ unsigned Scale = NewWidth / OldWidth;
+ unsigned SubMask = (1u << Scale) - 1;
+ for (unsigned i = 0; i != OldWidth; ++i) {
+ if (OldMask & (1 << i)) {
+ NewMask |= (SubMask << (i * Scale));
+ }
+ }
+ }
+
+ if (pNewMask)
+ *pNewMask = NewMask;
+ return true;
+}
+
+uint16_t X86InstrInfo::getExecutionDomainCustom(const MachineInstr &MI) const {
+ unsigned Opcode = MI.getOpcode();
+ unsigned NumOperands = MI.getNumOperands();
+
+ auto GetBlendDomains = [&](unsigned ImmWidth, bool Is256) {
+ uint16_t validDomains = 0;
+ if (MI.getOperand(NumOperands - 1).isImm()) {
+ unsigned Imm = MI.getOperand(NumOperands - 1).getImm();
+ if (AdjustBlendMask(Imm, ImmWidth, Is256 ? 8 : 4))
+ validDomains |= 0x2; // PackedSingle
+ if (AdjustBlendMask(Imm, ImmWidth, Is256 ? 4 : 2))
+ validDomains |= 0x4; // PackedDouble
+ if (!Is256 || Subtarget.hasAVX2())
+ validDomains |= 0x8; // PackedInt
+ }
+ return validDomains;
+ };
+
+ switch (Opcode) {
+ case X86::BLENDPDrmi:
+ case X86::BLENDPDrri:
+ case X86::VBLENDPDrmi:
+ case X86::VBLENDPDrri:
+ return GetBlendDomains(2, false);
+ case X86::VBLENDPDYrmi:
+ case X86::VBLENDPDYrri:
+ return GetBlendDomains(4, true);
+ case X86::BLENDPSrmi:
+ case X86::BLENDPSrri:
+ case X86::VBLENDPSrmi:
+ case X86::VBLENDPSrri:
+ case X86::VPBLENDDrmi:
+ case X86::VPBLENDDrri:
+ return GetBlendDomains(4, false);
+ case X86::VBLENDPSYrmi:
+ case X86::VBLENDPSYrri:
+ case X86::VPBLENDDYrmi:
+ case X86::VPBLENDDYrri:
+ return GetBlendDomains(8, true);
+ case X86::PBLENDWrmi:
+ case X86::PBLENDWrri:
+ case X86::VPBLENDWrmi:
+ case X86::VPBLENDWrri:
+ // Treat VPBLENDWY as a 128-bit vector as it repeats the lo/hi masks.
+ case X86::VPBLENDWYrmi:
+ case X86::VPBLENDWYrri:
+ return GetBlendDomains(8, false);
+ }
+ return 0;
+}
+
+bool X86InstrInfo::setExecutionDomainCustom(MachineInstr &MI,
+ unsigned Domain) const {
+ assert(Domain > 0 && Domain < 4 && "Invalid execution domain");
+ uint16_t dom = (MI.getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
+ assert(dom && "Not an SSE instruction");
+
+ unsigned Opcode = MI.getOpcode();
+ unsigned NumOperands = MI.getNumOperands();
+
+ auto SetBlendDomain = [&](unsigned ImmWidth, bool Is256) {
+ if (MI.getOperand(NumOperands - 1).isImm()) {
+ unsigned Imm = MI.getOperand(NumOperands - 1).getImm() & 255;
+ Imm = (ImmWidth == 16 ? ((Imm << 8) | Imm) : Imm);
+ unsigned NewImm = Imm;
+
+ const uint16_t *table = lookup(Opcode, dom, ReplaceableCustomInstrs);
+ if (!table)
+ table = lookup(Opcode, dom, ReplaceableCustomAVX2Instrs);
+
+ if (Domain == 1) { // PackedSingle
+ AdjustBlendMask(Imm, ImmWidth, Is256 ? 8 : 4, &NewImm);
+ } else if (Domain == 2) { // PackedDouble
+ AdjustBlendMask(Imm, ImmWidth, Is256 ? 4 : 2, &NewImm);
+ } else if (Domain == 3) { // PackedInt
+ if (Subtarget.hasAVX2()) {
+ // If we are already VPBLENDW use that, else use VPBLENDD.
+ if ((ImmWidth / (Is256 ? 2 : 1)) != 8) {
+ table = lookup(Opcode, dom, ReplaceableCustomAVX2Instrs);
+ AdjustBlendMask(Imm, ImmWidth, Is256 ? 8 : 4, &NewImm);
+ }
+ } else {
+ assert(!Is256 && "128-bit vector expected");
+ AdjustBlendMask(Imm, ImmWidth, 8, &NewImm);
+ }
+ }
+
+ assert(table && table[Domain - 1] && "Unknown domain op");
+ MI.setDesc(get(table[Domain - 1]));
+ MI.getOperand(NumOperands - 1).setImm(NewImm & 255);
+ }
+ return true;
+ };
+
+ switch (Opcode) {
+ case X86::BLENDPDrmi:
+ case X86::BLENDPDrri:
+ case X86::VBLENDPDrmi:
+ case X86::VBLENDPDrri:
+ return SetBlendDomain(2, false);
+ case X86::VBLENDPDYrmi:
+ case X86::VBLENDPDYrri:
+ return SetBlendDomain(4, true);
+ case X86::BLENDPSrmi:
+ case X86::BLENDPSrri:
+ case X86::VBLENDPSrmi:
+ case X86::VBLENDPSrri:
+ case X86::VPBLENDDrmi:
+ case X86::VPBLENDDrri:
+ return SetBlendDomain(4, false);
+ case X86::VBLENDPSYrmi:
+ case X86::VBLENDPSYrri:
+ case X86::VPBLENDDYrmi:
+ case X86::VPBLENDDYrri:
+ return SetBlendDomain(8, true);
+ case X86::PBLENDWrmi:
+ case X86::PBLENDWrri:
+ case X86::VPBLENDWrmi:
+ case X86::VPBLENDWrri:
+ return SetBlendDomain(8, false);
+ case X86::VPBLENDWYrmi:
+ case X86::VPBLENDWYrri:
+ return SetBlendDomain(16, true);
+ }
+ return false;
+}
+
std::pair<uint16_t, uint16_t>
X86InstrInfo::getExecutionDomain(const MachineInstr &MI) const {
uint16_t domain = (MI.getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
unsigned opcode = MI.getOpcode();
uint16_t validDomains = 0;
if (domain) {
- if (lookup(MI.getOpcode(), domain, ReplaceableInstrs)) {
+ // Attempt to match for custom instructions.
+ if (validDomains = getExecutionDomainCustom(MI)) {
+ return std::make_pair(domain, validDomains);
+ }
+
+ if (lookup(opcode, domain, ReplaceableInstrs)) {
validDomains = 0xe;
} else if (lookup(opcode, domain, ReplaceableInstrsAVX2)) {
validDomains = Subtarget.hasAVX2() ? 0xe : 0x6;
@@ -10007,6 +10187,11 @@ void X86InstrInfo::setExecutionDomain(MachineInstr &MI, unsigned Domain) const {
assert(Domain>0 && Domain<4 && "Invalid execution domain");
uint16_t dom = (MI.getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
assert(dom && "Not an SSE instruction");
+
+ // Attempt to match for custom instructions.
+ if (setExecutionDomainCustom(MI, Domain))
+ return;
+
const uint16_t *table = lookup(MI.getOpcode(), dom, ReplaceableInstrs);
if (!table) { // try the other table
assert((Subtarget.hasAVX2() || Domain < 3) &&
OpenPOWER on IntegriCloud