summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorHal Finkel <hfinkel@anl.gov>2015-01-03 01:16:37 +0000
committerHal Finkel <hfinkel@anl.gov>2015-01-03 01:16:37 +0000
commit4edc66b8de9672fcb0d8ef4059d2658dd351c6f0 (patch)
treec0dcc91c1c6d9c2ce38a2abcb0c60d3a4e9b9268 /llvm/lib/Target
parent4c059622d534eb4f14805985a4c3fe4a7580f8da (diff)
downloadbcm5719-llvm-4edc66b8de9672fcb0d8ef4059d2658dd351c6f0.tar.gz
bcm5719-llvm-4edc66b8de9672fcb0d8ef4059d2658dd351c6f0.zip
[PowerPC] Add support for the CMPB instruction
Newer POWER cores, and the A2, support the cmpb instruction. This instruction compares its operands, treating each of the 8 bytes in the GPRs separately, returning a 'mask' result of 0 (for false) or -1 (for true) in each byte. Code generation support is added, in the form of a PPCISelDAGToDAG DAG-preprocessing routine, that recognizes patterns close to what the instruction computes (either exactly, or related by a constant masking operation), and generates the cmpb instruction (along with any necessary constant masking operation). This can be expanded if use cases arise. llvm-svn: 225106
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/PowerPC/PPC.td15
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp252
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.cpp1
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.h3
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstr64Bit.td5
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrInfo.td7
-rw-r--r--llvm/lib/Target/PowerPC/PPCSubtarget.cpp1
-rw-r--r--llvm/lib/Target/PowerPC/PPCSubtarget.h2
8 files changed, 278 insertions, 8 deletions
diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td
index 4c9581927d7..a7fd62c0730 100644
--- a/llvm/lib/Target/PowerPC/PPC.td
+++ b/llvm/lib/Target/PowerPC/PPC.td
@@ -88,6 +88,8 @@ def FeaturePOPCNTD : SubtargetFeature<"popcntd","HasPOPCNTD", "true",
"Enable the popcnt[dw] instructions">;
def FeatureLDBRX : SubtargetFeature<"ldbrx","HasLDBRX", "true",
"Enable the ldbrx instruction">;
+def FeatureCMPB : SubtargetFeature<"cmpb", "HasCMPB", "true",
+ "Enable the cmpb instruction">;
def FeatureBookE : SubtargetFeature<"booke", "IsBookE", "true",
"Enable Book E instructions">;
def FeatureMSYNC : SubtargetFeature<"msync", "HasOnlyMSYNC", "true",
@@ -116,7 +118,6 @@ def DeprecatedDST : SubtargetFeature<"", "DeprecatedDST", "true",
// Note: Future features to add when support is extended to more
// recent ISA levels:
//
-// CMPB p6, p6x, p7 cmpb
// DFP p6, p6x, p7 decimal floating-point instructions
// POPCNTB p5 through p7 popcntb and related instructions
@@ -258,7 +259,7 @@ def : ProcessorModel<"a2", PPCA2Model,
FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec,
FeatureSTFIWX, FeatureLFIWAX,
FeatureFPRND, FeatureFPCVT, FeatureISEL,
- FeaturePOPCNTD, FeatureLDBRX, Feature64Bit
+ FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX, Feature64Bit
/*, Feature64BitRegs */, DeprecatedMFTB]>;
def : ProcessorModel<"a2q", PPCA2Model,
[DirectiveA2, FeatureBookE, FeatureMFOCRF,
@@ -266,7 +267,7 @@ def : ProcessorModel<"a2q", PPCA2Model,
FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec,
FeatureSTFIWX, FeatureLFIWAX,
FeatureFPRND, FeatureFPCVT, FeatureISEL,
- FeaturePOPCNTD, FeatureLDBRX, Feature64Bit
+ FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX, Feature64Bit
/*, Feature64BitRegs */, FeatureQPX, DeprecatedMFTB]>;
def : ProcessorModel<"pwr3", G5Model,
[DirectivePwr3, FeatureAltivec,
@@ -292,14 +293,14 @@ def : ProcessorModel<"pwr6", G5Model,
[DirectivePwr6, FeatureAltivec,
FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE,
FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES,
- FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX,
+ FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX, FeatureCMPB,
FeatureFPRND, Feature64Bit /*, Feature64BitRegs */,
DeprecatedMFTB, DeprecatedDST]>;
def : ProcessorModel<"pwr6x", G5Model,
[DirectivePwr5x, FeatureAltivec, FeatureMFOCRF,
FeatureFCPSGN, FeatureFSqrt, FeatureFRE, FeatureFRES,
FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec,
- FeatureSTFIWX, FeatureLFIWAX,
+ FeatureSTFIWX, FeatureLFIWAX, FeatureCMPB,
FeatureFPRND, Feature64Bit,
DeprecatedMFTB, DeprecatedDST]>;
def : ProcessorModel<"pwr7", P7Model,
@@ -308,7 +309,7 @@ def : ProcessorModel<"pwr7", P7Model,
FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES,
FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX,
FeatureFPRND, FeatureFPCVT, FeatureISEL,
- FeaturePOPCNTD, FeatureLDBRX,
+ FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX,
Feature64Bit /*, Feature64BitRegs */,
DeprecatedMFTB, DeprecatedDST]>;
def : ProcessorModel<"pwr8", P8Model,
@@ -317,7 +318,7 @@ def : ProcessorModel<"pwr8", P8Model,
FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES,
FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX,
FeatureFPRND, FeatureFPCVT, FeatureISEL,
- FeaturePOPCNTD, FeatureLDBRX,
+ FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX,
Feature64Bit /*, Feature64BitRegs */,
DeprecatedMFTB, DeprecatedDST]>;
def : Processor<"ppc", G3Itineraries, [Directive32]>;
diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index efc0b7fa2f0..c6dc50143c0 100644
--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -83,6 +83,7 @@ namespace {
return true;
}
+ void PreprocessISelDAG() override;
void PostprocessISelDAG() override;
/// getI32Imm - Return a target constant with the specified value, of type
@@ -215,6 +216,8 @@ private:
void PeepholePPC64ZExt();
void PeepholeCROps();
+ SDValue combineToCMPB(SDNode *N);
+
bool AllUsersSelectZero(SDNode *N);
void SwapAllSelectUsers(SDNode *N);
};
@@ -684,7 +687,6 @@ static SDNode *SelectInt64(SelectionDAG *CurDAG, SDNode *N) {
return SelectInt64(CurDAG, dl, Imm);
}
-
namespace {
class BitPermutationSelector {
struct ValueBit {
@@ -2872,6 +2874,254 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
return SelectCode(N);
}
+// If the target supports the cmpb instruction, do the idiom recognition here.
+// We don't do this as a DAG combine because we don't want to do it as nodes
+// are being combined (because we might miss part of the eventual idiom). We
+// don't want to do it during instruction selection because we want to reuse
+// the logic for lowering the masking operations already part of the
+// instruction selector.
+SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) {
+ SDLoc dl(N);
+
+ assert(N->getOpcode() == ISD::OR &&
+ "Only OR nodes are supported for CMPB");
+
+ SDValue Res;
+ if (!PPCSubTarget->hasCMPB())
+ return Res;
+
+ if (N->getValueType(0) != MVT::i32 &&
+ N->getValueType(0) != MVT::i64)
+ return Res;
+
+ EVT VT = N->getValueType(0);
+
+ SDValue RHS, LHS;
+ bool BytesFound[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
+ uint64_t Mask = 0, Alt = 0;
+
+ auto IsByteSelectCC = [this](SDValue O, unsigned &b,
+ uint64_t &Mask, uint64_t &Alt,
+ SDValue &LHS, SDValue &RHS) {
+ if (O.getOpcode() != ISD::SELECT_CC)
+ return false;
+ ISD::CondCode CC = cast<CondCodeSDNode>(O.getOperand(4))->get();
+
+ if (!isa<ConstantSDNode>(O.getOperand(2)) ||
+ !isa<ConstantSDNode>(O.getOperand(3)))
+ return false;
+
+ uint64_t PM = O.getConstantOperandVal(2);
+ uint64_t PAlt = O.getConstantOperandVal(3);
+ for (b = 0; b < 8; ++b) {
+ uint64_t Mask = UINT64_C(0xFF) << (8*b);
+ if (PM && (PM & Mask) == PM && (PAlt & Mask) == PAlt)
+ break;
+ }
+
+ if (b == 8)
+ return false;
+ Mask |= PM;
+ Alt |= PAlt;
+
+ if (!isa<ConstantSDNode>(O.getOperand(1)) ||
+ O.getConstantOperandVal(1) != 0) {
+ SDValue Op0 = O.getOperand(0), Op1 = O.getOperand(1);
+ if (Op0.getOpcode() == ISD::TRUNCATE)
+ Op0 = Op0.getOperand(0);
+ if (Op1.getOpcode() == ISD::TRUNCATE)
+ Op1 = Op1.getOperand(0);
+
+ if (Op0.getOpcode() == ISD::SRL && Op1.getOpcode() == ISD::SRL &&
+ Op0.getOperand(1) == Op1.getOperand(1) && CC == ISD::SETEQ &&
+ isa<ConstantSDNode>(Op0.getOperand(1))) {
+
+ unsigned Bits = Op0.getValueType().getSizeInBits();
+ if (b != Bits/8-1)
+ return false;
+ if (Op0.getConstantOperandVal(1) != Bits-8)
+ return false;
+
+ LHS = Op0.getOperand(0);
+ RHS = Op1.getOperand(0);
+ return true;
+ }
+
+ // When we have small integers (i16 to be specific), the form present
+ // post-legalization uses SETULT in the SELECT_CC for the
+ // higher-order byte, depending on the fact that the
+ // even-higher-order bytes are known to all be zero, for example:
+ // select_cc (xor $lhs, $rhs), 256, 65280, 0, setult
+ // (so when the second byte is the same, because all higher-order
+ // bits from bytes 3 and 4 are known to be zero, the result of the
+ // xor can be at most 255)
+ if (Op0.getOpcode() == ISD::XOR && CC == ISD::SETULT &&
+ isa<ConstantSDNode>(O.getOperand(1))) {
+
+ uint64_t ULim = O.getConstantOperandVal(1);
+ if (ULim != (UINT64_C(1) << b*8))
+ return false;
+
+ // Now we need to make sure that the upper bytes are known to be
+ // zero.
+ unsigned Bits = Op0.getValueType().getSizeInBits();
+ if (!CurDAG->MaskedValueIsZero(Op0,
+ APInt::getHighBitsSet(Bits, Bits - (b+1)*8)))
+ return false;
+
+ LHS = Op0.getOperand(0);
+ RHS = Op0.getOperand(1);
+ return true;
+ }
+
+ return false;
+ }
+
+ if (CC != ISD::SETEQ)
+ return false;
+
+ SDValue Op = O.getOperand(0);
+ if (Op.getOpcode() == ISD::AND) {
+ if (!isa<ConstantSDNode>(Op.getOperand(1)))
+ return false;
+ if (Op.getConstantOperandVal(1) != (UINT64_C(0xFF) << (8*b)))
+ return false;
+
+ SDValue XOR = Op.getOperand(0);
+ if (XOR.getOpcode() == ISD::TRUNCATE)
+ XOR = XOR.getOperand(0);
+ if (XOR.getOpcode() != ISD::XOR)
+ return false;
+
+ LHS = XOR.getOperand(0);
+ RHS = XOR.getOperand(1);
+ return true;
+ } else if (Op.getOpcode() == ISD::SRL) {
+ if (!isa<ConstantSDNode>(Op.getOperand(1)))
+ return false;
+ unsigned Bits = Op.getValueType().getSizeInBits();
+ if (b != Bits/8-1)
+ return false;
+ if (Op.getConstantOperandVal(1) != Bits-8)
+ return false;
+
+ SDValue XOR = Op.getOperand(0);
+ if (XOR.getOpcode() == ISD::TRUNCATE)
+ XOR = XOR.getOperand(0);
+ if (XOR.getOpcode() != ISD::XOR)
+ return false;
+
+ LHS = XOR.getOperand(0);
+ RHS = XOR.getOperand(1);
+ return true;
+ }
+
+ return false;
+ };
+
+ SmallVector<SDValue, 8> Queue(1, SDValue(N, 0));
+ while (!Queue.empty()) {
+ SDValue V = Queue.pop_back_val();
+
+ for (const SDValue &O : V.getNode()->ops()) {
+ unsigned b;
+ uint64_t M = 0, A = 0;
+ SDValue OLHS, ORHS;
+ if (O.getOpcode() == ISD::OR) {
+ Queue.push_back(O);
+ } else if (IsByteSelectCC(O, b, M, A, OLHS, ORHS)) {
+ if (!LHS) {
+ LHS = OLHS;
+ RHS = ORHS;
+ BytesFound[b] = true;
+ Mask |= M;
+ Alt |= A;
+ } else if ((LHS == ORHS && RHS == OLHS) ||
+ (RHS == ORHS && LHS == OLHS)) {
+ BytesFound[b] = true;
+ Mask |= M;
+ Alt |= A;
+ } else {
+ return Res;
+ }
+ } else {
+ return Res;
+ }
+ }
+ }
+
+ unsigned LastB = 0, BCnt = 0;
+ for (unsigned i = 0; i < 8; ++i)
+ if (BytesFound[LastB]) {
+ ++BCnt;
+ LastB = i;
+ }
+
+ if (!LastB || BCnt < 2)
+ return Res;
+
+ // Because we'll be zero-extending the output anyway if don't have a specific
+ // value for each input byte (via the Mask), we can 'anyext' the inputs.
+ if (LHS.getValueType() != VT) {
+ LHS = CurDAG->getAnyExtOrTrunc(LHS, dl, VT);
+ RHS = CurDAG->getAnyExtOrTrunc(RHS, dl, VT);
+ }
+
+ Res = CurDAG->getNode(PPCISD::CMPB, dl, VT, LHS, RHS);
+
+ bool NonTrivialMask = ((int64_t) Mask) != INT64_C(-1);
+ if (NonTrivialMask && !Alt) {
+ // Res = Mask & CMPB
+ Res = CurDAG->getNode(ISD::AND, dl, VT, Res, CurDAG->getConstant(Mask, VT));
+ } else if (Alt) {
+ // Res = (CMPB & Mask) | (~CMPB & Alt)
+ // Which, as suggested here:
+ // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge
+ // can be written as:
+ // Res = Alt ^ ((Alt ^ Mask) & CMPB)
+ // useful because the (Alt ^ Mask) can be pre-computed.
+ Res = CurDAG->getNode(ISD::AND, dl, VT, Res,
+ CurDAG->getConstant(Mask ^ Alt, VT));
+ Res = CurDAG->getNode(ISD::XOR, dl, VT, Res, CurDAG->getConstant(Alt, VT));
+ }
+
+ return Res;
+}
+
+void PPCDAGToDAGISel::PreprocessISelDAG() {
+ SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode());
+ ++Position;
+
+ bool MadeChange = false;
+ while (Position != CurDAG->allnodes_begin()) {
+ SDNode *N = --Position;
+ if (N->use_empty())
+ continue;
+
+ SDValue Res;
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::OR:
+ Res = combineToCMPB(N);
+ break;
+ }
+
+ if (Res) {
+ DEBUG(dbgs() << "PPC DAG preprocessing replacing:\nOld: ");
+ DEBUG(N->dump(CurDAG));
+ DEBUG(dbgs() << "\nNew: ");
+ DEBUG(Res.getNode()->dump(CurDAG));
+ DEBUG(dbgs() << "\n");
+
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
+ MadeChange = true;
+ }
+ }
+
+ if (MadeChange)
+ CurDAG->RemoveDeadNodes();
+}
+
/// PostprocessISelDAG - Perform some late peephole optimizations
/// on the DAG representation.
void PPCDAGToDAGISel::PostprocessISelDAG() {
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index e430b07d143..32f958ebad7 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -759,6 +759,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::VMADDFP: return "PPCISD::VMADDFP";
case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP";
case PPCISD::VPERM: return "PPCISD::VPERM";
+ case PPCISD::CMPB: return "PPCISD::CMPB";
case PPCISD::Hi: return "PPCISD::Hi";
case PPCISD::Lo: return "PPCISD::Lo";
case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 089cfe73c47..b4b11d846e3 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -61,6 +61,9 @@ namespace llvm {
///
VPERM,
+ /// The CMPB instruction (takes two operands of i32 or i64).
+ CMPB,
+
/// Hi/Lo - These represent the high and low 16-bit parts of a global
/// address respectively. These nodes have two operands, the first of
/// which must be a TargetGlobalAddress, and the second of which must be a
diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index 7d1249c2ce7..75f4656d01b 100644
--- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -591,6 +591,11 @@ def POPCNTD : XForm_11<31, 506, (outs g8rc:$rA), (ins g8rc:$rS),
"popcntd $rA, $rS", IIC_IntGeneral,
[(set i64:$rA, (ctpop i64:$rS))]>;
+let isCodeGenOnly = 1, isCommutable = 1 in
+def CMPB8 : XForm_6<31, 508, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
+ "cmpb $rA, $rS, $rB", IIC_IntGeneral,
+ [(set i64:$rA, (PPCcmpb i64:$rS, i64:$rB))]>;
+
// popcntw also does a population count on the high 32 bits (storing the
// results in the high 32-bits of the output). We'll ignore that here (which is
// safe because we never separately use the high part of the 64-bit registers).
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 4c01b2dee0c..aec1385aaf5 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -118,6 +118,8 @@ def PPCaddiDtprelL : SDNode<"PPCISD::ADDI_DTPREL_L", SDTIntBinOp>;
def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>;
+def PPCcmpb : SDNode<"PPCISD::CMPB", SDTIntBinOp, []>;
+
// These nodes represent the 32-bit PPC shifts that operate on 6-bit shift
// amounts. These nodes are generated by the multi-precision shift code.
def PPCsrl : SDNode<"PPCISD::SRL" , SDTIntShiftOp>;
@@ -1865,6 +1867,11 @@ defm EXTSB : XForm_11r<31, 954, (outs gprc:$rA), (ins gprc:$rS),
defm EXTSH : XForm_11r<31, 922, (outs gprc:$rA), (ins gprc:$rS),
"extsh", "$rA, $rS", IIC_IntSimple,
[(set i32:$rA, (sext_inreg i32:$rS, i16))]>;
+
+let isCommutable = 1 in
+def CMPB : XForm_6<31, 508, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
+ "cmpb $rA, $rS, $rB", IIC_IntGeneral,
+ [(set i32:$rA, (PPCcmpb i32:$rS, i32:$rB))]>;
}
let isCompare = 1, hasSideEffects = 0 in {
def CMPW : XForm_16_ext<31, 0, (outs crrc:$crD), (ins gprc:$rA, gprc:$rB),
diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
index 8f0e3a49ee9..fb47c910c2f 100644
--- a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -108,6 +108,7 @@ void PPCSubtarget::initializeEnvironment() {
HasFPCVT = false;
HasISEL = false;
HasPOPCNTD = false;
+ HasCMPB = false;
HasLDBRX = false;
IsBookE = false;
HasOnlyMSYNC = false;
diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.h b/llvm/lib/Target/PowerPC/PPCSubtarget.h
index 1df19c3e1eb..de5f92a971b 100644
--- a/llvm/lib/Target/PowerPC/PPCSubtarget.h
+++ b/llvm/lib/Target/PowerPC/PPCSubtarget.h
@@ -102,6 +102,7 @@ protected:
bool HasFPCVT;
bool HasISEL;
bool HasPOPCNTD;
+ bool HasCMPB;
bool HasLDBRX;
bool IsBookE;
bool HasOnlyMSYNC;
@@ -220,6 +221,7 @@ public:
bool hasMFOCRF() const { return HasMFOCRF; }
bool hasISEL() const { return HasISEL; }
bool hasPOPCNTD() const { return HasPOPCNTD; }
+ bool hasCMPB() const { return HasCMPB; }
bool hasLDBRX() const { return HasLDBRX; }
bool isBookE() const { return IsBookE; }
bool hasOnlyMSYNC() const { return HasOnlyMSYNC; }
OpenPOWER on IntegriCloud