summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/SystemZ/SystemZISelLowering.cpp48
-rw-r--r--llvm/lib/Target/SystemZ/SystemZISelLowering.h4
-rw-r--r--llvm/lib/Target/SystemZ/SystemZInstrInfo.td7
-rw-r--r--llvm/lib/Target/SystemZ/SystemZOperators.td1
-rw-r--r--llvm/lib/Target/SystemZ/SystemZProcessors.td13
-rw-r--r--llvm/lib/Target/SystemZ/SystemZSubtarget.cpp3
-rw-r--r--llvm/lib/Target/SystemZ/SystemZSubtarget.h4
-rw-r--r--llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp9
-rw-r--r--llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h2
9 files changed, 85 insertions, 6 deletions
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 18d4c02e779..e0cb376d11d 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -163,8 +163,13 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm,
// available, or if the operand is constant.
setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
+ // Use POPCNT on z196 and above.
+ if (Subtarget.hasPopulationCount())
+ setOperationAction(ISD::CTPOP, VT, Custom);
+ else
+ setOperationAction(ISD::CTPOP, VT, Expand);
+
// No special instructions for these.
- setOperationAction(ISD::CTPOP, VT, Expand);
setOperationAction(ISD::CTTZ, VT, Expand);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
@@ -2304,6 +2309,45 @@ SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
MVT::i64, HighOp, Low32);
}
+SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ int64_t OrigBitSize = VT.getSizeInBits();
+ SDLoc DL(Op);
+
+ // Get the known-zero mask for the operand.
+ Op = Op.getOperand(0);
+ APInt KnownZero, KnownOne;
+ DAG.computeKnownBits(Op, KnownZero, KnownOne);
+ uint64_t Mask = ~KnownZero.getZExtValue();
+
+ // Skip known-zero high parts of the operand.
+ int64_t BitSize = OrigBitSize;
+ while ((Mask & ((((uint64_t)1 << (BitSize / 2)) - 1) << (BitSize / 2))) == 0)
+ BitSize = BitSize / 2;
+
+ // The POPCNT instruction counts the number of bits in each byte.
+ Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op);
+ Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::i64, Op);
+ Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
+
+ // Add up per-byte counts in a binary tree. All bits of Op at
+ // position larger than BitSize remain zero throughout.
+ for (int64_t I = BitSize / 2; I >= 8; I = I / 2) {
+ SDValue Tmp = DAG.getNode(ISD::SHL, DL, VT, Op, DAG.getConstant(I, VT));
+ if (BitSize != OrigBitSize)
+ Tmp = DAG.getNode(ISD::AND, DL, VT, Tmp,
+ DAG.getConstant(((uint64_t)1 << BitSize) - 1, VT));
+ Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
+ }
+
+ // Extract overall result from high byte.
+ if (BitSize > 8)
+ Op = DAG.getNode(ISD::SRL, DL, VT, Op, DAG.getConstant(BitSize - 8, VT));
+
+ return Op;
+}
+
// Op is an atomic load. Lower it into a normal volatile load.
SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op,
SelectionDAG &DAG) const {
@@ -2554,6 +2598,8 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
return lowerUDIVREM(Op, DAG);
case ISD::OR:
return lowerOR(Op, DAG);
+ case ISD::CTPOP:
+ return lowerCTPOP(Op, DAG);
case ISD::ATOMIC_SWAP:
return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW);
case ISD::ATOMIC_STORE:
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
index 15add448d19..dda4f4544ed 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -87,6 +87,9 @@ enum {
// the number of the register.
EXTRACT_ACCESS,
+ // Count number of bits set in operand 0 per byte.
+ POPCNT,
+
// Wrappers around the ISD opcodes of the same name. The output and
// first input operands are GR128s. The trailing numbers are the
// widths of the second operand in bits.
@@ -304,6 +307,7 @@ private:
SDValue lowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerCTPOP(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerATOMIC_LOAD_OP(SDValue Op, SelectionDAG &DAG,
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
index a7f774791d4..31db301d95e 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -1382,6 +1382,13 @@ let Defs = [CC] in {
def : Pat<(ctlz GR64:$src),
(EXTRACT_SUBREG (FLOGR GR64:$src), subreg_h64)>;
+// Population count. Counts bits set per byte.
+let Predicates = [FeaturePopulationCount], Defs = [CC] in {
+ def POPCNT : InstRRE<0xB9E1, (outs GR64:$R1), (ins GR64:$R2),
+ "popcnt\t$R1, $R2",
+ [(set GR64:$R1, (z_popcnt GR64:$R2))]>;
+}
+
// Use subregs to populate the "don't care" bits in a 32-bit to 64-bit anyext.
def : Pat<(i64 (anyext GR32:$src)),
(INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_l32)>;
diff --git a/llvm/lib/Target/SystemZ/SystemZOperators.td b/llvm/lib/Target/SystemZ/SystemZOperators.td
index 51ac5daad54..0cb476d2122 100644
--- a/llvm/lib/Target/SystemZ/SystemZOperators.td
+++ b/llvm/lib/Target/SystemZ/SystemZOperators.td
@@ -121,6 +121,7 @@ def z_select_ccmask : SDNode<"SystemZISD::SELECT_CCMASK", SDT_ZSelectCCMask,
def z_adjdynalloc : SDNode<"SystemZISD::ADJDYNALLOC", SDT_ZAdjDynAlloc>;
def z_extract_access : SDNode<"SystemZISD::EXTRACT_ACCESS",
SDT_ZExtractAccess>;
+def z_popcnt : SDNode<"SystemZISD::POPCNT", SDTIntUnaryOp>;
def z_umul_lohi64 : SDNode<"SystemZISD::UMUL_LOHI64", SDT_ZGR128Binary64>;
def z_sdivrem32 : SDNode<"SystemZISD::SDIVREM32", SDT_ZGR128Binary32>;
def z_sdivrem64 : SDNode<"SystemZISD::SDIVREM64", SDT_ZGR128Binary64>;
diff --git a/llvm/lib/Target/SystemZ/SystemZProcessors.td b/llvm/lib/Target/SystemZ/SystemZProcessors.td
index 1594854ab2c..b8bbd5677e0 100644
--- a/llvm/lib/Target/SystemZ/SystemZProcessors.td
+++ b/llvm/lib/Target/SystemZ/SystemZProcessors.td
@@ -39,6 +39,11 @@ def FeatureFPExtension : SystemZFeature<
"Assume that the floating-point extension facility is installed"
>;
+def FeaturePopulationCount : SystemZFeature<
+ "population-count", "PopulationCount",
+ "Assume that the population-count facility is installed"
+>;
+
def FeatureFastSerialization : SystemZFeature<
"fast-serialization", "FastSerialization",
"Assume that the fast-serialization facility is installed"
@@ -54,9 +59,9 @@ def : Processor<"generic", NoItineraries, []>;
def : Processor<"z10", NoItineraries, []>;
def : Processor<"z196", NoItineraries,
[FeatureDistinctOps, FeatureLoadStoreOnCond, FeatureHighWord,
- FeatureFPExtension, FeatureFastSerialization,
- FeatureInterlockedAccess1]>;
+ FeatureFPExtension, FeaturePopulationCount,
+ FeatureFastSerialization, FeatureInterlockedAccess1]>;
def : Processor<"zEC12", NoItineraries,
[FeatureDistinctOps, FeatureLoadStoreOnCond, FeatureHighWord,
- FeatureFPExtension, FeatureFastSerialization,
- FeatureInterlockedAccess1]>;
+ FeatureFPExtension, FeaturePopulationCount,
+ FeatureFastSerialization, FeatureInterlockedAccess1]>;
diff --git a/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp b/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp
index 4b5c23cefc9..0999b45c9d6 100644
--- a/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp
@@ -38,7 +38,8 @@ SystemZSubtarget::SystemZSubtarget(const std::string &TT,
const TargetMachine &TM)
: SystemZGenSubtargetInfo(TT, CPU, FS), HasDistinctOps(false),
HasLoadStoreOnCond(false), HasHighWord(false), HasFPExtension(false),
- HasFastSerialization(false), HasInterlockedAccess1(false),
+ HasPopulationCount(false), HasFastSerialization(false),
+ HasInterlockedAccess1(false),
TargetTriple(TT), InstrInfo(initializeSubtargetDependencies(CPU, FS)),
TLInfo(TM, *this), TSInfo(*TM.getDataLayout()), FrameLowering() {}
diff --git a/llvm/lib/Target/SystemZ/SystemZSubtarget.h b/llvm/lib/Target/SystemZ/SystemZSubtarget.h
index 99cb1ad3045..b3e7a3512cd 100644
--- a/llvm/lib/Target/SystemZ/SystemZSubtarget.h
+++ b/llvm/lib/Target/SystemZ/SystemZSubtarget.h
@@ -38,6 +38,7 @@ protected:
bool HasLoadStoreOnCond;
bool HasHighWord;
bool HasFPExtension;
+ bool HasPopulationCount;
bool HasFastSerialization;
bool HasInterlockedAccess1;
@@ -86,6 +87,9 @@ public:
// Return true if the target has the floating-point extension facility.
bool hasFPExtension() const { return HasFPExtension; }
+ // Return true if the target has the population-count facility.
+ bool hasPopulationCount() const { return HasPopulationCount; }
+
// Return true if the target has the fast-serialization facility.
bool hasFastSerialization() const { return HasFastSerialization; }
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
index 244bd0379a6..3337f6388bd 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -229,3 +229,12 @@ unsigned SystemZTTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
}
return SystemZTTIImpl::getIntImmCost(Imm, Ty);
}
+
+TargetTransformInfo::PopcntSupportKind
+SystemZTTIImpl::getPopcntSupport(unsigned TyWidth) {
+ assert(isPowerOf2_32(TyWidth) && "Type width must be power of 2");
+ if (ST->hasPopulationCount() && TyWidth <= 64)
+ return TTI::PSK_FastHardware;
+ return TTI::PSK_Software;
+}
+
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
index d96515adce4..d4989130679 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
+++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
@@ -60,6 +60,8 @@ public:
unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
Type *Ty);
+ TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
+
/// @}
};
OpenPOWER on IntegriCloud