diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-07-22 16:46:25 +0000 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-07-22 16:46:25 +0000 |
commit | 820f87a72d872987a95d1e5931f1dd7656f2ec0a (patch) | |
tree | 1d46ae62cb0fac56b5c103fd081e48c45f31423b /llvm/lib/CodeGen | |
parent | c27f1b7182b41a262b8415afc03d21aa20326317 (diff) | |
download | bcm5719-llvm-820f87a72d872987a95d1e5931f1dd7656f2ec0a.tar.gz bcm5719-llvm-820f87a72d872987a95d1e5931f1dd7656f2ec0a.zip |
[SelectionDAG] Optimization of BITREVERSE legalization for power-of-2 integer scalar/vector types
An extension of D19978, this patch replaces the default BITREVERSE evaluation of individual bit masks+shifts with block mask+shifts when we have integer elements of power-of-2 bits in size.
After calling BSWAP to reverse the order of the constituent bytes (which typically follows a similar approach), every neighbouring 4-bits, 2-bits and finally 1-bit pairs are masked off and swapped over with shifts.
In doing so we can significantly reduce the number of operations required.
Differential Revision: https://reviews.llvm.org/D21578
llvm-svn: 276432
Diffstat (limited to 'llvm/lib/CodeGen')
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 49 |
1 files changed, 46 insertions, 3 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 81634096c1b..609ff86c8c4 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -2520,13 +2520,56 @@ SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT, return DAG.getNode(ISD::TRUNCATE, dl, DestVT, Operation); } -/// Open code the operations for BITREVERSE. +/// Legalize a BITREVERSE scalar/vector operation as a series of mask + shifts. SDValue SelectionDAGLegalize::ExpandBITREVERSE(SDValue Op, const SDLoc &dl) { EVT VT = Op.getValueType(); EVT SHVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); unsigned Sz = VT.getScalarSizeInBits(); - SDValue Tmp, Tmp2; + SDValue Tmp, Tmp2, Tmp3; + + // If we can, perform BSWAP first and then the mask+swap the i4, then i2 + // and finally the i1 pairs. + // TODO: We can easily support i4/i2 legal types if any target ever does. + if (Sz >= 8 && isPowerOf2_32(Sz)) { + // Create the masks - repeating the pattern every byte. + APInt MaskHi4(Sz, 0), MaskHi2(Sz, 0), MaskHi1(Sz, 0); + APInt MaskLo4(Sz, 0), MaskLo2(Sz, 0), MaskLo1(Sz, 0); + for (unsigned J = 0; J != Sz; J += 8) { + MaskHi4 = MaskHi4.Or(APInt(Sz, 0xF0ull << J)); + MaskLo4 = MaskLo4.Or(APInt(Sz, 0x0Full << J)); + MaskHi2 = MaskHi2.Or(APInt(Sz, 0xCCull << J)); + MaskLo2 = MaskLo2.Or(APInt(Sz, 0x33ull << J)); + MaskHi1 = MaskHi1.Or(APInt(Sz, 0xAAull << J)); + MaskLo1 = MaskLo1.Or(APInt(Sz, 0x55ull << J)); + } + + // BSWAP if the type is wider than a single byte. + Tmp = (Sz > 8 ? DAG.getNode(ISD::BSWAP, dl, VT, Op) : Op); + + // swap i4: ((V & 0xF0) >> 4) | ((V & 0x0F) << 4) + Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi4, dl, VT)); + Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo4, dl, VT)); + Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(4, dl, VT)); + Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, VT)); + Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); + + // swap i2: ((V & 0xCC) >> 2) | ((V & 0x33) << 2) + Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi2, dl, VT)); + Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo2, dl, VT)); + Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(2, dl, VT)); + Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, VT)); + Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); + + // swap i1: ((V & 0xAA) >> 1) | ((V & 0x55) << 1) + Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi1, dl, VT)); + Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo1, dl, VT)); + Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(1, dl, VT)); + Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, VT)); + Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); + return Tmp; + } + Tmp = DAG.getConstant(0, dl, VT); for (unsigned I = 0, J = Sz-1; I < Sz; ++I, --J) { if (I < J) @@ -2550,7 +2593,7 @@ SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, const SDLoc &dl) { EVT VT = Op.getValueType(); EVT SHVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8; - switch (VT.getSimpleVT().SimpleTy) { + switch (VT.getSimpleVT().getScalarType().SimpleTy) { default: llvm_unreachable("Unhandled Expand type in BSWAP!"); case MVT::i16: Tmp2 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); |