[PowerPC] handle ISD:TRUNCATE in BitPermutationSelector

This is the last one in a series of patches to support better code generation for bitfield insert. BitPermutationSelector already support ISD::ZERO_EXTEND but not TRUNCATE. This patch adds support for ISD:TRUNCATE in BitPermutationSelector. For example of this test case, struct s64b { int a:4; int b:16; int c:24; }; void bitfieldinsert64b(struct s64b *p, unsigned char v) { p->b = v; } the selection DAG loos like: t14: i32,ch = load<(load 4 from %ir.0)> t0, t2, undef:i64 t18: i32 = and t14, Constant:i32<-1048561> t4: i64,ch = CopyFromReg t0, Register:i64 %1 t22: i64 = AssertZext t4, ValueType:ch:i8 t23: i32 = truncate t22 t16: i32 = shl nuw nsw t23, Constant:i32<4> t19: i32 = or t18, t16 t20: ch = store<(store 4 into %ir.0)> t14:1, t19, t2, undef:i64 By handling truncate in the BitPermutationSelector, we can use information from AssertZext when selecting t19 and skip the mask operation corresponding to t18. So the generated sequences with and without this patch are without this patch rlwinm 5, 5, 0, 28, 11 # corresponding to t18 rlwimi 5, 4, 4, 20, 27 with this patch rlwimi 5, 4, 4, 12, 27 Differential Revision: https://reviews.llvm.org/D49076 llvm-svn: 350118
author: Hiroshi Inoue <inouehrs@jp.ibm.com> 2018-12-28 08:00:39 +0000
committer: Hiroshi Inoue <inouehrs@jp.ibm.com> 2018-12-28 08:00:39 +0000
commit: 1ea98f040ea4281878bdd6474af6aab3462735f9 (patch)
tree: 45f578f87636f392608002caabd1061cd4b3d5d8
parent: 530ff8f3ccc4bcf9acb27c0dff235e3f3616c675 (diff)
download: bcm5719-llvm-1ea98f040ea4281878bdd6474af6aab3462735f9.tar.gz
bcm5719-llvm-1ea98f040ea4281878bdd6474af6aab3462735f9.zip
2 files changed, 105 insertions, 8 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 2036dbd5460..31acd0ff870 100644
--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -1331,6 +1331,34 @@ class BitPermutationSelector {
 
       return std::make_pair(Interesting, &Bits);
     }
+    case ISD::TRUNCATE: {
+      EVT FromType = V.getOperand(0).getValueType();
+      EVT ToType = V.getValueType();
+      // We support only the case with truncate from i64 to i32.
+      if (FromType != MVT::i64 || ToType != MVT::i32)
+        break;
+      const unsigned NumAllBits = FromType.getSizeInBits();
+      SmallVector<ValueBit, 64> *InBits;
+      std::tie(Interesting, InBits) = getValueBits(V.getOperand(0),
+                                                    NumAllBits);
+      const unsigned NumValidBits = ToType.getSizeInBits();
+
+      // A 32-bit instruction cannot touch upper 32-bit part of 64-bit value.
+      // So, we cannot include this truncate.
+      bool UseUpper32bit = false;
+      for (unsigned i = 0; i < NumValidBits; ++i)
+        if ((*InBits)[i].hasValue() && (*InBits)[i].getValueBitIndex() >= 32) {
+          UseUpper32bit = true;
+          break;
+        }
+      if (UseUpper32bit)
+        break;
+
+      for (unsigned i = 0; i < NumValidBits; ++i)
+        Bits[i] = (*InBits)[i];
+
+      return std::make_pair(Interesting, &Bits);
+    }
     case ISD::AssertZext: {
       // For AssertZext, we look through the operand and
       // mark the bits known to be zero.
@@ -1676,6 +1704,17 @@ class BitPermutationSelector {
     return ExtVal;
   }
 
+  SDValue TruncateToInt32(SDValue V, const SDLoc &dl) {
+    if (V.getValueSizeInBits() == 32)
+      return V;
+
+    assert(V.getValueSizeInBits() == 64);
+    SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
+    SDValue SubVal = SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl,
+                                                    MVT::i32, V, SubRegIdx), 0);
+    return SubVal;
+  }
+
   // Depending on the number of groups for a particular value, it might be
   // better to rotate, mask explicitly (using andi/andis), and then or the
   // result. Select this part of the result first.
@@ -1734,12 +1773,12 @@ class BitPermutationSelector {
       SDValue VRot;
       if (VRI.RLAmt) {
         SDValue Ops[] =
-          { VRI.V, getI32Imm(VRI.RLAmt, dl), getI32Imm(0, dl),
-            getI32Imm(31, dl) };
+          { TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl),
+            getI32Imm(0, dl), getI32Imm(31, dl) };
         VRot = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
                                               Ops), 0);
       } else {
-        VRot = VRI.V;
+        VRot = TruncateToInt32(VRI.V, dl);
       }
 
       SDValue ANDIVal, ANDISVal;
@@ -1791,12 +1830,12 @@ class BitPermutationSelector {
       if (VRI.RLAmt) {
         if (InstCnt) *InstCnt += 1;
         SDValue Ops[] =
-          { VRI.V, getI32Imm(VRI.RLAmt, dl), getI32Imm(0, dl),
-            getI32Imm(31, dl) };
+          { TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl),
+            getI32Imm(0, dl), getI32Imm(31, dl) };
         Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops),
                       0);
       } else {
-        Res = VRI.V;
+        Res = TruncateToInt32(VRI.V, dl);
       }
 
       // Now, remove all groups with this underlying value and rotation factor.
@@ -1811,13 +1850,13 @@ class BitPermutationSelector {
     for (auto &BG : BitGroups) {
       if (!Res) {
         SDValue Ops[] =
-          { BG.V, getI32Imm(BG.RLAmt, dl),
+          { TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl),
             getI32Imm(Bits.size() - BG.EndIdx - 1, dl),
             getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };
         Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
       } else {
         SDValue Ops[] =
-          { Res, BG.V, getI32Imm(BG.RLAmt, dl),
+          { Res, TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl),
               getI32Imm(Bits.size() - BG.EndIdx - 1, dl),
             getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };
         Res = SDValue(CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops), 0);
diff --git a/llvm/test/CodeGen/PowerPC/bitfieldinsert.ll b/llvm/test/CodeGen/PowerPC/bitfieldinsert.ll
index 76a648b6f13..97d86f20b12 100644
--- a/llvm/test/CodeGen/PowerPC/bitfieldinsert.ll
+++ b/llvm/test/CodeGen/PowerPC/bitfieldinsert.ll
@@ -60,3 +60,61 @@ entry:
   ret void
 }
 
+; test cases which include ISD::TRUNCATE
+; equivalent C code
+;   struct s64b {
+;     int a:4;
+;     int b:16;
+;     int c:24;
+;   };
+;   void bitfieldinsert64b(struct s64b *p, unsigned char v) {
+;     p->b = v;
+;   }
+
+%struct.s64b = type { i24, i24 }
+
+define void @bitfieldinsert64b(%struct.s64b* nocapture %p, i8 zeroext %v) {
+; CHECK-LABEL: @bitfieldinsert64b
+; CHECK: lwz [[REG1:[0-9]+]], 0(3)
+; CHECK-NEXT: rlwimi [[REG1]], 4, 4, 12, 27
+; CHECK-NEXT: stw [[REG1]], 0(3)
+; CHECK-NEXT: blr
+entry:
+  %conv = zext i8 %v to i32
+  %0 = bitcast %struct.s64b* %p to i32*
+  %bf.load = load i32, i32* %0, align 4
+  %bf.shl = shl nuw nsw i32 %conv, 4
+  %bf.clear = and i32 %bf.load, -1048561
+  %bf.set = or i32 %bf.clear, %bf.shl
+  store i32 %bf.set, i32* %0, align 4
+  ret void
+}
+
+; equivalent C code
+;   struct s64c {
+;     int a:5;
+;     int b:16;
+;     long c:10;
+;   };
+;   void bitfieldinsert64c(struct s64c *p, unsigned short v) {
+;     p->b = v;
+;   }
+
+%struct.s64c = type { i32, [4 x i8] }
+
+define void @bitfieldinsert64c(%struct.s64c* nocapture %p, i16 zeroext %v) {
+; CHECK-LABEL: @bitfieldinsert64c
+; CHECK: lwz [[REG1:[0-9]+]], 0(3)
+; CHECK-NEXT: rlwimi [[REG1]], 4, 5, 11, 26
+; CHECK-NEXT: stw [[REG1]], 0(3)
+; CHECK-NEXT: blr
+entry:
+  %conv = zext i16 %v to i32
+  %0 = getelementptr inbounds %struct.s64c, %struct.s64c* %p, i64 0, i32 0
+  %bf.load = load i32, i32* %0, align 8
+  %bf.shl = shl nuw nsw i32 %conv, 5
+  %bf.clear = and i32 %bf.load, -2097121
+  %bf.set = or i32 %bf.clear, %bf.shl
+  store i32 %bf.set, i32* %0, align 8
+  ret void
+}
author	Hiroshi Inoue <inouehrs@jp.ibm.com>	2018-12-28 08:00:39 +0000
committer	Hiroshi Inoue <inouehrs@jp.ibm.com>	2018-12-28 08:00:39 +0000
commit	1ea98f040ea4281878bdd6474af6aab3462735f9 (patch)
tree	45f578f87636f392608002caabd1061cd4b3d5d8
parent	530ff8f3ccc4bcf9acb27c0dff235e3f3616c675 (diff)
download	bcm5719-llvm-1ea98f040ea4281878bdd6474af6aab3462735f9.tar.gz bcm5719-llvm-1ea98f040ea4281878bdd6474af6aab3462735f9.zip