[AArch64] Generate rev16/rev32 from bswap + srl when upper bits are known zero.

Canonicalize (srl (bswap i32 x), 16) to (rotr (bswap i32 x), 16), if the high 16-bits of x are zero. Similarly, canonicalize (srl (bswap i64 x), 32) to (rotr (bswap i64 x), 32), if the high 32-bits of x are zero. test_rev_w_srl16: test_rev_w_srl16: and w8, w0, #0xffff and w8, w0, #0xffff rev w8, w8 ---> rev16 w0, w8 lsr w0, w8, #16 test_rev_x_srl32: test_rev_x_srl32: rev x8, x8 ---> rev32 x0, x8 lsr x0, x8, #32 llvm-svn: 270896
author: Chad Rosier <mcrosier@codeaurora.org> 2016-05-26 19:41:33 +0000
committer: Chad Rosier <mcrosier@codeaurora.org> 2016-05-26 19:41:33 +0000
commit: 14aa2ad1f466d54e275ba9f1327e79ef7c8c52ce (patch)
tree: 3289b0c98dd257f76b3395f74d5aaca083762306
parent: e6dbd902c082f16337cd42dbbff0e62404f88fc2 (diff)
download: bcm5719-llvm-14aa2ad1f466d54e275ba9f1327e79ef7c8c52ce.tar.gz
bcm5719-llvm-14aa2ad1f466d54e275ba9f1327e79ef7c8c52ce.zip
2 files changed, 58 insertions, 1 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 2c8a4183f17..cd02150c0a2 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -475,7 +475,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
   // Also, try to fold ADD into CSINC/CSINV..
   setTargetDAGCombine(ISD::ADD);
   setTargetDAGCombine(ISD::SUB);
-
+  setTargetDAGCombine(ISD::SRL);
   setTargetDAGCombine(ISD::XOR);
   setTargetDAGCombine(ISD::SINT_TO_FP);
   setTargetDAGCombine(ISD::UINT_TO_FP);
@@ -8001,6 +8001,34 @@ static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
   return SDValue();
 }
 
+static SDValue performSRLCombine(SDNode *N,
+                                 TargetLowering::DAGCombinerInfo &DCI) {
+  SelectionDAG &DAG = DCI.DAG;
+  EVT VT = N->getValueType(0);
+  if (VT != MVT::i32 && VT != MVT::i64)
+    return SDValue();
+
+  // Canonicalize (srl (bswap i32 x), 16) to (rotr (bswap i32 x), 16), if the
+  // high 16-bits of x are zero. Similarly, canonicalize (srl (bswap i64 x), 32)
+  // to (rotr (bswap i64 x), 32), if the high 32-bits of x are zero.
+  SDValue N0 = N->getOperand(0);
+  if (N0.getOpcode() == ISD::BSWAP) {
+    SDLoc DL(N);
+    SDValue N1 = N->getOperand(1);
+    SDValue N00 = N0.getOperand(0);
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
+      uint64_t ShiftAmt = C->getZExtValue();
+      if (VT == MVT::i32 && ShiftAmt == 16 &&
+          DAG.MaskedValueIsZero(N00, APInt::getHighBitsSet(32, 16)))
+        return DAG.getNode(ISD::ROTR, DL, VT, N0, N1);
+      if (VT == MVT::i64 && ShiftAmt == 32 &&
+          DAG.MaskedValueIsZero(N00, APInt::getHighBitsSet(64, 32)))
+        return DAG.getNode(ISD::ROTR, DL, VT, N0, N1);
+    }
+  }
+  return SDValue();
+}
+
 static SDValue performBitcastCombine(SDNode *N,
                                      TargetLowering::DAGCombinerInfo &DCI,
                                      SelectionDAG &DAG) {
@@ -9893,6 +9921,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
     return performFDivCombine(N, DAG, Subtarget);
   case ISD::OR:
     return performORCombine(N, DCI, Subtarget);
+  case ISD::SRL:
+    return performSRLCombine(N, DCI);
   case ISD::INTRINSIC_WO_CHAIN:
     return performIntrinsicCombine(N, DCI, Subtarget);
   case ISD::ANY_EXTEND:
diff --git a/llvm/test/CodeGen/AArch64/arm64-rev.ll b/llvm/test/CodeGen/AArch64/arm64-rev.ll
index 74356d76d3c..4980d7e3b27 100644
--- a/llvm/test/CodeGen/AArch64/arm64-rev.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-rev.ll
@@ -16,6 +16,33 @@ entry:
   ret i64 %0
 }
 
+; Canonicalize (srl (bswap x), 16) to (rotr (bswap x), 16) if the high 16-bits
+; of %a are zero. This optimizes rev + lsr 16 to rev16.
+define i32 @test_rev_w_srl16(i16 %a) {
+entry:
+; CHECK-LABEL: test_rev_w_srl16:
+; CHECK: and [[REG:w[0-9]+]], w0, #0xffff
+; CHECK: rev16 w0, [[REG]]
+; CHECK-NOT: lsr
+  %0 = zext i16 %a to i32
+  %1 = tail call i32 @llvm.bswap.i32(i32 %0)
+  %2 = lshr i32 %1, 16
+  ret i32 %2
+}
+
+; Canonicalize (srl (bswap x), 32) to (rotr (bswap x), 32) if the high 32-bits
+; of %a are zero. This optimizes rev + lsr 32 to rev32.
+define i64 @test_rev_x_srl32(i32 %a) {
+entry:
+; CHECK-LABEL: test_rev_x_srl32:
+; CHECK: rev32 x0, {{x[0-9]+}}
+; CHECK-NOT: lsr
+  %0 = zext i32 %a to i64
+  %1 = tail call i64 @llvm.bswap.i64(i64 %0)
+  %2 = lshr i64 %1, 32
+  ret i64 %2
+}
+
 declare i32 @llvm.bswap.i32(i32) nounwind readnone
 declare i64 @llvm.bswap.i64(i64) nounwind readnone
author	Chad Rosier <mcrosier@codeaurora.org>	2016-05-26 19:41:33 +0000
committer	Chad Rosier <mcrosier@codeaurora.org>	2016-05-26 19:41:33 +0000
commit	14aa2ad1f466d54e275ba9f1327e79ef7c8c52ce (patch)
tree	3289b0c98dd257f76b3395f74d5aaca083762306
parent	e6dbd902c082f16337cd42dbbff0e62404f88fc2 (diff)
download	bcm5719-llvm-14aa2ad1f466d54e275ba9f1327e79ef7c8c52ce.tar.gz bcm5719-llvm-14aa2ad1f466d54e275ba9f1327e79ef7c8c52ce.zip