[ConstantFolding] add simplifications for funnel shift intrinsics

This is another step towards being able to canonicalize to the funnel shift intrinsics in IR (see D49242 for the initial patch). We should not have any loss of simplification power in IR between these and the equivalent IR constructs. Differential Revision: https://reviews.llvm.org/D50848 llvm-svn: 340022
author: Sanjay Patel <spatel@rotateright.com> 2018-08-17 13:23:44 +0000
committer: Sanjay Patel <spatel@rotateright.com> 2018-08-17 13:23:44 +0000
commit: 411b86081e131e13ab4981e21e7752405fb9d49a (patch)
tree: 344e8c12ce4f2a71a5600e0ef6d84b68d381fb8a
parent: c9818ebc02b9ae0ef5cee50a9fb60a9626c6274e (diff)
download: bcm5719-llvm-411b86081e131e13ab4981e21e7752405fb9d49a.tar.gz
bcm5719-llvm-411b86081e131e13ab4981e21e7752405fb9d49a.zip
2 files changed, 31 insertions, 12 deletions
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index f741f66d462..bbd812bf517 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -1389,6 +1389,8 @@ bool llvm::canConstantFoldCallTo(ImmutableCallSite CS, const Function *F) {
   case Intrinsic::ctpop:
   case Intrinsic::ctlz:
   case Intrinsic::cttz:
+  case Intrinsic::fshl:
+  case Intrinsic::fshr:
   case Intrinsic::fma:
   case Intrinsic::fmuladd:
   case Intrinsic::copysign:
@@ -2081,6 +2083,29 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty,
     }
   }
 
+  if (IntrinsicID == Intrinsic::fshl || IntrinsicID == Intrinsic::fshr) {
+    auto *C0 = dyn_cast<ConstantInt>(Operands[0]);
+    auto *C1 = dyn_cast<ConstantInt>(Operands[1]);
+    auto *C2 = dyn_cast<ConstantInt>(Operands[2]);
+    if (!(C0 && C1 && C2))
+      return nullptr;
+
+    // The shift amount is interpreted as modulo the bitwidth. If the shift
+    // amount is effectively 0, avoid UB due to oversized inverse shift below.
+    unsigned BitWidth = C0->getBitWidth();
+    unsigned ShAmt = C2->getValue().urem(BitWidth);
+    bool IsRight = IntrinsicID == Intrinsic::fshr;
+    if (!ShAmt)
+      return IsRight ? C1 : C0;
+
+    // (X << ShlAmt) | (Y >> LshrAmt)
+    const APInt &X = C0->getValue();
+    const APInt &Y = C1->getValue();
+    unsigned LshrAmt = IsRight ? ShAmt : BitWidth - ShAmt;
+    unsigned ShlAmt = !IsRight ? ShAmt : BitWidth - ShAmt;
+    return ConstantInt::get(Ty->getContext(), X.shl(ShlAmt) | Y.lshr(LshrAmt));
+  }
+
   return nullptr;
 }
 
diff --git a/llvm/test/Analysis/ConstantFolding/funnel-shift.ll b/llvm/test/Analysis/ConstantFolding/funnel-shift.ll
index 86a4d62a06e..8ccc714ae53 100644
--- a/llvm/test/Analysis/ConstantFolding/funnel-shift.ll
+++ b/llvm/test/Analysis/ConstantFolding/funnel-shift.ll
@@ -12,8 +12,7 @@ declare <4 x i8> @llvm.fshr.v4i8(<4 x i8>, <4 x i8>, <4 x i8>)
 
 define i32 @fshl_i32() {
 ; CHECK-LABEL: @fshl_i32(
-; CHECK-NEXT:    [[F:%.*]] = call i32 @llvm.fshl.i32(i32 305419896, i32 -1412567295, i32 5)
-; CHECK-NEXT:    ret i32 [[F]]
+; CHECK-NEXT:    ret i32 1183502101
 ;
   %f = call i32 @llvm.fshl.i32(i32 305419896, i32 2882400001, i32 5)
   ret i32 %f
@@ -24,8 +23,7 @@ define i32 @fshl_i32() {
 
 define i32 @fshr_i32() {
 ; CHECK-LABEL: @fshr_i32(
-; CHECK-NEXT:    [[F:%.*]] = call i32 @llvm.fshr.i32(i32 305419896, i32 -1412567295, i32 37)
-; CHECK-NEXT:    ret i32 [[F]]
+; CHECK-NEXT:    ret i32 -983666824
 ;
   %f = call i32 @llvm.fshr.i32(i32 305419896, i32 2882400001, i32 37)
   ret i32 %f
@@ -38,8 +36,7 @@ define i32 @fshr_i32() {
 
 define i7 @fshl_i7() {
 ; CHECK-LABEL: @fshl_i7(
-; CHECK-NEXT:    [[F:%.*]] = call i7 @llvm.fshl.i7(i7 -16, i7 -1, i7 9)
-; CHECK-NEXT:    ret i7 [[F]]
+; CHECK-NEXT:    ret i7 -61
 ;
   %f = call i7 @llvm.fshl.i7(i7 112, i7 127, i7 9)
   ret i7 %f
@@ -50,8 +47,7 @@ define i7 @fshl_i7() {
 
 define i7 @fshr_i7() {
 ; CHECK-LABEL: @fshr_i7(
-; CHECK-NEXT:    [[F:%.*]] = call i7 @llvm.fshr.i7(i7 -16, i7 -1, i7 16)
-; CHECK-NEXT:    ret i7 [[F]]
+; CHECK-NEXT:    ret i7 31
 ;
   %f = call i7 @llvm.fshr.i7(i7 112, i7 127, i7 16)
   ret i7 %f
@@ -65,8 +61,7 @@ define i7 @fshr_i7() {
 
 define <4 x i8> @fshl_v4i8() {
 ; CHECK-LABEL: @fshl_v4i8(
-; CHECK-NEXT:    [[F:%.*]] = call <4 x i8> @llvm.fshl.v4i8(<4 x i8> <i8 0, i8 -1, i8 16, i8 17>, <4 x i8> <i8 -1, i8 0, i8 85, i8 -86>, <4 x i8> <i8 0, i8 8, i8 9, i8 10>)
-; CHECK-NEXT:    ret <4 x i8> [[F]]
+; CHECK-NEXT:    ret <4 x i8> <i8 0, i8 -1, i8 32, i8 70>
 ;
   %f = call <4 x i8> @llvm.fshl.v4i8(<4 x i8> <i8 0, i8 -1, i8 16, i8 17>, <4 x i8> <i8 -1, i8 0, i8 85, i8 170>, <4 x i8> <i8 0, i8 8, i8 9, i8 10>)
   ret <4 x i8> %f
@@ -80,8 +75,7 @@ define <4 x i8> @fshl_v4i8() {
 
 define <4 x i8> @fshr_v4i8() {
 ; CHECK-LABEL: @fshr_v4i8(
-; CHECK-NEXT:    [[F:%.*]] = call <4 x i8> @llvm.fshr.v4i8(<4 x i8> <i8 0, i8 -1, i8 16, i8 17>, <4 x i8> <i8 -1, i8 0, i8 85, i8 -86>, <4 x i8> <i8 0, i8 8, i8 9, i8 10>)
-; CHECK-NEXT:    ret <4 x i8> [[F]]
+; CHECK-NEXT:    ret <4 x i8> <i8 -1, i8 0, i8 42, i8 106>
 ;
   %f = call <4 x i8> @llvm.fshr.v4i8(<4 x i8> <i8 0, i8 -1, i8 16, i8 17>, <4 x i8> <i8 -1, i8 0, i8 85, i8 170>, <4 x i8> <i8 0, i8 8, i8 9, i8 10>)
   ret <4 x i8> %f
author	Sanjay Patel <spatel@rotateright.com>	2018-08-17 13:23:44 +0000
committer	Sanjay Patel <spatel@rotateright.com>	2018-08-17 13:23:44 +0000
commit	411b86081e131e13ab4981e21e7752405fb9d49a (patch)
tree	344e8c12ce4f2a71a5600e0ef6d84b68d381fb8a
parent	c9818ebc02b9ae0ef5cee50a9fb60a9626c6274e (diff)
download	bcm5719-llvm-411b86081e131e13ab4981e21e7752405fb9d49a.tar.gz bcm5719-llvm-411b86081e131e13ab4981e21e7752405fb9d49a.zip