summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHuihui Zhang <huihuiz@quicinc.com>2019-09-24 00:30:09 +0000
committerHuihui Zhang <huihuiz@quicinc.com>2019-09-24 00:30:09 +0000
commita4dd98f2e90b2916fd347020c70ba804c5557db1 (patch)
tree501f6dc5ff9bf1eeff5aabde2ddc97c83bee5b8f
parentc41dba992957b89c77ff2ccb7909ff1254fa3f65 (diff)
downloadbcm5719-llvm-a4dd98f2e90b2916fd347020c70ba804c5557db1.tar.gz
bcm5719-llvm-a4dd98f2e90b2916fd347020c70ba804c5557db1.zip
[InstCombine] Fold a shifty implementation of clamp-to-allones.
Summary: Fold or(ashr(subNSW(Y, X), ScalarSizeInBits(Y)-1), X) into X s> Y ? -1 : X https://rise4fun.com/Alive/d8Ab clamp255 is a common operator in image processing, can be implemented in a shifty way "(255 - X) >> 31 | X & 255". Fold shift into select enables more optimization, e.g., vmin generation for ARM target. Reviewers: lebedev.ri, efriedma, spatel, kparzysz, bcahoon Reviewed By: lebedev.ri Subscribers: kristof.beyls, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D67800 llvm-svn: 372678
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp15
-rw-r--r--llvm/test/Transforms/InstCombine/sub-ashr-or-to-icmp-select.ll59
2 files changed, 39 insertions, 35 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 490faddaefb..cde4cc1e842 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -2677,6 +2677,21 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
}
}
+ // or(ashr(subNSW(Y, X), ScalarSizeInBits(Y)-1), X) --> X s> Y ? -1 : X.
+ {
+ Value *X, *Y;
+ const APInt *ShAmt;
+ Type *Ty = I.getType();
+ if (match(&I, m_c_Or(m_OneUse(m_AShr(m_NSWSub(m_Value(Y), m_Value(X)),
+ m_APInt(ShAmt))),
+ m_Deferred(X))) &&
+ *ShAmt == Ty->getScalarSizeInBits() - 1) {
+ Value *NewICmpInst = Builder.CreateICmpSGT(X, Y);
+ return SelectInst::Create(NewICmpInst, ConstantInt::getAllOnesValue(Ty),
+ X);
+ }
+ }
+
return nullptr;
}
diff --git a/llvm/test/Transforms/InstCombine/sub-ashr-or-to-icmp-select.ll b/llvm/test/Transforms/InstCombine/sub-ashr-or-to-icmp-select.ll
index 630a14fd242..937bff62dc7 100644
--- a/llvm/test/Transforms/InstCombine/sub-ashr-or-to-icmp-select.ll
+++ b/llvm/test/Transforms/InstCombine/sub-ashr-or-to-icmp-select.ll
@@ -12,9 +12,8 @@
define i32 @clamp255_i32(i32 %x) {
; CHECK-LABEL: @clamp255_i32(
-; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 255, [[X:%.*]]
-; CHECK-NEXT: [[SHR:%.*]] = ashr i32 [[SUB]], 31
-; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHR]], [[X]]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[X:%.*]], 255
+; CHECK-NEXT: [[OR:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 255
; CHECK-NEXT: [[AND:%.*]] = and i32 [[OR]], 255
; CHECK-NEXT: ret i32 [[AND]]
;
@@ -27,9 +26,8 @@ define i32 @clamp255_i32(i32 %x) {
define i8 @sub_ashr_or_i8(i8 %x, i8 %y) {
; CHECK-LABEL: @sub_ashr_or_i8(
-; CHECK-NEXT: [[SUB:%.*]] = sub nsw i8 [[Y:%.*]], [[X:%.*]]
-; CHECK-NEXT: [[SHR:%.*]] = ashr i8 [[SUB]], 7
-; CHECK-NEXT: [[OR:%.*]] = or i8 [[SHR]], [[X]]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[OR:%.*]] = select i1 [[TMP1]], i8 -1, i8 [[X]]
; CHECK-NEXT: ret i8 [[OR]]
;
%sub = sub nsw i8 %y, %x
@@ -40,9 +38,8 @@ define i8 @sub_ashr_or_i8(i8 %x, i8 %y) {
define i16 @sub_ashr_or_i16(i16 %x, i16 %y) {
; CHECK-LABEL: @sub_ashr_or_i16(
-; CHECK-NEXT: [[SUB:%.*]] = sub nsw i16 [[Y:%.*]], [[X:%.*]]
-; CHECK-NEXT: [[SHR:%.*]] = ashr i16 [[SUB]], 15
-; CHECK-NEXT: [[OR:%.*]] = or i16 [[SHR]], [[X]]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i16 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[OR:%.*]] = select i1 [[TMP1]], i16 -1, i16 [[X]]
; CHECK-NEXT: ret i16 [[OR]]
;
%sub = sub nsw i16 %y, %x
@@ -53,9 +50,8 @@ define i16 @sub_ashr_or_i16(i16 %x, i16 %y) {
define i32 @sub_ashr_or_i32(i32 %x, i32 %y) {
; CHECK-LABEL: @sub_ashr_or_i32(
-; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[Y:%.*]], [[X:%.*]]
-; CHECK-NEXT: [[SHR:%.*]] = ashr i32 [[SUB]], 31
-; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHR]], [[X]]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[OR:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[X]]
; CHECK-NEXT: ret i32 [[OR]]
;
%sub = sub nsw i32 %y, %x
@@ -66,9 +62,8 @@ define i32 @sub_ashr_or_i32(i32 %x, i32 %y) {
define i64 @sub_ashr_or_i64(i64 %x, i64 %y) {
; CHECK-LABEL: @sub_ashr_or_i64(
-; CHECK-NEXT: [[SUB:%.*]] = sub nsw i64 [[Y:%.*]], [[X:%.*]]
-; CHECK-NEXT: [[SHR:%.*]] = ashr i64 [[SUB]], 63
-; CHECK-NEXT: [[OR:%.*]] = or i64 [[SHR]], [[X]]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i64 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[OR:%.*]] = select i1 [[TMP1]], i64 -1, i64 [[X]]
; CHECK-NEXT: ret i64 [[OR]]
;
%sub = sub nsw i64 %y, %x
@@ -81,9 +76,8 @@ define i64 @sub_ashr_or_i64(i64 %x, i64 %y) {
define i32 @sub_ashr_or_i32_nuw_nsw(i32 %x, i32 %y) {
; CHECK-LABEL: @sub_ashr_or_i32_nuw_nsw(
-; CHECK-NEXT: [[SUB:%.*]] = sub nuw nsw i32 [[Y:%.*]], [[X:%.*]]
-; CHECK-NEXT: [[SHR:%.*]] = ashr i32 [[SUB]], 31
-; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHR]], [[X]]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[OR:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[X]]
; CHECK-NEXT: ret i32 [[OR]]
;
%sub = sub nuw nsw i32 %y, %x
@@ -96,9 +90,8 @@ define i32 @sub_ashr_or_i32_nuw_nsw(i32 %x, i32 %y) {
define i32 @sub_ashr_or_i32_commute(i32 %x, i32 %y) {
; CHECK-LABEL: @sub_ashr_or_i32_commute(
-; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[Y:%.*]], [[X:%.*]]
-; CHECK-NEXT: [[SHR:%.*]] = ashr i32 [[SUB]], 31
-; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHR]], [[X]]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[OR:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[X]]
; CHECK-NEXT: ret i32 [[OR]]
;
%sub = sub nsw i32 %y, %x
@@ -111,9 +104,8 @@ define i32 @sub_ashr_or_i32_commute(i32 %x, i32 %y) {
define <4 x i32> @sub_ashr_or_i32_vec(<4 x i32> %x, <4 x i32> %y) {
; CHECK-LABEL: @sub_ashr_or_i32_vec(
-; CHECK-NEXT: [[SUB:%.*]] = sub nsw <4 x i32> [[Y:%.*]], [[X:%.*]]
-; CHECK-NEXT: [[SHR:%.*]] = ashr <4 x i32> [[SUB]], <i32 31, i32 31, i32 31, i32 31>
-; CHECK-NEXT: [[OR:%.*]] = or <4 x i32> [[SHR]], [[X]]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[OR:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> [[X]]
; CHECK-NEXT: ret <4 x i32> [[OR]]
;
%sub = sub nsw <4 x i32> %y, %x
@@ -124,9 +116,8 @@ define <4 x i32> @sub_ashr_or_i32_vec(<4 x i32> %x, <4 x i32> %y) {
define <4 x i32> @sub_ashr_or_i32_vec_nuw_nsw(<4 x i32> %x, <4 x i32> %y) {
; CHECK-LABEL: @sub_ashr_or_i32_vec_nuw_nsw(
-; CHECK-NEXT: [[SUB:%.*]] = sub nuw nsw <4 x i32> [[Y:%.*]], [[X:%.*]]
-; CHECK-NEXT: [[SHR:%.*]] = ashr <4 x i32> [[SUB]], <i32 31, i32 31, i32 31, i32 31>
-; CHECK-NEXT: [[OR:%.*]] = or <4 x i32> [[SHR]], [[X]]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[OR:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> [[X]]
; CHECK-NEXT: ret <4 x i32> [[OR]]
;
%sub = sub nuw nsw <4 x i32> %y, %x
@@ -137,9 +128,8 @@ define <4 x i32> @sub_ashr_or_i32_vec_nuw_nsw(<4 x i32> %x, <4 x i32> %y) {
define <4 x i32> @sub_ashr_or_i32_vec_commute(<4 x i32> %x, <4 x i32> %y) {
; CHECK-LABEL: @sub_ashr_or_i32_vec_commute(
-; CHECK-NEXT: [[SUB:%.*]] = sub nsw <4 x i32> [[Y:%.*]], [[X:%.*]]
-; CHECK-NEXT: [[SHR:%.*]] = ashr <4 x i32> [[SUB]], <i32 31, i32 31, i32 31, i32 31>
-; CHECK-NEXT: [[OR:%.*]] = or <4 x i32> [[SHR]], [[X]]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[OR:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> [[X]]
; CHECK-NEXT: ret <4 x i32> [[OR]]
;
%sub = sub nsw <4 x i32> %y, %x
@@ -154,8 +144,8 @@ define i32 @sub_ashr_or_i32_extra_use_sub(i32 %x, i32 %y, i32* %p) {
; CHECK-LABEL: @sub_ashr_or_i32_extra_use_sub(
; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[Y:%.*]], [[X:%.*]]
; CHECK-NEXT: store i32 [[SUB]], i32* [[P:%.*]], align 4
-; CHECK-NEXT: [[SHR:%.*]] = ashr i32 [[SUB]], 31
-; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHR]], [[X]]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[Y]], [[X]]
+; CHECK-NEXT: [[OR:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[X]]
; CHECK-NEXT: ret i32 [[OR]]
;
%sub = sub nsw i32 %y, %x
@@ -167,9 +157,8 @@ define i32 @sub_ashr_or_i32_extra_use_sub(i32 %x, i32 %y, i32* %p) {
define i32 @sub_ashr_or_i32_extra_use_or(i32 %x, i32 %y, i32* %p) {
; CHECK-LABEL: @sub_ashr_or_i32_extra_use_or(
-; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[Y:%.*]], [[X:%.*]]
-; CHECK-NEXT: [[SHR:%.*]] = ashr i32 [[SUB]], 31
-; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHR]], [[X]]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[OR:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[X]]
; CHECK-NEXT: store i32 [[OR]], i32* [[P:%.*]], align 4
; CHECK-NEXT: ret i32 [[OR]]
;
OpenPOWER on IntegriCloud