summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2017-06-21 16:07:13 +0000
committerCraig Topper <craig.topper@intel.com>2017-06-21 16:07:13 +0000
commitae86cc725dfd33a534efec9eab249609ddd1980d (patch)
tree5a7543b0ece5fbf5fcb9e326f4046a47cc8ce780 /llvm
parentcbac691c4bbf6c98c35441fff0460de4235c483d (diff)
downloadbcm5719-llvm-ae86cc725dfd33a534efec9eab249609ddd1980d.tar.gz
bcm5719-llvm-ae86cc725dfd33a534efec9eab249609ddd1980d.zip
[InstCombine] Don't let folding (select (icmp eq (and X, C1), 0), Y, (or Y, C2)) create more instructions than it removes
Summary: Previously this folding had no checks to see if it was going to result in less instructions. This was pointed out during the review of D34184 This patch adds code to count how many instructions its going to create vs how many its going to remove so we can make a proper decision. Reviewers: spatel, majnemer Reviewed By: spatel Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D34437 llvm-svn: 305926
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp20
-rw-r--r--llvm/test/Transforms/InstCombine/select-with-bitwise-ops.ll59
2 files changed, 39 insertions, 40 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index b9674d85634..993c095dc6f 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -303,7 +303,7 @@ Instruction *InstCombiner::foldSelectIntoOp(SelectInst &SI, Value *TrueVal,
/// We want to turn:
/// (select (icmp eq (and X, C1), 0), Y, (or Y, C2))
/// into:
-/// (or (shl (and X, C1), C3), y)
+/// (or (shl (and X, C1), C3), Y)
/// iff:
/// C1 and C2 are both powers of 2
/// where:
@@ -345,6 +345,20 @@ static Value *foldSelectICmpAndOr(const SelectInst &SI, Value *TrueVal,
unsigned C1Log = C1->logBase2();
unsigned C2Log = C2->logBase2();
+
+ ICmpInst::Predicate Pred = IC->getPredicate();
+ bool NeedXor = (Pred == ICmpInst::ICMP_NE && OrOnFalseVal) ||
+ (Pred == ICmpInst::ICMP_EQ && OrOnTrueVal);
+ bool NeedShift = C1Log != C2Log;
+ bool NeedZExtTrunc = Y->getType()->getIntegerBitWidth() !=
+ V->getType()->getIntegerBitWidth();
+
+ // Make sure we don't create more instructions than we save.
+ Value *Or = OrOnFalseVal ? FalseVal : TrueVal;
+ if ((NeedShift + NeedXor + NeedZExtTrunc) >
+ (IC->hasOneUse() + Or->hasOneUse()))
+ return nullptr;
+
if (C2Log > C1Log) {
V = Builder->CreateZExtOrTrunc(V, Y->getType());
V = Builder->CreateShl(V, C2Log - C1Log);
@@ -354,9 +368,7 @@ static Value *foldSelectICmpAndOr(const SelectInst &SI, Value *TrueVal,
} else
V = Builder->CreateZExtOrTrunc(V, Y->getType());
- ICmpInst::Predicate Pred = IC->getPredicate();
- if ((Pred == ICmpInst::ICMP_NE && OrOnFalseVal) ||
- (Pred == ICmpInst::ICMP_EQ && OrOnTrueVal))
+ if (NeedXor)
V = Builder->CreateXor(V, *C2);
return Builder->CreateOr(V, Y);
diff --git a/llvm/test/Transforms/InstCombine/select-with-bitwise-ops.ll b/llvm/test/Transforms/InstCombine/select-with-bitwise-ops.ll
index faeb4e046ac..7a1fc1be478 100644
--- a/llvm/test/Transforms/InstCombine/select-with-bitwise-ops.ll
+++ b/llvm/test/Transforms/InstCombine/select-with-bitwise-ops.ll
@@ -104,12 +104,11 @@ define i32 @select_icmp_ne_0_and_32_or_4096(i32 %x, i32 %y) {
define i8 @select_icmp_ne_0_and_1073741824_or_8(i32 %x, i8 %y) {
; CHECK-LABEL: @select_icmp_ne_0_and_1073741824_or_8(
-; CHECK-NEXT: [[AND:%.*]] = lshr i32 %x, 27
-; CHECK-NEXT: [[AND_TR:%.*]] = trunc i32 [[AND]] to i8
-; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[AND_TR]], 8
-; CHECK-NEXT: [[TMP2:%.*]] = xor i8 [[TMP1]], 8
-; CHECK-NEXT: [[TMP3:%.*]] = or i8 [[TMP2]], %y
-; CHECK-NEXT: ret i8 [[TMP3]]
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 1073741824
+; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], 0
+; CHECK-NEXT: [[OR:%.*]] = or i8 [[Y:%.*]], 8
+; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], i8 [[Y]], i8 [[OR]]
+; CHECK-NEXT: ret i8 [[SELECT]]
;
%and = and i32 %x, 1073741824
%cmp = icmp ne i32 0, %and
@@ -120,12 +119,11 @@ define i8 @select_icmp_ne_0_and_1073741824_or_8(i32 %x, i8 %y) {
define i32 @select_icmp_ne_0_and_8_or_1073741824(i8 %x, i32 %y) {
; CHECK-LABEL: @select_icmp_ne_0_and_8_or_1073741824(
-; CHECK-NEXT: [[AND:%.*]] = and i8 %x, 8
-; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[AND]] to i32
-; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i32 [[TMP1]], 27
-; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP2]], 1073741824
-; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], %y
-; CHECK-NEXT: ret i32 [[TMP4]]
+; CHECK-NEXT: [[AND:%.*]] = and i8 [[X:%.*]], 8
+; CHECK-NEXT: [[CMP:%.*]] = icmp ne i8 [[AND]], 0
+; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y:%.*]], 1073741824
+; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], i32 [[Y]], i32 [[OR]]
+; CHECK-NEXT: ret i32 [[SELECT]]
;
%and = and i8 %x, 8
%cmp = icmp ne i8 0, %and
@@ -377,15 +375,13 @@ define i32 @no_shift_xor_multiuse_or(i32 %x, i32 %y) {
ret i32 %res
}
-; TODO this increased the number of instructions
define i32 @shift_xor_multiuse_or(i32 %x, i32 %y) {
; CHECK-LABEL: @shift_xor_multiuse_or(
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 4096
+; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], 0
; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y:%.*]], 2048
-; CHECK-NEXT: [[AND:%.*]] = lshr i32 [[X:%.*]], 1
-; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[AND]], 2048
-; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], 2048
-; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP2]], [[Y]]
-; CHECK-NEXT: [[RES:%.*]] = mul i32 [[TMP3]], [[OR]]
+; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], i32 [[Y]], i32 [[OR]]
+; CHECK-NEXT: [[RES:%.*]] = mul i32 [[SELECT]], [[OR]]
; CHECK-NEXT: ret i32 [[RES]]
;
%and = and i32 %x, 4096
@@ -452,16 +448,14 @@ define i32 @no_shift_xor_multiuse_cmp(i32 %x, i32 %y, i32 %z, i32 %w) {
ret i32 %res
}
-; TODO this increased the number of instructions
define i32 @shift_xor_multiuse_cmp(i32 %x, i32 %y, i32 %z, i32 %w) {
; CHECK-LABEL: @shift_xor_multiuse_cmp(
; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 4096
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], 0
-; CHECK-NEXT: [[TMP1:%.*]] = lshr exact i32 [[AND]], 1
-; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], 2048
-; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP2]], [[Y:%.*]]
+; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y:%.*]], 2048
+; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], i32 [[Y]], i32 [[OR]]
; CHECK-NEXT: [[SELECT2:%.*]] = select i1 [[CMP]], i32 [[Z:%.*]], i32 [[W:%.*]]
-; CHECK-NEXT: [[RES:%.*]] = mul i32 [[TMP3]], [[SELECT2]]
+; CHECK-NEXT: [[RES:%.*]] = mul i32 [[SELECT]], [[SELECT2]]
; CHECK-NEXT: ret i32 [[RES]]
;
%and = and i32 %x, 4096
@@ -473,16 +467,14 @@ define i32 @shift_xor_multiuse_cmp(i32 %x, i32 %y, i32 %z, i32 %w) {
ret i32 %res
}
-; TODO this increased the number of instructions
define i32 @shift_no_xor_multiuse_cmp_or(i32 %x, i32 %y, i32 %z, i32 %w) {
; CHECK-LABEL: @shift_no_xor_multiuse_cmp_or(
; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 1
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0
; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y:%.*]], 2
-; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i32 [[AND]], 1
-; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[Y]]
+; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], i32 [[Y]], i32 [[OR]]
; CHECK-NEXT: [[SELECT2:%.*]] = select i1 [[CMP]], i32 [[Z:%.*]], i32 [[W:%.*]]
-; CHECK-NEXT: [[RES:%.*]] = mul i32 [[TMP2]], [[SELECT2]]
+; CHECK-NEXT: [[RES:%.*]] = mul i32 [[SELECT]], [[SELECT2]]
; CHECK-NEXT: [[RES2:%.*]] = mul i32 [[RES]], [[OR]]
; CHECK-NEXT: ret i32 [[RES2]]
;
@@ -517,16 +509,14 @@ define i32 @no_shift_no_xor_multiuse_cmp_or(i32 %x, i32 %y, i32 %z, i32 %w) {
ret i32 %res2
}
-; TODO this increased the number of instructions
define i32 @no_shift_xor_multiuse_cmp_or(i32 %x, i32 %y, i32 %z, i32 %w) {
; CHECK-LABEL: @no_shift_xor_multiuse_cmp_or(
; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 4096
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], 0
; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y:%.*]], 4096
-; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[AND]], 4096
-; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[Y]]
+; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], i32 [[Y]], i32 [[OR]]
; CHECK-NEXT: [[SELECT2:%.*]] = select i1 [[CMP]], i32 [[Z:%.*]], i32 [[W:%.*]]
-; CHECK-NEXT: [[RES:%.*]] = mul i32 [[TMP2]], [[SELECT2]]
+; CHECK-NEXT: [[RES:%.*]] = mul i32 [[SELECT]], [[SELECT2]]
; CHECK-NEXT: [[RES2:%.*]] = mul i32 [[RES]], [[OR]]
; CHECK-NEXT: ret i32 [[RES2]]
;
@@ -540,17 +530,14 @@ define i32 @no_shift_xor_multiuse_cmp_or(i32 %x, i32 %y, i32 %z, i32 %w) {
ret i32 %res2
}
-; TODO this increased the number of instructions
define i32 @shift_xor_multiuse_cmp_or(i32 %x, i32 %y, i32 %z, i32 %w) {
; CHECK-LABEL: @shift_xor_multiuse_cmp_or(
; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 4096
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], 0
; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y:%.*]], 2048
-; CHECK-NEXT: [[TMP1:%.*]] = lshr exact i32 [[AND]], 1
-; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], 2048
-; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP2]], [[Y]]
+; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], i32 [[Y]], i32 [[OR]]
; CHECK-NEXT: [[SELECT2:%.*]] = select i1 [[CMP]], i32 [[Z:%.*]], i32 [[W:%.*]]
-; CHECK-NEXT: [[RES:%.*]] = mul i32 [[TMP3]], [[SELECT2]]
+; CHECK-NEXT: [[RES:%.*]] = mul i32 [[SELECT]], [[SELECT2]]
; CHECK-NEXT: [[RES2:%.*]] = mul i32 [[RES]], [[OR]]
; CHECK-NEXT: ret i32 [[RES2]]
;
OpenPOWER on IntegriCloud