summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp5
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.cpp77
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.h3
-rw-r--r--llvm/test/CodeGen/ARM/Windows/alloca.ll2
-rw-r--r--llvm/test/CodeGen/ARM/Windows/vla.ll4
-rw-r--r--llvm/test/CodeGen/ARM/and-cmpz.ll9
-rw-r--r--llvm/test/CodeGen/ARM/illegal-bitfield-loadstore.ll4
-rw-r--r--llvm/test/CodeGen/ARM/select_const.ll5
-rw-r--r--llvm/test/CodeGen/Thumb/bic_imm.ll67
-rw-r--r--llvm/test/CodeGen/Thumb/shift-and.ll24
10 files changed, 172 insertions, 28 deletions
diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 81196555535..665e67beddb 100644
--- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -2309,6 +2309,11 @@ bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
Srl_imm)) {
assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
+ // Mask off the unnecessary bits of the AND immediate; normally
+ // DAGCombine will do this, but that might not happen if
+ // targetShrinkDemandedConstant chooses a different immediate.
+ And_imm &= -1U >> Srl_imm;
+
// Note: The width operand is encoded as width-1.
unsigned Width = countTrailingOnes(And_imm) - 1;
unsigned LSB = Srl_imm;
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index da99058b91a..f1801c07a23 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -13602,6 +13602,83 @@ void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
}
}
+bool
+ARMTargetLowering::targetShrinkDemandedConstant(SDValue Op,
+ const APInt &DemandedAPInt,
+ TargetLoweringOpt &TLO) const {
+ // Delay optimization, so we don't have to deal with illegal types, or block
+ // optimizations.
+ if (!TLO.LegalOps)
+ return false;
+
+ // Only optimize AND for now.
+ if (Op.getOpcode() != ISD::AND)
+ return false;
+
+ EVT VT = Op.getValueType();
+
+ // Ignore vectors.
+ if (VT.isVector())
+ return false;
+
+ assert(VT == MVT::i32 && "Unexpected integer type");
+
+ // Make sure the RHS really is a constant.
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ if (!C)
+ return false;
+
+ unsigned Mask = C->getZExtValue();
+
+ // If mask is zero, nothing to do.
+ if (!Mask)
+ return false;
+
+ unsigned Demanded = DemandedAPInt.getZExtValue();
+ unsigned ShrunkMask = Mask & Demanded;
+ unsigned ExpandedMask = Mask | ~Demanded;
+
+ auto IsLegalMask = [ShrunkMask, ExpandedMask](unsigned Mask) -> bool {
+ return (ShrunkMask & Mask) == ShrunkMask && (~ExpandedMask & Mask) == 0;
+ };
+ auto UseMask = [this, Mask, Op, VT, &TLO](unsigned NewMask) -> bool {
+ if (NewMask == Mask)
+ return true;
+ SDLoc DL(Op);
+ SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT);
+ SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC);
+ return TLO.CombineTo(Op, NewOp);
+ };
+
+ // Prefer uxtb mask.
+ if (IsLegalMask(0xFF))
+ return UseMask(0xFF);
+
+ // Prefer uxth mask.
+ if (IsLegalMask(0xFFFF))
+ return UseMask(0xFFFF);
+
+ // [1, 255] is Thumb1 movs+ands, legal immediate for ARM/Thumb2.
+ // FIXME: Prefer a contiguous sequence of bits for other optimizations.
+ if (ShrunkMask < 256)
+ return UseMask(ShrunkMask);
+
+ // [-256, -2] is Thumb1 movs+bics, legal immediate for ARM/Thumb2.
+ // FIXME: Prefer a contiguous sequence of bits for other optimizations.
+ if ((int)ExpandedMask <= -2 && (int)ExpandedMask >= -256)
+ return UseMask(ExpandedMask);
+
+ // Potential improvements:
+ //
+ // We could try to recognize lsls+lsrs or lsrs+lsls pairs here.
+ // We could try to prefer Thumb1 immediates which can be lowered to a
+ // two-instruction sequence.
+ // We could try to recognize more legal ARM/Thumb2 immediates here.
+
+ return false;
+}
+
+
//===----------------------------------------------------------------------===//
// ARM Inline Assembly Support
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index 50b4c2977fb..47b20aa4a6a 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -389,6 +389,9 @@ class VectorType;
const SelectionDAG &DAG,
unsigned Depth) const override;
+ bool targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
+ TargetLoweringOpt &TLO) const override;
+
bool ExpandInlineAsm(CallInst *CI) const override;
diff --git a/llvm/test/CodeGen/ARM/Windows/alloca.ll b/llvm/test/CodeGen/ARM/Windows/alloca.ll
index ea893fa9163..203333e71a8 100644
--- a/llvm/test/CodeGen/ARM/Windows/alloca.ll
+++ b/llvm/test/CodeGen/ARM/Windows/alloca.ll
@@ -19,7 +19,7 @@ entry:
; because we do not have the kill flag on R0.
; CHECK: mov.w [[R1:lr]], #7
; CHECK: add.w [[R0:r[0-9]+]], [[R1]], [[R0]], lsl #2
-; CHECK: bic [[R0]], [[R0]], #7
+; CHECK: bic [[R0]], [[R0]], #4
; CHECK: lsrs r4, [[R0]], #2
; CHECK: bl __chkstk
; CHECK: sub.w sp, sp, r4
diff --git a/llvm/test/CodeGen/ARM/Windows/vla.ll b/llvm/test/CodeGen/ARM/Windows/vla.ll
index 03709758048..f095197f3d6 100644
--- a/llvm/test/CodeGen/ARM/Windows/vla.ll
+++ b/llvm/test/CodeGen/ARM/Windows/vla.ll
@@ -14,13 +14,13 @@ entry:
}
; CHECK-SMALL-CODE: adds [[R4:r[0-9]+]], #7
-; CHECK-SMALL-CODE: bic [[R4]], [[R4]], #7
+; CHECK-SMALL-CODE: bic [[R4]], [[R4]], #4
; CHECK-SMALL-CODE: lsrs r4, [[R4]], #2
; CHECK-SMALL-CODE: bl __chkstk
; CHECK-SMALL-CODE: sub.w sp, sp, r4
; CHECK-LARGE-CODE: adds [[R4:r[0-9]+]], #7
-; CHECK-LARGE-CODE: bic [[R4]], [[R4]], #7
+; CHECK-LARGE-CODE: bic [[R4]], [[R4]], #4
; CHECK-LARGE-CODE: lsrs r4, [[R4]], #2
; CHECK-LARGE-CODE: movw [[IP:r[0-9]+]], :lower16:__chkstk
; CHECK-LARGE-CODE: movt [[IP]], :upper16:__chkstk
diff --git a/llvm/test/CodeGen/ARM/and-cmpz.ll b/llvm/test/CodeGen/ARM/and-cmpz.ll
index fb5f8fe280f..b327f04ba06 100644
--- a/llvm/test/CodeGen/ARM/and-cmpz.ll
+++ b/llvm/test/CodeGen/ARM/and-cmpz.ll
@@ -89,11 +89,10 @@ false:
}
; CHECK-LABEL: i16_cmpz:
-; T1: movs r2, #127
-; T1-NEXT: lsls r2, r2, #9
-; T1-NEXT: ands r2, r0
-; T1-NEXT: lsrs r0, r2, #9
-; T2: and r0, r0, #65024
+; T1: uxth r0, r0
+; T1-NEXT: lsrs r0, r0, #9
+; T1-NEXT: bne
+; T2: uxth r0, r0
; T2-NEXT: movs r2, #0
; T2-NEXT: cmp.w r2, r0, lsr #9
define void @i16_cmpz(i16 %x, void (i32)* %foo) {
diff --git a/llvm/test/CodeGen/ARM/illegal-bitfield-loadstore.ll b/llvm/test/CodeGen/ARM/illegal-bitfield-loadstore.ll
index 6f1e18ffdfc..93ba3fbc853 100644
--- a/llvm/test/CodeGen/ARM/illegal-bitfield-loadstore.ll
+++ b/llvm/test/CodeGen/ARM/illegal-bitfield-loadstore.ll
@@ -30,10 +30,8 @@ define void @i24_and_or(i24* %a) {
; LE-LABEL: i24_and_or:
; LE: @ %bb.0:
; LE-NEXT: ldrh r1, [r0]
-; LE-NEXT: mov r2, #16256
-; LE-NEXT: orr r2, r2, #49152
; LE-NEXT: orr r1, r1, #384
-; LE-NEXT: and r1, r1, r2
+; LE-NEXT: bic r1, r1, #127
; LE-NEXT: strh r1, [r0]
; LE-NEXT: mov pc, lr
;
diff --git a/llvm/test/CodeGen/ARM/select_const.ll b/llvm/test/CodeGen/ARM/select_const.ll
index 7cce0b08203..81b0db3b313 100644
--- a/llvm/test/CodeGen/ARM/select_const.ll
+++ b/llvm/test/CodeGen/ARM/select_const.ll
@@ -314,9 +314,8 @@ define i64 @opaque_constant2(i1 %cond, i64 %x) {
; CHECK-NEXT: mov r1, #1
; CHECK-NEXT: tst r0, #1
; CHECK-NEXT: orr r1, r1, #65536
-; CHECK-NEXT: mov r0, r1
-; CHECK-NEXT: moveq r0, #23
-; CHECK-NEXT: and r0, r0, r1
+; CHECK-NEXT: moveq r1, #23
+; CHECK-NEXT: bic r0, r1, #22
; CHECK-NEXT: mov r1, #0
; CHECK-NEXT: mov pc, lr
%sel = select i1 %cond, i64 65537, i64 23
diff --git a/llvm/test/CodeGen/Thumb/bic_imm.ll b/llvm/test/CodeGen/Thumb/bic_imm.ll
index 38a144f04e7..0426551486b 100644
--- a/llvm/test/CodeGen/Thumb/bic_imm.ll
+++ b/llvm/test/CodeGen/Thumb/bic_imm.ll
@@ -39,22 +39,77 @@ entry:
define void @truncated(i16 %a, i16* %p) {
; CHECK-T1-LABEL: truncated:
; CHECK-T1: @ %bb.0:
-; CHECK-T1-NEXT: ldr r2, .LCPI2_0
+; CHECK-T1-NEXT: movs r2, #128
+; CHECK-T1-NEXT: bics r0, r2
+; CHECK-T1-NEXT: strh r0, [r1]
+; CHECK-T1-NEXT: bx lr
+;
+; CHECK-T2-LABEL: truncated:
+; CHECK-T2: @ %bb.0:
+; CHECK-T2-NEXT: bic r0, r0, #128
+; CHECK-T2-NEXT: strh r0, [r1]
+; CHECK-T2-NEXT: bx lr
+ %and = and i16 %a, -129
+ store i16 %and, i16* %p
+ ret void
+}
+
+define void @truncated_neg2(i16 %a, i16* %p) {
+; CHECK-T1-LABEL: truncated_neg2:
+; CHECK-T1: @ %bb.0:
+; CHECK-T1-NEXT: movs r2, #1
+; CHECK-T1-NEXT: bics r0, r2
+; CHECK-T1-NEXT: strh r0, [r1]
+; CHECK-T1-NEXT: bx lr
+;
+; CHECK-T2-LABEL: truncated_neg2:
+; CHECK-T2: @ %bb.0:
+; CHECK-T2-NEXT: bic r0, r0, #1
+; CHECK-T2-NEXT: strh r0, [r1]
+; CHECK-T2-NEXT: bx lr
+ %and = and i16 %a, -2
+ store i16 %and, i16* %p
+ ret void
+}
+
+define void @truncated_neg256(i16 %a, i16* %p) {
+; CHECK-T1-LABEL: truncated_neg256:
+; CHECK-T1: @ %bb.0:
+; CHECK-T1-NEXT: movs r2, #255
+; CHECK-T1-NEXT: bics r0, r2
+; CHECK-T1-NEXT: strh r0, [r1]
+; CHECK-T1-NEXT: bx lr
+;
+; CHECK-T2-LABEL: truncated_neg256:
+; CHECK-T2: @ %bb.0:
+; CHECK-T2-NEXT: bic r0, r0, #255
+; CHECK-T2-NEXT: strh r0, [r1]
+; CHECK-T2-NEXT: bx lr
+ %and = and i16 %a, -256
+ store i16 %and, i16* %p
+ ret void
+}
+
+; FIXME: Thumb2 supports "bic r0, r0, #510"
+define void @truncated_neg511(i16 %a, i16* %p) {
+; CHECK-T1-LABEL: truncated_neg511:
+; CHECK-T1: @ %bb.0:
+; CHECK-T1-NEXT: ldr r2, .LCPI5_0
; CHECK-T1-NEXT: ands r2, r0
; CHECK-T1-NEXT: strh r2, [r1]
; CHECK-T1-NEXT: bx lr
; CHECK-T1-NEXT: .p2align 2
; CHECK-T1-NEXT: @ %bb.1:
-; CHECK-T1-NEXT: .LCPI2_0:
-; CHECK-T1-NEXT: .long 65407 @ 0xff7f
+; CHECK-T1-NEXT: .LCPI5_0:
+; CHECK-T1-NEXT: .long 65025 @ 0xfe01
;
-; CHECK-T2-LABEL: truncated:
+; CHECK-T2-LABEL: truncated_neg511:
; CHECK-T2: @ %bb.0:
-; CHECK-T2-NEXT: movw r2, #65407
+; CHECK-T2-NEXT: movw r2, #65025
; CHECK-T2-NEXT: ands r0, r2
; CHECK-T2-NEXT: strh r0, [r1]
; CHECK-T2-NEXT: bx lr
- %and = and i16 %a, -129
+ %and = and i16 %a, -511
store i16 %and, i16* %p
ret void
}
diff --git a/llvm/test/CodeGen/Thumb/shift-and.ll b/llvm/test/CodeGen/Thumb/shift-and.ll
index fa6ee1db5e2..01f6b84d0c3 100644
--- a/llvm/test/CodeGen/Thumb/shift-and.ll
+++ b/llvm/test/CodeGen/Thumb/shift-and.ll
@@ -46,7 +46,7 @@ define i32 @test4(i32 %x) {
; CHECK-LABEL: test4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: lsls r0, r0, #4
-; CHECK-NEXT: movs r1, #127
+; CHECK-NEXT: movs r1, #112
; CHECK-NEXT: bics r0, r1
; CHECK-NEXT: bx lr
entry:
@@ -110,16 +110,24 @@ entry:
define i32 @test9(i32 %x) {
; CHECK-LABEL: test9:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: lsrs r1, r0, #2
-; CHECK-NEXT: ldr r0, .LCPI8_0
-; CHECK-NEXT: ands r0, r1
+; CHECK-NEXT: lsrs r0, r0, #2
+; CHECK-NEXT: movs r1, #1
+; CHECK-NEXT: bics r0, r1
; CHECK-NEXT: bx lr
-; CHECK-NEXT: .p2align 2
-; CHECK-NEXT: @ %bb.1:
-; CHECK-NEXT: .LCPI8_0:
-; CHECK-NEXT: .long 1073741822 @ 0x3ffffffe
entry:
%and = lshr i32 %x, 2
%shr = and i32 %and, 1073741822
ret i32 %shr
}
+
+define i32 @test10(i32 %x) {
+; CHECK-LABEL: test10:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: lsls r0, r0, #2
+; CHECK-NEXT: uxtb r0, r0
+; CHECK-NEXT: bx lr
+entry:
+ %0 = shl i32 %x, 2
+ %shr = and i32 %0, 255
+ ret i32 %shr
+}
OpenPOWER on IntegriCloud