diff options
Diffstat (limited to 'llvm')
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 38 | ||||
-rw-r--r-- | llvm/test/CodeGen/ARM/shift-combine.ll | 119 | ||||
-rw-r--r-- | llvm/test/CodeGen/PowerPC/trunc-srl-load.ll | 18 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/h-registers-1.ll | 30 |
4 files changed, 179 insertions, 26 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index d918c962c8f..dc87e899455 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -3831,6 +3831,17 @@ bool DAGCombiner::isLegalNarrowLoad(LoadSDNode *LoadN, ISD::LoadExtType ExtType, if (LoadN->getNumValues() > 2) return false; + // Only allow byte offsets. + if (ShAmt % 8) + return false; + + // Ensure that this isn't going to produce an unsupported unaligned access. + if (ShAmt && !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), + ExtVT, LoadN->getAddressSpace(), + ShAmt / 8)) + return false; + + // If the load that we're shrinking is an extload and we're not just // discarding the extension we can't simply shrink the load. Bail. // TODO: It would be possible to merge the extensions in some cases. @@ -8434,8 +8445,9 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { unsigned ShAmt = 0; if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) { - if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { - ShAmt = N01->getZExtValue(); + SDValue SRL = N0; + if (auto *ConstShift = dyn_cast<ConstantSDNode>(SRL.getOperand(1))) { + ShAmt = ConstShift->getZExtValue(); unsigned EVTBits = ExtVT.getSizeInBits(); // Is the shift amount a multiple of size of VT? if ((ShAmt & (EVTBits-1)) == 0) { @@ -8448,17 +8460,35 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { // At this point, we must have a load or else we can't do the transform. if (!isa<LoadSDNode>(N0)) return SDValue(); + auto *LN0 = cast<LoadSDNode>(N0); + // Because a SRL must be assumed to *need* to zero-extend the high bits // (as opposed to anyext the high bits), we can't combine the zextload // lowering of SRL and an sextload. - if (cast<LoadSDNode>(N0)->getExtensionType() == ISD::SEXTLOAD) + if (LN0->getExtensionType() == ISD::SEXTLOAD) return SDValue(); // If the shift amount is larger than the input type then we're not // accessing any of the loaded bytes. If the load was a zextload/extload // then the result of the shift+trunc is zero/undef (handled elsewhere). - if (ShAmt >= cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits()) + if (ShAmt >= LN0->getMemoryVT().getSizeInBits()) return SDValue(); + + // If the SRL is only used by a masking AND, we may be able to adjust + // the ExtVT to make the AND redundant. + SDNode *Mask = *(SRL->use_begin()); + if (Mask->getOpcode() == ISD::AND && + isa<ConstantSDNode>(Mask->getOperand(1))) { + const APInt &ShiftMask = + cast<ConstantSDNode>(Mask->getOperand(1))->getAPIntValue(); + if (ShiftMask.isMask()) { + EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(), + ShiftMask.countTrailingOnes()); + // Recompute the type. + if (TLI.isLoadExtLegal(ExtType, N0.getValueType(), MaskedVT)) + ExtVT = MaskedVT; + } + } } } diff --git a/llvm/test/CodeGen/ARM/shift-combine.ll b/llvm/test/CodeGen/ARM/shift-combine.ll index f6892f36a43..82aa28b9155 100644 --- a/llvm/test/CodeGen/ARM/shift-combine.ll +++ b/llvm/test/CodeGen/ARM/shift-combine.ll @@ -217,10 +217,23 @@ entry: ret i32 %conv } -; CHECK-LABEL: test_shift8_mask8 +; CHECK-LABEL: test_shift7_mask8 ; CHECK-BE: ldr r1, [r0] ; CHECK-COMMON: ldr r1, [r0] -; CHECK-COMMON: ubfx r1, r1, #8, #8 +; CHECK-COMMON: ubfx r1, r1, #7, #8 +; CHECK-COMMON: str r1, [r0] +define arm_aapcscc void @test_shift7_mask8(i32* nocapture %p) { +entry: + %0 = load i32, i32* %p, align 4 + %shl = lshr i32 %0, 7 + %and = and i32 %shl, 255 + store i32 %and, i32* %p, align 4 + ret void +} + +; CHECK-LABEL: test_shift8_mask8 +; CHECK-BE: ldrb r1, [r0, #2] +; CHECK-COMMON: ldrb r1, [r0, #1] ; CHECK-COMMON: str r1, [r0] define arm_aapcscc void @test_shift8_mask8(i32* nocapture %p) { entry: @@ -231,10 +244,40 @@ entry: ret void } -; CHECK-LABEL: test_shift8_mask16 +; CHECK-LABEL: test_shift8_mask7 +; CHECK-BE: ldr r1, [r0] +; CHECK-COMMON: ldr r1, [r0] +; CHECK-COMMON: ubfx r1, r1, #8, #7 +; CHECK-COMMON: str r1, [r0] +define arm_aapcscc void @test_shift8_mask7(i32* nocapture %p) { +entry: + %0 = load i32, i32* %p, align 4 + %shl = lshr i32 %0, 8 + %and = and i32 %shl, 127 + store i32 %and, i32* %p, align 4 + ret void +} + +; CHECK-LABEL: test_shift9_mask8 ; CHECK-BE: ldr r1, [r0] ; CHECK-COMMON: ldr r1, [r0] -; CHECK-COMMON: ubfx r1, r1, #8, #16 +; CHECK-COMMON: ubfx r1, r1, #9, #8 +; CHECK-COMMON: str r1, [r0] +define arm_aapcscc void @test_shift9_mask8(i32* nocapture %p) { +entry: + %0 = load i32, i32* %p, align 4 + %shl = lshr i32 %0, 9 + %and = and i32 %shl, 255 + store i32 %and, i32* %p, align 4 + ret void +} + +; CHECK-LABEL: test_shift8_mask16 +; CHECK-ALIGN: ldr r1, [r0] +; CHECK-ALIGN: ubfx r1, r1, #8, #16 +; CHECK-BE: ldrh r1, [r0, #1] +; CHECK-ARM: ldrh r1, [r0, #1] +; CHECK-THUMB: ldrh.w r1, [r0, #1] ; CHECK-COMMON: str r1, [r0] define arm_aapcscc void @test_shift8_mask16(i32* nocapture %p) { entry: @@ -245,6 +288,61 @@ entry: ret void } +; CHECK-LABEL: test_shift15_mask16 +; CHECK-COMMON: ldr r1, [r0] +; CHECK-COMMON: ubfx r1, r1, #15, #16 +; CHECK-COMMON: str r1, [r0] +define arm_aapcscc void @test_shift15_mask16(i32* nocapture %p) { +entry: + %0 = load i32, i32* %p, align 4 + %shl = lshr i32 %0, 15 + %and = and i32 %shl, 65535 + store i32 %and, i32* %p, align 4 + ret void +} + +; CHECK-LABEL: test_shift16_mask15 +; CHECK-BE: ldrh r1, [r0] +; CHECK-COMMON: ldrh r1, [r0, #2] +; CHECK-COMMON: bfc r1, #15, #17 +; CHECK-COMMON: str r1, [r0] +define arm_aapcscc void @test_shift16_mask15(i32* nocapture %p) { +entry: + %0 = load i32, i32* %p, align 4 + %shl = lshr i32 %0, 16 + %and = and i32 %shl, 32767 + store i32 %and, i32* %p, align 4 + ret void +} + +; CHECK-LABEL: test_shift8_mask24 +; CHECK-BE: ldr r1, [r0] +; CHECK-COMMON: ldr r1, [r0] +; CHECK-ARM: lsr r1, r1, #8 +; CHECK-THUMB: lsrs r1, r1, #8 +; CHECK-COMMON: str r1, [r0] +define arm_aapcscc void @test_shift8_mask24(i32* nocapture %p) { +entry: + %0 = load i32, i32* %p, align 4 + %shl = lshr i32 %0, 8 + %and = and i32 %shl, 16777215 + store i32 %and, i32* %p, align 4 + ret void +} + +; CHECK-LABEL: test_shift24_mask16 +; CHECK-BE: ldrb r1, [r0] +; CHECK-COMMON: ldrb r1, [r0, #3] +; CHECK-COMMON: str r1, [r0] +define arm_aapcscc void @test_shift24_mask16(i32* nocapture %p) { +entry: + %0 = load i32, i32* %p, align 4 + %shl = lshr i32 %0, 24 + %and = and i32 %shl, 65535 + store i32 %and, i32* %p, align 4 + ret void +} + ; CHECK-LABEL: test_sext_shift8_mask8 ; CHECK-BE: ldrb r0, [r0] ; CHECK-COMMON: ldrb r0, [r0, #1] @@ -274,3 +372,16 @@ entry: store i32 %and, i32* %q, align 4 ret void } + +; CHECK-LABEL: trunc_i64_mask_srl +; CHECK-ARM: ldrh r2, [r1, #4] +; CHECK-BE: ldrh r2, [r1, #2] +define i1 @trunc_i64_mask_srl(i32 zeroext %AttrArgNo, i64* %ptr) { +entry: + %bf.load.i = load i64, i64* %ptr, align 8 + %bf.lshr.i = lshr i64 %bf.load.i, 32 + %0 = trunc i64 %bf.lshr.i to i32 + %bf.cast.i = and i32 %0, 65535 + %cmp.i = icmp ugt i32 %bf.cast.i, %AttrArgNo + ret i1 %cmp.i +} diff --git a/llvm/test/CodeGen/PowerPC/trunc-srl-load.ll b/llvm/test/CodeGen/PowerPC/trunc-srl-load.ll new file mode 100644 index 00000000000..4cb557f031a --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/trunc-srl-load.ll @@ -0,0 +1,18 @@ +; RUN: llc -mtriple=powerpc64-unknown-unknown %s -o - | FileCheck %s + +; CHECK-LABEL: trunc_srl_load +; CHECK-NOT: lhz 4, 4(0) +; CHECK: lhz 4, 2(0) +define dso_local fastcc void @trunc_srl_load(i32 zeroext %AttrArgNo) { +entry: + %bf.load.i = load i64, i64* null, align 8 + %bf.lshr.i = lshr i64 %bf.load.i, 32 + %0 = trunc i64 %bf.lshr.i to i32 + %bf.cast.i = and i32 %0, 65535 + %cmp.i = icmp ugt i32 %bf.cast.i, %AttrArgNo + br i1 %cmp.i, label %exit, label %cond.false +exit: ; preds = %entry + unreachable +cond.false: ; preds = %entry + unreachable +} diff --git a/llvm/test/CodeGen/X86/h-registers-1.ll b/llvm/test/CodeGen/X86/h-registers-1.ll index 2900475be7f..fe00672b414 100644 --- a/llvm/test/CodeGen/X86/h-registers-1.ll +++ b/llvm/test/CodeGen/X86/h-registers-1.ll @@ -22,20 +22,17 @@ define i64 @foo(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64 %g, i64 %h) ; CHECK-NEXT: movzbl %ah, %eax ; CHECK-NEXT: movq %rax, %r10 ; CHECK-NEXT: movzbl %dh, %edx -; CHECK-NEXT: movzbl %ch, %eax -; CHECK-NEXT: movq %rax, %r11 +; CHECK-NEXT: movzbl %ch, %ebp ; CHECK-NEXT: movq %r8, %rax ; CHECK-NEXT: movzbl %ah, %ecx ; CHECK-NEXT: movq %r9, %rax -; CHECK-NEXT: movzbl %ah, %ebp -; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: movzbl %ah, %eax -; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %ebx -; CHECK-NEXT: movzbl %bh, %edi +; CHECK-NEXT: movzbl %ah, %ebx +; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %edi ; CHECK-NEXT: addq %r10, %rsi -; CHECK-NEXT: addq %r11, %rdx +; CHECK-NEXT: addq %rbp, %rdx ; CHECK-NEXT: addq %rsi, %rdx -; CHECK-NEXT: addq %rbp, %rcx +; CHECK-NEXT: addq %rbx, %rcx ; CHECK-NEXT: addq %rdi, %rax ; CHECK-NEXT: addq %rcx, %rax ; CHECK-NEXT: addq %rdx, %rax @@ -57,20 +54,17 @@ define i64 @foo(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64 %g, i64 %h) ; GNUX32-NEXT: movzbl %ah, %eax ; GNUX32-NEXT: movq %rax, %r10 ; GNUX32-NEXT: movzbl %dh, %edx -; GNUX32-NEXT: movzbl %ch, %eax -; GNUX32-NEXT: movq %rax, %r11 +; GNUX32-NEXT: movzbl %ch, %ebp ; GNUX32-NEXT: movq %r8, %rax ; GNUX32-NEXT: movzbl %ah, %ecx ; GNUX32-NEXT: movq %r9, %rax -; GNUX32-NEXT: movzbl %ah, %ebp -; GNUX32-NEXT: movl {{[0-9]+}}(%esp), %eax -; GNUX32-NEXT: movzbl %ah, %eax -; GNUX32-NEXT: movl {{[0-9]+}}(%esp), %ebx -; GNUX32-NEXT: movzbl %bh, %edi +; GNUX32-NEXT: movzbl %ah, %ebx +; GNUX32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; GNUX32-NEXT: movzbl {{[0-9]+}}(%esp), %edi ; GNUX32-NEXT: addq %r10, %rsi -; GNUX32-NEXT: addq %r11, %rdx +; GNUX32-NEXT: addq %rbp, %rdx ; GNUX32-NEXT: addq %rsi, %rdx -; GNUX32-NEXT: addq %rbp, %rcx +; GNUX32-NEXT: addq %rbx, %rcx ; GNUX32-NEXT: addq %rdi, %rax ; GNUX32-NEXT: addq %rcx, %rax ; GNUX32-NEXT: addq %rdx, %rax |