From 7c842fadf100b2ed160986e40a9a68a0613df256 Mon Sep 17 00:00:00 2001 From: Nemanja Ivanovic Date: Wed, 5 Jun 2019 02:36:40 +0000 Subject: [PowerPC] Collapse RLDICL/RLDICR into RLDIC when possible Generally speaking, we lower to an optimal rotate sequence for nodes visible in the SDAG. However, there are instances where the two rotates are not visible at ISEL time - most notably those in a very common sequence when lowering switch statements to jump tables. A common situation is a switch on a 32-bit integer. This value has to have the upper 32 bits cleared and because jump table offsets are word offsets, the value needs to be shifted left by 2 bits. We currently emit the clear and the left shift as two separate instructions, but this is not needed as we can lower it to a single RLDIC. This patch just cleans that up. Differential revision: https://reviews.llvm.org/D60402 llvm-svn: 362576 --- llvm/test/CodeGen/PowerPC/collapse-rotates.mir | 65 +++++++++++ .../CodeGen/PowerPC/jump-tables-collapse-rotate.ll | 122 +++++++++++++++++++++ 2 files changed, 187 insertions(+) create mode 100644 llvm/test/CodeGen/PowerPC/collapse-rotates.mir create mode 100644 llvm/test/CodeGen/PowerPC/jump-tables-collapse-rotate.ll (limited to 'llvm/test/CodeGen') diff --git a/llvm/test/CodeGen/PowerPC/collapse-rotates.mir b/llvm/test/CodeGen/PowerPC/collapse-rotates.mir new file mode 100644 index 00000000000..116f74bd46e --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/collapse-rotates.mir @@ -0,0 +1,65 @@ +# RUN: llc -mtriple=powerpc64le--linux-gnu -start-before ppc-mi-peepholes %s -o - -verify-machineinstrs | FileCheck %s + +--- | + ; ModuleID = 'b.ll' + source_filename = "b.ll" + target datalayout = "e-m:e-i64:64-n32:64" + + define dso_local i64 @test(i64 %l) { + entry: + %shl = shl i64 %l, 3 + ret i64 %shl + } + +... +--- +name: test +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: + - { id: 0, class: g8rc, preferred-register: '' } + - { id: 1, class: g8rc, preferred-register: '' } + - { id: 2, class: g8rc, preferred-register: '' } +liveins: + - { reg: '$x3', virtual-reg: '%0' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 4294967295 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +constants: [] +machineFunctionInfo: {} +body: | + bb.0.entry: + liveins: $x3 + + %0:g8rc = COPY $x3 + %1:g8rc = RLDICL %0, 2, 32 + %2:g8rc = RLDICR %1, 3, 58 + $x3 = COPY %2 + BLR8 implicit $lr8, implicit $rm, implicit $x3 + +... +# CHECK: rldic 3, 3, 5, 29 diff --git a/llvm/test/CodeGen/PowerPC/jump-tables-collapse-rotate.ll b/llvm/test/CodeGen/PowerPC/jump-tables-collapse-rotate.ll new file mode 100644 index 00000000000..1fa49fdbfee --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/jump-tables-collapse-rotate.ll @@ -0,0 +1,122 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -o - \ +; RUN: -ppc-asm-full-reg-names -verify-machineinstrs %s | FileCheck %s + +; Function Attrs: nounwind +define dso_local zeroext i32 @test(i32 signext %l) { +; CHECK-LABEL: test: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: std r0, 16(r1) +; CHECK-NEXT: stdu r1, -32(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: addi r3, r3, -1 +; CHECK-NEXT: cmplwi r3, 5 +; CHECK-NEXT: bgt cr0, .LBB0_3 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: addis r4, r2, .LC0@toc@ha +; CHECK-NEXT: rldic r3, r3, 2, 30 +; CHECK-NEXT: ld r4, .LC0@toc@l(r4) +; CHECK-NEXT: lwax r3, r3, r4 +; CHECK-NEXT: add r3, r3, r4 +; CHECK-NEXT: mtctr r3 +; CHECK-NEXT: bctr +; CHECK-NEXT: .LBB0_2: # %sw.bb +; CHECK-NEXT: li r3, 2 +; CHECK-NEXT: bl test1 +; CHECK-NEXT: nop +; CHECK-NEXT: b .LBB0_10 +; CHECK-NEXT: .LBB0_3: # %sw.default +; CHECK-NEXT: li r3, 1 +; CHECK-NEXT: bl test1 +; CHECK-NEXT: nop +; CHECK-NEXT: bl test3 +; CHECK-NEXT: nop +; CHECK-NEXT: b .LBB0_10 +; CHECK-NEXT: .LBB0_4: # %sw.bb3 +; CHECK-NEXT: li r3, 3 +; CHECK-NEXT: b .LBB0_9 +; CHECK-NEXT: .LBB0_5: # %sw.bb5 +; CHECK-NEXT: li r3, 4 +; CHECK-NEXT: bl test2 +; CHECK-NEXT: nop +; CHECK-NEXT: bl test3 +; CHECK-NEXT: nop +; CHECK-NEXT: b .LBB0_10 +; CHECK-NEXT: .LBB0_6: # %sw.bb8 +; CHECK-NEXT: li r3, 5 +; CHECK-NEXT: bl test4 +; CHECK-NEXT: nop +; CHECK-NEXT: b .LBB0_10 +; CHECK-NEXT: .LBB0_7: # %sw.bb10 +; CHECK-NEXT: li r3, 66 +; CHECK-NEXT: bl test4 +; CHECK-NEXT: nop +; CHECK-NEXT: bl test1 +; CHECK-NEXT: nop +; CHECK-NEXT: b .LBB0_10 +; CHECK-NEXT: .LBB0_8: # %sw.bb13 +; CHECK-NEXT: li r3, 66 +; CHECK-NEXT: .LBB0_9: # %return +; CHECK-NEXT: bl test2 +; CHECK-NEXT: nop +; CHECK-NEXT: .LBB0_10: # %return +; CHECK-NEXT: clrldi r3, r3, 32 +; CHECK-NEXT: addi r1, r1, 32 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +entry: + switch i32 %l, label %sw.default [ + i32 1, label %sw.bb + i32 2, label %sw.bb3 + i32 3, label %sw.bb5 + i32 4, label %sw.bb8 + i32 5, label %sw.bb10 + i32 6, label %sw.bb13 + ] + +sw.default: ; preds = %entry + %call = tail call signext i32 @test1(i32 signext 1) + %call1 = tail call signext i32 @test3(i32 signext %call) + br label %return + +sw.bb: ; preds = %entry + %call2 = tail call signext i32 @test1(i32 signext 2) + br label %return + +sw.bb3: ; preds = %entry + %call4 = tail call signext i32 @test2(i32 signext 3) + br label %return + +sw.bb5: ; preds = %entry + %call6 = tail call signext i32 @test2(i32 signext 4) + %call7 = tail call signext i32 @test3(i32 signext %call6) + br label %return + +sw.bb8: ; preds = %entry + %call9 = tail call signext i32 @test4(i32 signext 5) + br label %return + +sw.bb10: ; preds = %entry + %call11 = tail call signext i32 @test4(i32 signext 66) + %call12 = tail call signext i32 @test1(i32 signext %call11) + br label %return + +sw.bb13: ; preds = %entry + %call14 = tail call signext i32 @test2(i32 signext 66) + br label %return + +return: ; preds = %sw.bb13, %sw.bb10, %sw.bb8, %sw.bb5, %sw.bb3, %sw.bb, %sw.default + %retval.0 = phi i32 [ %call1, %sw.default ], [ %call14, %sw.bb13 ], [ %call12, %sw.bb10 ], [ %call9, %sw.bb8 ], [ %call7, %sw.bb5 ], [ %call4, %sw.bb3 ], [ %call2, %sw.bb ] + ret i32 %retval.0 +} + +declare signext i32 @test3(i32 signext) + +declare signext i32 @test1(i32 signext) + +declare signext i32 @test2(i32 signext) + +declare signext i32 @test4(i32 signext) -- cgit v1.2.3