diff options
author | Hiroshi Inoue <inouehrs@jp.ibm.com> | 2018-04-06 05:41:16 +0000 |
---|---|---|
committer | Hiroshi Inoue <inouehrs@jp.ibm.com> | 2018-04-06 05:41:16 +0000 |
commit | a2eefb6d9a76e456552fd96fad3dae6c0266cbed (patch) | |
tree | 2041568918741a73177ed63defeceb4a464423c6 | |
parent | 248148db0032b2f9061449b3dac0a5436ca16a3e (diff) | |
download | bcm5719-llvm-a2eefb6d9a76e456552fd96fad3dae6c0266cbed.tar.gz bcm5719-llvm-a2eefb6d9a76e456552fd96fad3dae6c0266cbed.zip |
[PowerPC] allow D-form VSX load/store when accessing FrameIndex without offset
VSX D-form load/store instructions of POWER9 require the offset be a multiple of 16 and a helper`isOffsetMultipleOf` is used to check this.
So far, the helper handles FrameIndex + offset case, but not handling FrameIndex without offset case. Due to this, we are missing opportunities to exploit D-form instructions when accessing an object or array allocated on stack.
For example, x-form store (stxvx) is used for int a[4] = {0}; instead of d-form store (stxv). For larger arrays, D-form instruction is not used when accessing the first 16-byte. Using D-form instructions reduces register pressure as well as instructions.
Differential Revision: https://reviews.llvm.org/D45079
llvm-svn: 329377
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 24 | ||||
-rw-r--r-- | llvm/test/CodeGen/PowerPC/vsx-p9.ll | 38 |
2 files changed, 54 insertions, 8 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index ec822aa53ca..bb9d5244f0e 100644 --- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -3937,20 +3937,28 @@ bool PPCDAGToDAGISel::isOffsetMultipleOf(SDNode *N, unsigned Val) const { else if (STN) AddrOp = STN->getOperand(2); + // If the address points a frame object or a frame object with an offset, + // we need to check the object alignment. short Imm = 0; - if (AddrOp.getOpcode() == ISD::ADD) { + if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>( + AddrOp.getOpcode() == ISD::ADD ? AddrOp.getOperand(0) : + AddrOp)) { // If op0 is a frame index that is under aligned, we can't do it either, // because it is translated to r31 or r1 + slot + offset. We won't know the // slot number until the stack frame is finalized. - if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(AddrOp.getOperand(0))) { - const MachineFrameInfo &MFI = CurDAG->getMachineFunction().getFrameInfo(); - unsigned SlotAlign = MFI.getObjectAlignment(FI->getIndex()); - if ((SlotAlign % Val) != 0) - return false; - } - return isIntS16Immediate(AddrOp.getOperand(1), Imm) && !(Imm % Val); + const MachineFrameInfo &MFI = CurDAG->getMachineFunction().getFrameInfo(); + unsigned SlotAlign = MFI.getObjectAlignment(FI->getIndex()); + if ((SlotAlign % Val) != 0) + return false; + + // If we have an offset, we need further check on the offset. + if (AddrOp.getOpcode() != ISD::ADD) + return true; } + if (AddrOp.getOpcode() == ISD::ADD) + return isIntS16Immediate(AddrOp.getOperand(1), Imm) && !(Imm % Val); + // If the address comes from the outside, the offset will be zero. return AddrOp.getOpcode() == ISD::CopyFromReg; } diff --git a/llvm/test/CodeGen/PowerPC/vsx-p9.ll b/llvm/test/CodeGen/PowerPC/vsx-p9.ll index 1ca679f474c..d7bea34f754 100644 --- a/llvm/test/CodeGen/PowerPC/vsx-p9.ll +++ b/llvm/test/CodeGen/PowerPC/vsx-p9.ll @@ -411,3 +411,41 @@ entry: } declare void @sink(...) + +; stack object should be accessed using D-form load/store instead of X-form +define signext i32 @func1() { +; CHECK-LABEL: @func1 +; CHECK-NOT: stxvx +; CHECK: stxv {{[0-9]+}}, {{[0-9]+}}(1) +; CHECK-NOT: stxvx +; CHECK: blr +entry: + %a = alloca [4 x i32], align 4 + %0 = bitcast [4 x i32]* %a to i8* + call void @llvm.memset.p0i8.i64(i8* nonnull align 4 %0, i8 0, i64 16, i1 false) + %arraydecay = getelementptr inbounds [4 x i32], [4 x i32]* %a, i64 0, i64 0 + %call = call signext i32 @callee(i32* nonnull %arraydecay) #3 + ret i32 %call +} + +; stack object should be accessed using D-form load/store instead of X-form +define signext i32 @func2() { +; CHECK-LABEL: @func2 +; CHECK-NOT: stxvx +; CHECK: stxv [[ZEROREG:[0-9]+]], {{[0-9]+}}(1) +; CHECK: stxv [[ZEROREG]], {{[0-9]+}}(1) +; CHECK: stxv [[ZEROREG]], {{[0-9]+}}(1) +; CHECK: stxv [[ZEROREG]], {{[0-9]+}}(1) +; CHECK-NOT: stxvx +; CHECK: blr +entry: + %a = alloca [16 x i32], align 4 + %0 = bitcast [16 x i32]* %a to i8* + call void @llvm.memset.p0i8.i64(i8* nonnull align 4 %0, i8 0, i64 64, i1 false) + %arraydecay = getelementptr inbounds [16 x i32], [16 x i32]* %a, i64 0, i64 0 + %call = call signext i32 @callee(i32* nonnull %arraydecay) #3 + ret i32 %call +} + +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1) #1 +declare signext i32 @callee(i32*) local_unnamed_addr #2 |