summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/Mips/MipsISelLowering.cpp51
-rw-r--r--llvm/test/CodeGen/Mips/cconv/byval.ll430
2 files changed, 473 insertions, 8 deletions
diff --git a/llvm/lib/Target/Mips/MipsISelLowering.cpp b/llvm/lib/Target/Mips/MipsISelLowering.cpp
index 7e6ce442625..73c305042c7 100644
--- a/llvm/lib/Target/Mips/MipsISelLowering.cpp
+++ b/llvm/lib/Target/Mips/MipsISelLowering.cpp
@@ -2949,12 +2949,44 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs, *DAG.getContext(),
MipsCCState::getSpecialCallingConvForCallee(Callee.getNode(), Subtarget));
+ const ExternalSymbolSDNode *ES =
+ dyn_cast_or_null<const ExternalSymbolSDNode>(Callee.getNode());
+
+ // There is one case where CALLSEQ_START..CALLSEQ_END can be nested, which
+ // is during the lowering of a call with a byval argument which produces
+ // a call to memcpy. For the O32 case, this causes the caller to allocate
+ // stack space for the reserved argument area for the callee, then recursively
+ // again for the memcpy call. In the NEWABI case, this doesn't occur as those
+ // ABIs mandate that the callee allocates the reserved argument area. We do
+ // still produce nested CALLSEQ_START..CALLSEQ_END with zero space though.
+ //
+ // If the callee has a byval argument and memcpy is used, we are mandated
+ // to already have produced a reserved argument area for the callee for O32.
+ // Therefore, the reserved argument area can be reused for both calls.
+ //
+ // Other cases of calling memcpy cannot have a chain with a CALLSEQ_START
+ // present, as we have yet to hook that node onto the chain.
+ //
+ // Hence, the CALLSEQ_START and CALLSEQ_END nodes can be eliminated in this
+ // case. GCC does a similar trick, in that wherever possible, it calculates
+ // the maximum out going argument area (including the reserved area), and
+ // preallocates the stack space on entrance to the caller.
+ //
+ // FIXME: We should do the same for efficency and space.
+
+ // Note: The check on the calling convention below must match
+ // MipsABIInfo::GetCalleeAllocdArgSizeInBytes().
+ bool MemcpyInByVal = ES &&
+ StringRef(ES->getSymbol()) == StringRef("memcpy") &&
+ CallConv != CallingConv::Fast &&
+ Chain.getOpcode() == ISD::CALLSEQ_START;
+
// Allocate the reserved argument area. It seems strange to do this from the
// caller side but removing it breaks the frame size calculation.
- CCInfo.AllocateStack(ABI.GetCalleeAllocdArgSizeInBytes(CallConv), 1);
+ unsigned ReservedArgArea =
+ MemcpyInByVal ? 0 : ABI.GetCalleeAllocdArgSizeInBytes(CallConv);
+ CCInfo.AllocateStack(ReservedArgArea, 1);
- const ExternalSymbolSDNode *ES =
- dyn_cast_or_null<const ExternalSymbolSDNode>(Callee.getNode());
CCInfo.AnalyzeCallOperands(Outs, CC_Mips, CLI.getArgs(),
ES ? ES->getSymbol() : nullptr);
@@ -2989,7 +3021,7 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
NextStackOffset = alignTo(NextStackOffset, StackAlignment);
SDValue NextStackOffsetVal = DAG.getIntPtrConstant(NextStackOffset, DL, true);
- if (!IsTailCall)
+ if (!(IsTailCall || MemcpyInByVal))
Chain = DAG.getCALLSEQ_START(Chain, NextStackOffset, 0, DL);
SDValue StackPtr =
@@ -3197,10 +3229,13 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
Chain = DAG.getNode(MipsISD::JmpLink, DL, NodeTys, Ops);
SDValue InFlag = Chain.getValue(1);
- // Create the CALLSEQ_END node.
- Chain = DAG.getCALLSEQ_END(Chain, NextStackOffsetVal,
- DAG.getIntPtrConstant(0, DL, true), InFlag, DL);
- InFlag = Chain.getValue(1);
+ // Create the CALLSEQ_END node in the case of where it is not a call to
+ // memcpy.
+ if (!(MemcpyInByVal)) {
+ Chain = DAG.getCALLSEQ_END(Chain, NextStackOffsetVal,
+ DAG.getIntPtrConstant(0, DL, true), InFlag, DL);
+ InFlag = Chain.getValue(1);
+ }
// Handle result values, copying them out of physregs into vregs that we
// return.
diff --git a/llvm/test/CodeGen/Mips/cconv/byval.ll b/llvm/test/CodeGen/Mips/cconv/byval.ll
new file mode 100644
index 00000000000..e3e7eccd262
--- /dev/null
+++ b/llvm/test/CodeGen/Mips/cconv/byval.ll
@@ -0,0 +1,430 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; NOTE: The SelectionDAG checks have been added by hand.
+
+; RUN: llc < %s -mtriple=mips-linux-gnu -verify-machineinstrs \
+; RUN: | FileCheck %s --check-prefix=O32
+; RUN: llc < %s -mtriple=mips64-linux-gnu -target-abi n32 -verify-machineinstrs \
+; RUN: | FileCheck %s --check-prefix=N32
+; RUN: llc < %s -mtriple=mips64-linux-gnu -target-abi n64 -verify-machineinstrs \
+; RUN: | FileCheck %s --check-prefix=N64
+
+; RUN: llc < %s -mtriple=mips-linux-gnu -verify-machineinstrs -debug 2>&1 \
+; RUN: | FileCheck %s --check-prefix=O32-SDAG
+; RUN: llc < %s -mtriple=mips64-linux-gnu -target-abi n32 -verify-machineinstrs \
+; RUN: -debug 2>&1 | FileCheck %s --check-prefix=N32-SDAG
+; RUN: llc < %s -mtriple=mips64-linux-gnu -target-abi n64 -verify-machineinstrs \
+; RUN: -debug 2>&1 | FileCheck %s --check-prefix=N64-SDAG
+
+; REQUIRES: asserts
+
+; Test that reserved argument area is shared between the memcpy call and the
+; call to f2. This eliminates the nested call sequence nodes.
+
+; Also, test that a basic call to memcpy reserves its outgoing argument area.
+
+; FIXME: We should also be explicit about testing that the loads for the
+; arguments are scheduled after the memcpy, but that wasn't enforced in
+; this patch.
+
+%struct.S1 = type { [65520 x i8] }
+
+; O32-SDAG-LABEL: Initial selection DAG: %bb.0 'g:entry'
+; O32-SDAG: t{{.*}}: ch,glue = callseq_start t{{.*}}, TargetConstant:i32<{{.*}}>
+; O32-SDAG-NOT: t{{.*}}: ch,glue = callseq_start t{{.*}}, TargetConstant:i32<{{.*}}>
+; O32-SDAG: t{{.*}}: ch,glue = MipsISD::JmpLink t{{.*}}, TargetExternalSymbol:i32'memcpy'
+; O32-SDAG-NOT: t{{.*}}: ch,glue = callseq_end t{{.*}}, TargetConstant:i32<{{.*}}>
+; O32-SDAG: t{{.*}}: ch,glue = MipsISD::JmpLink t{{.*}}, TargetGlobalAddress:i32<void (%struct.S1*)* @f2>
+; O32-SDAG: t{{.*}}: ch,glue = callseq_end t{{.*}}, TargetConstant:i32<{{.*}}>
+
+; N32-SDAG-LABEL: Initial selection DAG: %bb.0 'g:entry'
+; N32-SDAG: t{{.*}}: ch,glue = callseq_start t{{.*}}, TargetConstant:i32<{{.*}}>
+; N32-SDAG-NOT: t{{.*}}: ch,glue = callseq_start t{{.*}}, TargetConstant:i32<{{.*}}>
+; N32-SDAG: t{{.*}}: ch,glue = MipsISD::JmpLink t{{.*}}, TargetExternalSymbol:i32'memcpy'
+; N32-SDAG-NOT: t{{.*}}: ch,glue = callseq_end t{{.*}}, TargetConstant:i32<{{.*}}>
+; N32-SDAG: t{{.*}}: ch,glue = MipsISD::JmpLink t{{.*}}, TargetGlobalAddress:i32<void (%struct.S1*)* @f2>
+; N32-SDAG: t{{.*}}: ch,glue = callseq_end t{{.*}}, TargetConstant:i32<{{.*}}>
+
+; N64-SDAG-LABEL: Initial selection DAG: %bb.0 'g:entry'
+; N64-SDAG: t{{.*}}: ch,glue = callseq_start t{{.*}}, TargetConstant:i64<{{.*}}>
+; N64-SDAG-NOT: t{{.*}}: ch,glue = callseq_start t{{.*}}, TargetConstant:i64<{{.*}}>
+; N64-SDAG: t{{.*}}: ch,glue = MipsISD::JmpLink t{{.*}}, TargetExternalSymbol:i64'memcpy'
+; N64-SDAG-NOT: t{{.*}}: ch,glue = callseq_end t{{.*}}, TargetConstant:i64<{{.*}}>
+; N64-SDAG: t{{.*}}: ch,glue = MipsISD::JmpLink t{{.*}}, TargetGlobalAddress:i64<void (%struct.S1*)* @f2>
+; N64-SDAG: t{{.*}}: ch,glue = callseq_end t{{.*}}, TargetConstant:i64<{{.*}}>
+
+define dso_local void @g() #0 {
+; O32-LABEL: g:
+; O32: # %bb.0: # %entry
+; O32-NEXT: lui $1, 1
+; O32-NEXT: subu $sp, $sp, $1
+; O32-NEXT: .cfi_def_cfa_offset 65536
+; O32-NEXT: lui $1, 1
+; O32-NEXT: addu $1, $sp, $1
+; O32-NEXT: sw $ra, -4($1) # 4-byte Folded Spill
+; O32-NEXT: .cfi_offset 31, -4
+; O32-NEXT: ori $1, $zero, 65520
+; O32-NEXT: subu $sp, $sp, $1
+; O32-NEXT: addiu $1, $sp, 8
+; O32-NEXT: addiu $5, $1, 16
+; O32-NEXT: addiu $4, $sp, 16
+; O32-NEXT: jal memcpy
+; O32-NEXT: ori $6, $zero, 65504
+; O32-NEXT: lw $7, 20($sp)
+; O32-NEXT: lw $6, 16($sp)
+; O32-NEXT: lw $5, 12($sp)
+; O32-NEXT: jal f2
+; O32-NEXT: lw $4, 8($sp)
+; O32-NEXT: ori $1, $zero, 65520
+; O32-NEXT: addu $sp, $sp, $1
+; O32-NEXT: lui $1, 1
+; O32-NEXT: addu $1, $sp, $1
+; O32-NEXT: lw $ra, -4($1) # 4-byte Folded Reload
+; O32-NEXT: lui $1, 1
+; O32-NEXT: jr $ra
+; O32-NEXT: addu $sp, $sp, $1
+;
+; N32-LABEL: g:
+; N32: # %bb.0: # %entry
+; N32-NEXT: lui $1, 1
+; N32-NEXT: subu $sp, $sp, $1
+; N32-NEXT: .cfi_def_cfa_offset 65536
+; N32-NEXT: lui $1, 1
+; N32-NEXT: addu $1, $sp, $1
+; N32-NEXT: sd $ra, -8($1) # 8-byte Folded Spill
+; N32-NEXT: .cfi_offset 31, -8
+; N32-NEXT: ori $1, $zero, 65456
+; N32-NEXT: subu $sp, $sp, $1
+; N32-NEXT: addiu $1, $sp, 8
+; N32-NEXT: addiu $5, $1, 64
+; N32-NEXT: ori $6, $zero, 65456
+; N32-NEXT: jal memcpy
+; N32-NEXT: move $4, $sp
+; N32-NEXT: ld $11, 64($sp)
+; N32-NEXT: ld $10, 56($sp)
+; N32-NEXT: ld $9, 48($sp)
+; N32-NEXT: ld $8, 40($sp)
+; N32-NEXT: ld $7, 32($sp)
+; N32-NEXT: ld $6, 24($sp)
+; N32-NEXT: ld $5, 16($sp)
+; N32-NEXT: jal f2
+; N32-NEXT: ld $4, 8($sp)
+; N32-NEXT: ori $1, $zero, 65456
+; N32-NEXT: addu $sp, $sp, $1
+; N32-NEXT: lui $1, 1
+; N32-NEXT: addu $1, $sp, $1
+; N32-NEXT: ld $ra, -8($1) # 8-byte Folded Reload
+; N32-NEXT: lui $1, 1
+; N32-NEXT: jr $ra
+; N32-NEXT: addu $sp, $sp, $1
+;
+; N64-LABEL: g:
+; N64: # %bb.0: # %entry
+; N64-NEXT: lui $1, 1
+; N64-NEXT: dsubu $sp, $sp, $1
+; N64-NEXT: .cfi_def_cfa_offset 65536
+; N64-NEXT: lui $1, 1
+; N64-NEXT: daddu $1, $sp, $1
+; N64-NEXT: sd $ra, -8($1) # 8-byte Folded Spill
+; N64-NEXT: .cfi_offset 31, -8
+; N64-NEXT: ori $1, $zero, 65456
+; N64-NEXT: dsubu $sp, $sp, $1
+; N64-NEXT: daddiu $1, $sp, 8
+; N64-NEXT: daddiu $5, $1, 64
+; N64-NEXT: ori $6, $zero, 65456
+; N64-NEXT: jal memcpy
+; N64-NEXT: move $4, $sp
+; N64-NEXT: ld $11, 64($sp)
+; N64-NEXT: ld $10, 56($sp)
+; N64-NEXT: ld $9, 48($sp)
+; N64-NEXT: ld $8, 40($sp)
+; N64-NEXT: ld $7, 32($sp)
+; N64-NEXT: ld $6, 24($sp)
+; N64-NEXT: ld $5, 16($sp)
+; N64-NEXT: jal f2
+; N64-NEXT: ld $4, 8($sp)
+; N64-NEXT: ori $1, $zero, 65456
+; N64-NEXT: daddu $sp, $sp, $1
+; N64-NEXT: lui $1, 1
+; N64-NEXT: daddu $1, $sp, $1
+; N64-NEXT: ld $ra, -8($1) # 8-byte Folded Reload
+; N64-NEXT: lui $1, 1
+; N64-NEXT: jr $ra
+; N64-NEXT: daddu $sp, $sp, $1
+entry:
+ %a = alloca %struct.S1, align 4
+ call void @f2(%struct.S1* byval align 4 %a)
+ ret void
+}
+
+declare dso_local void @f2(%struct.S1* byval align 4) #1
+
+; O32-SDAG-LABEL: Initial selection DAG: %bb.0 'g2:entry'
+; O32-SDAG: t{{.*}}: ch,glue = callseq_start t{{.*}}, TargetConstant:i32<{{.*}}>
+; O32-SDAG: t{{.*}}: ch,glue = MipsISD::JmpLink t{{.*}}, TargetExternalSymbol:i32'memcpy'
+; O32-SDAG: t{{.*}}: ch,glue = callseq_end t{{.*}}, TargetConstant:i32<{{.*}}>
+; O32-SDAG: t{{.*}}: ch,glue = callseq_start t{{.*}}, TargetConstant:i32<{{.*}}>
+; O32-SDAG-NOT: t{{.*}}: ch,glue = callseq_start t{{.*}}, TargetConstant:i32<{{.*}}>
+; O32-SDAG: t{{.*}}: ch,glue = MipsISD::JmpLink t{{.*}}, TargetExternalSymbol:i32'memcpy'
+; O32-SDAG-NOT: t{{.*}}: ch,glue = callseq_end t{{.*}}, TargetConstant:i32<{{.*}}>
+; O32-SDAG: t{{.*}}: ch,glue = MipsISD::JmpLink t{{.*}}, TargetGlobalAddress:i32<void (%struct.S1*)* @f2>
+; O32-SDAG: t{{.*}}: ch,glue = callseq_end t{{.*}}, TargetConstant:i32<{{.*}}>
+
+; N32-SDAG-LABEL: Initial selection DAG: %bb.0 'g2:entry'
+; N32-SDAG: t{{.*}}: ch,glue = callseq_start t{{.*}}, TargetConstant:i32<{{.*}}>
+; N32-SDAG: t{{.*}}: ch,glue = MipsISD::JmpLink t{{.*}}, TargetExternalSymbol:i32'memcpy'
+; N32-SDAG: t{{.*}}: ch,glue = callseq_end t{{.*}}, TargetConstant:i32<{{.*}}>
+; N32-SDAG: t{{.*}}: ch,glue = callseq_start t{{.*}}, TargetConstant:i32<{{.*}}>
+; N32-SDAG-NOT: t{{.*}}: ch,glue = callseq_start t{{.*}}, TargetConstant:i32<{{.*}}>
+; N32-SDAG: t{{.*}}: ch,glue = MipsISD::JmpLink t{{.*}}, TargetExternalSymbol:i32'memcpy'
+; N32-SDAG-NOT: t{{.*}}: ch,glue = callseq_end t{{.*}}, TargetConstant:i32<{{.*}}>
+; N32-SDAG: t{{.*}}: ch,glue = MipsISD::JmpLink t{{.*}}, TargetGlobalAddress:i32<void (%struct.S1*)* @f2>
+; N32-SDAG: t{{.*}}: ch,glue = callseq_end t{{.*}}, TargetConstant:i32<{{.*}}>
+
+; N64-SDAG-LABEL: Initial selection DAG: %bb.0 'g2:entry'
+; N64-SDAG: t{{.*}}: ch,glue = callseq_start t{{.*}}, TargetConstant:i64<{{.*}}>
+; N64-SDAG: t{{.*}}: ch,glue = MipsISD::JmpLink t{{.*}}, TargetExternalSymbol:i64'memcpy'
+; N64-SDAG: t{{.*}}: ch,glue = callseq_end t{{.*}}, TargetConstant:i64<{{.*}}>
+; N64-SDAG: t{{.*}}: ch,glue = callseq_start t{{.*}}, TargetConstant:i64<{{.*}}>
+; N64-SDAG-NOT: t{{.*}}: ch,glue = callseq_start t{{.*}}, TargetConstant:i64<{{.*}}>
+; N64-SDAG: t{{.*}}: ch,glue = MipsISD::JmpLink t{{.*}}, TargetExternalSymbol:i64'memcpy'
+; N64-SDAG-NOT: t{{.*}}: ch,glue = callseq_end t{{.*}}, TargetConstant:i64<{{.*}}>
+; N64-SDAG: t{{.*}}: ch,glue = MipsISD::JmpLink t{{.*}}, TargetGlobalAddress:i64<void (%struct.S1*)* @f2>
+; N64-SDAG: t{{.*}}: ch,glue = callseq_end t{{.*}}, TargetConstant:i64<{{.*}}>
+
+define dso_local void @g2(%struct.S1* %a) {
+; O32-LABEL: g2:
+; O32: # %bb.0: # %entry
+; O32-NEXT: lui $1, 1
+; O32-NEXT: addiu $1, $1, 8
+; O32-NEXT: subu $sp, $sp, $1
+; O32-NEXT: .cfi_def_cfa_offset 65544
+; O32-NEXT: lui $1, 1
+; O32-NEXT: addu $1, $sp, $1
+; O32-NEXT: sw $ra, 4($1) # 4-byte Folded Spill
+; O32-NEXT: lui $1, 1
+; O32-NEXT: addu $1, $sp, $1
+; O32-NEXT: sw $16, 0($1) # 4-byte Folded Spill
+; O32-NEXT: .cfi_offset 31, -4
+; O32-NEXT: .cfi_offset 16, -8
+; O32-NEXT: move $5, $4
+; O32-NEXT: lui $1, 1
+; O32-NEXT: addu $1, $sp, $1
+; O32-NEXT: sw $4, -4($1)
+; O32-NEXT: addiu $sp, $sp, -16
+; O32-NEXT: addiu $16, $sp, 8
+; O32-NEXT: ori $6, $zero, 65520
+; O32-NEXT: jal memcpy
+; O32-NEXT: move $4, $16
+; O32-NEXT: addiu $sp, $sp, 16
+; O32-NEXT: ori $1, $zero, 65520
+; O32-NEXT: subu $sp, $sp, $1
+; O32-NEXT: addiu $5, $16, 16
+; O32-NEXT: addiu $4, $sp, 16
+; O32-NEXT: jal memcpy
+; O32-NEXT: ori $6, $zero, 65504
+; O32-NEXT: lw $7, 20($sp)
+; O32-NEXT: lw $6, 16($sp)
+; O32-NEXT: lw $5, 12($sp)
+; O32-NEXT: jal f2
+; O32-NEXT: lw $4, 8($sp)
+; O32-NEXT: ori $1, $zero, 65520
+; O32-NEXT: addu $sp, $sp, $1
+; O32-NEXT: lui $1, 1
+; O32-NEXT: addu $1, $sp, $1
+; O32-NEXT: lw $16, 0($1) # 4-byte Folded Reload
+; O32-NEXT: lui $1, 1
+; O32-NEXT: addu $1, $sp, $1
+; O32-NEXT: lw $ra, 4($1) # 4-byte Folded Reload
+; O32-NEXT: lui $1, 1
+; O32-NEXT: addiu $1, $1, 8
+; O32-NEXT: jr $ra
+; O32-NEXT: addu $sp, $sp, $1
+;
+; N32-LABEL: g2:
+; N32: # %bb.0: # %entry
+; N32-NEXT: lui $1, 1
+; N32-NEXT: addiu $1, $1, 16
+; N32-NEXT: subu $sp, $sp, $1
+; N32-NEXT: .cfi_def_cfa_offset 65552
+; N32-NEXT: lui $1, 1
+; N32-NEXT: addu $1, $sp, $1
+; N32-NEXT: sd $ra, 8($1) # 8-byte Folded Spill
+; N32-NEXT: lui $1, 1
+; N32-NEXT: addu $1, $sp, $1
+; N32-NEXT: sd $16, 0($1) # 8-byte Folded Spill
+; N32-NEXT: .cfi_offset 31, -8
+; N32-NEXT: .cfi_offset 16, -16
+; N32-NEXT: move $5, $4
+; N32-NEXT: sll $1, $5, 0
+; N32-NEXT: lui $2, 1
+; N32-NEXT: addu $2, $sp, $2
+; N32-NEXT: sw $1, -4($2)
+; N32-NEXT: addiu $16, $sp, 8
+; N32-NEXT: ori $6, $zero, 65520
+; N32-NEXT: jal memcpy
+; N32-NEXT: move $4, $16
+; N32-NEXT: addiu $5, $16, 64
+; N32-NEXT: ori $1, $zero, 65456
+; N32-NEXT: subu $sp, $sp, $1
+; N32-NEXT: ori $6, $zero, 65456
+; N32-NEXT: jal memcpy
+; N32-NEXT: move $4, $sp
+; N32-NEXT: ld $11, 64($sp)
+; N32-NEXT: ld $10, 56($sp)
+; N32-NEXT: ld $9, 48($sp)
+; N32-NEXT: ld $8, 40($sp)
+; N32-NEXT: ld $7, 32($sp)
+; N32-NEXT: ld $6, 24($sp)
+; N32-NEXT: ld $5, 16($sp)
+; N32-NEXT: jal f2
+; N32-NEXT: ld $4, 8($sp)
+; N32-NEXT: ori $1, $zero, 65456
+; N32-NEXT: addu $sp, $sp, $1
+; N32-NEXT: lui $1, 1
+; N32-NEXT: addu $1, $sp, $1
+; N32-NEXT: ld $16, 0($1) # 8-byte Folded Reload
+; N32-NEXT: lui $1, 1
+; N32-NEXT: addu $1, $sp, $1
+; N32-NEXT: ld $ra, 8($1) # 8-byte Folded Reload
+; N32-NEXT: lui $1, 1
+; N32-NEXT: addiu $1, $1, 16
+; N32-NEXT: jr $ra
+; N32-NEXT: addu $sp, $sp, $1
+;
+; N64-LABEL: g2:
+; N64: # %bb.0: # %entry
+; N64-NEXT: lui $1, 1
+; N64-NEXT: daddiu $1, $1, 16
+; N64-NEXT: dsubu $sp, $sp, $1
+; N64-NEXT: .cfi_def_cfa_offset 65552
+; N64-NEXT: lui $1, 1
+; N64-NEXT: daddu $1, $sp, $1
+; N64-NEXT: sd $ra, 8($1) # 8-byte Folded Spill
+; N64-NEXT: lui $1, 1
+; N64-NEXT: daddu $1, $sp, $1
+; N64-NEXT: sd $16, 0($1) # 8-byte Folded Spill
+; N64-NEXT: .cfi_offset 31, -8
+; N64-NEXT: .cfi_offset 16, -16
+; N64-NEXT: move $5, $4
+; N64-NEXT: lui $1, 1
+; N64-NEXT: daddu $1, $sp, $1
+; N64-NEXT: sd $4, -8($1)
+; N64-NEXT: daddiu $16, $sp, 8
+; N64-NEXT: ori $6, $zero, 65520
+; N64-NEXT: jal memcpy
+; N64-NEXT: move $4, $16
+; N64-NEXT: ori $1, $zero, 65456
+; N64-NEXT: dsubu $sp, $sp, $1
+; N64-NEXT: daddiu $5, $16, 64
+; N64-NEXT: ori $6, $zero, 65456
+; N64-NEXT: jal memcpy
+; N64-NEXT: move $4, $sp
+; N64-NEXT: ld $11, 64($sp)
+; N64-NEXT: ld $10, 56($sp)
+; N64-NEXT: ld $9, 48($sp)
+; N64-NEXT: ld $8, 40($sp)
+; N64-NEXT: ld $7, 32($sp)
+; N64-NEXT: ld $6, 24($sp)
+; N64-NEXT: ld $5, 16($sp)
+; N64-NEXT: jal f2
+; N64-NEXT: ld $4, 8($sp)
+; N64-NEXT: ori $1, $zero, 65456
+; N64-NEXT: daddu $sp, $sp, $1
+; N64-NEXT: lui $1, 1
+; N64-NEXT: daddu $1, $sp, $1
+; N64-NEXT: ld $16, 0($1) # 8-byte Folded Reload
+; N64-NEXT: lui $1, 1
+; N64-NEXT: daddu $1, $sp, $1
+; N64-NEXT: ld $ra, 8($1) # 8-byte Folded Reload
+; N64-NEXT: lui $1, 1
+; N64-NEXT: daddiu $1, $1, 16
+; N64-NEXT: jr $ra
+; N64-NEXT: daddu $sp, $sp, $1
+entry:
+ %a.addr = alloca %struct.S1*, align 4
+ %byval-temp = alloca %struct.S1, align 4
+ store %struct.S1* %a, %struct.S1** %a.addr, align 4
+ %0 = load %struct.S1*, %struct.S1** %a.addr, align 4
+ %1 = bitcast %struct.S1* %byval-temp to i8*
+ %2 = bitcast %struct.S1* %0 to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %1, i8* align 1 %2, i32 65520, i1 false)
+ call void @f2(%struct.S1* byval align 4 %byval-temp)
+ ret void
+}
+
+; O32-SDAG-LABEL: Initial selection DAG: %bb.0 'g3:entry'
+; O32-SDAG: t{{.*}}: ch,glue = callseq_start t{{.*}}, TargetConstant:i32<16>
+; O32-SDAG: t{{.*}}: ch,glue = MipsISD::JmpLink t{{.*}}, TargetExternalSymbol:i32'memcpy'
+; O32-SDAG: t{{.*}}: ch,glue = callseq_end t{{.*}}, TargetConstant:i32<16>
+
+; N32-SDAG-LABEL: Initial selection DAG: %bb.0 'g3:entry'
+; N32-SDAG: t{{.*}}: ch,glue = callseq_start t{{.*}}, TargetConstant:i32<0>
+; N32-SDAG: t{{.*}}: ch,glue = MipsISD::JmpLink t{{.*}}, TargetExternalSymbol:i32'memcpy'
+; N32-SDAG: t{{.*}}: ch,glue = callseq_end t{{.*}}, TargetConstant:i32<0>
+
+; N64-SDAG-LABEL: Initial selection DAG: %bb.0 'g3:entry'
+; N64-SDAG: t{{.*}}: ch,glue = callseq_start t{{.*}}, TargetConstant:i64<0>
+; N64-SDAG: t{{.*}}: ch,glue = MipsISD::JmpLink t{{.*}}, TargetExternalSymbol:i64'memcpy'
+; N64-SDAG: t{{.*}}: ch,glue = callseq_end t{{.*}}, TargetConstant:i64<0>
+
+define dso_local i32 @g3(%struct.S1* %a, %struct.S1* %b) #0 {
+; O32-LABEL: g3:
+; O32: # %bb.0: # %entry
+; O32-NEXT: addiu $sp, $sp, -32
+; O32-NEXT: .cfi_def_cfa_offset 32
+; O32-NEXT: sw $ra, 28($sp) # 4-byte Folded Spill
+; O32-NEXT: .cfi_offset 31, -4
+; O32-NEXT: sw $5, 20($sp)
+; O32-NEXT: sw $4, 24($sp)
+; O32-NEXT: jal memcpy
+; O32-NEXT: ori $6, $zero, 65520
+; O32-NEXT: addiu $2, $zero, 4
+; O32-NEXT: lw $ra, 28($sp) # 4-byte Folded Reload
+; O32-NEXT: jr $ra
+; O32-NEXT: addiu $sp, $sp, 32
+;
+; N32-LABEL: g3:
+; N32: # %bb.0: # %entry
+; N32-NEXT: addiu $sp, $sp, -16
+; N32-NEXT: .cfi_def_cfa_offset 16
+; N32-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill
+; N32-NEXT: .cfi_offset 31, -8
+; N32-NEXT: sll $1, $5, 0
+; N32-NEXT: sw $1, 0($sp)
+; N32-NEXT: sll $1, $4, 0
+; N32-NEXT: sw $1, 4($sp)
+; N32-NEXT: jal memcpy
+; N32-NEXT: ori $6, $zero, 65520
+; N32-NEXT: addiu $2, $zero, 4
+; N32-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload
+; N32-NEXT: jr $ra
+; N32-NEXT: addiu $sp, $sp, 16
+;
+; N64-LABEL: g3:
+; N64: # %bb.0: # %entry
+; N64-NEXT: daddiu $sp, $sp, -32
+; N64-NEXT: .cfi_def_cfa_offset 32
+; N64-NEXT: sd $ra, 24($sp) # 8-byte Folded Spill
+; N64-NEXT: .cfi_offset 31, -8
+; N64-NEXT: sd $5, 8($sp)
+; N64-NEXT: sd $4, 16($sp)
+; N64-NEXT: jal memcpy
+; N64-NEXT: ori $6, $zero, 65520
+; N64-NEXT: addiu $2, $zero, 4
+; N64-NEXT: ld $ra, 24($sp) # 8-byte Folded Reload
+; N64-NEXT: jr $ra
+; N64-NEXT: daddiu $sp, $sp, 32
+entry:
+ %a.addr = alloca %struct.S1*, align 4
+ %b.addr = alloca %struct.S1*, align 4
+ store %struct.S1* %a, %struct.S1** %a.addr, align 4
+ store %struct.S1* %b, %struct.S1** %b.addr, align 4
+ %0 = load %struct.S1*, %struct.S1** %a.addr, align 4
+ %1 = bitcast %struct.S1* %0 to i8*
+ %2 = load %struct.S1*, %struct.S1** %b.addr, align 4
+ %3 = bitcast %struct.S1* %2 to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %1, i8* align 1 %3, i32 65520, i1 false)
+ ret i32 4
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i1) #2
OpenPOWER on IntegriCloud