9 files changed, 214 insertions, 175 deletions
diff --git a/llvm/lib/Target/SystemZ/README.txt b/llvm/lib/Target/SystemZ/README.txt
index eebc4e4572f..93e29b8a0fa 100644
--- a/llvm/lib/Target/SystemZ/README.txt
+++ b/llvm/lib/Target/SystemZ/README.txt
@@ -35,10 +35,6 @@ performance measurements.
 
 --
 
-We don't support tail calls at present.
-
---
-
 We don't support prefetching yet.
 
 --
diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
index a58da901720..ed75e28cf93 100644
--- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -420,8 +420,7 @@ void SystemZFrameLowering::emitEpilogue(MachineFunction &MF,
   SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
 
   // Skip the return instruction.
-  assert(MBBI->getOpcode() == SystemZ::RET &&
-         "Can only insert epilogue into returning blocks");
+  assert(MBBI->isReturn() && "Can only insert epilogue into returning blocks");
 
   uint64_t StackSize = getAllocatedStackSize(MF);
   if (ZFI->getLowSavedGPR()) {
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 788fc2e7058..0000485f2d0 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -305,6 +305,22 @@ bool SystemZTargetLowering::isLegalAddressingMode(const AddrMode &AM,
   return AM.Scale == 0 || AM.Scale == 1;
 }
 
+bool SystemZTargetLowering::isTruncateFree(Type *FromType, Type *ToType) const {
+  if (!FromType->isIntegerTy() || !ToType->isIntegerTy())
+    return false;
+  unsigned FromBits = FromType->getPrimitiveSizeInBits();
+  unsigned ToBits = ToType->getPrimitiveSizeInBits();
+  return FromBits > ToBits;
+}
+
+bool SystemZTargetLowering::isTruncateFree(EVT FromVT, EVT ToVT) const {
+  if (!FromVT.isInteger() || !ToVT.isInteger())
+    return false;
+  unsigned FromBits = FromVT.getSizeInBits();
+  unsigned ToBits = ToVT.getSizeInBits();
+  return FromBits > ToBits;
+}
+
 //===----------------------------------------------------------------------===//
 // Inline asm support
 //===----------------------------------------------------------------------===//
@@ -527,6 +543,17 @@ LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
 
 #include "SystemZGenCallingConv.inc"
 
+bool SystemZTargetLowering::allowTruncateForTailCall(Type *FromType,
+                                                     Type *ToType) const {
+  return isTruncateFree(FromType, ToType);
+}
+
+bool SystemZTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
+  if (!CI->isTailCall())
+    return false;
+  return true;
+}
+
 // Value is a value that has been passed to us in the location described by VA
 // (and so has type VA.getLocVT()).  Convert Value to VA.getValVT(), chaining
 // any loads onto Chain.
@@ -689,6 +716,23 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
   return Chain;
 }
 
+static bool canUseSiblingCall(CCState ArgCCInfo,
+                              SmallVectorImpl<CCValAssign> &ArgLocs) {
+  // Punt if there are any indirect or stack arguments, or if the call
+  // needs the call-saved argument register R6.
+  for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
+    CCValAssign &VA = ArgLocs[I];
+    if (VA.getLocInfo() == CCValAssign::Indirect)
+      return false;
+    if (!VA.isRegLoc())
+      return false;
+    unsigned Reg = VA.getLocReg();
+    if (Reg == SystemZ::R6W || Reg == SystemZ::R6D)
+      return false;
+  }
+  return true;
+}
+
 SDValue
 SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
                                  SmallVectorImpl<SDValue> &InVals) const {
@@ -699,26 +743,29 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
   SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
   SDValue Chain = CLI.Chain;
   SDValue Callee = CLI.Callee;
-  bool &isTailCall = CLI.IsTailCall;
+  bool &IsTailCall = CLI.IsTailCall;
   CallingConv::ID CallConv = CLI.CallConv;
   bool IsVarArg = CLI.IsVarArg;
   MachineFunction &MF = DAG.getMachineFunction();
   EVT PtrVT = getPointerTy();
 
-  // SystemZ target does not yet support tail call optimization.
-  isTailCall = false;
-
   // Analyze the operands of the call, assigning locations to each operand.
   SmallVector<CCValAssign, 16> ArgLocs;
   CCState ArgCCInfo(CallConv, IsVarArg, MF, TM, ArgLocs, *DAG.getContext());
   ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
 
+  // We don't support GuaranteedTailCallOpt, only automatically-detected
+  // sibling calls.
+  if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs))
+    IsTailCall = false;
+
   // Get a count of how many bytes are to be pushed on the stack.
   unsigned NumBytes = ArgCCInfo.getNextStackOffset();
 
   // Mark the start of the call.
-  Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumBytes, PtrVT, true),
-                               DL);
+  if (!IsTailCall)
+    Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumBytes, PtrVT, true),
+                                 DL);
 
   // Copy argument values to their designated locations.
   SmallVector<std::pair<unsigned, SDValue>, 9> RegsToPass;
@@ -767,22 +814,27 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
                         &MemOpChains[0], MemOpChains.size());
 
-  // Build a sequence of copy-to-reg nodes, chained and glued together.
-  SDValue Glue;
-  for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) {
-    Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[I].first,
-                             RegsToPass[I].second, Glue);
-    Glue = Chain.getValue(1);
-  }
-
   // Accept direct calls by converting symbolic call addresses to the
-  // associated Target* opcodes.
+  // associated Target* opcodes.  Force %r1 to be used for indirect
+  // tail calls.
+  SDValue Glue;
   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
     Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
     Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
   } else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
     Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT);
     Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
+  } else if (IsTailCall) {
+    Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R1D, Callee, Glue);
+    Glue = Chain.getValue(1);
+    Callee = DAG.getRegister(SystemZ::R1D, Callee.getValueType());
+  }
+
+  // Build a sequence of copy-to-reg nodes, chained and glued together.
+  for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) {
+    Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[I].first,
+                             RegsToPass[I].second, Glue);
+    Glue = Chain.getValue(1);
   }
 
   // The first call operand is the chain and the second is the target address.
@@ -802,6 +854,8 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
 
   // Emit the call.
   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+  if (IsTailCall)
+    return DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, &Ops[0], Ops.size());
   Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, &Ops[0], Ops.size());
   Glue = Chain.getValue(1);
 
@@ -1689,6 +1743,7 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
   switch (Opcode) {
     OPCODE(RET_FLAG);
     OPCODE(CALL);
+    OPCODE(SIBCALL);
     OPCODE(PCREL_WRAPPER);
     OPCODE(CMP);
     OPCODE(UCMP);
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
index 21677a0e77c..3692e1e053b 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -32,6 +32,7 @@ namespace SystemZISD {
     // is the target address.  The arguments start at operand 2.
     // There is an optional glue operand at the end.
     CALL,
+    SIBCALL,
 
     // Wraps a TargetGlobalAddress that should be loaded using PC-relative
     // accesses (LARL).  Operand 0 is the address.
@@ -155,6 +156,8 @@ public:
      LLVM_OVERRIDE;
   virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const
     LLVM_OVERRIDE;
+  virtual bool isTruncateFree(Type *, Type *) const LLVM_OVERRIDE;
+  virtual bool isTruncateFree(EVT, EVT) const LLVM_OVERRIDE;
   virtual const char *getTargetNodeName(unsigned Opcode) const LLVM_OVERRIDE;
   virtual std::pair<unsigned, const TargetRegisterClass *>
     getRegForInlineAsmConstraint(const std::string &Constraint,
@@ -174,6 +177,8 @@ public:
                                 MachineBasicBlock *BB) const LLVM_OVERRIDE;
   virtual SDValue LowerOperation(SDValue Op,
                                  SelectionDAG &DAG) const LLVM_OVERRIDE;
+  virtual bool allowTruncateForTailCall(Type *, Type *) const LLVM_OVERRIDE;
+  virtual bool mayBeEmittedAsTailCall(CallInst *CI) const LLVM_OVERRIDE;
   virtual SDValue
     LowerFormalArguments(SDValue Chain,
                          CallingConv::ID CallConv, bool isVarArg,
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
index 7789d614972..876b48b50b6 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -212,6 +212,16 @@ let isCall = 1, Defs = [R0D, R1D, R2D, R3D, R4D, R5D, R14D,
                      "basr\t%r14, $R2", [(z_call ADDR64:$R2)]>;
 }
 
+// Sibling calls.  Indirect sibling calls must be via R1, since R2 upwards
+// are argument registers and since branching to R0 is a no-op.
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
+    isCodeGenOnly = 1, R1 = 15 in {
+  def CallJG : InstRIL<0xC04, (outs), (ins pcrel32call:$I2),
+                       "jg\t$I2", [(z_sibcall pcrel32call:$I2)]>;
+  let R2 = 1, Uses = [R1D] in
+    def CallBR : InstRR<0x07, (outs), (ins), "br\t%r1", [(z_sibcall R1D)]>;
+}
+
 // Define the general form of the call instructions for the asm parser.
 // These instructions don't hard-code %r14 as the return address register.
 def AsmBRAS  : InstRI<0xA75, (outs), (ins GR64:$R1, brtarget16:$I2),
diff --git a/llvm/lib/Target/SystemZ/SystemZOperators.td b/llvm/lib/Target/SystemZ/SystemZOperators.td
index 1a3d45efe99..8d6c6198b3c 100644
--- a/llvm/lib/Target/SystemZ/SystemZOperators.td
+++ b/llvm/lib/Target/SystemZ/SystemZOperators.td
@@ -82,6 +82,9 @@ def z_retflag           : SDNode<"SystemZISD::RET_FLAG", SDTNone,
 def z_call              : SDNode<"SystemZISD::CALL", SDT_ZCall,
                                  [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue,
                                   SDNPVariadic]>;
+def z_sibcall           : SDNode<"SystemZISD::SIBCALL", SDT_ZCall,
+                                 [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue,
+                                  SDNPVariadic]>;
 def z_pcrel_wrapper     : SDNode<"SystemZISD::PCREL_WRAPPER", SDT_ZWrapPtr, []>;
 def z_cmp               : SDNode<"SystemZISD::CMP", SDT_ZCmp, [SDNPOutGlue]>;
 def z_ucmp              : SDNode<"SystemZISD::UCMP", SDT_ZCmp, [SDNPOutGlue]>;
diff --git a/llvm/test/CodeGen/SystemZ/call-03.ll b/llvm/test/CodeGen/SystemZ/call-03.ll
new file mode 100644
index 00000000000..1f314eae58c
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/call-03.ll
@@ -0,0 +1,125 @@
+; Test sibling calls.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare void @ok(i8 %r2, i16 %r3, i32 %r4, i64 %r5, float %f0, double %f2,
+                 float %f4, double %f6)
+declare void @uses_r6(i8 %r2, i16 %r3, i32 %r4, i64 %r5, i64 %r6)
+declare void @uses_indirect(fp128 %r2)
+declare void @uses_stack(float %f0, float %f2, float %f4, float %f6,
+                         float %stack)
+declare i32 @returns_i32()
+declare i64 @returns_i64()
+
+; Check the maximum number of arguments that we can pass and still use
+; a sibling call.
+define void @f1() {
+; CHECK-LABEL: f1:
+; CHECK-DAG: lzer %f0
+; CHECK-DAG: lzdr %f2
+; CHECK-DAG: lhi %r2, 1
+; CHECK-DAG: lhi %r3, 2
+; CHECK-DAG: lhi %r4, 3
+; CHECK-DAG: lghi %r5, 4
+; CHECK-DAG: {{ler %f4, %f0|lzer %f4}}
+; CHECK-DAG: {{ldr %f6, %f2|lzdr %f6}}
+; CHECK: jg ok@PLT
+  tail call void @ok(i8 1, i16 2, i32 3, i64 4, float 0.0, double 0.0,
+                     float 0.0, double 0.0)
+  ret void
+}
+
+; Check a call that uses %r6 to pass an argument.  At the moment we don't
+; use sibling calls in that case.
+define void @f2() {
+; CHECK-LABEL: f2:
+; CHECK: brasl %r14, uses_r6@PLT
+; CHECK: br %r14
+  tail call void @uses_r6(i8 1, i16 2, i32 3, i64 4, i64 5)
+  ret void
+}
+
+; Check a call that passes indirect arguments.  We can't use sibling
+; calls in that case.
+define void @f3() {
+; CHECK-LABEL: f3:
+; CHECK: brasl %r14, uses_indirect@PLT
+; CHECK: br %r14
+  tail call void @uses_indirect(fp128 0xL00000000000000000000000000000000)
+  ret void
+}
+
+; Check a call that uses direct stack arguments, which again prevents
+; sibling calls
+define void @f4() {
+; CHECK-LABEL: f4:
+; CHECK: brasl %r14, uses_stack@PLT
+; CHECK: br %r14
+  tail call void @uses_stack(float 0.0, float 0.0, float 0.0, float 0.0,
+                             float 0.0)
+  ret void
+}
+
+; Check an indirect call.  In this case the only acceptable choice for
+; the target register is %r1.
+define void @f5(void(i32, i32, i32, i32) *%foo) {
+; CHECK-LABEL: f5:
+; CHECK: lgr %r1, %r2
+; CHECK-DAG: lhi %r2, 1
+; CHECK-DAG: lhi %r3, 2
+; CHECK-DAG: lhi %r4, 3
+; CHECK-DAG: lhi %r5, 4
+; CHECK: br %r1
+  tail call void %foo(i32 1, i32 2, i32 3, i32 4)
+  ret void
+}
+
+; Check an indirect call that will be forced into a call-saved GPR
+; (which should be %r13, the highest GPR not used for anything else).
+define void @f6(void(i32) *%foo) {
+; CHECK-LABEL: f6:
+; CHECK: stmg %r13, %r15, 104(%r15)
+; CHECK: lgr %r13, %r2
+; CHECK: brasl %r14, returns_i32
+; CHECK: lgr %r1, %r13
+; CHECK: lmg %r13, %r15, 264(%r15)
+; CHECK: br %r1
+  %arg = call i32 @returns_i32()
+  tail call void %foo(i32 %arg)
+  ret void
+}
+
+; Test a function that returns a value.
+define i64 @f7() {
+; CHECK-LABEL: f7:
+; CHECK: jg returns_i64@PLT
+  %res = tail call i64 @returns_i64()
+  ret i64 %res
+}
+
+; Test a function that returns a value truncated from i64 to i32.
+define i32 @f8() {
+; CHECK-LABEL: f8:
+; CHECK: jg returns_i64@PLT
+  %res = tail call i64 @returns_i64()
+  %trunc = trunc i64 %res to i32
+  ret i32 %trunc
+}
+
+; Test a function that returns a value truncated from i64 to i7.
+define i7 @f9() {
+; CHECK-LABEL: f9:
+; CHECK: jg returns_i64@PLT
+  %res = tail call i64 @returns_i64()
+  %trunc = trunc i64 %res to i7
+  ret i7 %trunc
+}
+
+; Test a function that returns a value truncated from i32 to i8.
+define i8 @f10() {
+; CHECK-LABEL: f10:
+; CHECK: jg returns_i32@PLT
+  %res = tail call i32 @returns_i32()
+  %trunc = trunc i32 %res to i8
+  ret i8 %trunc
+}
diff --git a/llvm/test/CodeGen/SystemZ/int-conv-09.ll b/llvm/test/CodeGen/SystemZ/int-conv-09.ll
index db4c333a30b..b9c508917d4 100644
--- a/llvm/test/CodeGen/SystemZ/int-conv-09.ll
+++ b/llvm/test/CodeGen/SystemZ/int-conv-09.ll
@@ -102,80 +102,3 @@ define i64 @f9(i64 %src, i64 %index) {
   %ext = sext i32 %word to i64
   ret i64 %ext
 }
-
-; Test a case where we spill the source of at least one LGFR.  We want
-; to use LGF if possible.
-define void @f10(i64 *%ptr1, i32 *%ptr2) {
-; CHECK-LABEL: f10:
-; CHECK: lgf {{%r[0-9]+}}, 16{{[04]}}(%r15)
-; CHECK: br %r14
-  %val0 = load volatile i32 *%ptr2
-  %val1 = load volatile i32 *%ptr2
-  %val2 = load volatile i32 *%ptr2
-  %val3 = load volatile i32 *%ptr2
-  %val4 = load volatile i32 *%ptr2
-  %val5 = load volatile i32 *%ptr2
-  %val6 = load volatile i32 *%ptr2
-  %val7 = load volatile i32 *%ptr2
-  %val8 = load volatile i32 *%ptr2
-  %val9 = load volatile i32 *%ptr2
-  %val10 = load volatile i32 *%ptr2
-  %val11 = load volatile i32 *%ptr2
-  %val12 = load volatile i32 *%ptr2
-  %val13 = load volatile i32 *%ptr2
-  %val14 = load volatile i32 *%ptr2
-  %val15 = load volatile i32 *%ptr2
-
-  %ext0 = sext i32 %val0 to i64
-  %ext1 = sext i32 %val1 to i64
-  %ext2 = sext i32 %val2 to i64
-  %ext3 = sext i32 %val3 to i64
-  %ext4 = sext i32 %val4 to i64
-  %ext5 = sext i32 %val5 to i64
-  %ext6 = sext i32 %val6 to i64
-  %ext7 = sext i32 %val7 to i64
-  %ext8 = sext i32 %val8 to i64
-  %ext9 = sext i32 %val9 to i64
-  %ext10 = sext i32 %val10 to i64
-  %ext11 = sext i32 %val11 to i64
-  %ext12 = sext i32 %val12 to i64
-  %ext13 = sext i32 %val13 to i64
-  %ext14 = sext i32 %val14 to i64
-  %ext15 = sext i32 %val15 to i64
-
-  store volatile i32 %val0, i32 *%ptr2
-  store volatile i32 %val1, i32 *%ptr2
-  store volatile i32 %val2, i32 *%ptr2
-  store volatile i32 %val3, i32 *%ptr2
-  store volatile i32 %val4, i32 *%ptr2
-  store volatile i32 %val5, i32 *%ptr2
-  store volatile i32 %val6, i32 *%ptr2
-  store volatile i32 %val7, i32 *%ptr2
-  store volatile i32 %val8, i32 *%ptr2
-  store volatile i32 %val9, i32 *%ptr2
-  store volatile i32 %val10, i32 *%ptr2
-  store volatile i32 %val11, i32 *%ptr2
-  store volatile i32 %val12, i32 *%ptr2
-  store volatile i32 %val13, i32 *%ptr2
-  store volatile i32 %val14, i32 *%ptr2
-  store volatile i32 %val15, i32 *%ptr2
-
-  store volatile i64 %ext0, i64 *%ptr1
-  store volatile i64 %ext1, i64 *%ptr1
-  store volatile i64 %ext2, i64 *%ptr1
-  store volatile i64 %ext3, i64 *%ptr1
-  store volatile i64 %ext4, i64 *%ptr1
-  store volatile i64 %ext5, i64 *%ptr1
-  store volatile i64 %ext6, i64 *%ptr1
-  store volatile i64 %ext7, i64 *%ptr1
-  store volatile i64 %ext8, i64 *%ptr1
-  store volatile i64 %ext9, i64 *%ptr1
-  store volatile i64 %ext10, i64 *%ptr1
-  store volatile i64 %ext11, i64 *%ptr1
-  store volatile i64 %ext12, i64 *%ptr1
-  store volatile i64 %ext13, i64 *%ptr1
-  store volatile i64 %ext14, i64 *%ptr1
-  store volatile i64 %ext15, i64 *%ptr1
-
-  ret void
-}
diff --git a/llvm/test/CodeGen/SystemZ/int-conv-10.ll b/llvm/test/CodeGen/SystemZ/int-conv-10.ll
index f2f71d90dce..781c74c7fa2 100644
--- a/llvm/test/CodeGen/SystemZ/int-conv-10.ll
+++ b/llvm/test/CodeGen/SystemZ/int-conv-10.ll
@@ -111,80 +111,3 @@ define i64 @f10(i64 %src, i64 %index) {
   %ext = zext i32 %word to i64
   ret i64 %ext
 }
-
-; Test a case where we spill the source of at least one LLGFR.  We want
-; to use LLGF if possible.
-define void @f11(i64 *%ptr1, i32 *%ptr2) {
-; CHECK-LABEL: f11:
-; CHECK: llgf {{%r[0-9]+}}, 16{{[04]}}(%r15)
-; CHECK: br %r14
-  %val0 = load volatile i32 *%ptr2
-  %val1 = load volatile i32 *%ptr2
-  %val2 = load volatile i32 *%ptr2
-  %val3 = load volatile i32 *%ptr2
-  %val4 = load volatile i32 *%ptr2
-  %val5 = load volatile i32 *%ptr2
-  %val6 = load volatile i32 *%ptr2
-  %val7 = load volatile i32 *%ptr2
-  %val8 = load volatile i32 *%ptr2
-  %val9 = load volatile i32 *%ptr2
-  %val10 = load volatile i32 *%ptr2
-  %val11 = load volatile i32 *%ptr2
-  %val12 = load volatile i32 *%ptr2
-  %val13 = load volatile i32 *%ptr2
-  %val14 = load volatile i32 *%ptr2
-  %val15 = load volatile i32 *%ptr2
-
-  %ext0 = zext i32 %val0 to i64
-  %ext1 = zext i32 %val1 to i64
-  %ext2 = zext i32 %val2 to i64
-  %ext3 = zext i32 %val3 to i64
-  %ext4 = zext i32 %val4 to i64
-  %ext5 = zext i32 %val5 to i64
-  %ext6 = zext i32 %val6 to i64
-  %ext7 = zext i32 %val7 to i64
-  %ext8 = zext i32 %val8 to i64
-  %ext9 = zext i32 %val9 to i64
-  %ext10 = zext i32 %val10 to i64
-  %ext11 = zext i32 %val11 to i64
-  %ext12 = zext i32 %val12 to i64
-  %ext13 = zext i32 %val13 to i64
-  %ext14 = zext i32 %val14 to i64
-  %ext15 = zext i32 %val15 to i64
-
-  store volatile i32 %val0, i32 *%ptr2
-  store volatile i32 %val1, i32 *%ptr2
-  store volatile i32 %val2, i32 *%ptr2
-  store volatile i32 %val3, i32 *%ptr2
-  store volatile i32 %val4, i32 *%ptr2
-  store volatile i32 %val5, i32 *%ptr2
-  store volatile i32 %val6, i32 *%ptr2
-  store volatile i32 %val7, i32 *%ptr2
-  store volatile i32 %val8, i32 *%ptr2
-  store volatile i32 %val9, i32 *%ptr2
-  store volatile i32 %val10, i32 *%ptr2
-  store volatile i32 %val11, i32 *%ptr2
-  store volatile i32 %val12, i32 *%ptr2
-  store volatile i32 %val13, i32 *%ptr2
-  store volatile i32 %val14, i32 *%ptr2
-  store volatile i32 %val15, i32 *%ptr2
-
-  store volatile i64 %ext0, i64 *%ptr1
-  store volatile i64 %ext1, i64 *%ptr1
-  store volatile i64 %ext2, i64 *%ptr1
-  store volatile i64 %ext3, i64 *%ptr1
-  store volatile i64 %ext4, i64 *%ptr1
-  store volatile i64 %ext5, i64 *%ptr1
-  store volatile i64 %ext6, i64 *%ptr1
-  store volatile i64 %ext7, i64 *%ptr1
-  store volatile i64 %ext8, i64 *%ptr1
-  store volatile i64 %ext9, i64 *%ptr1
-  store volatile i64 %ext10, i64 *%ptr1
-  store volatile i64 %ext11, i64 *%ptr1
-  store volatile i64 %ext12, i64 *%ptr1
-  store volatile i64 %ext13, i64 *%ptr1
-  store volatile i64 %ext14, i64 *%ptr1
-  store volatile i64 %ext15, i64 *%ptr1
-
-  ret void
-}