summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/BPF/BPF.h2
-rw-r--r--llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp169
-rw-r--r--llvm/lib/Target/BPF/BPFMIPeephole.cpp189
-rw-r--r--llvm/lib/Target/BPF/BPFTargetMachine.cpp12
-rw-r--r--llvm/test/CodeGen/BPF/remove_truncate_6.ll80
5 files changed, 285 insertions, 167 deletions
diff --git a/llvm/lib/Target/BPF/BPF.h b/llvm/lib/Target/BPF/BPF.h
index ba21503d868..6e4f35f4c5d 100644
--- a/llvm/lib/Target/BPF/BPF.h
+++ b/llvm/lib/Target/BPF/BPF.h
@@ -20,12 +20,14 @@ ModulePass *createBPFAbstractMemberAccess(BPFTargetMachine *TM);
FunctionPass *createBPFISelDag(BPFTargetMachine &TM);
FunctionPass *createBPFMISimplifyPatchablePass();
FunctionPass *createBPFMIPeepholePass();
+FunctionPass *createBPFMIPeepholeTruncElimPass();
FunctionPass *createBPFMIPreEmitPeepholePass();
FunctionPass *createBPFMIPreEmitCheckingPass();
void initializeBPFAbstractMemberAccessPass(PassRegistry&);
void initializeBPFMISimplifyPatchablePass(PassRegistry&);
void initializeBPFMIPeepholePass(PassRegistry&);
+void initializeBPFMIPeepholeTruncElimPass(PassRegistry&);
void initializeBPFMIPreEmitPeepholePass(PassRegistry&);
void initializeBPFMIPreEmitCheckingPass(PassRegistry&);
}
diff --git a/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp b/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp
index 85fa1f2a6be..f2be0ff070d 100644
--- a/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp
+++ b/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp
@@ -45,9 +45,7 @@ class BPFDAGToDAGISel : public SelectionDAGISel {
public:
explicit BPFDAGToDAGISel(BPFTargetMachine &TM)
- : SelectionDAGISel(TM), Subtarget(nullptr) {
- curr_func_ = nullptr;
- }
+ : SelectionDAGISel(TM), Subtarget(nullptr) {}
StringRef getPassName() const override {
return "BPF DAG->DAG Pattern Instruction Selection";
@@ -92,14 +90,8 @@ private:
val_vec_type &Vals, int Offset);
bool getConstantFieldValue(const GlobalAddressSDNode *Node, uint64_t Offset,
uint64_t Size, unsigned char *ByteSeq);
- bool checkLoadDef(unsigned DefReg, unsigned match_load_op);
-
// Mapping from ConstantStruct global value to corresponding byte-list values
std::map<const void *, val_vec_type> cs_vals_;
- // Mapping from vreg to load memory opcode
- std::map<unsigned, unsigned> load_to_vreg_;
- // Current function
- const Function *curr_func_;
};
} // namespace
@@ -325,32 +317,13 @@ void BPFDAGToDAGISel::PreprocessLoad(SDNode *Node,
}
void BPFDAGToDAGISel::PreprocessISelDAG() {
- // Iterate through all nodes, interested in the following cases:
+ // Iterate through all nodes, interested in the following case:
//
// . loads from ConstantStruct or ConstantArray of constructs
// which can be turns into constant itself, with this we can
// avoid reading from read-only section at runtime.
//
- // . reg truncating is often the result of 8/16/32bit->64bit or
- // 8/16bit->32bit conversion. If the reg value is loaded with
- // masked byte width, the AND operation can be removed since
- // BPF LOAD already has zero extension.
- //
- // This also solved a correctness issue.
- // In BPF socket-related program, e.g., __sk_buff->{data, data_end}
- // are 32-bit registers, but later on, kernel verifier will rewrite
- // it with 64-bit value. Therefore, truncating the value after the
- // load will result in incorrect code.
-
- // clear the load_to_vreg_ map so that we have a clean start
- // for this function.
- if (!curr_func_) {
- curr_func_ = FuncInfo->Fn;
- } else if (curr_func_ != FuncInfo->Fn) {
- load_to_vreg_.clear();
- curr_func_ = FuncInfo->Fn;
- }
-
+ // . Removing redundant AND for intrinsic narrow loads.
for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
E = CurDAG->allnodes_end();
I != E;) {
@@ -358,8 +331,6 @@ void BPFDAGToDAGISel::PreprocessISelDAG() {
unsigned Opcode = Node->getOpcode();
if (Opcode == ISD::LOAD)
PreprocessLoad(Node, I);
- else if (Opcode == ISD::CopyToReg)
- PreprocessCopyToReg(Node);
else if (Opcode == ISD::AND)
PreprocessTrunc(Node, I);
}
@@ -491,36 +462,6 @@ bool BPFDAGToDAGISel::fillConstantStruct(const DataLayout &DL,
return true;
}
-void BPFDAGToDAGISel::PreprocessCopyToReg(SDNode *Node) {
- const RegisterSDNode *RegN = dyn_cast<RegisterSDNode>(Node->getOperand(1));
- if (!RegN || !Register::isVirtualRegister(RegN->getReg()))
- return;
-
- const LoadSDNode *LD = dyn_cast<LoadSDNode>(Node->getOperand(2));
- if (!LD)
- return;
-
- // Assign a load value to a virtual register. record its load width
- unsigned mem_load_op = 0;
- switch (LD->getMemOperand()->getSize()) {
- default:
- return;
- case 4:
- mem_load_op = BPF::LDW;
- break;
- case 2:
- mem_load_op = BPF::LDH;
- break;
- case 1:
- mem_load_op = BPF::LDB;
- break;
- }
-
- LLVM_DEBUG(dbgs() << "Find Load Value to VReg "
- << Register::virtReg2Index(RegN->getReg()) << '\n');
- load_to_vreg_[RegN->getReg()] = mem_load_op;
-}
-
void BPFDAGToDAGISel::PreprocessTrunc(SDNode *Node,
SelectionDAG::allnodes_iterator &I) {
ConstantSDNode *MaskN = dyn_cast<ConstantSDNode>(Node->getOperand(1));
@@ -534,112 +475,26 @@ void BPFDAGToDAGISel::PreprocessTrunc(SDNode *Node,
// which the generic optimizer doesn't understand their results are
// zero extended.
SDValue BaseV = Node->getOperand(0);
- if (BaseV.getOpcode() == ISD::INTRINSIC_W_CHAIN) {
- unsigned IntNo = cast<ConstantSDNode>(BaseV->getOperand(1))->getZExtValue();
- uint64_t MaskV = MaskN->getZExtValue();
-
- if (!((IntNo == Intrinsic::bpf_load_byte && MaskV == 0xFF) ||
- (IntNo == Intrinsic::bpf_load_half && MaskV == 0xFFFF) ||
- (IntNo == Intrinsic::bpf_load_word && MaskV == 0xFFFFFFFF)))
- return;
-
- LLVM_DEBUG(dbgs() << "Remove the redundant AND operation in: ";
- Node->dump(); dbgs() << '\n');
-
- I--;
- CurDAG->ReplaceAllUsesWith(SDValue(Node, 0), BaseV);
- I++;
- CurDAG->DeleteNode(Node);
-
- return;
- }
-
- // Multiple basic blocks case.
- if (BaseV.getOpcode() != ISD::CopyFromReg)
+ if (BaseV.getOpcode() != ISD::INTRINSIC_W_CHAIN)
return;
- unsigned match_load_op = 0;
- switch (MaskN->getZExtValue()) {
- default:
- return;
- case 0xFFFFFFFF:
- match_load_op = BPF::LDW;
- break;
- case 0xFFFF:
- match_load_op = BPF::LDH;
- break;
- case 0xFF:
- match_load_op = BPF::LDB;
- break;
- }
+ unsigned IntNo = cast<ConstantSDNode>(BaseV->getOperand(1))->getZExtValue();
+ uint64_t MaskV = MaskN->getZExtValue();
- const RegisterSDNode *RegN =
- dyn_cast<RegisterSDNode>(BaseV.getNode()->getOperand(1));
- if (!RegN || !Register::isVirtualRegister(RegN->getReg()))
+ if (!((IntNo == Intrinsic::bpf_load_byte && MaskV == 0xFF) ||
+ (IntNo == Intrinsic::bpf_load_half && MaskV == 0xFFFF) ||
+ (IntNo == Intrinsic::bpf_load_word && MaskV == 0xFFFFFFFF)))
return;
- unsigned AndOpReg = RegN->getReg();
- LLVM_DEBUG(dbgs() << "Examine " << printReg(AndOpReg) << '\n');
-
- // Examine the PHI insns in the MachineBasicBlock to found out the
- // definitions of this virtual register. At this stage (DAG2DAG
- // transformation), only PHI machine insns are available in the machine basic
- // block.
- MachineBasicBlock *MBB = FuncInfo->MBB;
- MachineInstr *MII = nullptr;
- for (auto &MI : *MBB) {
- for (unsigned i = 0; i < MI.getNumOperands(); ++i) {
- const MachineOperand &MOP = MI.getOperand(i);
- if (!MOP.isReg() || !MOP.isDef())
- continue;
- Register Reg = MOP.getReg();
- if (Register::isVirtualRegister(Reg) && Reg == AndOpReg) {
- MII = &MI;
- break;
- }
- }
- }
-
- if (MII == nullptr) {
- // No phi definition in this block.
- if (!checkLoadDef(AndOpReg, match_load_op))
- return;
- } else {
- // The PHI node looks like:
- // %2 = PHI %0, <%bb.1>, %1, <%bb.3>
- // Trace each incoming definition, e.g., (%0, %bb.1) and (%1, %bb.3)
- // The AND operation can be removed if both %0 in %bb.1 and %1 in
- // %bb.3 are defined with a load matching the MaskN.
- LLVM_DEBUG(dbgs() << "Check PHI Insn: "; MII->dump(); dbgs() << '\n');
- unsigned PrevReg = -1;
- for (unsigned i = 0; i < MII->getNumOperands(); ++i) {
- const MachineOperand &MOP = MII->getOperand(i);
- if (MOP.isReg()) {
- if (MOP.isDef())
- continue;
- PrevReg = MOP.getReg();
- if (!Register::isVirtualRegister(PrevReg))
- return;
- if (!checkLoadDef(PrevReg, match_load_op))
- return;
- }
- }
- }
- LLVM_DEBUG(dbgs() << "Remove the redundant AND operation in: "; Node->dump();
- dbgs() << '\n');
+ LLVM_DEBUG(dbgs() << "Remove the redundant AND operation in: ";
+ Node->dump(); dbgs() << '\n');
I--;
CurDAG->ReplaceAllUsesWith(SDValue(Node, 0), BaseV);
I++;
CurDAG->DeleteNode(Node);
-}
-
-bool BPFDAGToDAGISel::checkLoadDef(unsigned DefReg, unsigned match_load_op) {
- auto it = load_to_vreg_.find(DefReg);
- if (it == load_to_vreg_.end())
- return false; // The definition of register is not exported yet.
- return it->second == match_load_op;
+ return;
}
FunctionPass *llvm::createBPFISelDag(BPFTargetMachine &TM) {
diff --git a/llvm/lib/Target/BPF/BPFMIPeephole.cpp b/llvm/lib/Target/BPF/BPFMIPeephole.cpp
index fafd2f703ad..72fd131c459 100644
--- a/llvm/lib/Target/BPF/BPFMIPeephole.cpp
+++ b/llvm/lib/Target/BPF/BPFMIPeephole.cpp
@@ -71,7 +71,7 @@ void BPFMIPeephole::initialize(MachineFunction &MFParm) {
MF = &MFParm;
MRI = &MF->getRegInfo();
TII = MF->getSubtarget<BPFSubtarget>().getInstrInfo();
- LLVM_DEBUG(dbgs() << "*** BPF MachineSSA peephole pass ***\n\n");
+ LLVM_DEBUG(dbgs() << "*** BPF MachineSSA ZEXT Elim peephole pass ***\n\n");
}
bool BPFMIPeephole::isMovFrom32Def(MachineInstr *MovMI)
@@ -186,7 +186,8 @@ bool BPFMIPeephole::eliminateZExtSeq(void) {
} // end default namespace
INITIALIZE_PASS(BPFMIPeephole, DEBUG_TYPE,
- "BPF MachineSSA Peephole Optimization", false, false)
+ "BPF MachineSSA Peephole Optimization For ZEXT Eliminate",
+ false, false)
char BPFMIPeephole::ID = 0;
FunctionPass* llvm::createBPFMIPeepholePass() { return new BPFMIPeephole(); }
@@ -253,12 +254,16 @@ bool BPFMIPreEmitPeephole::eliminateRedundantMov(void) {
// enabled. The special type cast insn MOV_32_64 involves different
// register class on src (i32) and dst (i64), RA could generate useless
// instruction due to this.
- if (MI.getOpcode() == BPF::MOV_32_64) {
+ unsigned Opcode = MI.getOpcode();
+ if (Opcode == BPF::MOV_32_64 ||
+ Opcode == BPF::MOV_rr || Opcode == BPF::MOV_rr_32) {
Register dst = MI.getOperand(0).getReg();
- Register dst_sub = TRI->getSubReg(dst, BPF::sub_32);
Register src = MI.getOperand(1).getReg();
- if (dst_sub != src)
+ if (Opcode == BPF::MOV_32_64)
+ dst = TRI->getSubReg(dst, BPF::sub_32);
+
+ if (dst != src)
continue;
ToErase = &MI;
@@ -281,3 +286,177 @@ FunctionPass* llvm::createBPFMIPreEmitPeepholePass()
{
return new BPFMIPreEmitPeephole();
}
+
+STATISTIC(TruncElemNum, "Number of truncation eliminated");
+
+namespace {
+
+struct BPFMIPeepholeTruncElim : public MachineFunctionPass {
+
+ static char ID;
+ const BPFInstrInfo *TII;
+ MachineFunction *MF;
+ MachineRegisterInfo *MRI;
+
+ BPFMIPeepholeTruncElim() : MachineFunctionPass(ID) {
+ initializeBPFMIPeepholeTruncElimPass(*PassRegistry::getPassRegistry());
+ }
+
+private:
+ // Initialize class variables.
+ void initialize(MachineFunction &MFParm);
+
+ bool eliminateTruncSeq(void);
+
+public:
+
+ // Main entry point for this pass.
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ if (skipFunction(MF.getFunction()))
+ return false;
+
+ initialize(MF);
+
+ return eliminateTruncSeq();
+ }
+};
+
+static bool TruncSizeCompatible(int TruncSize, unsigned opcode)
+{
+ if (TruncSize == 1)
+ return opcode == BPF::LDB || opcode == BPF::LDB32;
+
+ if (TruncSize == 2)
+ return opcode == BPF::LDH || opcode == BPF::LDH32;
+
+ if (TruncSize == 4)
+ return opcode == BPF::LDW || opcode == BPF::LDW32;
+
+ return false;
+}
+
+// Initialize class variables.
+void BPFMIPeepholeTruncElim::initialize(MachineFunction &MFParm) {
+ MF = &MFParm;
+ MRI = &MF->getRegInfo();
+ TII = MF->getSubtarget<BPFSubtarget>().getInstrInfo();
+ LLVM_DEBUG(dbgs() << "*** BPF MachineSSA TRUNC Elim peephole pass ***\n\n");
+}
+
+// Reg truncating is often the result of 8/16/32bit->64bit or
+// 8/16bit->32bit conversion. If the reg value is loaded with
+// masked byte width, the AND operation can be removed since
+// BPF LOAD already has zero extension.
+//
+// This also solved a correctness issue.
+// In BPF socket-related program, e.g., __sk_buff->{data, data_end}
+// are 32-bit registers, but later on, kernel verifier will rewrite
+// it with 64-bit value. Therefore, truncating the value after the
+// load will result in incorrect code.
+bool BPFMIPeepholeTruncElim::eliminateTruncSeq(void) {
+ MachineInstr* ToErase = nullptr;
+ bool Eliminated = false;
+
+ for (MachineBasicBlock &MBB : *MF) {
+ for (MachineInstr &MI : MBB) {
+ // The second insn to remove if the eliminate candidate is a pair.
+ MachineInstr *MI2 = nullptr;
+ Register DstReg, SrcReg;
+ MachineInstr *DefMI;
+ int TruncSize = -1;
+
+ // If the previous instruction was marked for elimination, remove it now.
+ if (ToErase) {
+ ToErase->eraseFromParent();
+ ToErase = nullptr;
+ }
+
+ // AND A, 0xFFFFFFFF will be turned into SLL/SRL pair due to immediate
+ // for BPF ANDI is i32, and this case only happens on ALU64.
+ if (MI.getOpcode() == BPF::SRL_ri &&
+ MI.getOperand(2).getImm() == 32) {
+ SrcReg = MI.getOperand(1).getReg();
+ MI2 = MRI->getVRegDef(SrcReg);
+ DstReg = MI.getOperand(0).getReg();
+
+ if (!MI2 ||
+ MI2->getOpcode() != BPF::SLL_ri ||
+ MI2->getOperand(2).getImm() != 32)
+ continue;
+
+ // Update SrcReg.
+ SrcReg = MI2->getOperand(1).getReg();
+ DefMI = MRI->getVRegDef(SrcReg);
+ if (DefMI)
+ TruncSize = 4;
+ } else if (MI.getOpcode() == BPF::AND_ri ||
+ MI.getOpcode() == BPF::AND_ri_32) {
+ SrcReg = MI.getOperand(1).getReg();
+ DstReg = MI.getOperand(0).getReg();
+ DefMI = MRI->getVRegDef(SrcReg);
+
+ if (!DefMI)
+ continue;
+
+ int64_t imm = MI.getOperand(2).getImm();
+ if (imm == 0xff)
+ TruncSize = 1;
+ else if (imm == 0xffff)
+ TruncSize = 2;
+ }
+
+ if (TruncSize == -1)
+ continue;
+
+ // The definition is PHI node, check all inputs.
+ if (DefMI->isPHI()) {
+ bool CheckFail = false;
+
+ for (unsigned i = 1, e = DefMI->getNumOperands(); i < e; i += 2) {
+ MachineOperand &opnd = DefMI->getOperand(i);
+ if (!opnd.isReg()) {
+ CheckFail = true;
+ break;
+ }
+
+ MachineInstr *PhiDef = MRI->getVRegDef(opnd.getReg());
+ if (!PhiDef || PhiDef->isPHI() ||
+ !TruncSizeCompatible(TruncSize, PhiDef->getOpcode())) {
+ CheckFail = true;
+ break;
+ }
+ }
+
+ if (CheckFail)
+ continue;
+ } else if (!TruncSizeCompatible(TruncSize, DefMI->getOpcode())) {
+ continue;
+ }
+
+ BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(BPF::MOV_rr), DstReg)
+ .addReg(SrcReg);
+
+ if (MI2)
+ MI2->eraseFromParent();
+
+ // Mark it to ToErase, and erase in the next iteration.
+ ToErase = &MI;
+ TruncElemNum++;
+ Eliminated = true;
+ }
+ }
+
+ return Eliminated;
+}
+
+} // end default namespace
+
+INITIALIZE_PASS(BPFMIPeepholeTruncElim, "bpf-mi-trunc-elim",
+ "BPF MachineSSA Peephole Optimization For TRUNC Eliminate",
+ false, false)
+
+char BPFMIPeepholeTruncElim::ID = 0;
+FunctionPass* llvm::createBPFMIPeepholeTruncElimPass()
+{
+ return new BPFMIPeepholeTruncElim();
+}
diff --git a/llvm/lib/Target/BPF/BPFTargetMachine.cpp b/llvm/lib/Target/BPF/BPFTargetMachine.cpp
index d940ac965c0..0c4f2c74e7a 100644
--- a/llvm/lib/Target/BPF/BPFTargetMachine.cpp
+++ b/llvm/lib/Target/BPF/BPFTargetMachine.cpp
@@ -36,6 +36,7 @@ extern "C" void LLVMInitializeBPFTarget() {
PassRegistry &PR = *PassRegistry::getPassRegistry();
initializeBPFAbstractMemberAccessPass(PR);
initializeBPFMIPeepholePass(PR);
+ initializeBPFMIPeepholeTruncElimPass(PR);
}
// DataLayout: little or big endian
@@ -115,15 +116,16 @@ void BPFPassConfig::addMachineSSAOptimization() {
TargetPassConfig::addMachineSSAOptimization();
const BPFSubtarget *Subtarget = getBPFTargetMachine().getSubtargetImpl();
- if (Subtarget->getHasAlu32() && !DisableMIPeephole)
- addPass(createBPFMIPeepholePass());
+ if (!DisableMIPeephole) {
+ if (Subtarget->getHasAlu32())
+ addPass(createBPFMIPeepholePass());
+ addPass(createBPFMIPeepholeTruncElimPass());
+ }
}
void BPFPassConfig::addPreEmitPass() {
- const BPFSubtarget *Subtarget = getBPFTargetMachine().getSubtargetImpl();
-
addPass(createBPFMIPreEmitCheckingPass());
if (getOptLevel() != CodeGenOpt::None)
- if (Subtarget->getHasAlu32() && !DisableMIPeephole)
+ if (!DisableMIPeephole)
addPass(createBPFMIPreEmitPeepholePass());
}
diff --git a/llvm/test/CodeGen/BPF/remove_truncate_6.ll b/llvm/test/CodeGen/BPF/remove_truncate_6.ll
new file mode 100644
index 00000000000..6577afb13ba
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/remove_truncate_6.ll
@@ -0,0 +1,80 @@
+; RUN: llc < %s -march=bpf -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -march=bpf -mattr=+alu32 -verify-machineinstrs | FileCheck --check-prefix=CHECK-32 %s
+;
+; void cal1(unsigned short *a, unsigned long *b, unsigned int k)
+; {
+; unsigned short e;
+;
+; e = *a;
+; for (unsigned int i = 0; i < k; i++) {
+; b[i] = e;
+; e = ~e;
+; }
+; }
+;
+; void cal2(unsigned short *a, unsigned int *b, unsigned int k)
+; {
+; unsigned short e;
+;
+; e = *a;
+; for (unsigned int i = 0; i < k; i++) {
+; b[i] = e;
+; e = ~e;
+; }
+; }
+
+; Function Attrs: nofree norecurse nounwind optsize
+define dso_local void @cal1(i16* nocapture readonly %a, i64* nocapture %b, i32 %k) local_unnamed_addr #0 {
+entry:
+ %cmp8 = icmp eq i32 %k, 0
+ br i1 %cmp8, label %for.cond.cleanup, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ %0 = load i16, i16* %a, align 2
+ %wide.trip.count = zext i32 %k to i64
+ br label %for.body
+
+for.cond.cleanup: ; preds = %for.body, %entry
+ ret void
+
+for.body: ; preds = %for.body, %for.body.preheader
+ %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+ %e.09 = phi i16 [ %0, %for.body.preheader ], [ %neg, %for.body ]
+ %conv = zext i16 %e.09 to i64
+ %arrayidx = getelementptr inbounds i64, i64* %b, i64 %indvars.iv
+; CHECK: r{{[0-9]+}} &= 65535
+; CHECK-32: r{{[0-9]+}} &= 65535
+ store i64 %conv, i64* %arrayidx, align 8
+ %neg = xor i16 %e.09, -1
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
+
+; Function Attrs: nofree norecurse nounwind optsize
+define dso_local void @cal2(i16* nocapture readonly %a, i32* nocapture %b, i32 %k) local_unnamed_addr #0 {
+entry:
+ %cmp8 = icmp eq i32 %k, 0
+ br i1 %cmp8, label %for.cond.cleanup, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ %0 = load i16, i16* %a, align 2
+ %wide.trip.count = zext i32 %k to i64
+ br label %for.body
+
+for.cond.cleanup: ; preds = %for.body, %entry
+ ret void
+
+for.body: ; preds = %for.body, %for.body.preheader
+ %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+ %e.09 = phi i16 [ %0, %for.body.preheader ], [ %neg, %for.body ]
+ %conv = zext i16 %e.09 to i32
+ %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
+; CHECK: r{{[0-9]+}} &= 65535
+; CHECK-32: w{{[0-9]+}} &= 65535
+ store i32 %conv, i32* %arrayidx, align 4
+ %neg = xor i16 %e.09, -1
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
OpenPOWER on IntegriCloud