Index: include/llvm/ADT/PointerUnion.h =================================================================== --- include/llvm/ADT/PointerUnion.h (revision 152265) +++ include/llvm/ADT/PointerUnion.h (working copy) @@ -266,7 +266,7 @@ ::llvm::PointerUnionTypeSelector >::Return Ty; - return Ty(Val).is(); + return Ty(Val).template is(); } /// get() - Return the value of the specified pointer type. If the @@ -279,7 +279,7 @@ ::llvm::PointerUnionTypeSelector >::Return Ty; - return Ty(Val).get(); + return Ty(Val).template get(); } /// dyn_cast() - If the current value is of the specified pointer type, Index: include/llvm/ADT/IntervalMap.h =================================================================== --- include/llvm/ADT/IntervalMap.h (revision 152265) +++ include/llvm/ADT/IntervalMap.h (working copy) @@ -1977,7 +1977,7 @@ CurSize[Nodes] = CurSize[NewNode]; Node[Nodes] = Node[NewNode]; CurSize[NewNode] = 0; - Node[NewNode] = this->map->newNode(); + Node[NewNode] = this->map->template newNode(); ++Nodes; } Index: utils/TableGen/X86RecognizableInstr.cpp =================================================================== --- utils/TableGen/X86RecognizableInstr.cpp (revision 152265) +++ utils/TableGen/X86RecognizableInstr.cpp (working copy) @@ -405,13 +405,13 @@ return FILTER_STRONG; - // Filter out artificial instructions + // Filter out artificial instructions but leave in the LOCK_PREFIX so it is + // printed as a separate "instruction". if (Name.find("_Int") != Name.npos || Name.find("Int_") != Name.npos || Name.find("_NOREX") != Name.npos || - Name.find("2SDL") != Name.npos || - Name == "LOCK_PREFIX") + Name.find("2SDL") != Name.npos) return FILTER_STRONG; // Filter out instructions with segment override prefixes. Index: lib/Target/ARM/ARMJITInfo.cpp =================================================================== --- lib/Target/ARM/ARMJITInfo.cpp (revision 152265) +++ lib/Target/ARM/ARMJITInfo.cpp (working copy) @@ -61,7 +61,7 @@ // concerned, so we can't just preserve the callee saved regs. "stmdb sp!, {r0, r1, r2, r3, lr}\n" #if (defined(__VFP_FP__) && !defined(__SOFTFP__)) - "fstmfdd sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n" + "vstmdb sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n" #endif // The LR contains the address of the stub function on entry. // pass it as the argument to the C part of the callback @@ -85,7 +85,7 @@ // #if (defined(__VFP_FP__) && !defined(__SOFTFP__)) // Restore VFP caller-saved registers. - "fldmfdd sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n" + "vldmia sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n" #endif // // We need to exchange the values in slots 0 and 1 so we can Index: lib/Target/ARM/ARMInstrNEON.td =================================================================== --- lib/Target/ARM/ARMInstrNEON.td (revision 152265) +++ lib/Target/ARM/ARMInstrNEON.td (working copy) @@ -4795,12 +4795,12 @@ // Vector Swap def VSWPd : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0, - (outs DPR:$Vd, DPR:$Vd1), (ins DPR:$Vm, DPR:$Vm1), - NoItinerary, "vswp", "$Vd, $Vd1", "$Vm = $Vd, $Vm1 = $Vd1", + (outs DPR:$Vd, DPR:$Vm), (ins DPR:$in1, DPR:$in2), + NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm", []>; def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0, - (outs QPR:$Vd, QPR:$Vd1), (ins QPR:$Vm, QPR:$Vm1), - NoItinerary, "vswp", "$Vd, $Vd1", "$Vm = $Vd, $Vm1 = $Vd1", + (outs QPR:$Vd, QPR:$Vm), (ins QPR:$in1, QPR:$in2), + NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm", []>; // Vector Move Operations. Index: lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp =================================================================== --- lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp (revision 152265) +++ lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp (working copy) @@ -212,12 +212,12 @@ } else { assert(Op.isExpr() && "unknown operand kind in printOperand"); // If a symbolic branch target was added as a constant expression then print - // that address in hex. + // that address in hex. And only print 32 unsigned bits for the address. const MCConstantExpr *BranchTarget = dyn_cast(Op.getExpr()); int64_t Address; if (BranchTarget && BranchTarget->EvaluateAsAbsolute(Address)) { O << "0x"; - O.write_hex(Address); + O.write_hex((uint32_t)Address); } else { // Otherwise, just print the expression. Index: lib/Target/ARM/ARMInstrThumb2.td =================================================================== --- lib/Target/ARM/ARMInstrThumb2.td (revision 152265) +++ lib/Target/ARM/ARMInstrThumb2.td (working copy) @@ -3198,6 +3198,7 @@ let Inst{13} = target{17}; let Inst{21-16} = target{16-11}; let Inst{10-0} = target{10-0}; + let DecoderMethod = "DecodeT2BInstruction"; } let isNotDuplicable = 1, isIndirectBranch = 1 in { Index: lib/Target/ARM/ARMInstrThumb.td =================================================================== --- lib/Target/ARM/ARMInstrThumb.td (revision 152265) +++ lib/Target/ARM/ARMInstrThumb.td (working copy) @@ -413,11 +413,11 @@ "bl${p}\t$func", [(ARMtcall tglobaladdr:$func)]>, Requires<[IsThumb, IsNotIOS]> { - bits<22> func; - let Inst{26} = func{21}; + bits<24> func; + let Inst{26} = func{23}; let Inst{25-16} = func{20-11}; - let Inst{13} = 1; - let Inst{11} = 1; + let Inst{13} = func{22}; + let Inst{11} = func{21}; let Inst{10-0} = func{10-0}; } @@ -427,10 +427,11 @@ "blx${p}\t$func", [(ARMcall tglobaladdr:$func)]>, Requires<[IsThumb, HasV5T, IsNotIOS]> { - bits<21> func; + bits<24> func; + let Inst{26} = func{23}; let Inst{25-16} = func{20-11}; - let Inst{13} = 1; - let Inst{11} = 1; + let Inst{13} = func{22}; + let Inst{11} = func{21}; let Inst{10-1} = func{10-1}; let Inst{0} = 0; // func{0} is assumed zero } Index: lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp =================================================================== --- lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp (revision 152265) +++ lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp (working copy) @@ -397,39 +397,65 @@ return swapped; } case ARM::fixup_arm_thumb_bl: { - // The value doesn't encode the low bit (always zero) and is offset by - // four. The value is encoded into disjoint bit positions in the destination - // opcode. x = unchanged, I = immediate value bit, S = sign extension bit - // - // BL: xxxxxSIIIIIIIIII xxxxxIIIIIIIIIII - // - // Note that the halfwords are stored high first, low second; so we need - // to transpose the fixup value here to map properly. - unsigned isNeg = (int64_t(Value - 4) < 0) ? 1 : 0; - uint32_t Binary = 0; - Value = 0x3fffff & ((Value - 4) >> 1); - Binary = (Value & 0x7ff) << 16; // Low imm11 value. - Binary |= (Value & 0x1ffc00) >> 11; // High imm10 value. - Binary |= isNeg << 10; // Sign bit. - return Binary; + // The value doesn't encode the low bit (always zero) and is offset by + // four. The 32-bit immediate value is encoded as + // imm32 = SignExtend(S:I1:I2:imm10:imm11:0) + // where I1 = NOT(J1 ^ S) and I2 = NOT(J2 ^ S). + // The value is encoded into disjoint bit positions in the destination + // opcode. x = unchanged, I = immediate value bit, S = sign extension bit, + // J = either J1 or J2 bit + // + // BL: xxxxxSIIIIIIIIII xxJxJIIIIIIIIIII + // + // Note that the halfwords are stored high first, low second; so we need + // to transpose the fixup value here to map properly. + uint32_t offset = (Value - 4) >> 1; + uint32_t signBit = (offset & 0x800000) >> 23; + uint32_t I1Bit = (offset & 0x400000) >> 22; + uint32_t J1Bit = (I1Bit ^ 0x1) ^ signBit; + uint32_t I2Bit = (offset & 0x200000) >> 21; + uint32_t J2Bit = (I2Bit ^ 0x1) ^ signBit; + uint32_t imm10Bits = (offset & 0x1FF800) >> 11; + uint32_t imm11Bits = (offset & 0x000007FF); + + uint32_t Binary = 0; + uint32_t firstHalf = (((uint16_t)signBit << 10) | (uint16_t)imm10Bits); + uint32_t secondHalf = (((uint16_t)J1Bit << 13) | ((uint16_t)J2Bit << 11) | + (uint16_t)imm11Bits); + Binary |= secondHalf << 16; + Binary |= firstHalf; + return Binary; + } case ARM::fixup_arm_thumb_blx: { - // The value doesn't encode the low two bits (always zero) and is offset by - // four (see fixup_arm_thumb_cp). The value is encoded into disjoint bit - // positions in the destination opcode. x = unchanged, I = immediate value - // bit, S = sign extension bit, 0 = zero. - // - // BLX: xxxxxSIIIIIIIIII xxxxxIIIIIIIIII0 - // - // Note that the halfwords are stored high first, low second; so we need - // to transpose the fixup value here to map properly. - unsigned isNeg = (int64_t(Value-4) < 0) ? 1 : 0; - uint32_t Binary = 0; - Value = 0xfffff & ((Value - 2) >> 2); - Binary = (Value & 0x3ff) << 17; // Low imm10L value. - Binary |= (Value & 0xffc00) >> 10; // High imm10H value. - Binary |= isNeg << 10; // Sign bit. - return Binary; + // The value doesn't encode the low two bits (always zero) and is offset by + // four (see fixup_arm_thumb_cp). The 32-bit immediate value is encoded as + // imm32 = SignExtend(S:I1:I2:imm10H:imm10L:00) + // where I1 = NOT(J1 ^ S) and I2 = NOT(J2 ^ S). + // The value is encoded into disjoint bit positions in the destination + // opcode. x = unchanged, I = immediate value bit, S = sign extension bit, + // J = either J1 or J2 bit, 0 = zero. + // + // BLX: xxxxxSIIIIIIIIII xxJxJIIIIIIIIII0 + // + // Note that the halfwords are stored high first, low second; so we need + // to transpose the fixup value here to map properly. + uint32_t offset = (Value - 2) >> 2; + uint32_t signBit = (offset & 0x400000) >> 22; + uint32_t I1Bit = (offset & 0x200000) >> 21; + uint32_t J1Bit = (I1Bit ^ 0x1) ^ signBit; + uint32_t I2Bit = (offset & 0x100000) >> 20; + uint32_t J2Bit = (I2Bit ^ 0x1) ^ signBit; + uint32_t imm10HBits = (offset & 0xFFC00) >> 10; + uint32_t imm10LBits = (offset & 0x3FF); + + uint32_t Binary = 0; + uint32_t firstHalf = (((uint16_t)signBit << 10) | (uint16_t)imm10HBits); + uint32_t secondHalf = (((uint16_t)J1Bit << 13) | ((uint16_t)J2Bit << 11) | + ((uint16_t)imm10LBits) << 1); + Binary |= secondHalf << 16; + Binary |= firstHalf; + return Binary; } case ARM::fixup_arm_thumb_cp: // Offset by 4, and don't encode the low two bits. Two bytes of that Index: lib/Target/ARM/Disassembler/ARMDisassembler.cpp =================================================================== --- lib/Target/ARM/Disassembler/ARMDisassembler.cpp (revision 152265) +++ lib/Target/ARM/Disassembler/ARMDisassembler.cpp (working copy) @@ -182,6 +182,8 @@ uint64_t Address, const void *Decoder); static DecodeStatus DecodeAddrMode7Operand(llvm::MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeT2BInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); static DecodeStatus DecodeBranchImmInstruction(llvm::MCInst &Inst,unsigned Insn, uint64_t Address, const void *Decoder); static DecodeStatus DecodeAddrMode6Operand(llvm::MCInst &Inst, unsigned Val, @@ -1945,6 +1947,21 @@ } static DecodeStatus +DecodeT2BInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + unsigned imm = (fieldFromInstruction32(Insn, 0, 11) << 0) | + (fieldFromInstruction32(Insn, 11, 1) << 18) | + (fieldFromInstruction32(Insn, 13, 1) << 17) | + (fieldFromInstruction32(Insn, 16, 6) << 11) | + (fieldFromInstruction32(Insn, 26, 1) << 19); + if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<20>(imm<<1) + 4, + true, 4, Inst, Decoder)) + Inst.addOperand(MCOperand::CreateImm(SignExtend32<20>(imm << 1))); + return S; +} + +static DecodeStatus DecodeBranchImmInstruction(llvm::MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -2177,6 +2194,8 @@ case ARM::VLD2b8wb_register: case ARM::VLD2b16wb_register: case ARM::VLD2b32wb_register: + Inst.addOperand(MCOperand::CreateImm(0)); + break; case ARM::VLD3d8_UPD: case ARM::VLD3d16_UPD: case ARM::VLD3d32_UPD: @@ -2245,6 +2264,16 @@ !Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) return MCDisassembler::Fail; break; + case ARM::VLD2d8wb_fixed: + case ARM::VLD2d16wb_fixed: + case ARM::VLD2d32wb_fixed: + case ARM::VLD2b8wb_fixed: + case ARM::VLD2b16wb_fixed: + case ARM::VLD2b32wb_fixed: + case ARM::VLD2q8wb_fixed: + case ARM::VLD2q16wb_fixed: + case ARM::VLD2q32wb_fixed: + break; } return S; @@ -2313,6 +2342,10 @@ case ARM::VST2b8wb_register: case ARM::VST2b16wb_register: case ARM::VST2b32wb_register: + if (Rm == 0xF) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateImm(0)); + break; case ARM::VST3d8_UPD: case ARM::VST3d16_UPD: case ARM::VST3d32_UPD: @@ -2354,6 +2387,23 @@ case ARM::VST1q16wb_fixed: case ARM::VST1q32wb_fixed: case ARM::VST1q64wb_fixed: + case ARM::VST1d8Twb_fixed: + case ARM::VST1d16Twb_fixed: + case ARM::VST1d32Twb_fixed: + case ARM::VST1d64Twb_fixed: + case ARM::VST1d8Qwb_fixed: + case ARM::VST1d16Qwb_fixed: + case ARM::VST1d32Qwb_fixed: + case ARM::VST1d64Qwb_fixed: + case ARM::VST2d8wb_fixed: + case ARM::VST2d16wb_fixed: + case ARM::VST2d32wb_fixed: + case ARM::VST2q8wb_fixed: + case ARM::VST2q16wb_fixed: + case ARM::VST2q32wb_fixed: + case ARM::VST2b8wb_fixed: + case ARM::VST2b16wb_fixed: + case ARM::VST2b32wb_fixed: break; } @@ -2555,7 +2605,6 @@ unsigned Rm = fieldFromInstruction32(Insn, 0, 4); unsigned align = fieldFromInstruction32(Insn, 4, 1); unsigned size = 1 << fieldFromInstruction32(Insn, 6, 2); - unsigned pred = fieldFromInstruction32(Insn, 22, 4); align *= 2*size; switch (Inst.getOpcode()) { @@ -2586,16 +2635,11 @@ return MCDisassembler::Fail; Inst.addOperand(MCOperand::CreateImm(align)); - if (Rm == 0xD) - Inst.addOperand(MCOperand::CreateReg(0)); - else if (Rm != 0xF) { + if (Rm != 0xD && Rm != 0xF) { if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) return MCDisassembler::Fail; } - if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder))) - return MCDisassembler::Fail; - return S; } @@ -2837,19 +2881,25 @@ static DecodeStatus DecodeThumbBROperand(llvm::MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { - Inst.addOperand(MCOperand::CreateImm(SignExtend32<12>(Val << 1))); + if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<12>(Val<<1) + 4, + true, 2, Inst, Decoder)) + Inst.addOperand(MCOperand::CreateImm(SignExtend32<12>(Val << 1))); return MCDisassembler::Success; } static DecodeStatus DecodeT2BROperand(llvm::MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { - Inst.addOperand(MCOperand::CreateImm(SignExtend32<21>(Val))); + if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<21>(Val) + 4, + true, 4, Inst, Decoder)) + Inst.addOperand(MCOperand::CreateImm(SignExtend32<21>(Val))); return MCDisassembler::Success; } static DecodeStatus DecodeThumbCmpBROperand(llvm::MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { - Inst.addOperand(MCOperand::CreateImm(SignExtend32<7>(Val << 1))); + if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<7>(Val<<1) + 4, + true, 2, Inst, Decoder)) + Inst.addOperand(MCOperand::CreateImm(SignExtend32<7>(Val << 1))); return MCDisassembler::Success; } @@ -3162,10 +3212,25 @@ static DecodeStatus DecodeThumbBLXOffset(llvm::MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { + // Val is passed in as S:J1:J2:imm10H:imm10L:’0’ + // Note only one trailing zero not two. Also the J1 and J2 values are from + // the encoded instruction. So here change to I1 and I2 values via: + // I1 = NOT(J1 EOR S); + // I2 = NOT(J2 EOR S); + // and build the imm32 with two trailing zeros as documented: + // imm32 = SignExtend(S:I1:I2:imm10H:imm10L:’00’, 32); + unsigned S = (Val >> 23) & 1; + unsigned J1 = (Val >> 22) & 1; + unsigned J2 = (Val >> 21) & 1; + unsigned I1 = !(J1 ^ S); + unsigned I2 = !(J2 ^ S); + unsigned tmp = (Val & ~0x600000) | (I1 << 22) | (I2 << 21); + int imm32 = SignExtend32<25>(tmp << 1); + if (!tryAddingSymbolicOperand(Address, - (Address & ~2u) + SignExtend32<22>(Val << 1) + 4, + (Address & ~2u) + imm32 + 4, true, 4, Inst, Decoder)) - Inst.addOperand(MCOperand::CreateImm(SignExtend32<22>(Val << 1))); + Inst.addOperand(MCOperand::CreateImm(imm32)); return MCDisassembler::Success; } @@ -3271,15 +3336,32 @@ static DecodeStatus DecodeThumbBCCTargetOperand(llvm::MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder){ - Inst.addOperand(MCOperand::CreateImm(Val << 1)); + if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<8>(Val<<1) + 4, + true, 2, Inst, Decoder)) + Inst.addOperand(MCOperand::CreateImm(SignExtend32<8>(Val << 1))); return MCDisassembler::Success; } static DecodeStatus DecodeThumbBLTargetOperand(llvm::MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder){ - if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<22>(Val<<1) + 4, + // Val is passed in as S:J1:J2:imm10:imm11 + // Note no trailing zero after imm11. Also the J1 and J2 values are from + // the encoded instruction. So here change to I1 and I2 values via: + // I1 = NOT(J1 EOR S); + // I2 = NOT(J2 EOR S); + // and build the imm32 with one trailing zero as documented: + // imm32 = SignExtend(S:I1:I2:imm10:imm11:’0’, 32); + unsigned S = (Val >> 23) & 1; + unsigned J1 = (Val >> 22) & 1; + unsigned J2 = (Val >> 21) & 1; + unsigned I1 = !(J1 ^ S); + unsigned I2 = !(J2 ^ S); + unsigned tmp = (Val & ~0x600000) | (I1 << 22) | (I2 << 21); + int imm32 = SignExtend32<25>(tmp << 1); + + if (!tryAddingSymbolicOperand(Address, Address + imm32 + 4, true, 4, Inst, Decoder)) - Inst.addOperand(MCOperand::CreateImm(SignExtend32<22>(Val << 1))); + Inst.addOperand(MCOperand::CreateImm(imm32)); return MCDisassembler::Success; } Index: lib/Target/X86/Disassembler/X86Disassembler.cpp =================================================================== --- lib/Target/X86/Disassembler/X86Disassembler.cpp (revision 152265) +++ lib/Target/X86/Disassembler/X86Disassembler.cpp (working copy) @@ -322,7 +322,12 @@ OperandType type = (OperandType)operand.type; + bool isBranch = false; + uint64_t pcrel = 0; if (type == TYPE_RELv) { + isBranch = true; + pcrel = insn.startLocation + + insn.immediateOffset + insn.immediateSize; switch (insn.displacementSize) { default: break; @@ -373,8 +378,6 @@ } } - bool isBranch = false; - uint64_t pcrel = 0; switch (type) { case TYPE_XMM128: mcInst.addOperand(MCOperand::CreateReg(X86::XMM0 + (immediate >> 4))); Index: lib/Target/X86/Disassembler/X86DisassemblerDecoder.c =================================================================== --- lib/Target/X86/Disassembler/X86DisassemblerDecoder.c (revision 152265) +++ lib/Target/X86/Disassembler/X86DisassemblerDecoder.c (working copy) @@ -312,6 +312,13 @@ if (consumeByte(insn, &byte)) return -1; + + // If the the first byte is a LOCK prefix break and let it be disassembled + // as a lock "instruction", by creating an . + // FIXME there is currently no way to get the disassembler to print the + // lock prefix if it is not the first byte. + if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0) + break; switch (byte) { case 0xf0: /* LOCK */ Index: lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp =================================================================== --- lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp (revision 152265) +++ lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp (working copy) @@ -24,7 +24,8 @@ bool RuntimeDyldMachO:: resolveRelocation(uint8_t *LocalAddress, uint64_t FinalAddress, - uint64_t Value, + uint64_t FinalSource1, + uint64_t FinalSource2, bool isPCRel, unsigned Type, unsigned Size, @@ -32,10 +33,20 @@ // This just dispatches to the proper target specific routine. switch (CPUType) { default: llvm_unreachable("Unsupported CPU type!"); + case mach::CTM_i386: + return resolveI386Relocation(LocalAddress, + FinalAddress, + FinalSource1, + FinalSource2, + isPCRel, + Type, + Size, + Addend); case mach::CTM_x86_64: return resolveX86_64Relocation(LocalAddress, FinalAddress, - (uintptr_t)Value, + FinalSource1, + FinalSource2, isPCRel, Type, Size, @@ -43,7 +54,8 @@ case mach::CTM_ARM: return resolveARMRelocation(LocalAddress, FinalAddress, - (uintptr_t)Value, + FinalSource1, + FinalSource2, isPCRel, Type, Size, @@ -52,19 +64,52 @@ } bool RuntimeDyldMachO:: +resolveI386Relocation(uint8_t *LocalAddress, + uint64_t FinalAddress, + uint64_t FinalSource1, + uint64_t FinalSource2, + bool isPCRel, + unsigned Type, + unsigned Size, + int64_t Addend) { + int64_t ValueToWrite = Addend; + + switch (Type) { + default: + llvm_unreachable("Invalid relocation type!"); + case macho::RIT_Vanilla: + ValueToWrite += FinalSource1; + break; + case macho::RIT_Difference: + case macho::RIT_Generic_LocalDifference: + case macho::RIT_Generic_PreboundLazyPointer: + ValueToWrite += FinalSource1; + ValueToWrite -= FinalSource2; + break; + } + + if (isPCRel) + ValueToWrite -= FinalAddress + 4; // see resolveX86_64Relocation + + uint8_t *p = LocalAddress; + for (unsigned i = 0; i < Size; ++i) { + *p++ = (uint8_t)(ValueToWrite & 0xff); + ValueToWrite >>= 8; + } + + return false; +} + +bool RuntimeDyldMachO:: resolveX86_64Relocation(uint8_t *LocalAddress, uint64_t FinalAddress, - uint64_t Value, + uint64_t FinalSource1, + uint64_t FinalSource2, bool isPCRel, unsigned Type, unsigned Size, int64_t Addend) { - // If the relocation is PC-relative, the value to be encoded is the - // pointer difference. - if (isPCRel) - // FIXME: It seems this value needs to be adjusted by 4 for an effective PC - // address. Is that expected? Only for branches, perhaps? - Value -= FinalAddress + 4; + int64_t ValueToWrite = Addend; switch(Type) { default: @@ -74,41 +119,53 @@ case macho::RIT_X86_64_Signed4: case macho::RIT_X86_64_Signed: case macho::RIT_X86_64_Unsigned: - case macho::RIT_X86_64_Branch: { - Value += Addend; - // Mask in the target value a byte at a time (we don't have an alignment - // guarantee for the target address, so this is safest). - uint8_t *p = (uint8_t*)LocalAddress; - for (unsigned i = 0; i < Size; ++i) { - *p++ = (uint8_t)Value; - Value >>= 8; - } - return false; - } + case macho::RIT_X86_64_Branch: + ValueToWrite += FinalSource1; + break; case macho::RIT_X86_64_GOTLoad: case macho::RIT_X86_64_GOT: case macho::RIT_X86_64_Subtractor: case macho::RIT_X86_64_TLV: return Error("Relocation type not implemented yet!"); } + + // If the relocation is PC-relative, the value to be encoded is the + // pointer difference. + if (isPCRel) + // FIXME: It seems this value needs to be adjusted by 4 for an effective PC + // address. Is that expected? Only for branches, perhaps? + ValueToWrite -= FinalAddress + 4; + + // Mask in the target value a byte at a time (we don't have an alignment + // guarantee for the target address, so this is safest). + uint8_t *p = (uint8_t*)LocalAddress; + for (unsigned i = 0; i < Size; ++i) { + *p++ = (uint8_t)(ValueToWrite & 0xff); + ValueToWrite >>= 8; + } + + return false; } bool RuntimeDyldMachO:: resolveARMRelocation(uint8_t *LocalAddress, uint64_t FinalAddress, - uint64_t Value, + uint64_t FinalSource1, + uint64_t FinalSource2, bool isPCRel, unsigned Type, unsigned Size, int64_t Addend) { + int64_t ValueToWrite = Addend; + // If the relocation is PC-relative, the value to be encoded is the // pointer difference. if (isPCRel) { - Value -= FinalAddress; + ValueToWrite -= FinalAddress; // ARM PCRel relocations have an effective-PC offset of two instructions // (four bytes in Thumb mode, 8 bytes in ARM mode). // FIXME: For now, assume ARM mode. - Value -= 8; + ValueToWrite -= 8; } switch(Type) { @@ -119,8 +176,8 @@ // guarantee for the target address, so this is safest). uint8_t *p = (uint8_t*)LocalAddress; for (unsigned i = 0; i < Size; ++i) { - *p++ = (uint8_t)Value; - Value >>= 8; + *p++ = (uint8_t)(ValueToWrite & 0xff); + ValueToWrite >>= 8; } break; } @@ -129,15 +186,15 @@ // 32-bit aligned, so we can do it all at once. uint32_t *p = (uint32_t*)LocalAddress; // The low two bits of the value are not encoded. - Value >>= 2; + ValueToWrite >>= 2; // Mask the value to 24 bits. - Value &= 0xffffff; + ValueToWrite &= 0xffffff; // FIXME: If the destination is a Thumb function (and the instruction // is a non-predicated BL instruction), we need to change it to a BLX // instruction instead. // Insert the value into the instruction. - *p = (*p & ~0xffffff) | Value; + *p = (*p & ~0xffffff) | ValueToWrite; break; } case macho::RIT_ARM_ThumbBranch22Bit: @@ -153,6 +210,29 @@ return false; } +static bool +ResolveSectionAndOffset(const MachOObject *Obj, + SmallVectorImpl &SectionMap, + const MachOObject::LoadCommandInfo *SegmentLCI, + InMemoryStruct &SegmentLC, + uint64_t Address, + unsigned &SectionID, + uint64_t &Offset) +{ + for (unsigned SI = 0, SE = SegmentLC->NumSections; SI < SE; ++SI) { + InMemoryStruct CandidateSection; + Obj->ReadSection(*SegmentLCI, SI, CandidateSection); + if (Address >= CandidateSection->Address && + Address < CandidateSection->Address + CandidateSection->Size) { + SectionID = SectionMap[SI]; + Offset = Address - CandidateSection->Address; + return true; + } + } + + return false; +} + bool RuntimeDyldMachO:: loadSegment32(const MachOObject *Obj, const MachOObject::LoadCommandInfo *SegmentLCI, @@ -210,6 +290,7 @@ // Process the relocations for each section we're loading. Relocations.grow(Relocations.size() + SegmentLC->NumSections); + RelocationSources.grow(RelocationSources.size() + SegmentLC->NumSections); for (unsigned SectNum = 0; SectNum != SegmentLC->NumSections; ++SectNum) { InMemoryStruct Sect; Obj->ReadSection(*SegmentLCI, SectNum, Sect); @@ -218,51 +299,135 @@ for (unsigned j = 0; j != Sect->NumRelocationTableEntries; ++j) { InMemoryStruct RE; Obj->ReadRelocationEntry(Sect->RelocationTableOffset, j, RE); - if (RE->Word0 & macho::RF_Scattered) - return Error("NOT YET IMPLEMENTED: scattered relocations."); - // Word0 of the relocation is the offset into the section where the - // relocation should be applied. We need to translate that into an - // offset into a function since that's our atom. - uint32_t Offset = RE->Word0; - bool isExtern = (RE->Word1 >> 27) & 1; - - // FIXME: Get the relocation addend from the target address. - // FIXME: VERY imporant for internal relocations. - - // Figure out the source symbol of the relocation. If isExtern is true, - // this relocation references the symbol table, otherwise it references - // a section in the same object, numbered from 1 through NumSections - // (SectionBases is [0, NumSections-1]). - uint32_t SourceNum = RE->Word1 & 0xffffff; // 24-bit value - if (!isExtern) { - assert(SourceNum > 0 && "Invalid relocation section number!"); - unsigned SectionID = SectionMap[SourceNum - 1]; + if (RE->Word0 & macho::RF_Scattered) { + // The lower 24 bits of Word0 of the scattered relocation is the offset + // into the section where the relocation should be applied, i.e., the + // current section. + uint32_t OffsetInTarget = RE->Word0 & 0x00ffffff; unsigned TargetID = SectionMap[SectNum]; - DEBUG(dbgs() << "Internal relocation at Section #" - << TargetID << " + " << Offset - << " from Section #" - << SectionID << " (Word1: " - << format("0x%x", RE->Word1) << ")\n"); - - // Store the relocation information. It will get resolved when - // the section addresses are assigned. - Relocations[SectionID].push_back(RelocationEntry(TargetID, - Offset, - RE->Word1, - 0 /*Addend*/)); + // Word1 of the scattered relocation is a file offset which needs to + // be resolved into Section+Offset form. This gives the address of the + // source. + unsigned Source1ID; + uint64_t Source1Offset; + if (!ResolveSectionAndOffset(Obj, + SectionMap, + SegmentLCI, + SegmentLC, + RE->Word1, + Source1ID, + Source1Offset)) + return Error("couldn't find scattered relocation value in sections"); + // This relocation may have a paired relocation entry. If it does, set + // the source/offset information for it correctly. + unsigned Source2ID = SectionOffset::NoSectionID; + uint64_t Source2Offset = 0; + if (j + 1 < Sect->NumRelocationTableEntries) { + InMemoryStruct PairRE; + Obj->ReadRelocationEntry(Sect->RelocationTableOffset, j+1, PairRE); + if ((PairRE->Word0 & macho::RF_Scattered) && + ((PairRE->Word0 & 0x0f000000) >> 24) == macho::RIT_Pair) { + if (!ResolveSectionAndOffset(Obj, + SectionMap, + SegmentLCI, + SegmentLC, + PairRE->Word1, + Source2ID, + Source2Offset)) + return Error("couldn't find scattered relocation value in sections"); + ++j; + } + } + if (Source2ID == SectionOffset::NoSectionID) + DEBUG(dbgs() << "Scattered relocation at Section #" + << TargetID << " + " << OffsetInTarget + << " from Section #" << Source1ID + << "+" << Source1Offset + << " (Word0: " + << format("0x%x", RE->Word0) << ")\n"); + else + DEBUG(dbgs() << "Scattered relocation at Section #" + << TargetID << " + " << OffsetInTarget + << " from Section #" << Source1ID + << "+" << Source1Offset + << " and Section #" << Source2ID + << "+" << Source2Offset + << " (Word0: " + << format("0x%x", RE->Word0) << ")\n"); + uint32_t RelocationIndex = Relocations[TargetID].size(); + // FIXME: Get the relocation addend from the target address. + // FIXME: VERY imporant for internal relocations. + RelocationEntry TranslatedRE(OffsetInTarget, + Source1ID, + Source1Offset, + Source2ID, + Source2Offset, + RE->Word1, + 0 /*Addend*/); + Relocations[TargetID].push_back(TranslatedRE); + RelocationSources[Source1ID].push_back(RelocationSource(TargetID, + RelocationIndex, + 0)); + if (Source2ID != SectionOffset::NoSectionID) + RelocationSources[Source2ID].push_back(RelocationSource(TargetID, + RelocationIndex, + 1)); } else { - StringRef SourceName = SymbolNames[SourceNum]; - - // Now store the relocation information. Associate it with the source - // symbol. Just add it to the unresolved list and let the general - // path post-load resolve it if we know where the symbol is. - UnresolvedRelocations[SourceName].push_back(RelocationEntry(SectNum, - Offset, - RE->Word1, - 0 /*Addend*/)); - DEBUG(dbgs() << "Relocation at Section #" << SectNum << " + " << Offset - << " from '" << SourceName << "(Word1: " - << format("0x%x", RE->Word1) << ")\n"); + // Word0 of the relocation is the offset into the section where the + // relocation should be applied, i.e., the current section. We need + // to translate that into an offset into a function since that's our atom. + uint32_t OffsetInTarget = RE->Word0; + bool isExtern = (RE->Word1 >> 27) & 1; + + // FIXME: Get the relocation addend from the target address. + // FIXME: VERY imporant for internal relocations. + + // Figure out the source symbol of the relocation. If isExtern is true, + // this relocation references the symbol table, otherwise it references + // a section in the same object, numbered from 1 through NumSections + // (SectionBases is [0, NumSections-1]). + uint32_t SourceNum_OneBased = RE->Word1 & 0xffffff; // 24-bit value + if (!isExtern) { + assert(SourceNum_OneBased > 0 && "Invalid relocation section number!"); + unsigned SourceID = SectionMap[SourceNum_OneBased - 1]; + unsigned TargetID = SectionMap[SectNum]; + DEBUG(dbgs() << "Internal relocation at Section #" + << TargetID << " + " << OffsetInTarget + << " from Section #" + << SourceID << " (Word1: " + << format("0x%x", RE->Word1) << ")\n"); + + // Store the relocation information. It will get resolved when + // the section addresses are assigned. + uint32_t RelocationIndex = Relocations[TargetID].size(); + Relocations[TargetID].push_back(RelocationEntry(OffsetInTarget, + SourceID, + SectionOffset::NoSectionID, + RE->Word1, + 0 /*Addend*/)); + RelocationSources[SourceID].push_back(RelocationSource(TargetID, + RelocationIndex, + 0)); + } else { + StringRef SourceName = SymbolNames[SourceNum_OneBased]; + + // Now store the relocation information. Associate it with the source + // symbol. Just add it to the unresolved list and let the general + // path post-load resolve it if we know where the symbol is. + unsigned TargetID = SectionMap[SectNum]; + uint32_t RelocationIndex = Relocations[TargetID].size(); + Relocations[TargetID].push_back(RelocationEntry(OffsetInTarget, + SectionOffset::UnresolvedSourceID, + SectionOffset::NoSectionID, + RE->Word1, + 0 /*Addend*/)); + UnresolvedRelocations[SourceName].push_back(RelocationSource(TargetID, + RelocationIndex, + 0)); + DEBUG(dbgs() << "Relocation at Section #" << SectNum << " + " << OffsetInTarget + << " from '" << SourceName << "' (Word1: " + << format("0x%x", RE->Word1) << ")\n"); + } } } } @@ -332,6 +497,7 @@ // Process the relocations for each section we're loading. Relocations.grow(Relocations.size() + Segment64LC->NumSections); + RelocationSources.grow(RelocationSources.size() + Segment64LC->NumSections); for (unsigned SectNum = 0; SectNum != Segment64LC->NumSections; ++SectNum) { InMemoryStruct Sect; Obj->ReadSection64(*SegmentLCI, SectNum, Sect); @@ -341,11 +507,11 @@ InMemoryStruct RE; Obj->ReadRelocationEntry(Sect->RelocationTableOffset, j, RE); if (RE->Word0 & macho::RF_Scattered) - return Error("NOT YET IMPLEMENTED: scattered relocations."); + return Error("scattered relocations don't exist on 64-bit platforms"); // Word0 of the relocation is the offset into the section where the // relocation should be applied. We need to translate that into an // offset into a function since that's our atom. - uint32_t Offset = RE->Word0; + uint32_t OffsetInTarget = RE->Word0; bool isExtern = (RE->Word1 >> 27) & 1; // FIXME: Get the relocation addend from the target address. @@ -355,34 +521,45 @@ // this relocation references the symbol table, otherwise it references // a section in the same object, numbered from 1 through NumSections // (SectionBases is [0, NumSections-1]). - uint32_t SourceNum = RE->Word1 & 0xffffff; // 24-bit value + uint32_t SourceNum_OneBased = RE->Word1 & 0xffffff; // 24-bit value if (!isExtern) { - assert(SourceNum > 0 && "Invalid relocation section number!"); - unsigned SectionID = SectionMap[SourceNum - 1]; + assert(SourceNum_OneBased > 0 && "Invalid relocation section number!"); + unsigned SourceID = SectionMap[SourceNum_OneBased - 1]; unsigned TargetID = SectionMap[SectNum]; DEBUG(dbgs() << "Internal relocation at Section #" - << TargetID << " + " << Offset + << TargetID << " + " << OffsetInTarget << " from Section #" - << SectionID << " (Word1: " + << SourceID << " (Word1: " << format("0x%x", RE->Word1) << ")\n"); // Store the relocation information. It will get resolved when // the section addresses are assigned. - Relocations[SectionID].push_back(RelocationEntry(TargetID, - Offset, - RE->Word1, - 0 /*Addend*/)); + uint32_t RelocationIndex = Relocations[TargetID].size(); + Relocations[TargetID].push_back(RelocationEntry(OffsetInTarget, + SourceID, + SectionOffset::NoSectionID, + RE->Word1, + 0 /*Addend*/)); + RelocationSources[SourceID].push_back(RelocationSource(TargetID, + RelocationIndex, + 0)); } else { - StringRef SourceName = SymbolNames[SourceNum]; + StringRef SourceName = SymbolNames[SourceNum_OneBased]; // Now store the relocation information. Associate it with the source // symbol. Just add it to the unresolved list and let the general // path post-load resolve it if we know where the symbol is. - UnresolvedRelocations[SourceName].push_back(RelocationEntry(SectNum, - Offset, - RE->Word1, - 0 /*Addend*/)); - DEBUG(dbgs() << "Relocation at Section #" << SectNum << " + " << Offset + unsigned TargetID = SectionMap[SectNum]; + uint32_t RelocationIndex = Relocations[TargetID].size(); + Relocations[TargetID].push_back(RelocationEntry(OffsetInTarget, + SectionOffset::UnresolvedSourceID, + SectionOffset::NoSectionID, + RE->Word1, + 0 /*Addend*/)); + UnresolvedRelocations[SourceName].push_back(RelocationSource(TargetID, + RelocationIndex, + 0)); + DEBUG(dbgs() << "Relocation at Section #" << SectNum << " + " << OffsetInTarget << " from '" << SourceName << "(Word1: " << format("0x%x", RE->Word1) << ")\n"); } @@ -468,18 +645,22 @@ if (Loc == SymbolTable.end()) return; - RelocationList &Relocs = UnresolvedRelocations[Name]; + RelocationSourceList &SourcesForSymbol = UnresolvedRelocations[Name]; DEBUG(dbgs() << "Resolving symbol '" << Name << "'\n"); - for (int i = 0, e = Relocs.size(); i != e; ++i) { - // Change the relocation to be section relative rather than symbol - // relative and move it to the resolved relocation list. - RelocationEntry Entry = Relocs[i]; - Entry.Addend += Loc->second.second; - Relocations[Loc->second.first].push_back(Entry); + for (int i = 0, e = SourcesForSymbol.size(); i != e; ++i) { + // Find the relocation entry corresponding to this source and fill + // in its source information with the resolved information from this + // symbol. + RelocationSource &Source = SourcesForSymbol[i]; + RelocationEntry &Entry = Relocations[Source.SectionID][Source.Index]; + Entry.Sources[Source.SourceIdx].Offset = Loc->second.second; + Entry.Sources[Source.SourceIdx].ID = Loc->second.first; + // Now create a relocation source in the pointed-to section. + RelocationSources[Loc->second.first].push_back(Source); } // FIXME: Keep a worklist of the relocations we've added so that we can // resolve more selectively later. - Relocs.clear(); + SourcesForSymbol.clear(); } bool RuntimeDyldMachO::loadObject(MemoryBuffer *InputBuffer) { @@ -575,6 +756,56 @@ return false; } +bool RuntimeDyldMachO::resolveRelocationEntry(unsigned SectionID, + RelocationEntry &RE) +{ + uint8_t *Target = (uint8_t*)Sections[SectionID].base() + RE.Offset; + uint64_t FinalTarget = SectionLoadAddress[SectionID] + RE.Offset; + + uint64_t FinalSource1 = 0; + uint64_t FinalSource2 = 0; + + if (RE.Sources[0].ID == SectionOffset::UnresolvedSourceID || + RE.Sources[1].ID == SectionOffset::UnresolvedSourceID) + return false; + + FinalSource1 = SectionLoadAddress[RE.Sources[0].ID] + RE.Sources[0].Offset; + if (RE.Sources[1].ID != SectionOffset::NoSectionID) + FinalSource2 = SectionLoadAddress[RE.Sources[1].ID] + RE.Sources[1].Offset; + + bool isPCRel = RE.isPCRel(); + unsigned Type = RE.type(); + unsigned Size = RE.length(); + + if (RE.Sources[1].ID == SectionOffset::NoSectionID) + DEBUG(dbgs() << "Resolving relocation at Section #" << SectionID + << " + " << RE.Offset << " (" << format("%p", Target) << ")" + << " from Section #" << RE.Sources[0].ID << "+" << RE.Sources[0].Offset + << " (" << format("0x%llx", FinalSource1) << ")" + << " (" << (isPCRel ? "pcrel" : "absolute") + << ", type: " << Type << ", Size: " << Size << ", Addend: " + << RE.Addend << ").\n"); + else + DEBUG(dbgs() << "Resolving relocation at Section #" << SectionID + << " + " << RE.Offset << " (" << format("%p", Target) << ")" + << " from Section #" << RE.Sources[0].ID << "+" << RE.Sources[0].Offset + << " (" << format("0x%llx", FinalSource1) << ")" + << " and Section #" << RE.Sources[1].ID << "+" << RE.Sources[1].Offset + << " (" << format("0x%llx", FinalSource2) << ")" + << " (" << (isPCRel ? "pcrel" : "absolute") + << ", type: " << Type << ", Size: " << Size << ", Addend: " + << RE.Addend << ").\n"); + + return resolveRelocation(Target, + FinalTarget, + FinalSource1, + FinalSource2, + isPCRel, + Type, + Size, + RE.Addend); +} + // Assign an address to a symbol name and resolve all the relocations // associated with it. void RuntimeDyldMachO::reassignSectionAddress(unsigned SectionID, @@ -590,30 +821,17 @@ SectionLoadAddress[SectionID] = Addr; - RelocationList &Relocs = Relocations[SectionID]; - for (unsigned i = 0, e = Relocs.size(); i != e; ++i) { - RelocationEntry &RE = Relocs[i]; - uint8_t *Target = (uint8_t*)Sections[RE.SectionID].base() + RE.Offset; - uint64_t FinalTarget = (uint64_t)SectionLoadAddress[RE.SectionID] + RE.Offset; - bool isPCRel = (RE.Data >> 24) & 1; - unsigned Type = (RE.Data >> 28) & 0xf; - unsigned Size = 1 << ((RE.Data >> 25) & 3); - - DEBUG(dbgs() << "Resolving relocation at Section #" << RE.SectionID - << " + " << RE.Offset << " (" << format("%p", Target) << ")" - << " from Section #" << SectionID << " (" << format("%p", Addr) << ")" - << "(" << (isPCRel ? "pcrel" : "absolute") - << ", type: " << Type << ", Size: " << Size << ", Addend: " - << RE.Addend << ").\n"); - - resolveRelocation(Target, - FinalTarget, - Addr, - isPCRel, - Type, - Size, - RE.Addend); + RelocationList &RelocsForSection = Relocations[SectionID]; + for (unsigned i = 0, e = RelocsForSection.size(); i != e; ++i) { + RelocationEntry &RE = RelocsForSection[i]; + resolveRelocationEntry(SectionID, RE); } + RelocationSourceList &SourcesForSection = RelocationSources[SectionID]; + for (unsigned i = 0, e = SourcesForSection.size(); i != e; ++i) { + RelocationSource &R = SourcesForSection[i]; + RelocationEntry &RE = Relocations[R.SectionID][R.Index]; + resolveRelocationEntry(R.SectionID, RE); + } } bool RuntimeDyldMachO::isKnownFormat(const MemoryBuffer *InputBuffer) { Index: lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h =================================================================== --- lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h (revision 152265) +++ lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h (working copy) @@ -26,48 +26,183 @@ namespace llvm { class RuntimeDyldMachO : public RuntimeDyldImpl { - // For each symbol, keep a list of relocations based on it. Anytime - // its address is reassigned (the JIT re-compiled the function, e.g.), - // the relocations get re-resolved. - // The symbol (or section) the relocation is sourced from is the Key - // in the relocation list where it's stored. + // For each section, keep a list of relocatable pieces of data that + // reside in it. If the section moves, or the sections whose + // locations the data depends on move, re-resolve the relocations + // based on that movement. + // + // RelocationEntry structures correspond to one or two Mach-O + // relocation_info or scattered_relocation_info structures -- + // usually one, but two iff the original has a paired structure + // following it. + // + // To facilitate updating a relocation when its sources move, we + // also keep RelocationSource structures associated with the sections + // whose location the data depends on. + + // FIXME: Use SymbolLoc for this instead. Where should the enum live? + struct SectionOffset { + uint64_t Offset; // Offset of the location into its section. + unsigned ID; // The section the location is contained in. + + enum { + NoSectionID = 0xffff0000, + UnresolvedSourceID = 0xffffffff + }; + }; + struct RelocationEntry { - unsigned SectionID; // Section the relocation is contained in. - uint64_t Offset; // Offset into the section for the relocation. - uint32_t Data; // Second word of the raw macho relocation entry. - int64_t Addend; // Addend encoded in the instruction itself, if any, - // plus the offset into the source section for - // the symbol once the relocation is resolvable. + SectionOffset Sources[2]; // The section/offset pairs this relocation + // refers to. + // If the original Mach-O relocation entries used + // relocation_info, this data is computed from + // r_symbolnum and the offsets are locked to 0. + // (The only offset is determined by the addend.) + // If the original Mach-O relocation entries used + // scattered_relocation_info, this data, including + // offsets, is computed by looking r_value up in + // the section table. - RelocationEntry(unsigned id, uint64_t offset, uint32_t data, int64_t addend) - : SectionID(id), Offset(offset), Data(data), Addend(addend) {} + uint64_t Offset; // The offset of the data to be relocated. + // We don't use a SectionOffset because this + // RelocationEntry is already associated with the + // proper Section. + + int64_t Addend; // Addend encoded in the instruction itself, if any, + // plus the offset into the source section for + // the symbol once the relocation is resolvable. + + uint32_t Data; // If the original Mach-O relocation entry was a + // relocation_info, the bitfield { r_symbolnum, + // r_pcrel, r_length, r_extern, r_type }. + // If the original Mach-O relocation entry was a + // scattered_relocation_info, the bitfield + // { r_address, r_type, r_length, r_pcrel, + // r_scattered }. + + bool Scattered; // True iff this relocation is scattered. + + bool isPCRel() + { + if (Scattered) + return (Data & 0x40000000) >> 30; + else + return (Data & 0x01000000) >> 24; + } + + uint8_t type() + { + if (Scattered) + return (Data & 0x0f000000) >> 24; + else + return (Data & 0xf0000000) >> 28; + } + + // Returns the decoded version of the length field + uint8_t length() + { + if (Scattered) + return 1 << ((Data & 0x30000000) >> 28); + else + return 1 << ((Data & 0x0e000000) >> 25); + } + + // Used with an ordinary relocation entry, where the source_offsets are not + // known yet. + RelocationEntry(uint64_t offset, // See the Offset field. + unsigned source_id0, // The section ID for the first source. + unsigned source_id1, // The section ID for the second source. + uint32_t data, // See the Data field. + int64_t addend) // See the Addend field. + : Offset(offset), + Addend(addend), + Data(data), + Scattered(false) { + Sources[0].ID = source_id0; + Sources[0].Offset = 0; + Sources[1].ID = source_id1; + Sources[1].Offset = 0; + } + + // Used with a scattered relocation entry, where the source_offsets can be + // derived from the value. + RelocationEntry(uint64_t offset, // See the Offset field. + unsigned source_id0, // The section ID for the first source. + uint64_t source_off0, // The offset for the first source. + unsigned source_id1, // The section ID for the second source. + uint64_t source_off1, // The offset for the second source. + uint32_t data, // See the Data field. + int64_t addend) // See the Addend field. + : Offset(offset), + Addend(addend), + Data(data), + Scattered(true) { + Sources[0].ID = source_id0; + Sources[0].Offset = source_off0; + Sources[1].ID = source_id1; + Sources[1].Offset = source_off1; + } }; typedef SmallVector RelocationList; - // Relocations to sections already loaded. Indexed by SectionID which is the - // source of the address. The target where the address will be writen is - // SectionID/Offset in the relocation itself. + + // For each section, keep a list of sources that are used by relocations in + // other sections. Whenever a relocation gets created, create one or two + // corresponding relocation sources. Whenever relocations are re-resolved + // for a section, also re-resolve the relocations corresponding to that + // section's relocation targets. + struct RelocationSource { + unsigned SectionID; // Section whose RelocationList contains the relocation. + uint32_t Index : 24; // Index of the RelocatonEntry in that RelocationList. + uint8_t SourceIdx : 1; // Index of this source in the RelocationEntry's Sources. + + RelocationSource(unsigned id, + uint32_t index, + uint8_t source_idx) + : SectionID(id), + Index(index), + SourceIdx(source_idx) {} + }; + typedef SmallVector RelocationSourceList; + + // Relocations which refer to already-loaded section. Indexed by SectionID + // which is the section containing the relocatable data. IndexedMap Relocations; + // Targets corresponding to Relocations. + IndexedMap RelocationSources; // Relocations to symbols that are not yet resolved. Must be external // relocations by definition. Indexed by symbol name. - StringMap UnresolvedRelocations; + StringMap UnresolvedRelocations; + bool resolveRelocationEntry(unsigned SectionID, + RelocationEntry &RE); bool resolveRelocation(uint8_t *LocalAddress, uint64_t FinalAddress, - uint64_t Value, + uint64_t FinalSource1, + uint64_t FinalSource2, bool isPCRel, unsigned Type, unsigned Size, int64_t Addend); + bool resolveI386Relocation(uint8_t *LocalAddress, + uint64_t FinalAddress, + uint64_t FinalSource1, + uint64_t FinalSource2, + bool isPCRel, + unsigned Type, + unsigned Size, + int64_t Addend); bool resolveX86_64Relocation(uint8_t *LocalAddress, uint64_t FinalAddress, - uint64_t Value, + uint64_t FinalSource1, + uint64_t FinalSource2, bool isPCRel, unsigned Type, unsigned Size, int64_t Addend); bool resolveARMRelocation(uint8_t *LocalAddress, uint64_t FinalAddress, - uint64_t Value, + uint64_t FinalSource1, + uint64_t FinalSource2, bool isPCRel, unsigned Type, unsigned Size, Index: lib/MC/MCDisassembler/Disassembler.cpp =================================================================== --- lib/MC/MCDisassembler/Disassembler.cpp (revision 152265) +++ lib/MC/MCDisassembler/Disassembler.cpp (working copy) @@ -15,7 +15,9 @@ #include "llvm/MC/MCDisassembler.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/MemoryObject.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/TargetSelect.h" @@ -86,7 +88,7 @@ LLVMDisasmContext *DC = new LLVMDisasmContext(TripleName, DisInfo, TagType, GetOpInfo, SymbolLookUp, TheTarget, MAI, MRI, - Ctx, DisAsm, IP); + STI, Ctx, DisAsm, IP); assert(DC && "Allocation failure!"); return DC; Index: lib/MC/MCDisassembler/Disassembler.h =================================================================== --- lib/MC/MCDisassembler/Disassembler.h (revision 152265) +++ lib/MC/MCDisassembler/Disassembler.h (working copy) @@ -29,6 +29,7 @@ class MCDisassembler; class MCInstPrinter; class MCRegisterInfo; +class MCSubtargetInfo; class Target; // @@ -61,6 +62,8 @@ llvm::OwningPtr MAI; // The register information for the target architecture. llvm::OwningPtr MRI; + // The subtarget information for the target architecture. + llvm::OwningPtr MSI; // The assembly context for creating symbols and MCExprs. llvm::OwningPtr Ctx; // The disassembler for the target architecture. @@ -78,6 +81,7 @@ LLVMSymbolLookupCallback symbolLookUp, const Target *theTarget, const MCAsmInfo *mAI, const MCRegisterInfo *mRI, + const MCSubtargetInfo *mSI, llvm::MCContext *ctx, const MCDisassembler *disAsm, MCInstPrinter *iP) : TripleName(tripleName), DisInfo(disInfo), TagType(tagType), GetOpInfo(getOpInfo), @@ -85,6 +89,7 @@ CommentStream(CommentsToEmit) { MAI.reset(mAI); MRI.reset(mRI); + MSI.reset(mSI); Ctx.reset(ctx); DisAsm.reset(disAsm); IP.reset(iP);