summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/X86/X86FastISel.cpp2
-rw-r--r--llvm/lib/Target/X86/X86ISelDAGToDAG.cpp57
-rw-r--r--llvm/lib/Target/X86/X86Instr64bit.td69
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.cpp63
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.td110
-rw-r--r--llvm/lib/Target/X86/X86RegisterInfo.h2
-rw-r--r--llvm/lib/Target/X86/X86RegisterInfo.td154
-rw-r--r--llvm/test/CodeGen/X86/h-register-addressing-32.ll53
-rw-r--r--llvm/test/CodeGen/X86/h-register-addressing-64.ll53
-rw-r--r--llvm/test/CodeGen/X86/h-register-store.ll27
-rw-r--r--llvm/test/CodeGen/X86/h-registers.ll48
-rw-r--r--llvm/test/CodeGen/X86/inline-asm-out-regs.ll2
12 files changed, 540 insertions, 100 deletions
diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp
index 2cfa719d73b..42844562020 100644
--- a/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/llvm/lib/Target/X86/X86FastISel.cpp
@@ -997,7 +997,7 @@ bool X86FastISel::X86SelectTrunc(Instruction *I) {
return false;
// First issue a copy to GR16_ or GR32_.
- unsigned CopyOpc = (SrcVT == MVT::i16) ? X86::MOV16to16_ : X86::MOV32to32_;
+ unsigned CopyOpc = (SrcVT == MVT::i16) ? X86::MOV16rr : X86::MOV32rr;
const TargetRegisterClass *CopyRC = (SrcVT == MVT::i16)
? X86::GR16_RegisterClass : X86::GR32_RegisterClass;
unsigned CopyReg = createResultReg(CopyRC);
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 6fd9d00e661..41a3c416f85 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -1019,21 +1019,69 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM,
break;
case ISD::AND: {
- // Handle "(x << C1) & C2" as "(X & (C2>>C1)) << C1" if safe and if this
- // allows us to fold the shift into this addressing mode.
+ // Perform some heroic transforms on an and of a constant-count shift
+ // with a constant to enable use of the scaled offset field.
+
SDValue Shift = N.getOperand(0);
- if (Shift.getOpcode() != ISD::SHL) break;
+ if (Shift.getNumOperands() != 2) break;
// Scale must not be used already.
if (AM.IndexReg.getNode() != 0 || AM.Scale != 1) break;
// Not when RIP is used as the base.
if (AM.isRIPRel) break;
-
+
+ SDValue X = Shift.getOperand(0);
ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N.getOperand(1));
ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
if (!C1 || !C2) break;
+ // Handle "(X >> (8-C1)) & C2" as "(X >> 8) & 0xff)" if safe. This
+ // allows us to convert the shift and and into an h-register extract and
+ // a scaled index.
+ if (Shift.getOpcode() == ISD::SRL && Shift.hasOneUse()) {
+ unsigned ScaleLog = 8 - C1->getZExtValue();
+ if (ScaleLog > 0 && ScaleLog < 64 &&
+ C2->getZExtValue() == (UINT64_C(0xff) << ScaleLog)) {
+ SDValue Eight = CurDAG->getConstant(8, MVT::i8);
+ SDValue Mask = CurDAG->getConstant(0xff, N.getValueType());
+ SDValue Srl = CurDAG->getNode(ISD::SRL, dl, N.getValueType(),
+ X, Eight);
+ SDValue And = CurDAG->getNode(ISD::AND, dl, N.getValueType(),
+ Srl, Mask);
+
+ // Insert the new nodes into the topological ordering.
+ if (Eight.getNode()->getNodeId() == -1 ||
+ Eight.getNode()->getNodeId() > X.getNode()->getNodeId()) {
+ CurDAG->RepositionNode(X.getNode(), Eight.getNode());
+ Eight.getNode()->setNodeId(X.getNode()->getNodeId());
+ }
+ if (Mask.getNode()->getNodeId() == -1 ||
+ Mask.getNode()->getNodeId() > X.getNode()->getNodeId()) {
+ CurDAG->RepositionNode(X.getNode(), Mask.getNode());
+ Mask.getNode()->setNodeId(X.getNode()->getNodeId());
+ }
+ if (Srl.getNode()->getNodeId() == -1 ||
+ Srl.getNode()->getNodeId() > Shift.getNode()->getNodeId()) {
+ CurDAG->RepositionNode(Shift.getNode(), Srl.getNode());
+ Srl.getNode()->setNodeId(Shift.getNode()->getNodeId());
+ }
+ if (And.getNode()->getNodeId() == -1 ||
+ And.getNode()->getNodeId() > N.getNode()->getNodeId()) {
+ CurDAG->RepositionNode(N.getNode(), And.getNode());
+ And.getNode()->setNodeId(N.getNode()->getNodeId());
+ }
+ CurDAG->ReplaceAllUsesWith(N, And);
+ AM.IndexReg = And;
+ AM.Scale = (1 << ScaleLog);
+ return false;
+ }
+ }
+
+ // Handle "(X << C1) & C2" as "(X & (C2>>C1)) << C1" if safe and if this
+ // allows us to fold the shift into this addressing mode.
+ if (Shift.getOpcode() != ISD::SHL) break;
+
// Not likely to be profitable if either the AND or SHIFT node has more
// than one use (unless all uses are for address computation). Besides,
// isel mechanism requires their node ids to be reused.
@@ -1046,7 +1094,6 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM,
break;
// Get the new AND mask, this folds to a constant.
- SDValue X = Shift.getOperand(0);
SDValue NewANDMask = CurDAG->getNode(ISD::SRL, dl, N.getValueType(),
SDValue(C2, 0), SDValue(C1, 0));
SDValue NewAND = CurDAG->getNode(ISD::AND, dl, N.getValueType(), X,
diff --git a/llvm/lib/Target/X86/X86Instr64bit.td b/llvm/lib/Target/X86/X86Instr64bit.td
index 10e66e88bee..05bccabc304 100644
--- a/llvm/lib/Target/X86/X86Instr64bit.td
+++ b/llvm/lib/Target/X86/X86Instr64bit.td
@@ -1522,7 +1522,7 @@ def : Pat<(store (add (loadi64 addr:$dst), 0x00000000800000000), addr:$dst),
// r & (2^32-1) ==> movz
def : Pat<(and GR64:$src, 0x00000000FFFFFFFF),
- (MOVZX64rr32 (i32 (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit)))>;
+ (MOVZX64rr32 (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit))>;
// r & (2^16-1) ==> movz
def : Pat<(and GR64:$src, 0xffff),
(MOVZX64rr16 (i16 (EXTRACT_SUBREG GR64:$src, x86_subreg_16bit)))>;
@@ -1531,7 +1531,7 @@ def : Pat<(and GR64:$src, 0xff),
(MOVZX64rr8 (i8 (EXTRACT_SUBREG GR64:$src, x86_subreg_8bit)))>;
// r & (2^8-1) ==> movz
def : Pat<(and GR32:$src1, 0xff),
- (MOVZX32rr8 (i8 (EXTRACT_SUBREG GR32:$src1, x86_subreg_8bit)))>,
+ (MOVZX32rr8 (EXTRACT_SUBREG GR32:$src1, x86_subreg_8bit))>,
Requires<[In64BitMode]>;
// r & (2^8-1) ==> movz
def : Pat<(and GR16:$src1, 0xff),
@@ -1540,13 +1540,13 @@ def : Pat<(and GR16:$src1, 0xff),
// sext_inreg patterns
def : Pat<(sext_inreg GR64:$src, i32),
- (MOVSX64rr32 (i32 (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit)))>;
+ (MOVSX64rr32 (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit))>;
def : Pat<(sext_inreg GR64:$src, i16),
- (MOVSX64rr16 (i16 (EXTRACT_SUBREG GR64:$src, x86_subreg_16bit)))>;
+ (MOVSX64rr16 (EXTRACT_SUBREG GR64:$src, x86_subreg_16bit))>;
def : Pat<(sext_inreg GR64:$src, i8),
- (MOVSX64rr8 (i8 (EXTRACT_SUBREG GR64:$src, x86_subreg_8bit)))>;
+ (MOVSX64rr8 (EXTRACT_SUBREG GR64:$src, x86_subreg_8bit))>;
def : Pat<(sext_inreg GR32:$src, i8),
- (MOVSX32rr8 (i8 (EXTRACT_SUBREG GR32:$src, x86_subreg_8bit)))>,
+ (MOVSX32rr8 (EXTRACT_SUBREG GR32:$src, x86_subreg_8bit))>,
Requires<[In64BitMode]>;
def : Pat<(sext_inreg GR16:$src, i8),
(MOVSX16rr8 (i8 (EXTRACT_SUBREG GR16:$src, x86_subreg_8bit)))>,
@@ -1554,16 +1554,63 @@ def : Pat<(sext_inreg GR16:$src, i8),
// trunc patterns
def : Pat<(i32 (trunc GR64:$src)),
- (i32 (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit))>;
+ (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit)>;
def : Pat<(i16 (trunc GR64:$src)),
- (i16 (EXTRACT_SUBREG GR64:$src, x86_subreg_16bit))>;
+ (EXTRACT_SUBREG GR64:$src, x86_subreg_16bit)>;
def : Pat<(i8 (trunc GR64:$src)),
- (i8 (EXTRACT_SUBREG GR64:$src, x86_subreg_8bit))>;
+ (EXTRACT_SUBREG GR64:$src, x86_subreg_8bit)>;
def : Pat<(i8 (trunc GR32:$src)),
- (i8 (EXTRACT_SUBREG GR32:$src, x86_subreg_8bit))>,
+ (EXTRACT_SUBREG GR32:$src, x86_subreg_8bit)>,
Requires<[In64BitMode]>;
def : Pat<(i8 (trunc GR16:$src)),
- (i8 (EXTRACT_SUBREG GR16:$src, x86_subreg_8bit))>,
+ (EXTRACT_SUBREG GR16:$src, x86_subreg_8bit)>,
+ Requires<[In64BitMode]>;
+
+// h-register tricks.
+// For now, be conservative and only the extract if the value is immediately
+// zero-extended or stored, which are somewhat common cases. This uses a bunch
+// of code to prevent a register requiring a REX prefix from being allocated in
+// the same instruction as the h register, as there's currently no way to
+// describe this requirement to the register allocator.
+
+// h-register extract and zero-extend.
+def : Pat<(and (srl_su GR64:$src, (i8 8)), (i64 255)),
+ (SUBREG_TO_REG
+ (i64 0),
+ (MOVZX32_NOREXrr8
+ (EXTRACT_SUBREG (COPY_TO_SUBCLASS GR64:$src, GR64_),
+ x86_subreg_8bit_hi)),
+ x86_subreg_32bit)>;
+def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),
+ (MOVZX32_NOREXrr8
+ (EXTRACT_SUBREG (COPY_TO_SUBCLASS GR32:$src, GR32_),
+ x86_subreg_8bit_hi))>,
+ Requires<[In64BitMode]>;
+def : Pat<(srl_su GR16:$src, (i8 8)),
+ (EXTRACT_SUBREG
+ (MOVZX32_NOREXrr8
+ (EXTRACT_SUBREG (COPY_TO_SUBCLASS GR16:$src, GR16_),
+ x86_subreg_8bit_hi)),
+ x86_subreg_16bit)>,
+ Requires<[In64BitMode]>;
+
+// h-register extract and store.
+def : Pat<(store (i8 (trunc_su (srl_su GR64:$src, (i8 8)))), addr:$dst),
+ (MOV8mr_NOREX
+ addr:$dst,
+ (EXTRACT_SUBREG (COPY_TO_SUBCLASS GR64:$src, GR64_),
+ x86_subreg_8bit_hi))>;
+def : Pat<(store (i8 (trunc_su (srl_su GR32:$src, (i8 8)))), addr:$dst),
+ (MOV8mr_NOREX
+ addr:$dst,
+ (EXTRACT_SUBREG (COPY_TO_SUBCLASS GR32:$src, GR32_),
+ x86_subreg_8bit_hi))>,
+ Requires<[In64BitMode]>;
+def : Pat<(store (i8 (trunc_su (srl_su GR16:$src, (i8 8)))), addr:$dst),
+ (MOV8mr_NOREX
+ addr:$dst,
+ (EXTRACT_SUBREG (COPY_TO_SUBCLASS GR16:$src, GR16_),
+ x86_subreg_8bit_hi))>,
Requires<[In64BitMode]>;
// (shl x, 1) ==> (add x, x)
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 77320587cb4..77955a6a426 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -258,10 +258,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::JMP64r, X86::JMP64m, 1 },
{ X86::MOV16ri, X86::MOV16mi, 0 },
{ X86::MOV16rr, X86::MOV16mr, 0 },
- { X86::MOV16to16_, X86::MOV16_mr, 0 },
{ X86::MOV32ri, X86::MOV32mi, 0 },
{ X86::MOV32rr, X86::MOV32mr, 0 },
- { X86::MOV32to32_, X86::MOV32_mr, 0 },
{ X86::MOV64ri32, X86::MOV64mi32, 0 },
{ X86::MOV64rr, X86::MOV64mr, 0 },
{ X86::MOV8ri, X86::MOV8mi, 0 },
@@ -372,9 +370,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::Int_UCOMISDrr, X86::Int_UCOMISDrm },
{ X86::Int_UCOMISSrr, X86::Int_UCOMISSrm },
{ X86::MOV16rr, X86::MOV16rm },
- { X86::MOV16to16_, X86::MOV16_rm },
{ X86::MOV32rr, X86::MOV32rm },
- { X86::MOV32to32_, X86::MOV32_rm },
{ X86::MOV64rr, X86::MOV64rm },
{ X86::MOV64toPQIrr, X86::MOVQI2PQIrm },
{ X86::MOV64toSDrr, X86::MOV64toSDrm },
@@ -404,6 +400,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::MOVZPQILo2PQIrr, X86::MOVZPQILo2PQIrm },
{ X86::MOVZX16rr8, X86::MOVZX16rm8 },
{ X86::MOVZX32rr16, X86::MOVZX32rm16 },
+ { X86::MOVZX32_NOREXrr8, X86::MOVZX32_NOREXrm8 },
{ X86::MOVZX32rr8, X86::MOVZX32rm8 },
{ X86::MOVZX64rr16, X86::MOVZX64rm16 },
{ X86::MOVZX64rr32, X86::MOVZX64rm32 },
@@ -672,8 +669,6 @@ bool X86InstrInfo::isMoveInstr(const MachineInstr& MI,
case X86::MOV16rr:
case X86::MOV32rr:
case X86::MOV64rr:
- case X86::MOV16to16_:
- case X86::MOV32to32_:
case X86::MOVSSrr:
case X86::MOVSDrr:
@@ -710,9 +705,7 @@ unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
default: break;
case X86::MOV8rm:
case X86::MOV16rm:
- case X86::MOV16_rm:
case X86::MOV32rm:
- case X86::MOV32_rm:
case X86::MOV64rm:
case X86::LD_Fp64m:
case X86::MOVSSrm:
@@ -741,9 +734,7 @@ unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr *MI,
default: break;
case X86::MOV8mr:
case X86::MOV16mr:
- case X86::MOV16_mr:
case X86::MOV32mr:
- case X86::MOV32_mr:
case X86::MOV64mr:
case X86::ST_FpP64m:
case X86::MOVSSmr:
@@ -795,9 +786,7 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI) const {
default: break;
case X86::MOV8rm:
case X86::MOV16rm:
- case X86::MOV16_rm:
case X86::MOV32rm:
- case X86::MOV32_rm:
case X86::MOV64rm:
case X86::LD_Fp64m:
case X86::MOVSSrm:
@@ -1670,10 +1659,22 @@ bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
Opc = X86::MOV16rr;
} else if (DestRC == &X86::GR8RegClass) {
Opc = X86::MOV8rr;
+ } else if (DestRC == &X86::GR64_RegClass) {
+ Opc = X86::MOV64rr;
} else if (DestRC == &X86::GR32_RegClass) {
- Opc = X86::MOV32_rr;
+ Opc = X86::MOV32rr;
} else if (DestRC == &X86::GR16_RegClass) {
- Opc = X86::MOV16_rr;
+ Opc = X86::MOV16rr;
+ } else if (DestRC == &X86::GR8_RegClass) {
+ Opc = X86::MOV8rr;
+ } else if (DestRC == &X86::GR64_NOREXRegClass) {
+ Opc = X86::MOV64rr;
+ } else if (DestRC == &X86::GR32_NOREXRegClass) {
+ Opc = X86::MOV32rr;
+ } else if (DestRC == &X86::GR16_NOREXRegClass) {
+ Opc = X86::MOV16rr;
+ } else if (DestRC == &X86::GR8_NOREXRegClass) {
+ Opc = X86::MOV8rr;
} else if (DestRC == &X86::RFP32RegClass) {
Opc = X86::MOV_Fp3232;
} else if (DestRC == &X86::RFP64RegClass || DestRC == &X86::RSTRegClass) {
@@ -1721,7 +1722,7 @@ bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
return true;
}
}
-
+
// Moving from ST(0) turns into FpGET_ST0_32 etc.
if (SrcRC == &X86::RSTRegClass) {
// Copying from ST(0)/ST(1).
@@ -1779,10 +1780,22 @@ static unsigned getStoreRegOpcode(const TargetRegisterClass *RC,
Opc = X86::MOV16mr;
} else if (RC == &X86::GR8RegClass) {
Opc = X86::MOV8mr;
+ } else if (RC == &X86::GR64_RegClass) {
+ Opc = X86::MOV64mr;
} else if (RC == &X86::GR32_RegClass) {
- Opc = X86::MOV32_mr;
+ Opc = X86::MOV32mr;
} else if (RC == &X86::GR16_RegClass) {
- Opc = X86::MOV16_mr;
+ Opc = X86::MOV16mr;
+ } else if (RC == &X86::GR8_RegClass) {
+ Opc = X86::MOV8mr;
+ } else if (RC == &X86::GR64_NOREXRegClass) {
+ Opc = X86::MOV64mr;
+ } else if (RC == &X86::GR32_NOREXRegClass) {
+ Opc = X86::MOV32mr;
+ } else if (RC == &X86::GR16_NOREXRegClass) {
+ Opc = X86::MOV16mr;
+ } else if (RC == &X86::GR8_NOREXRegClass) {
+ Opc = X86::MOV8mr;
} else if (RC == &X86::RFP80RegClass) {
Opc = X86::ST_FpP80m; // pops
} else if (RC == &X86::RFP64RegClass) {
@@ -1847,10 +1860,22 @@ static unsigned getLoadRegOpcode(const TargetRegisterClass *RC,
Opc = X86::MOV16rm;
} else if (RC == &X86::GR8RegClass) {
Opc = X86::MOV8rm;
+ } else if (RC == &X86::GR64_RegClass) {
+ Opc = X86::MOV64rm;
} else if (RC == &X86::GR32_RegClass) {
- Opc = X86::MOV32_rm;
+ Opc = X86::MOV32rm;
} else if (RC == &X86::GR16_RegClass) {
- Opc = X86::MOV16_rm;
+ Opc = X86::MOV16rm;
+ } else if (RC == &X86::GR8_RegClass) {
+ Opc = X86::MOV8rm;
+ } else if (RC == &X86::GR64_NOREXRegClass) {
+ Opc = X86::MOV64rm;
+ } else if (RC == &X86::GR32_NOREXRegClass) {
+ Opc = X86::MOV32rm;
+ } else if (RC == &X86::GR16_NOREXRegClass) {
+ Opc = X86::MOV16rm;
+ } else if (RC == &X86::GR8_NOREXRegClass) {
+ Opc = X86::MOV8rm;
} else if (RC == &X86::RFP80RegClass) {
Opc = X86::LD_Fp80m;
} else if (RC == &X86::RFP64RegClass) {
diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td
index bef6b72c801..830796e3a38 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/llvm/lib/Target/X86/X86InstrInfo.td
@@ -181,6 +181,13 @@ def f64mem : X86MemOperand<"printf64mem">;
def f80mem : X86MemOperand<"printf80mem">;
def f128mem : X86MemOperand<"printf128mem">;
+// A version of i8mem for use on x86-64 that uses GR64_NOREX instead of
+// plain GR64, so that it doesn't potentially require a REX prefix.
+def i8mem_NOREX : Operand<i64> {
+ let PrintMethod = "printi8mem";
+ let MIOperandInfo = (ops GR64_NOREX, i8imm, GR64_NOREX, i32imm, i8imm);
+}
+
def lea32mem : Operand<i32> {
let PrintMethod = "printlea32mem";
let MIOperandInfo = (ops GR32, i8imm, GR32, i32imm);
@@ -398,6 +405,14 @@ def extloadi32i16 : PatFrag<(ops node:$ptr), (i32 (extloadi16 node:$ptr))>;
def and_su : PatFrag<(ops node:$lhs, node:$rhs), (and node:$lhs, node:$rhs), [{
return N->hasOneUse();
}]>;
+// An 'srl' node with a single use.
+def srl_su : PatFrag<(ops node:$lhs, node:$rhs), (srl node:$lhs, node:$rhs), [{
+ return N->hasOneUse();
+}]>;
+// An 'trunc' node with a single use.
+def trunc_su : PatFrag<(ops node:$src), (trunc node:$src), [{
+ return N->hasOneUse();
+}]>;
// 'shld' and 'shrd' instruction patterns. Note that even though these have
// the srl and shl in their patterns, the C++ code must still check for them,
@@ -767,7 +782,12 @@ def MOV16mr : I<0x89, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
def MOV32mr : I<0x89, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
"mov{l}\t{$src, $dst|$dst, $src}",
[(store GR32:$src, addr:$dst)]>;
-
+
+// A version of MOV8mr that uses i8mem_NOREX so that it can be used for
+// storing h registers, which can't be encoded when a REX prefix is present.
+def MOV8mr_NOREX : I<0x88, MRMDestMem, (outs), (ins i8mem_NOREX:$dst, GR8:$src),
+ "mov{b}\t{$src, $dst|$dst, $src} # NOREX", []>;
+
//===----------------------------------------------------------------------===//
// Fixed-Register Multiplication and Division Instructions...
//
@@ -2899,6 +2919,18 @@ def MOVZX32rm16: I<0xB7, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
"movz{wl|x}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (zextloadi32i16 addr:$src))]>, TB;
+// These are the same as the regular regular MOVZX32rr8 and MOVZX32rm8
+// except that they use GR32_NOREX for the output operand register class
+// instead of GR32. This allows them to operate on h registers on x86-64.
+def MOVZX32_NOREXrr8 : I<0xB6, MRMSrcReg,
+ (outs GR32_NOREX:$dst), (ins GR8:$src),
+ "movz{bl|x}\t{$src, $dst|$dst, $src} # NOREX",
+ []>, TB;
+def MOVZX32_NOREXrm8 : I<0xB6, MRMSrcMem,
+ (outs GR32_NOREX:$dst), (ins i8mem:$src),
+ "movz{bl|x}\t{$src, $dst|$dst, $src} # NOREX",
+ []>, TB;
+
let neverHasSideEffects = 1 in {
let Defs = [AX], Uses = [AL] in
def CBW : I<0x98, RawFrm, (outs), (ins),
@@ -2935,33 +2967,6 @@ def MOV32r0 : I<0x31, MRMInitReg, (outs GR32:$dst), (ins),
[(set GR32:$dst, 0)]>;
}
-// Basic operations on GR16 / GR32 subclasses GR16_ and GR32_ which contains only
-// those registers that have GR8 sub-registers (i.e. AX - DX, EAX - EDX).
-let neverHasSideEffects = 1, isAsCheapAsAMove = 1 in {
-def MOV16to16_ : I<0x89, MRMDestReg, (outs GR16_:$dst), (ins GR16:$src),
- "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
-def MOV32to32_ : I<0x89, MRMDestReg, (outs GR32_:$dst), (ins GR32:$src),
- "mov{l}\t{$src, $dst|$dst, $src}", []>;
-
-def MOV16_rr : I<0x89, MRMDestReg, (outs GR16_:$dst), (ins GR16_:$src),
- "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
-def MOV32_rr : I<0x89, MRMDestReg, (outs GR32_:$dst), (ins GR32_:$src),
- "mov{l}\t{$src, $dst|$dst, $src}", []>;
-} // neverHasSideEffects
-
-let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in {
-def MOV16_rm : I<0x8B, MRMSrcMem, (outs GR16_:$dst), (ins i16mem:$src),
- "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
-def MOV32_rm : I<0x8B, MRMSrcMem, (outs GR32_:$dst), (ins i32mem:$src),
- "mov{l}\t{$src, $dst|$dst, $src}", []>;
-}
-let mayStore = 1, neverHasSideEffects = 1 in {
-def MOV16_mr : I<0x89, MRMDestMem, (outs), (ins i16mem:$dst, GR16_:$src),
- "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
-def MOV32_mr : I<0x89, MRMDestMem, (outs), (ins i32mem:$dst, GR32_:$src),
- "mov{l}\t{$src, $dst|$dst, $src}", []>;
-}
-
//===----------------------------------------------------------------------===//
// Thread Local Storage Instructions
//
@@ -3341,38 +3346,61 @@ def : Pat<(store (add (loadi32 addr:$dst), 128), addr:$dst),
// r & (2^16-1) ==> movz
def : Pat<(and GR32:$src1, 0xffff),
- (MOVZX32rr16 (i16 (EXTRACT_SUBREG GR32:$src1, x86_subreg_16bit)))>;
+ (MOVZX32rr16 (EXTRACT_SUBREG GR32:$src1, x86_subreg_16bit))>;
// r & (2^8-1) ==> movz
def : Pat<(and GR32:$src1, 0xff),
- (MOVZX32rr8 (i8 (EXTRACT_SUBREG (MOV32to32_ GR32:$src1),
- x86_subreg_8bit)))>,
+ (MOVZX32rr8 (EXTRACT_SUBREG (COPY_TO_SUBCLASS GR32:$src1, GR32_),
+ x86_subreg_8bit))>,
Requires<[In32BitMode]>;
// r & (2^8-1) ==> movz
def : Pat<(and GR16:$src1, 0xff),
- (MOVZX16rr8 (i8 (EXTRACT_SUBREG (MOV16to16_ GR16:$src1),
- x86_subreg_8bit)))>,
+ (MOVZX16rr8 (EXTRACT_SUBREG (COPY_TO_SUBCLASS GR16:$src1, GR16_),
+ x86_subreg_8bit))>,
Requires<[In32BitMode]>;
// sext_inreg patterns
def : Pat<(sext_inreg GR32:$src, i16),
- (MOVSX32rr16 (i16 (EXTRACT_SUBREG GR32:$src, x86_subreg_16bit)))>;
+ (MOVSX32rr16 (EXTRACT_SUBREG GR32:$src, x86_subreg_16bit))>;
def : Pat<(sext_inreg GR32:$src, i8),
- (MOVSX32rr8 (i8 (EXTRACT_SUBREG (MOV32to32_ GR32:$src),
- x86_subreg_8bit)))>,
+ (MOVSX32rr8 (EXTRACT_SUBREG (COPY_TO_SUBCLASS GR32:$src, GR32_),
+ x86_subreg_8bit))>,
Requires<[In32BitMode]>;
def : Pat<(sext_inreg GR16:$src, i8),
- (MOVSX16rr8 (i8 (EXTRACT_SUBREG (MOV16to16_ GR16:$src),
- x86_subreg_8bit)))>,
+ (MOVSX16rr8 (EXTRACT_SUBREG (COPY_TO_SUBCLASS GR16:$src, GR16_),
+ x86_subreg_8bit))>,
Requires<[In32BitMode]>;
// trunc patterns
def : Pat<(i16 (trunc GR32:$src)),
- (i16 (EXTRACT_SUBREG GR32:$src, x86_subreg_16bit))>;
+ (EXTRACT_SUBREG GR32:$src, x86_subreg_16bit)>;
def : Pat<(i8 (trunc GR32:$src)),
- (i8 (EXTRACT_SUBREG (MOV32to32_ GR32:$src), x86_subreg_8bit))>,
+ (EXTRACT_SUBREG (COPY_TO_SUBCLASS GR32:$src, GR32_),
+ x86_subreg_8bit)>,
Requires<[In32BitMode]>;
def : Pat<(i8 (trunc GR16:$src)),
- (i8 (EXTRACT_SUBREG (MOV16to16_ GR16:$src), x86_subreg_8bit))>,
+ (EXTRACT_SUBREG (COPY_TO_SUBCLASS GR16:$src, GR16_),
+ x86_subreg_8bit)>,
+ Requires<[In32BitMode]>;
+
+// h-register tricks
+def : Pat<(i8 (trunc (srl_su GR16:$src, (i8 8)))),
+ (EXTRACT_SUBREG (COPY_TO_SUBCLASS GR16:$src, GR16_),
+ x86_subreg_8bit_hi)>,
+ Requires<[In32BitMode]>;
+def : Pat<(i8 (trunc (srl_su GR32:$src, (i8 8)))),
+ (EXTRACT_SUBREG (COPY_TO_SUBCLASS GR32:$src, GR32_),
+ x86_subreg_8bit_hi)>,
+ Requires<[In32BitMode]>;
+def : Pat<(srl_su GR16:$src, (i8 8)),
+ (EXTRACT_SUBREG
+ (MOVZX32rr8
+ (EXTRACT_SUBREG (COPY_TO_SUBCLASS GR16:$src, GR16_),
+ x86_subreg_8bit_hi)),
+ x86_subreg_16bit)>,
+ Requires<[In32BitMode]>;
+def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),
+ (MOVZX32rr8 (EXTRACT_SUBREG (COPY_TO_SUBCLASS GR32:$src, GR32_),
+ x86_subreg_8bit_hi))>,
Requires<[In32BitMode]>;
// (shl x, 1) ==> (add x, x)
diff --git a/llvm/lib/Target/X86/X86RegisterInfo.h b/llvm/lib/Target/X86/X86RegisterInfo.h
index 4856e2346de..33b9f5edc73 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.h
+++ b/llvm/lib/Target/X86/X86RegisterInfo.h
@@ -35,7 +35,7 @@ namespace X86 {
/// these indices must be kept in sync with the class indices in the
/// X86RegisterInfo.td file.
enum SubregIndex {
- SUBREG_8BIT = 1, SUBREG_16BIT = 2, SUBREG_32BIT = 3
+ SUBREG_8BIT = 1, SUBREG_8BIT_HI = 2, SUBREG_16BIT = 3, SUBREG_32BIT = 4
};
}
diff --git a/llvm/lib/Target/X86/X86RegisterInfo.td b/llvm/lib/Target/X86/X86RegisterInfo.td
index a7b0f88963b..b323e78cfab 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.td
+++ b/llvm/lib/Target/X86/X86RegisterInfo.td
@@ -49,7 +49,8 @@ let Namespace = "X86" in {
def R14B : Register<"r14b">, DwarfRegNum<[14, -2, -2]>;
def R15B : Register<"r15b">, DwarfRegNum<[15, -2, -2]>;
- // High registers X86-32 only
+ // High registers. On x86-64, these cannot be used in any instruction
+ // with a REX prefix.
def AH : Register<"ah">, DwarfRegNum<[0, 0, 0]>;
def DH : Register<"dh">, DwarfRegNum<[1, 2, 2]>;
def CH : Register<"ch">, DwarfRegNum<[2, 1, 1]>;
@@ -185,41 +186,45 @@ let Namespace = "X86" in {
//
def x86_subreg_8bit : PatLeaf<(i32 1)>;
-def x86_subreg_16bit : PatLeaf<(i32 2)>;
-def x86_subreg_32bit : PatLeaf<(i32 3)>;
+def x86_subreg_8bit_hi : PatLeaf<(i32 2)>;
+def x86_subreg_16bit : PatLeaf<(i32 3)>;
+def x86_subreg_32bit : PatLeaf<(i32 4)>;
def : SubRegSet<1, [AX, CX, DX, BX, SP, BP, SI, DI,
R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W],
[AL, CL, DL, BL, SPL, BPL, SIL, DIL,
R8B, R9B, R10B, R11B, R12B, R13B, R14B, R15B]>;
-// It's unclear if this subreg set is safe, given that not all registers
-// in the class have an 'H' subreg.
-// def : SubRegSet<2, [AX, CX, DX, BX],
-// [AH, CH, DH, BH]>;
+def : SubRegSet<2, [AX, CX, DX, BX],
+ [AH, CH, DH, BH]>;
def : SubRegSet<1, [EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI,
R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D],
[AL, CL, DL, BL, SPL, BPL, SIL, DIL,
R8B, R9B, R10B, R11B, R12B, R13B, R14B, R15B]>;
-def : SubRegSet<2, [EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI,
+def : SubRegSet<2, [EAX, ECX, EDX, EBX],
+ [AH, CH, DH, BH]>;
+
+def : SubRegSet<3, [EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI,
R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D],
[AX, CX, DX, BX, SP, BP, SI, DI,
R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W]>;
-
def : SubRegSet<1, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI,
R8, R9, R10, R11, R12, R13, R14, R15],
[AL, CL, DL, BL, SPL, BPL, SIL, DIL,
R8B, R9B, R10B, R11B, R12B, R13B, R14B, R15B]>;
-def : SubRegSet<2, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI,
+def : SubRegSet<2, [RAX, RCX, RDX, RBX],
+ [AH, CH, DH, BH]>;
+
+def : SubRegSet<3, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI,
R8, R9, R10, R11, R12, R13, R14, R15],
[AX, CX, DX, BX, SP, BP, SI, DI,
R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W]>;
-
-def : SubRegSet<3, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI,
+
+def : SubRegSet<4, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI,
R8, R9, R10, R11, R12, R13, R14, R15],
[EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI,
R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D]>;
@@ -236,7 +241,11 @@ def : SubRegSet<3, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI,
// R8B, ... R15B.
// Allocate R12 and R13 last, as these require an extra byte when
// encoded in x86_64 instructions.
-// FIXME: Allow AH, CH, DH, BH in 64-mode for non-REX instructions,
+// FIXME: Allow AH, CH, DH, BH to be used as general-purpose registers in
+// 64-bit mode. The main complication is that they cannot be encoded in an
+// instruction requiring a REX prefix, while SIL, DIL, BPL, R8D, etc.
+// require a REX prefix. For example, "addb %ah, %dil" and "movzbl %ah, %r8d"
+// cannot be encoded.
def GR8 : RegisterClass<"X86", [i8], 8,
[AL, CL, DL, BL, AH, CH, DH, BH, SIL, DIL, BPL, SPL,
R8B, R9B, R10B, R11B, R14B, R15B, R12B, R13B]> {
@@ -295,7 +304,7 @@ def GR8 : RegisterClass<"X86", [i8], 8,
def GR16 : RegisterClass<"X86", [i16], 16,
[AX, CX, DX, SI, DI, BX, BP, SP,
R8W, R9W, R10W, R11W, R14W, R15W, R12W, R13W]> {
- let SubRegClassList = [GR8];
+ let SubRegClassList = [GR8, GR8];
let MethodProtos = [{
iterator allocation_order_begin(const MachineFunction &MF) const;
iterator allocation_order_end(const MachineFunction &MF) const;
@@ -363,7 +372,7 @@ def GR16 : RegisterClass<"X86", [i16], 16,
def GR32 : RegisterClass<"X86", [i32], 32,
[EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP,
R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D]> {
- let SubRegClassList = [GR8, GR16];
+ let SubRegClassList = [GR8, GR8, GR16];
let MethodProtos = [{
iterator allocation_order_begin(const MachineFunction &MF) const;
iterator allocation_order_end(const MachineFunction &MF) const;
@@ -431,7 +440,7 @@ def GR32 : RegisterClass<"X86", [i32], 32,
def GR64 : RegisterClass<"X86", [i64], 64,
[RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
RBX, R14, R15, R12, R13, RBP, RSP]> {
- let SubRegClassList = [GR8, GR16, GR32];
+ let SubRegClassList = [GR8, GR8, GR16, GR32];
let MethodProtos = [{
iterator allocation_order_end(const MachineFunction &MF) const;
}];
@@ -452,13 +461,118 @@ def GR64 : RegisterClass<"X86", [i64], 64,
}
-// GR16, GR32 subclasses which contain registers that have GR8 sub-registers.
-// These should only be used for 32-bit mode.
+// GR8_, GR16_, GR32_, GR64_ - Subclasses of GR8, GR16, GR32, and GR64
+// which contain just the "a" "b", "c", and "d" registers. On x86-32,
+// GR16_ and GR32_ are classes for registers that support 8-bit subreg
+// operations. On x86-64, GR16_, GR32_, and GR64_ are classes for registers
+// that support 8-bit h-register operations.
+def GR8_ : RegisterClass<"X86", [i8], 8, [AL, CL, DL, BL]> {
+}
def GR16_ : RegisterClass<"X86", [i16], 16, [AX, CX, DX, BX]> {
- let SubRegClassList = [GR8];
+ let SubRegClassList = [GR8_, GR8_];
}
def GR32_ : RegisterClass<"X86", [i32], 32, [EAX, ECX, EDX, EBX]> {
- let SubRegClassList = [GR8, GR16];
+ let SubRegClassList = [GR8_, GR8_, GR16_];
+}
+def GR64_ : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RBX]> {
+ let SubRegClassList = [GR8_, GR8_, GR16_, GR32_];
+}
+
+// GR8_NOREX, GR16_NOREX, GR32_NOREX, GR64_NOREX - Subclasses of
+// GR8, GR16, GR32, and GR64 which contain only the first 8 GPRs.
+// On x86-64, GR64_NOREX, GR32_NOREX and GR16_NOREX are the classes
+// of registers which do not by themselves require a REX prefix.
+def GR8_NOREX : RegisterClass<"X86", [i8], 8,
+ [AL, CL, DL, SIL, DIL, BL, BPL, SPL]> {
+}
+def GR16_NOREX : RegisterClass<"X86", [i16], 16,
+ [AX, CX, DX, SI, DI, BX, BP, SP]> {
+ let SubRegClassList = [GR8_NOREX, GR8_NOREX];
+}
+// GR32_NOREX - GR32 registers which do not require a REX prefix.
+def GR32_NOREX : RegisterClass<"X86", [i32], 32,
+ [EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP]> {
+ let SubRegClassList = [GR8_NOREX, GR8_NOREX, GR16_NOREX];
+ let MethodProtos = [{
+ iterator allocation_order_begin(const MachineFunction &MF) const;
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ // Does the function dedicate RBP / EBP to being a frame ptr?
+ // If so, don't allocate ESP or EBP.
+ static const unsigned X86_GR32_NOREX_AO_fp[] = {
+ X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI, X86::EBX
+ };
+ // If not, just don't allocate ESP.
+ static const unsigned X86_GR32_NOREX_AO[] = {
+ X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI, X86::EBX, X86::EBP
+ };
+
+ GR32_NOREXClass::iterator
+ GR32_NOREXClass::allocation_order_begin(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ if (RI->hasFP(MF))
+ return X86_GR32_NOREX_AO_fp;
+ else
+ return X86_GR32_NOREX_AO;
+ }
+
+ GR32_NOREXClass::iterator
+ GR32_NOREXClass::allocation_order_end(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ if (RI->hasFP(MF))
+ return X86_GR32_NOREX_AO_fp +
+ (sizeof(X86_GR32_NOREX_AO_fp) / sizeof(unsigned));
+ else
+ return X86_GR32_NOREX_AO +
+ (sizeof(X86_GR32_NOREX_AO) / sizeof(unsigned));
+ }
+ }];
+}
+
+// GR64_NOREX - GR64 registers which do not require a REX prefix.
+def GR64_NOREX : RegisterClass<"X86", [i64], 64,
+ [RAX, RCX, RDX, RSI, RDI, RBX, RBP, RSP]> {
+ let SubRegClassList = [GR8_NOREX, GR8_NOREX, GR16_NOREX, GR32_NOREX];
+ let MethodProtos = [{
+ iterator allocation_order_begin(const MachineFunction &MF) const;
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ // Does the function dedicate RBP / EBP to being a frame ptr?
+ // If so, don't allocate RSP or RBP.
+ static const unsigned X86_GR64_NOREX_AO_fp[] = {
+ X86::RAX, X86::RCX, X86::RDX, X86::RSI, X86::RDI, X86::RBX
+ };
+ // If not, just don't allocate RSP.
+ static const unsigned X86_GR64_NOREX_AO[] = {
+ X86::RAX, X86::RCX, X86::RDX, X86::RSI, X86::RDI, X86::RBX, X86::RBP
+ };
+
+ GR64_NOREXClass::iterator
+ GR64_NOREXClass::allocation_order_begin(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ if (RI->hasFP(MF))
+ return X86_GR64_NOREX_AO_fp;
+ else
+ return X86_GR64_NOREX_AO;
+ }
+
+ GR64_NOREXClass::iterator
+ GR64_NOREXClass::allocation_order_end(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ if (RI->hasFP(MF))
+ return X86_GR64_NOREX_AO_fp +
+ (sizeof(X86_GR64_NOREX_AO_fp) / sizeof(unsigned));
+ else
+ return X86_GR64_NOREX_AO +
+ (sizeof(X86_GR64_NOREX_AO) / sizeof(unsigned));
+ }
+ }];
}
// A class to support the 'A' assembler constraint: EAX then EDX.
diff --git a/llvm/test/CodeGen/X86/h-register-addressing-32.ll b/llvm/test/CodeGen/X86/h-register-addressing-32.ll
new file mode 100644
index 00000000000..41d91285ddb
--- /dev/null
+++ b/llvm/test/CodeGen/X86/h-register-addressing-32.ll
@@ -0,0 +1,53 @@
+; RUN: llvm-as < %s | llc -march=x86 | grep {movzbl %\[abcd\]h,} | count 7
+
+; Use h-register extract and zero-extend.
+
+define double @foo8(double* nocapture inreg %p, i32 inreg %x) nounwind readonly {
+ %t0 = lshr i32 %x, 8
+ %t1 = and i32 %t0, 255
+ %t2 = getelementptr double* %p, i32 %t1
+ %t3 = load double* %t2, align 8
+ ret double %t3
+}
+define float @foo4(float* nocapture inreg %p, i32 inreg %x) nounwind readonly {
+ %t0 = lshr i32 %x, 8
+ %t1 = and i32 %t0, 255
+ %t2 = getelementptr float* %p, i32 %t1
+ %t3 = load float* %t2, align 8
+ ret float %t3
+}
+define i16 @foo2(i16* nocapture inreg %p, i32 inreg %x) nounwind readonly {
+ %t0 = lshr i32 %x, 8
+ %t1 = and i32 %t0, 255
+ %t2 = getelementptr i16* %p, i32 %t1
+ %t3 = load i16* %t2, align 8
+ ret i16 %t3
+}
+define i8 @foo1(i8* nocapture inreg %p, i32 inreg %x) nounwind readonly {
+ %t0 = lshr i32 %x, 8
+ %t1 = and i32 %t0, 255
+ %t2 = getelementptr i8* %p, i32 %t1
+ %t3 = load i8* %t2, align 8
+ ret i8 %t3
+}
+define i8 @bar8(i8* nocapture inreg %p, i32 inreg %x) nounwind readonly {
+ %t0 = lshr i32 %x, 5
+ %t1 = and i32 %t0, 2040
+ %t2 = getelementptr i8* %p, i32 %t1
+ %t3 = load i8* %t2, align 8
+ ret i8 %t3
+}
+define i8 @bar4(i8* nocapture inreg %p, i32 inreg %x) nounwind readonly {
+ %t0 = lshr i32 %x, 6
+ %t1 = and i32 %t0, 1020
+ %t2 = getelementptr i8* %p, i32 %t1
+ %t3 = load i8* %t2, align 8
+ ret i8 %t3
+}
+define i8 @bar2(i8* nocapture inreg %p, i32 inreg %x) nounwind readonly {
+ %t0 = lshr i32 %x, 7
+ %t1 = and i32 %t0, 510
+ %t2 = getelementptr i8* %p, i32 %t1
+ %t3 = load i8* %t2, align 8
+ ret i8 %t3
+}
diff --git a/llvm/test/CodeGen/X86/h-register-addressing-64.ll b/llvm/test/CodeGen/X86/h-register-addressing-64.ll
new file mode 100644
index 00000000000..b38e0e478e9
--- /dev/null
+++ b/llvm/test/CodeGen/X86/h-register-addressing-64.ll
@@ -0,0 +1,53 @@
+; RUN: llvm-as < %s | llc -march=x86-64 | grep {movzbl %\[abcd\]h,} | count 7
+
+; Use h-register extract and zero-extend.
+
+define double @foo8(double* nocapture inreg %p, i64 inreg %x) nounwind readonly {
+ %t0 = lshr i64 %x, 8
+ %t1 = and i64 %t0, 255
+ %t2 = getelementptr double* %p, i64 %t1
+ %t3 = load double* %t2, align 8
+ ret double %t3
+}
+define float @foo4(float* nocapture inreg %p, i64 inreg %x) nounwind readonly {
+ %t0 = lshr i64 %x, 8
+ %t1 = and i64 %t0, 255
+ %t2 = getelementptr float* %p, i64 %t1
+ %t3 = load float* %t2, align 8
+ ret float %t3
+}
+define i16 @foo2(i16* nocapture inreg %p, i64 inreg %x) nounwind readonly {
+ %t0 = lshr i64 %x, 8
+ %t1 = and i64 %t0, 255
+ %t2 = getelementptr i16* %p, i64 %t1
+ %t3 = load i16* %t2, align 8
+ ret i16 %t3
+}
+define i8 @foo1(i8* nocapture inreg %p, i64 inreg %x) nounwind readonly {
+ %t0 = lshr i64 %x, 8
+ %t1 = and i64 %t0, 255
+ %t2 = getelementptr i8* %p, i64 %t1
+ %t3 = load i8* %t2, align 8
+ ret i8 %t3
+}
+define i8 @bar8(i8* nocapture inreg %p, i64 inreg %x) nounwind readonly {
+ %t0 = lshr i64 %x, 5
+ %t1 = and i64 %t0, 2040
+ %t2 = getelementptr i8* %p, i64 %t1
+ %t3 = load i8* %t2, align 8
+ ret i8 %t3
+}
+define i8 @bar4(i8* nocapture inreg %p, i64 inreg %x) nounwind readonly {
+ %t0 = lshr i64 %x, 6
+ %t1 = and i64 %t0, 1020
+ %t2 = getelementptr i8* %p, i64 %t1
+ %t3 = load i8* %t2, align 8
+ ret i8 %t3
+}
+define i8 @bar2(i8* nocapture inreg %p, i64 inreg %x) nounwind readonly {
+ %t0 = lshr i64 %x, 7
+ %t1 = and i64 %t0, 510
+ %t2 = getelementptr i8* %p, i64 %t1
+ %t3 = load i8* %t2, align 8
+ ret i8 %t3
+}
diff --git a/llvm/test/CodeGen/X86/h-register-store.ll b/llvm/test/CodeGen/X86/h-register-store.ll
new file mode 100644
index 00000000000..e8672422a7b
--- /dev/null
+++ b/llvm/test/CodeGen/X86/h-register-store.ll
@@ -0,0 +1,27 @@
+; RUN: llvm-as < %s | llc -march=x86-64 > %t
+; RUN: grep mov %t | count 6
+; RUN: grep {movb %ah, (%rsi)} %t | count 3
+; RUN: llvm-as < %s | llc -march=x86 > %t
+; RUN: grep mov %t | count 3
+; RUN: grep {movb %ah, (%e} %t | count 3
+
+; Use h-register extract and store.
+
+define void @foo16(i16 inreg %p, i8* inreg %z) nounwind {
+ %q = lshr i16 %p, 8
+ %t = trunc i16 %q to i8
+ store i8 %t, i8* %z
+ ret void
+}
+define void @foo32(i32 inreg %p, i8* inreg %z) nounwind {
+ %q = lshr i32 %p, 8
+ %t = trunc i32 %q to i8
+ store i8 %t, i8* %z
+ ret void
+}
+define void @foo64(i64 inreg %p, i8* inreg %z) nounwind {
+ %q = lshr i64 %p, 8
+ %t = trunc i64 %q to i8
+ store i8 %t, i8* %z
+ ret void
+}
diff --git a/llvm/test/CodeGen/X86/h-registers.ll b/llvm/test/CodeGen/X86/h-registers.ll
new file mode 100644
index 00000000000..2777be9cc3e
--- /dev/null
+++ b/llvm/test/CodeGen/X86/h-registers.ll
@@ -0,0 +1,48 @@
+; RUN: llvm-as < %s | llc -march=x86-64 | grep {movzbl %\[abcd\]h,} | count 4
+; RUN: llvm-as < %s | llc -march=x86 > %t
+; RUN: grep {incb %ah} %t | count 3
+; RUN: grep {movzbl %ah,} %t | count 3
+
+; Use h registers. On x86-64, codegen doesn't support general allocation
+; of h registers yet, due to x86 encoding complications.
+
+define void @bar64(i64 inreg %x, i8* inreg %p) nounwind {
+ %t0 = lshr i64 %x, 8
+ %t1 = trunc i64 %t0 to i8
+ %t2 = add i8 %t1, 1
+ store i8 %t2, i8* %p
+ ret void
+}
+
+define void @bar32(i32 inreg %x, i8* inreg %p) nounwind {
+ %t0 = lshr i32 %x, 8
+ %t1 = trunc i32 %t0 to i8
+ %t2 = add i8 %t1, 1
+ store i8 %t2, i8* %p
+ ret void
+}
+
+define void @bar16(i16 inreg %x, i8* inreg %p) nounwind {
+ %t0 = lshr i16 %x, 8
+ %t1 = trunc i16 %t0 to i8
+ %t2 = add i8 %t1, 1
+ store i8 %t2, i8* %p
+ ret void
+}
+
+define i64 @qux64(i64 inreg %x) nounwind {
+ %t0 = lshr i64 %x, 8
+ %t1 = and i64 %t0, 255
+ ret i64 %t1
+}
+
+define i32 @qux32(i32 inreg %x) nounwind {
+ %t0 = lshr i32 %x, 8
+ %t1 = and i32 %t0, 255
+ ret i32 %t1
+}
+
+define i16 @qux16(i16 inreg %x) nounwind {
+ %t0 = lshr i16 %x, 8
+ ret i16 %t0
+}
diff --git a/llvm/test/CodeGen/X86/inline-asm-out-regs.ll b/llvm/test/CodeGen/X86/inline-asm-out-regs.ll
index 3a84bad94d0..01f1397830a 100644
--- a/llvm/test/CodeGen/X86/inline-asm-out-regs.ll
+++ b/llvm/test/CodeGen/X86/inline-asm-out-regs.ll
@@ -1,6 +1,4 @@
; RUN: llvm-as < %s | llc -mtriple=i386-unknown-linux-gnu
-; XFAIL: *
-; Expected to run out of registers during allocation.
; PR3391
@pci_indirect = external global { } ; <{ }*> [#uses=1]
OpenPOWER on IntegriCloud