diff options
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r-- | llvm/lib/Target/X86/X86InstrInfo.cpp | 34 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 8 |
2 files changed, 37 insertions, 5 deletions
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 4b2a20d1809..a43c5ddad3f 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -19,6 +19,7 @@ #include "X86Subtarget.h" #include "X86TargetMachine.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -2127,9 +2128,36 @@ MachineInstr* X86InstrInfo::foldMemoryOperand(MachineFunction &MF, return NULL; SmallVector<MachineOperand,4> MOs; - unsigned NumOps = LoadMI->getDesc().getNumOperands(); - for (unsigned i = NumOps - 4; i != NumOps; ++i) - MOs.push_back(LoadMI->getOperand(i)); + if (LoadMI->getOpcode() == X86::V_SET0 || + LoadMI->getOpcode() == X86::V_SETALLONES) { + // Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure. + // Create a constant-pool entry and operands to load from it. + + // x86-32 PIC requires a PIC base register for constant pools. + unsigned PICBase = 0; + if (TM.getRelocationModel() == Reloc::PIC_ && + !TM.getSubtarget<X86Subtarget>().is64Bit()) + PICBase = TM.getInstrInfo()->getGlobalBaseReg(&MF); + + // Create a v4i32 constant-pool entry. + MachineConstantPool &MCP = *MF.getConstantPool(); + const VectorType *Ty = VectorType::get(Type::Int32Ty, 4); + Constant *C = LoadMI->getOpcode() == X86::V_SET0 ? + ConstantVector::getNullValue(Ty) : + ConstantVector::getAllOnesValue(Ty); + unsigned CPI = MCP.getConstantPoolIndex(C, /*AlignmentLog2=*/4); + + // Create operands to load from the constant pool entry. + MOs.push_back(MachineOperand::CreateReg(PICBase, false)); + MOs.push_back(MachineOperand::CreateImm(1)); + MOs.push_back(MachineOperand::CreateReg(0, false)); + MOs.push_back(MachineOperand::CreateCPI(CPI, 0)); + } else { + // Folding a normal load. Just copy the load's address operands. + unsigned NumOps = LoadMI->getDesc().getNumOperands(); + for (unsigned i = NumOps - 4; i != NumOps; ++i) + MOs.push_back(LoadMI->getOperand(i)); + } return foldMemoryOperand(MF, MI, Ops[0], MOs); } diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 426bddcea09..58e923d6df5 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -987,7 +987,9 @@ def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst), "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>; // Alias instructions that map zero vector to pxor / xorp* for sse. -let isReMaterializable = 1 in +// We set isSimpleLoad because this can be converted to a constant-pool +// load of an all-zeros value if folding it would be beneficial. +let isReMaterializable = 1, isAsCheapAsAMove = 1, isSimpleLoad = 1 in def V_SET0 : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "xorps\t$dst, $dst", [(set VR128:$dst, (v4i32 immAllZerosV))]>; @@ -2253,7 +2255,9 @@ def : Pat<(membarrier (i8 imm:$ll), (i8 imm:$ls), (i8 imm:$sl), (i8 imm:$ss), (i8 1)), (MFENCE)>; // Alias instructions that map zero vector to pxor / xorp* for sse. -let isReMaterializable = 1, isAsCheapAsAMove = 1 in +// We set isSimpleLoad because this can be converted to a constant-pool +// load of an all-ones value if folding it would be beneficial. +let isReMaterializable = 1, isAsCheapAsAMove = 1, isSimpleLoad = 1 in def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "pcmpeqd\t$dst, $dst", [(set VR128:$dst, (v4i32 immAllOnesV))]>; |