summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/X86
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/X86')
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.cpp34
-rw-r--r--llvm/lib/Target/X86/X86InstrSSE.td8
2 files changed, 37 insertions, 5 deletions
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 4b2a20d1809..a43c5ddad3f 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -19,6 +19,7 @@
#include "X86Subtarget.h"
#include "X86TargetMachine.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -2127,9 +2128,36 @@ MachineInstr* X86InstrInfo::foldMemoryOperand(MachineFunction &MF,
return NULL;
SmallVector<MachineOperand,4> MOs;
- unsigned NumOps = LoadMI->getDesc().getNumOperands();
- for (unsigned i = NumOps - 4; i != NumOps; ++i)
- MOs.push_back(LoadMI->getOperand(i));
+ if (LoadMI->getOpcode() == X86::V_SET0 ||
+ LoadMI->getOpcode() == X86::V_SETALLONES) {
+ // Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure.
+ // Create a constant-pool entry and operands to load from it.
+
+ // x86-32 PIC requires a PIC base register for constant pools.
+ unsigned PICBase = 0;
+ if (TM.getRelocationModel() == Reloc::PIC_ &&
+ !TM.getSubtarget<X86Subtarget>().is64Bit())
+ PICBase = TM.getInstrInfo()->getGlobalBaseReg(&MF);
+
+ // Create a v4i32 constant-pool entry.
+ MachineConstantPool &MCP = *MF.getConstantPool();
+ const VectorType *Ty = VectorType::get(Type::Int32Ty, 4);
+ Constant *C = LoadMI->getOpcode() == X86::V_SET0 ?
+ ConstantVector::getNullValue(Ty) :
+ ConstantVector::getAllOnesValue(Ty);
+ unsigned CPI = MCP.getConstantPoolIndex(C, /*AlignmentLog2=*/4);
+
+ // Create operands to load from the constant pool entry.
+ MOs.push_back(MachineOperand::CreateReg(PICBase, false));
+ MOs.push_back(MachineOperand::CreateImm(1));
+ MOs.push_back(MachineOperand::CreateReg(0, false));
+ MOs.push_back(MachineOperand::CreateCPI(CPI, 0));
+ } else {
+ // Folding a normal load. Just copy the load's address operands.
+ unsigned NumOps = LoadMI->getDesc().getNumOperands();
+ for (unsigned i = NumOps - 4; i != NumOps; ++i)
+ MOs.push_back(LoadMI->getOperand(i));
+ }
return foldMemoryOperand(MF, MI, Ops[0], MOs);
}
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index 426bddcea09..58e923d6df5 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -987,7 +987,9 @@ def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
"stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>;
// Alias instructions that map zero vector to pxor / xorp* for sse.
-let isReMaterializable = 1 in
+// We set isSimpleLoad because this can be converted to a constant-pool
+// load of an all-zeros value if folding it would be beneficial.
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isSimpleLoad = 1 in
def V_SET0 : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins),
"xorps\t$dst, $dst",
[(set VR128:$dst, (v4i32 immAllZerosV))]>;
@@ -2253,7 +2255,9 @@ def : Pat<(membarrier (i8 imm:$ll), (i8 imm:$ls), (i8 imm:$sl), (i8 imm:$ss),
(i8 1)), (MFENCE)>;
// Alias instructions that map zero vector to pxor / xorp* for sse.
-let isReMaterializable = 1, isAsCheapAsAMove = 1 in
+// We set isSimpleLoad because this can be converted to a constant-pool
+// load of an all-ones value if folding it would be beneficial.
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isSimpleLoad = 1 in
def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins),
"pcmpeqd\t$dst, $dst",
[(set VR128:$dst, (v4i32 immAllOnesV))]>;
OpenPOWER on IntegriCloud