diff options
author | Jakob Stoklund Olesen <stoklund@2pi.dk> | 2010-03-31 00:40:13 +0000 |
---|---|---|
committer | Jakob Stoklund Olesen <stoklund@2pi.dk> | 2010-03-31 00:40:13 +0000 |
commit | 9986ba954c5c3fa0506cf54bdeba06ae46f0856f (patch) | |
tree | 25a2b81adeff3c59ddb9b6d7618aefa15a2fe4eb /llvm/lib/Target | |
parent | 710c6892be50528a29949878d20c6372bc86455a (diff) | |
download | bcm5719-llvm-9986ba954c5c3fa0506cf54bdeba06ae46f0856f.tar.gz bcm5719-llvm-9986ba954c5c3fa0506cf54bdeba06ae46f0856f.zip |
Replace V_SET0 with variants for each SSE execution domain.
llvm-svn: 99975
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r-- | llvm/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrInfo.cpp | 11 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 26 |
3 files changed, 26 insertions, 15 deletions
diff --git a/llvm/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp b/llvm/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp index 88370e630bc..c851ca3fc80 100644 --- a/llvm/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp +++ b/llvm/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp @@ -287,7 +287,9 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { LowerUnaryToTwoAddr(OutMI, X86::MMX_PCMPEQDrr); break; case X86::FsFLD0SS: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break; case X86::FsFLD0SD: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break; - case X86::V_SET0: LowerUnaryToTwoAddr(OutMI, X86::XORPSrr); break; + case X86::V_SET0PS: LowerUnaryToTwoAddr(OutMI, X86::XORPSrr); break; + case X86::V_SET0PD: LowerUnaryToTwoAddr(OutMI, X86::XORPDrr); break; + case X86::V_SET0PI: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break; case X86::V_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::PCMPEQDrr); break; case X86::MOV16r0: diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index d9b09609609..a6d9c045aa5 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -2514,7 +2514,9 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, Alignment = (*LoadMI->memoperands_begin())->getAlignment(); else switch (LoadMI->getOpcode()) { - case X86::V_SET0: + case X86::V_SET0PS: + case X86::V_SET0PD: + case X86::V_SET0PI: case X86::V_SETALLONES: Alignment = 16; break; @@ -2544,11 +2546,13 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, SmallVector<MachineOperand,X86AddrNumOperands> MOs; switch (LoadMI->getOpcode()) { - case X86::V_SET0: + case X86::V_SET0PS: + case X86::V_SET0PD: + case X86::V_SET0PI: case X86::V_SETALLONES: case X86::FsFLD0SD: case X86::FsFLD0SS: { - // Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure. + // Folding a V_SET0P? or V_SETALLONES as a load, to ease register pressure. // Create a constant-pool entry and operands to load from it. // Medium and large mode can't fold loads this way. @@ -3675,6 +3679,7 @@ static const unsigned ReplaceableInstrs[][3] = { { X86::ANDPSrr, X86::ANDPDrr, X86::PANDrr }, { X86::ORPSrm, X86::ORPDrm, X86::PORrm }, { X86::ORPSrr, X86::ORPDrr, X86::PORrr }, + { X86::V_SET0PS, X86::V_SET0PD, X86::V_SET0PI }, { X86::XORPSrm, X86::XORPDrm, X86::PXORrm }, { X86::XORPSrr, X86::XORPDrr, X86::PXORrr }, }; diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index f74ca9dd7bf..5a87314c324 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -1115,15 +1115,19 @@ def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst), // load of an all-zeros value if folding it would be beneficial. // FIXME: Change encoding to pseudo! let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, - isCodeGenOnly = 1 in -def V_SET0 : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "", + isCodeGenOnly = 1 in { +def V_SET0PS : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "", + [(set VR128:$dst, (v4f32 immAllZerosV))]>; +def V_SET0PD : PDI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "", + [(set VR128:$dst, (v2f64 immAllZerosV))]>; +let ExeDomain = SSEPackedInt in +def V_SET0PI : PDI<0xEF, MRMInitReg, (outs VR128:$dst), (ins), "", [(set VR128:$dst, (v4i32 immAllZerosV))]>; +} -def : Pat<(v2i64 immAllZerosV), (V_SET0)>; -def : Pat<(v8i16 immAllZerosV), (V_SET0)>; -def : Pat<(v16i8 immAllZerosV), (V_SET0)>; -def : Pat<(v2f64 immAllZerosV), (V_SET0)>; -def : Pat<(v4f32 immAllZerosV), (V_SET0)>; +def : Pat<(v2i64 immAllZerosV), (V_SET0PI)>; +def : Pat<(v8i16 immAllZerosV), (V_SET0PI)>; +def : Pat<(v16i8 immAllZerosV), (V_SET0PI)>; def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), x86_subreg_ss))>; @@ -3026,14 +3030,14 @@ let Predicates = [HasSSE2] in { let AddedComplexity = 15 in { // Zeroing a VR128 then do a MOVS{S|D} to the lower bits. def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))), - (MOVSDrr (v2f64 (V_SET0)), FR64:$src)>; + (MOVSDrr (v2f64 (V_SET0PS)), FR64:$src)>; def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))), - (MOVSSrr (v4f32 (V_SET0)), FR32:$src)>; + (MOVSSrr (v4f32 (V_SET0PS)), FR32:$src)>; def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), - (MOVSSrr (v4f32 (V_SET0)), + (MOVSSrr (v4f32 (V_SET0PS)), (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), x86_subreg_ss)))>; def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), - (MOVSSrr (v4i32 (V_SET0)), + (MOVSSrr (v4i32 (V_SET0PI)), (EXTRACT_SUBREG (v4i32 VR128:$src), x86_subreg_ss))>; } |