diff options
| -rw-r--r-- | llvm/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp | 4 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrInfo.cpp | 11 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 26 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vec_return.ll | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vec_shuffle-7.ll | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vec_shuffle-9.ll | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vec_zero.ll | 4 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vec_zero_cse.ll | 3 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/xor.ll | 2 | 
9 files changed, 34 insertions, 22 deletions
diff --git a/llvm/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp b/llvm/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp index 88370e630bc..c851ca3fc80 100644 --- a/llvm/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp +++ b/llvm/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp @@ -287,7 +287,9 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {      LowerUnaryToTwoAddr(OutMI, X86::MMX_PCMPEQDrr); break;    case X86::FsFLD0SS:     LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;    case X86::FsFLD0SD:     LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break; -  case X86::V_SET0:       LowerUnaryToTwoAddr(OutMI, X86::XORPSrr); break; +  case X86::V_SET0PS:     LowerUnaryToTwoAddr(OutMI, X86::XORPSrr); break; +  case X86::V_SET0PD:     LowerUnaryToTwoAddr(OutMI, X86::XORPDrr); break; +  case X86::V_SET0PI:     LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;    case X86::V_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::PCMPEQDrr); break;    case X86::MOV16r0: diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index d9b09609609..a6d9c045aa5 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -2514,7 +2514,9 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,      Alignment = (*LoadMI->memoperands_begin())->getAlignment();    else      switch (LoadMI->getOpcode()) { -    case X86::V_SET0: +    case X86::V_SET0PS: +    case X86::V_SET0PD: +    case X86::V_SET0PI:      case X86::V_SETALLONES:        Alignment = 16;        break; @@ -2544,11 +2546,13 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,    SmallVector<MachineOperand,X86AddrNumOperands> MOs;    switch (LoadMI->getOpcode()) { -  case X86::V_SET0: +  case X86::V_SET0PS: +  case X86::V_SET0PD: +  case X86::V_SET0PI:    case X86::V_SETALLONES:    case X86::FsFLD0SD:    case X86::FsFLD0SS: { -    // Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure. +    // Folding a V_SET0P? or V_SETALLONES as a load, to ease register pressure.      // Create a constant-pool entry and operands to load from it.      // Medium and large mode can't fold loads this way. @@ -3675,6 +3679,7 @@ static const unsigned ReplaceableInstrs[][3] = {    { X86::ANDPSrr,    X86::ANDPDrr,   X86::PANDrr    },    { X86::ORPSrm,     X86::ORPDrm,    X86::PORrm     },    { X86::ORPSrr,     X86::ORPDrr,    X86::PORrr     }, +  { X86::V_SET0PS,   X86::V_SET0PD,  X86::V_SET0PI  },    { X86::XORPSrm,    X86::XORPDrm,   X86::PXORrm    },    { X86::XORPSrr,    X86::XORPDrr,   X86::PXORrr    },  }; diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index f74ca9dd7bf..5a87314c324 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -1115,15 +1115,19 @@ def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),  // load of an all-zeros value if folding it would be beneficial.  // FIXME: Change encoding to pseudo!  let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, -    isCodeGenOnly = 1 in -def V_SET0 : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "", +    isCodeGenOnly = 1 in { +def V_SET0PS : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "", +                 [(set VR128:$dst, (v4f32 immAllZerosV))]>; +def V_SET0PD : PDI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "", +                 [(set VR128:$dst, (v2f64 immAllZerosV))]>; +let ExeDomain = SSEPackedInt in +def V_SET0PI : PDI<0xEF, MRMInitReg, (outs VR128:$dst), (ins), "",                   [(set VR128:$dst, (v4i32 immAllZerosV))]>; +} -def : Pat<(v2i64 immAllZerosV), (V_SET0)>; -def : Pat<(v8i16 immAllZerosV), (V_SET0)>; -def : Pat<(v16i8 immAllZerosV), (V_SET0)>; -def : Pat<(v2f64 immAllZerosV), (V_SET0)>; -def : Pat<(v4f32 immAllZerosV), (V_SET0)>; +def : Pat<(v2i64 immAllZerosV), (V_SET0PI)>; +def : Pat<(v8i16 immAllZerosV), (V_SET0PI)>; +def : Pat<(v16i8 immAllZerosV), (V_SET0PI)>;  def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),            (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), x86_subreg_ss))>; @@ -3026,14 +3030,14 @@ let Predicates = [HasSSE2] in {  let AddedComplexity = 15 in {  // Zeroing a VR128 then do a MOVS{S|D} to the lower bits.  def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))), -          (MOVSDrr (v2f64 (V_SET0)), FR64:$src)>; +          (MOVSDrr (v2f64 (V_SET0PS)), FR64:$src)>;  def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))), -          (MOVSSrr (v4f32 (V_SET0)), FR32:$src)>; +          (MOVSSrr (v4f32 (V_SET0PS)), FR32:$src)>;  def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), -          (MOVSSrr (v4f32 (V_SET0)), +          (MOVSSrr (v4f32 (V_SET0PS)),                     (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), x86_subreg_ss)))>;  def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), -          (MOVSSrr (v4i32 (V_SET0)), +          (MOVSSrr (v4i32 (V_SET0PI)),                     (EXTRACT_SUBREG (v4i32 VR128:$src), x86_subreg_ss))>;  } diff --git a/llvm/test/CodeGen/X86/vec_return.ll b/llvm/test/CodeGen/X86/vec_return.ll index 66762b4a060..676be9b7179 100644 --- a/llvm/test/CodeGen/X86/vec_return.ll +++ b/llvm/test/CodeGen/X86/vec_return.ll @@ -1,5 +1,5 @@  ; RUN: llc < %s -march=x86 -mattr=+sse2 > %t -; RUN: grep xorps %t | count 1 +; RUN: grep pxor %t | count 1  ; RUN: grep movaps %t | count 1  ; RUN: not grep shuf %t diff --git a/llvm/test/CodeGen/X86/vec_shuffle-7.ll b/llvm/test/CodeGen/X86/vec_shuffle-7.ll index 4cdca09c72f..64bd6a3c83b 100644 --- a/llvm/test/CodeGen/X86/vec_shuffle-7.ll +++ b/llvm/test/CodeGen/X86/vec_shuffle-7.ll @@ -1,5 +1,5 @@  ; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t -; RUN: grep xorps %t | count 1 +; RUN: grep pxor %t | count 1  ; RUN: not grep shufps %t  define void @test() { diff --git a/llvm/test/CodeGen/X86/vec_shuffle-9.ll b/llvm/test/CodeGen/X86/vec_shuffle-9.ll index fc16a26b615..07195869b8c 100644 --- a/llvm/test/CodeGen/X86/vec_shuffle-9.ll +++ b/llvm/test/CodeGen/X86/vec_shuffle-9.ll @@ -1,7 +1,7 @@  ; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s  define <4 x i32> @test(i8** %ptr) { -; CHECK: xorps +; CHECK: pxor  ; CHECK: punpcklbw  ; CHECK: punpcklwd diff --git a/llvm/test/CodeGen/X86/vec_zero.ll b/llvm/test/CodeGen/X86/vec_zero.ll index ae5af586cdc..4d1f05629b4 100644 --- a/llvm/test/CodeGen/X86/vec_zero.ll +++ b/llvm/test/CodeGen/X86/vec_zero.ll @@ -1,5 +1,6 @@ -; RUN: llc < %s -march=x86 -mattr=+sse2 | grep xorps | count 2 +; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s +; CHECK: xorps  define void @foo(<4 x float>* %P) {          %T = load <4 x float>* %P               ; <<4 x float>> [#uses=1]          %S = fadd <4 x float> zeroinitializer, %T                ; <<4 x float>> [#uses=1] @@ -7,6 +8,7 @@ define void @foo(<4 x float>* %P) {          ret void  } +; CHECK: pxor  define void @bar(<4 x i32>* %P) {          %T = load <4 x i32>* %P         ; <<4 x i32>> [#uses=1]          %S = add <4 x i32> zeroinitializer, %T          ; <<4 x i32>> [#uses=1] diff --git a/llvm/test/CodeGen/X86/vec_zero_cse.ll b/llvm/test/CodeGen/X86/vec_zero_cse.ll index 296378c6e9f..3b15d4cc407 100644 --- a/llvm/test/CodeGen/X86/vec_zero_cse.ll +++ b/llvm/test/CodeGen/X86/vec_zero_cse.ll @@ -1,5 +1,4 @@ -; RUN: llc < %s -relocation-model=static -march=x86 -mcpu=yonah | grep pxor | count 1 -; RUN: llc < %s -relocation-model=static -march=x86 -mcpu=yonah | grep xorps | count 1 +; RUN: llc < %s -relocation-model=static -march=x86 -mcpu=yonah | grep pxor | count 2  ; RUN: llc < %s -relocation-model=static -march=x86 -mcpu=yonah | grep pcmpeqd | count 2  @M1 = external global <1 x i64> diff --git a/llvm/test/CodeGen/X86/xor.ll b/llvm/test/CodeGen/X86/xor.ll index 9bfff8a06a8..f270d9d56e2 100644 --- a/llvm/test/CodeGen/X86/xor.ll +++ b/llvm/test/CodeGen/X86/xor.ll @@ -7,7 +7,7 @@ define <4 x i32> @test1() nounwind {  	ret <4 x i32> %tmp  ; X32: test1: -; X32:	xorps	%xmm0, %xmm0 +; X32:	pxor	%xmm0, %xmm0  ; X32:	ret  }  | 

