diff options
| author | Craig Topper <craig.topper@gmail.com> | 2012-01-10 06:30:56 +0000 | 
|---|---|---|
| committer | Craig Topper <craig.topper@gmail.com> | 2012-01-10 06:30:56 +0000 | 
| commit | eb8f9e9e5b49aca1748cff20783974522f7e01b4 (patch) | |
| tree | c3c75d409881ebb819f38e7db53cacaf13b084e0 /llvm/lib/Target | |
| parent | c4b251dc2bc66f753f279fa904da6861093ae1e6 (diff) | |
| download | bcm5719-llvm-eb8f9e9e5b49aca1748cff20783974522f7e01b4.tar.gz bcm5719-llvm-eb8f9e9e5b49aca1748cff20783974522f7e01b4.zip | |
Instruction selection priority fixes to remove the XMM/XMMInt/orAVX predicates. Another commit will remove orAVX functions from X86SubTarget.
llvm-svn: 147841
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrFPStack.td | 29 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrFormats.td | 25 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrInfo.td | 10 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrMMX.td | 40 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 89 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86Subtarget.h | 12 | 
6 files changed, 89 insertions, 116 deletions
| diff --git a/llvm/lib/Target/X86/X86InstrFPStack.td b/llvm/lib/Target/X86/X86InstrFPStack.td index cecd5929f92..adfd98b8fb4 100644 --- a/llvm/lib/Target/X86/X86InstrFPStack.td +++ b/llvm/lib/Target/X86/X86InstrFPStack.td @@ -437,33 +437,26 @@ def IST_FP64m : FPI<0xDF, MRM7m, (outs), (ins i64mem:$dst), "fistp{ll}\t$dst">;  }  // FISTTP requires SSE3 even though it's a FPStack op. +let Predicates = [HasSSE3] in {  def ISTT_Fp16m32 : FpI_<(outs), (ins i16mem:$op, RFP32:$src), OneArgFP, -                    [(X86fp_to_i16mem RFP32:$src, addr:$op)]>, -                    Requires<[HasSSE3orAVX]>; +                    [(X86fp_to_i16mem RFP32:$src, addr:$op)]>;  def ISTT_Fp32m32 : FpI_<(outs), (ins i32mem:$op, RFP32:$src), OneArgFP, -                    [(X86fp_to_i32mem RFP32:$src, addr:$op)]>, -                    Requires<[HasSSE3orAVX]>; +                    [(X86fp_to_i32mem RFP32:$src, addr:$op)]>;  def ISTT_Fp64m32 : FpI_<(outs), (ins i64mem:$op, RFP32:$src), OneArgFP, -                    [(X86fp_to_i64mem RFP32:$src, addr:$op)]>, -                    Requires<[HasSSE3orAVX]>; +                    [(X86fp_to_i64mem RFP32:$src, addr:$op)]>;  def ISTT_Fp16m64 : FpI_<(outs), (ins i16mem:$op, RFP64:$src), OneArgFP, -                    [(X86fp_to_i16mem RFP64:$src, addr:$op)]>, -                    Requires<[HasSSE3orAVX]>; +                    [(X86fp_to_i16mem RFP64:$src, addr:$op)]>;  def ISTT_Fp32m64 : FpI_<(outs), (ins i32mem:$op, RFP64:$src), OneArgFP, -                    [(X86fp_to_i32mem RFP64:$src, addr:$op)]>, -                    Requires<[HasSSE3orAVX]>; +                    [(X86fp_to_i32mem RFP64:$src, addr:$op)]>;  def ISTT_Fp64m64 : FpI_<(outs), (ins i64mem:$op, RFP64:$src), OneArgFP, -                    [(X86fp_to_i64mem RFP64:$src, addr:$op)]>, -                    Requires<[HasSSE3orAVX]>; +                    [(X86fp_to_i64mem RFP64:$src, addr:$op)]>;  def ISTT_Fp16m80 : FpI_<(outs), (ins i16mem:$op, RFP80:$src), OneArgFP, -                    [(X86fp_to_i16mem RFP80:$src, addr:$op)]>, -                    Requires<[HasSSE3orAVX]>; +                    [(X86fp_to_i16mem RFP80:$src, addr:$op)]>;  def ISTT_Fp32m80 : FpI_<(outs), (ins i32mem:$op, RFP80:$src), OneArgFP, -                    [(X86fp_to_i32mem RFP80:$src, addr:$op)]>, -                    Requires<[HasSSE3orAVX]>; +                    [(X86fp_to_i32mem RFP80:$src, addr:$op)]>;  def ISTT_Fp64m80 : FpI_<(outs), (ins i64mem:$op, RFP80:$src), OneArgFP, -                    [(X86fp_to_i64mem RFP80:$src, addr:$op)]>, -                    Requires<[HasSSE3orAVX]>; +                    [(X86fp_to_i64mem RFP80:$src, addr:$op)]>; +} // Predicates = [HasSSE3]  let mayStore = 1 in {  def ISTT_FP16m : FPI<0xDF, MRM1m, (outs), (ins i16mem:$dst), "fisttp{s}\t$dst">; diff --git a/llvm/lib/Target/X86/X86InstrFormats.td b/llvm/lib/Target/X86/X86InstrFormats.td index 76cae1870e7..957a923bb3d 100644 --- a/llvm/lib/Target/X86/X86InstrFormats.td +++ b/llvm/lib/Target/X86/X86InstrFormats.td @@ -436,7 +436,7 @@ class SS428I<bits<8> o, Format F, dag outs, dag ins, string asm,  //   SS42FI - SSE 4.2 instructions with T8XD prefix.  class SS42FI<bits<8> o, Format F, dag outs, dag ins, string asm,                list<dag> pattern> -      : I<o, F, outs, ins, asm, pattern>, T8XD, Requires<[HasSSE42orAVX]>; +      : I<o, F, outs, ins, asm, pattern>, T8XD, Requires<[HasSSE42]>;  //   SS42AI = SSE 4.2 instructions with TA prefix  class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm, @@ -569,11 +569,6 @@ class VRPDI<bits<8> o, Format F, dag outs, dag ins, string asm,  // MMXIi8 - MMX instructions with ImmT == Imm8 and TB prefix.  // MMXID  - MMX instructions with XD prefix.  // MMXIS  - MMX instructions with XS prefix. -// MMXPI  - SSE 1 & 2 packed instructions for MMX with no AVX equivalents -// MMXSDIi8  - SSE2 instructions with ImmT == Imm8 and XD prefix. No AVX equiv. -// MMXSSDIi8 - SSE2 instructions with ImmT == Imm8 and XS prefix. No AVX equiv. -// MMXSS38I - SSSE3 instructions with T8 prefix for MMX registers. No AVX equiv. -// MMXSS3AI - SSSE3 instructions with TA prefix for MMX registers. No AVX equiv.  class MMXI<bits<8> o, Format F, dag outs, dag ins, string asm,              list<dag> pattern>        : I<o, F, outs, ins, asm, pattern>, TB, Requires<[HasMMX]>; @@ -595,21 +590,3 @@ class MMXID<bits<8> o, Format F, dag outs, dag ins, string asm,  class MMXIS<bits<8> o, Format F, dag outs, dag ins, string asm,               list<dag> pattern>        : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasMMX]>; - -class MMXPI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, -            Domain d> -      : I<o, F, outs, ins, asm, pattern, d> { -  let Predicates = !if(hasOpSizePrefix /* OpSize */, [HasXMMInt], [HasXMM]); -} -class MMXSDIi8<bits<8> o, Format F, dag outs, dag ins, string asm, -               list<dag> pattern> -      : Ii8<o, F, outs, ins, asm, pattern>, XD, Requires<[HasXMMInt]>; -class MMXSSDIi8<bits<8> o, Format F, dag outs, dag ins, string asm, -               list<dag> pattern> -      : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasXMMInt]>; -class MMXSS38I<bits<8> o, Format F, dag outs, dag ins, string asm, -               list<dag> pattern> -      : I<o, F, outs, ins, asm, pattern>, T8, Requires<[HasSSSE3orAVX]>; -class MMXSS3AI<bits<8> o, Format F, dag outs, dag ins, string asm, -               list<dag> pattern> -      : Ii8<o, F, outs, ins, asm, pattern>, TA, Requires<[HasSSSE3orAVX]>; diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td index 62eadcff8e4..aeb12757ce1 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -470,14 +470,8 @@ def HasSSSE3     : Predicate<"Subtarget->hasSSSE3()">;  def HasSSE41     : Predicate<"Subtarget->hasSSE41()">;  def HasSSE42     : Predicate<"Subtarget->hasSSE42()">;  def HasSSE4A     : Predicate<"Subtarget->hasSSE4A()">; -  def HasAVX       : Predicate<"Subtarget->hasAVX()">;  def HasAVX2      : Predicate<"Subtarget->hasAVX2()">; -def HasXMM       : Predicate<"Subtarget->hasXMM()">; -def HasXMMInt    : Predicate<"Subtarget->hasXMMInt()">; -def HasSSE3orAVX : Predicate<"Subtarget->hasSSE3orAVX()">; -def HasSSSE3orAVX : Predicate<"Subtarget->hasSSSE3orAVX()">; -def HasSSE42orAVX : Predicate<"Subtarget->hasSSE42orAVX()">;  def HasPOPCNT    : Predicate<"Subtarget->hasPOPCNT()">;  def HasAES       : Predicate<"Subtarget->hasAES()">; @@ -492,8 +486,8 @@ def HasFSGSBase  : Predicate<"Subtarget->hasFSGSBase()">;  def HasLZCNT     : Predicate<"Subtarget->hasLZCNT()">;  def HasBMI       : Predicate<"Subtarget->hasBMI()">;  def HasBMI2      : Predicate<"Subtarget->hasBMI2()">; -def FPStackf32   : Predicate<"!Subtarget->hasXMM()">; -def FPStackf64   : Predicate<"!Subtarget->hasXMMInt()">; +def FPStackf32   : Predicate<"!Subtarget->hasSSE1()">; +def FPStackf64   : Predicate<"!Subtarget->hasSSE2()">;  def HasCmpxchg16b: Predicate<"Subtarget->hasCmpxchg16b()">;  def In32BitMode  : Predicate<"!Subtarget->is64Bit()">,                               AssemblerPredicate<"!Mode64Bit">; diff --git a/llvm/lib/Target/X86/X86InstrMMX.td b/llvm/lib/Target/X86/X86InstrMMX.td index 5bbf86a6ad4..3025a4dbd37 100644 --- a/llvm/lib/Target/X86/X86InstrMMX.td +++ b/llvm/lib/Target/X86/X86InstrMMX.td @@ -60,14 +60,14 @@ let Constraints = "$src1 = $dst" in {  /// Unary MMX instructions requiring SSSE3.  multiclass SS3I_unop_rm_int_mm<bits<8> opc, string OpcodeStr,                                 Intrinsic IntId64> { -  def rr64 : MMXSS38I<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src), -                      !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), -                      [(set VR64:$dst, (IntId64 VR64:$src))]>; - -  def rm64 : MMXSS38I<opc, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src), -                      !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), -                      [(set VR64:$dst, -                        (IntId64 (bitconvert (memopmmx addr:$src))))]>; +  def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src), +                   !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), +                   [(set VR64:$dst, (IntId64 VR64:$src))]>; + +  def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src), +                   !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), +                   [(set VR64:$dst, +                     (IntId64 (bitconvert (memopmmx addr:$src))))]>;  }  /// Binary MMX instructions requiring SSSE3. @@ -75,11 +75,11 @@ let ImmT = NoImm, Constraints = "$src1 = $dst" in {  multiclass SS3I_binop_rm_int_mm<bits<8> opc, string OpcodeStr,                               Intrinsic IntId64> {    let isCommutable = 0 in -  def rr64 : MMXSS38I<opc, MRMSrcReg, (outs VR64:$dst), +  def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst),         (ins VR64:$src1, VR64:$src2),          !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),         [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]>; -  def rm64 : MMXSS38I<opc, MRMSrcMem, (outs VR64:$dst), +  def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst),         (ins VR64:$src1, i64mem:$src2),          !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),         [(set VR64:$dst, @@ -90,11 +90,11 @@ multiclass SS3I_binop_rm_int_mm<bits<8> opc, string OpcodeStr,  /// PALIGN MMX instructions (require SSSE3).  multiclass ssse3_palign_mm<string asm, Intrinsic IntId> { -  def R64irr  : MMXSS3AI<0x0F, MRMSrcReg, (outs VR64:$dst), +  def R64irr  : SS3AI<0x0F, MRMSrcReg, (outs VR64:$dst),        (ins VR64:$src1, VR64:$src2, i8imm:$src3),        !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),         [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2, (i8 imm:$src3)))]>; -  def R64irm  : MMXSS3AI<0x0F, MRMSrcMem, (outs VR64:$dst), +  def R64irm  : SS3AI<0x0F, MRMSrcMem, (outs VR64:$dst),        (ins VR64:$src1, i64mem:$src2, i8imm:$src3),        !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),        [(set VR64:$dst, (IntId VR64:$src1, @@ -104,18 +104,18 @@ multiclass ssse3_palign_mm<string asm, Intrinsic IntId> {  multiclass sse12_cvt_pint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,                           Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag,                           string asm, Domain d> { -  def irr : MMXPI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, +  def irr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,                          [(set DstRC:$dst, (Int SrcRC:$src))], d>; -  def irm : MMXPI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, +  def irm : PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,                          [(set DstRC:$dst, (Int (ld_frag addr:$src)))], d>;  }  multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC,                      RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop,                      PatFrag ld_frag, string asm, Domain d> { -  def irr : MMXPI<opc, MRMSrcReg, (outs DstRC:$dst),(ins DstRC:$src1, SrcRC:$src2), +  def irr : PI<opc, MRMSrcReg, (outs DstRC:$dst),(ins DstRC:$src1, SrcRC:$src2),                asm, [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))], d>; -  def irm : MMXPI<opc, MRMSrcMem, (outs DstRC:$dst), +  def irm : PI<opc, MRMSrcMem, (outs DstRC:$dst),                     (ins DstRC:$src1, x86memop:$src2), asm,                [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))], d>;  } @@ -175,24 +175,24 @@ def MMX_MOVQ64mr : MMXI<0x7F, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),                          "movq\t{$src, $dst|$dst, $src}",                          [(store (x86mmx VR64:$src), addr:$dst)]>; -def MMX_MOVDQ2Qrr : MMXSDIi8<0xD6, MRMSrcReg, (outs VR64:$dst), +def MMX_MOVDQ2Qrr : SDIi8<0xD6, MRMSrcReg, (outs VR64:$dst),                            (ins VR128:$src), "movdq2q\t{$src, $dst|$dst, $src}",                            [(set VR64:$dst,                              (x86mmx (bitconvert                              (i64 (vector_extract (v2i64 VR128:$src),                                    (iPTR 0))))))]>; -def MMX_MOVQ2DQrr : MMXSSDIi8<0xD6, MRMSrcReg, (outs VR128:$dst), +def MMX_MOVQ2DQrr : SSDIi8<0xD6, MRMSrcReg, (outs VR128:$dst),                              (ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}",            [(set VR128:$dst,              (v2i64 (scalar_to_vector                                (i64 (bitconvert (x86mmx VR64:$src))))))]>;  let neverHasSideEffects = 1 in -def MMX_MOVQ2FR64rr: MMXSSDIi8<0xD6, MRMSrcReg, (outs FR64:$dst), +def MMX_MOVQ2FR64rr: SSDIi8<0xD6, MRMSrcReg, (outs FR64:$dst),                         (ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}", []>; -def MMX_MOVFR642Qrr: MMXSDIi8<0xD6, MRMSrcReg, (outs VR64:$dst), +def MMX_MOVFR642Qrr: SDIi8<0xD6, MRMSrcReg, (outs VR64:$dst),                         (ins FR64:$src), "movdq2q\t{$src, $dst|$dst, $src}", []>;  def MMX_MOVNTQmr  : MMXI<0xE7, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src), diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 266d1c58df8..95185f17f53 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -171,7 +171,7 @@ def : Pat<(v4f64 (scalar_to_vector FR64:$src)),  // Bitcasts between 128-bit vector types. Return the original type since  // no instruction is needed for the conversion -let Predicates = [HasXMMInt] in { +let Predicates = [HasSSE2] in {    def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>;    def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>;    def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>; @@ -244,9 +244,9 @@ let Predicates = [HasAVX] in {  let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,      isPseudo = 1 in {    def FsFLD0SS : I<0, Pseudo, (outs FR32:$dst), (ins), "", -                   [(set FR32:$dst, fp32imm0)]>, Requires<[HasXMM]>; +                   [(set FR32:$dst, fp32imm0)]>, Requires<[HasSSE1]>;    def FsFLD0SD : I<0, Pseudo, (outs FR64:$dst), (ins), "", -                   [(set FR64:$dst, fpimm0)]>, Requires<[HasXMMInt]>; +                   [(set FR64:$dst, fpimm0)]>, Requires<[HasSSE2]>;  }  //===----------------------------------------------------------------------===// @@ -1407,9 +1407,11 @@ multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,  multiclass sse12_cvt_s_np<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,                            X86MemOperand x86memop, string asm> { +let neverHasSideEffects = 1 in {    def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, []>;    let mayLoad = 1 in    def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, []>; +} // neverHasSideEffects = 1  }  multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, @@ -1423,12 +1425,14 @@ multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,  multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,                            X86MemOperand x86memop, string asm> { +let neverHasSideEffects = 1 in {    def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src),                !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>;    let mayLoad = 1 in    def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),                (ins DstRC:$src1, x86memop:$src),                !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>; +} // neverHasSideEffects = 1  }  defm VCVTTSS2SI   : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32, @@ -1459,7 +1463,7 @@ defm VCVTSI2SDL  : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd{l}">, XD,  defm VCVTSI2SD64 : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd{q}">, XD,                                    VEX_4V, VEX_W, VEX_LIG; -let Predicates = [HasAVX] in { +let Predicates = [HasAVX], AddedComplexity = 1 in {    def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),              (VCVTSI2SSrm (f32 (IMPLICIT_DEF)), addr:$src)>;    def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))), @@ -1623,26 +1627,26 @@ defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, i128mem, load /*dummy*/,                              SSEPackedSingle>, TB; /* PD SSE3 form is avaiable */  } -let Predicates = [HasSSE1] in { +let Predicates = [HasAVX] in {    def : Pat<(int_x86_sse_cvtss2si VR128:$src), -            (CVTSS2SIrr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; +            (VCVTSS2SIrr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;    def : Pat<(int_x86_sse_cvtss2si (load addr:$src)), -            (CVTSS2SIrm addr:$src)>; +            (VCVTSS2SIrm addr:$src)>;    def : Pat<(int_x86_sse_cvtss2si64 VR128:$src), -            (CVTSS2SI64rr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; +            (VCVTSS2SI64rr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;    def : Pat<(int_x86_sse_cvtss2si64 (load addr:$src)), -            (CVTSS2SI64rm addr:$src)>; +            (VCVTSS2SI64rm addr:$src)>;  } -let Predicates = [HasAVX] in { +let Predicates = [HasSSE1] in {    def : Pat<(int_x86_sse_cvtss2si VR128:$src), -            (VCVTSS2SIrr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; +            (CVTSS2SIrr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;    def : Pat<(int_x86_sse_cvtss2si (load addr:$src)), -            (VCVTSS2SIrm addr:$src)>; +            (CVTSS2SIrm addr:$src)>;    def : Pat<(int_x86_sse_cvtss2si64 VR128:$src), -            (VCVTSS2SI64rr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; +            (CVTSS2SI64rr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;    def : Pat<(int_x86_sse_cvtss2si64 (load addr:$src)), -            (VCVTSS2SI64rm addr:$src)>; +            (CVTSS2SI64rm addr:$src)>;  }  /// SSE 2 Only @@ -1844,6 +1848,7 @@ def Int_CVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),  // Convert with truncation packed single/double fp to doubleword  // SSE2 packed instructions with XS prefix +let neverHasSideEffects = 1 in {  def VCVTTPS2DQrr : VSSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),                        "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX;  let mayLoad = 1 in @@ -1854,14 +1859,7 @@ def VCVTTPS2DQYrr : VSSI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),  let mayLoad = 1 in  def VCVTTPS2DQYrm : VSSI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),                        "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX; -def CVTTPS2DQrr : SSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), -                      "cvttps2dq\t{$src, $dst|$dst, $src}", -                      [(set VR128:$dst, -                            (int_x86_sse2_cvttps2dq VR128:$src))]>; -def CVTTPS2DQrm : SSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), -                      "cvttps2dq\t{$src, $dst|$dst, $src}", -                      [(set VR128:$dst, -                            (int_x86_sse2_cvttps2dq (memop addr:$src)))]>; +} // neverHasSideEffects = 1  def Int_VCVTTPS2DQrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),                          "vcvttps2dq\t{$src, $dst|$dst, $src}", @@ -1874,12 +1872,14 @@ def Int_VCVTTPS2DQrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),                                             (memop addr:$src)))]>,                        XS, VEX, Requires<[HasAVX]>; -let Predicates = [HasSSE2] in { -  def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))), -            (Int_CVTDQ2PSrr VR128:$src)>; -  def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))), -            (CVTTPS2DQrr VR128:$src)>; -} +def CVTTPS2DQrr : SSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), +                      "cvttps2dq\t{$src, $dst|$dst, $src}", +                      [(set VR128:$dst, +                            (int_x86_sse2_cvttps2dq VR128:$src))]>; +def CVTTPS2DQrm : SSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), +                      "cvttps2dq\t{$src, $dst|$dst, $src}", +                      [(set VR128:$dst, +                            (int_x86_sse2_cvttps2dq (memop addr:$src)))]>;  let Predicates = [HasAVX] in {    def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))), @@ -1892,6 +1892,13 @@ let Predicates = [HasAVX] in {              (VCVTTPS2DQYrr VR256:$src)>;  } +let Predicates = [HasSSE2] in { +  def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))), +            (Int_CVTDQ2PSrr VR128:$src)>; +  def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))), +            (CVTTPS2DQrr VR128:$src)>; +} +  def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),                          "cvttpd2dq\t{$src, $dst|$dst, $src}",                          [(set VR128:$dst, @@ -3145,6 +3152,7 @@ let Predicates = [HasAVX] in {                  sse1_fp_unop_p_int<0x53, "vrcp", int_x86_sse_rcp_ps>, VEX;  } +let AddedComplexity = 1 in {  def : Pat<(f32 (fsqrt FR32:$src)),            (VSQRTSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>;  def : Pat<(f32 (fsqrt (load addr:$src))), @@ -3167,8 +3175,9 @@ def : Pat<(f32 (X86frcp FR32:$src)),  def : Pat<(f32 (X86frcp (load addr:$src))),            (VRCPSSm (f32 (IMPLICIT_DEF)), addr:$src)>,            Requires<[HasAVX, OptForSize]>; +} -let Predicates = [HasAVX] in { +let Predicates = [HasAVX], AddedComplexity = 1 in {    def : Pat<(int_x86_sse_sqrt_ss VR128:$src),              (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)),                  (VSQRTSSr (f32 (IMPLICIT_DEF)), @@ -3292,11 +3301,11 @@ def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst),  def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),                   "movnti{l}\t{$src, $dst|$dst, $src}",                   [(nontemporalstore (i32 GR32:$src), addr:$dst)]>, -               TB, Requires<[HasXMMInt]>; +               TB, Requires<[HasSSE2]>;  def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),                       "movnti{q}\t{$src, $dst|$dst, $src}",                       [(nontemporalstore (i64 GR64:$src), addr:$dst)]>, -                  TB, Requires<[HasXMMInt]>; +                  TB, Requires<[HasSSE2]>;  }  //===----------------------------------------------------------------------===// @@ -3304,7 +3313,7 @@ def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),  //===----------------------------------------------------------------------===//  // Prefetch intrinsic. -let Predicates = [HasXMM] in { +let Predicates = [HasSSE1] in {  def PREFETCHT0   : I<0x18, MRM1m, (outs), (ins i8mem:$src),      "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3), (i32 1))]>, TB;  def PREFETCHT1   : I<0x18, MRM2m, (outs), (ins i8mem:$src), @@ -3318,7 +3327,7 @@ def PREFETCHNTA  : I<0x18, MRM0m, (outs), (ins i8mem:$src),  // Flush cache  def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src),                 "clflush\t$src", [(int_x86_sse2_clflush addr:$src)]>, -              TB, Requires<[HasXMMInt]>; +              TB, Requires<[HasSSE2]>;  // Pause. This "instruction" is encoded as "rep; nop", so even though it  // was introduced with SSE2, it's backward compatible. @@ -3326,11 +3335,11 @@ def PAUSE : I<0x90, RawFrm, (outs), (ins), "pause", []>, REP;  // Load, store, and memory fence  def SFENCE : I<0xAE, MRM_F8, (outs), (ins), -               "sfence", [(int_x86_sse_sfence)]>, TB, Requires<[HasXMM]>; +               "sfence", [(int_x86_sse_sfence)]>, TB, Requires<[HasSSE1]>;  def LFENCE : I<0xAE, MRM_E8, (outs), (ins), -               "lfence", [(int_x86_sse2_lfence)]>, TB, Requires<[HasXMMInt]>; +               "lfence", [(int_x86_sse2_lfence)]>, TB, Requires<[HasSSE2]>;  def MFENCE : I<0xAE, MRM_F0, (outs), (ins), -               "mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasXMMInt]>; +               "mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>;  def : Pat<(X86SFence), (SFENCE)>;  def : Pat<(X86LFence), (LFENCE)>; @@ -5475,18 +5484,18 @@ def : Pat<(v16i8 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),  let usesCustomInserter = 1 in {  def MONITOR : PseudoI<(outs), (ins i32mem:$src1, GR32:$src2, GR32:$src3),                  [(int_x86_sse3_monitor addr:$src1, GR32:$src2, GR32:$src3)]>, -                Requires<[HasSSE3orAVX]>; +                Requires<[HasSSE3]>;  def MWAIT : PseudoI<(outs), (ins GR32:$src1, GR32:$src2),                  [(int_x86_sse3_mwait GR32:$src1, GR32:$src2)]>, -                Requires<[HasSSE3orAVX]>; +                Requires<[HasSSE3]>;  }  let Uses = [EAX, ECX, EDX] in  def MONITORrrr : I<0x01, MRM_C8, (outs), (ins), "monitor", []>, TB, -                 Requires<[HasSSE3orAVX]>; +                 Requires<[HasSSE3]>;  let Uses = [ECX, EAX] in  def MWAITrr   : I<0x01, MRM_C9, (outs), (ins), "mwait", []>, TB, -                Requires<[HasSSE3orAVX]>; +                Requires<[HasSSE3]>;  def : InstAlias<"mwait %eax, %ecx", (MWAITrr)>, Requires<[In32BitMode]>;  def : InstAlias<"mwait %rax, %rcx", (MWAITrr)>, Requires<[In64BitMode]>; diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h index 10ef868968b..5a322400ee9 100644 --- a/llvm/lib/Target/X86/X86Subtarget.h +++ b/llvm/lib/Target/X86/X86Subtarget.h @@ -173,12 +173,12 @@ public:    bool hasCMov() const { return HasCMov; }    bool hasMMX() const { return X86SSELevel >= MMX; } -  bool hasSSE1() const { return X86SSELevel >= SSE1 && !hasAVX(); } -  bool hasSSE2() const { return X86SSELevel >= SSE2 && !hasAVX(); } -  bool hasSSE3() const { return X86SSELevel >= SSE3 && !hasAVX(); } -  bool hasSSSE3() const { return X86SSELevel >= SSSE3 && !hasAVX(); } -  bool hasSSE41() const { return X86SSELevel >= SSE41 && !hasAVX(); } -  bool hasSSE42() const { return X86SSELevel >= SSE42 && !hasAVX(); } +  bool hasSSE1() const { return X86SSELevel >= SSE1; } +  bool hasSSE2() const { return X86SSELevel >= SSE2; } +  bool hasSSE3() const { return X86SSELevel >= SSE3; } +  bool hasSSSE3() const { return X86SSELevel >= SSSE3; } +  bool hasSSE41() const { return X86SSELevel >= SSE41; } +  bool hasSSE42() const { return X86SSELevel >= SSE42; }    bool hasSSE4A() const { return HasSSE4A; }    bool has3DNow() const { return X863DNowLevel >= ThreeDNow; }    bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; } | 

