diff options
| -rw-r--r-- | llvm/lib/Target/X86/X86.td | 454 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86Subtarget.cpp | 3 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86Subtarget.h | 9 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/mmx-intrinsics.ll | 4 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/mmx-only.ll | 21 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/mult-alt-x86.ll | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/sse-only.ll | 19 |
7 files changed, 341 insertions, 171 deletions
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index 3a3b03874c0..fa0b674ff94 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -37,14 +37,17 @@ def FeatureCMOV : SubtargetFeature<"cmov","HasCMov", "true", def FeaturePOPCNT : SubtargetFeature<"popcnt", "HasPOPCNT", "true", "Support POPCNT instruction">; - -def FeatureMMX : SubtargetFeature<"mmx","X86SSELevel", "MMX", +// The MMX subtarget feature is separate from the rest of the SSE features +// because it's important (for odd compatibility reasons) to be able to +// turn it off explicitly while allowing SSE+ to be on. +def FeatureMMX : SubtargetFeature<"mmx","HasMMX", "true", "Enable MMX instructions">; + def FeatureSSE1 : SubtargetFeature<"sse", "X86SSELevel", "SSE1", "Enable SSE instructions", // SSE codegen depends on cmovs, and all // SSE1+ processors support them. - [FeatureMMX, FeatureCMOV]>; + [FeatureCMOV]>; def FeatureSSE2 : SubtargetFeature<"sse2", "X86SSELevel", "SSE2", "Enable SSE2 instructions", [FeatureSSE1]>; @@ -219,184 +222,241 @@ def : Proc<"pentium-mmx", [FeatureSlowUAMem16, FeatureMMX]>; def : Proc<"i686", [FeatureSlowUAMem16]>; def : Proc<"pentiumpro", [FeatureSlowUAMem16, FeatureCMOV]>; def : Proc<"pentium2", [FeatureSlowUAMem16, FeatureMMX, FeatureCMOV]>; -def : Proc<"pentium3", [FeatureSlowUAMem16, FeatureSSE1]>; -def : Proc<"pentium3m", [FeatureSlowUAMem16, FeatureSSE1, +def : Proc<"pentium3", [FeatureSlowUAMem16, FeatureMMX, FeatureSSE1]>; +def : Proc<"pentium3m", [FeatureSlowUAMem16, FeatureMMX, FeatureSSE1, FeatureSlowBTMem]>; -def : Proc<"pentium-m", [FeatureSlowUAMem16, FeatureSSE2, +def : Proc<"pentium-m", [FeatureSlowUAMem16, FeatureMMX, FeatureSSE2, FeatureSlowBTMem]>; -def : Proc<"pentium4", [FeatureSlowUAMem16, FeatureSSE2]>; -def : Proc<"pentium4m", [FeatureSlowUAMem16, FeatureSSE2, +def : Proc<"pentium4", [FeatureSlowUAMem16, FeatureMMX, FeatureSSE2]>; +def : Proc<"pentium4m", [FeatureSlowUAMem16, FeatureMMX, FeatureSSE2, FeatureSlowBTMem]>; // Intel Core Duo. -def : ProcessorModel<"yonah", SandyBridgeModel, - [FeatureSlowUAMem16, FeatureSSE3, FeatureSlowBTMem]>; +def : ProcessorModel< + "yonah", SandyBridgeModel, + [ FeatureSlowUAMem16, FeatureMMX, FeatureSSE3, FeatureSlowBTMem ]>; // NetBurst. -def : Proc<"prescott", [FeatureSlowUAMem16, FeatureSSE3, FeatureSlowBTMem]>; -def : Proc<"nocona", [FeatureSlowUAMem16, FeatureSSE3, FeatureCMPXCHG16B, - FeatureSlowBTMem]>; +def : Proc<"prescott", + [ FeatureSlowUAMem16, FeatureMMX, FeatureSSE3, FeatureSlowBTMem ]>; +def : Proc<"nocona", [ + FeatureSlowUAMem16, + FeatureMMX, + FeatureSSE3, + FeatureCMPXCHG16B, + FeatureSlowBTMem +]>; // Intel Core 2 Solo/Duo. -def : ProcessorModel<"core2", SandyBridgeModel, - [FeatureSlowUAMem16, FeatureSSSE3, FeatureCMPXCHG16B, - FeatureSlowBTMem]>; -def : ProcessorModel<"penryn", SandyBridgeModel, - [FeatureSlowUAMem16, FeatureSSE41, FeatureCMPXCHG16B, - FeatureSlowBTMem]>; +def : ProcessorModel<"core2", SandyBridgeModel, [ + FeatureSlowUAMem16, + FeatureMMX, + FeatureSSSE3, + FeatureCMPXCHG16B, + FeatureSlowBTMem +]>; +def : ProcessorModel<"penryn", SandyBridgeModel, [ + FeatureSlowUAMem16, + FeatureMMX, + FeatureSSE41, + FeatureCMPXCHG16B, + FeatureSlowBTMem +]>; // Atom CPUs. class BonnellProc<string Name> : ProcessorModel<Name, AtomModel, [ - ProcIntelAtom, - FeatureSlowUAMem16, - FeatureSSSE3, - FeatureCMPXCHG16B, - FeatureMOVBE, - FeatureSlowBTMem, - FeatureLeaForSP, - FeatureSlowDivide32, - FeatureSlowDivide64, - FeatureCallRegIndirect, - FeatureLEAUsesAG, - FeaturePadShortFunctions - ]>; + ProcIntelAtom, + FeatureSlowUAMem16, + FeatureMMX, + FeatureSSSE3, + FeatureCMPXCHG16B, + FeatureMOVBE, + FeatureSlowBTMem, + FeatureLeaForSP, + FeatureSlowDivide32, + FeatureSlowDivide64, + FeatureCallRegIndirect, + FeatureLEAUsesAG, + FeaturePadShortFunctions +]>; def : BonnellProc<"bonnell">; def : BonnellProc<"atom">; // Pin the generic name to the baseline. class SilvermontProc<string Name> : ProcessorModel<Name, SLMModel, [ - ProcIntelSLM, - FeatureSSE42, - FeatureCMPXCHG16B, - FeatureMOVBE, - FeaturePOPCNT, - FeaturePCLMUL, - FeatureAES, - FeatureSlowDivide64, - FeatureCallRegIndirect, - FeaturePRFCHW, - FeatureSlowLEA, - FeatureSlowIncDec, - FeatureSlowBTMem - ]>; + ProcIntelSLM, + FeatureMMX, + FeatureSSE42, + FeatureCMPXCHG16B, + FeatureMOVBE, + FeaturePOPCNT, + FeaturePCLMUL, + FeatureAES, + FeatureSlowDivide64, + FeatureCallRegIndirect, + FeaturePRFCHW, + FeatureSlowLEA, + FeatureSlowIncDec, + FeatureSlowBTMem +]>; def : SilvermontProc<"silvermont">; def : SilvermontProc<"slm">; // Legacy alias. // "Arrandale" along with corei3 and corei5 class NehalemProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [ - FeatureSSE42, - FeatureCMPXCHG16B, - FeatureSlowBTMem, - FeaturePOPCNT - ]>; + FeatureMMX, + FeatureSSE42, + FeatureCMPXCHG16B, + FeatureSlowBTMem, + FeaturePOPCNT +]>; def : NehalemProc<"nehalem">; def : NehalemProc<"corei7">; // Westmere is a similar machine to nehalem with some additional features. // Westmere is the corei3/i5/i7 path from nehalem to sandybridge class WestmereProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [ - FeatureSSE42, - FeatureCMPXCHG16B, - FeatureSlowBTMem, - FeaturePOPCNT, - FeatureAES, - FeaturePCLMUL - ]>; + FeatureMMX, + FeatureSSE42, + FeatureCMPXCHG16B, + FeatureSlowBTMem, + FeaturePOPCNT, + FeatureAES, + FeaturePCLMUL +]>; def : WestmereProc<"westmere">; // SSE is not listed here since llvm treats AVX as a reimplementation of SSE, // rather than a superset. class SandyBridgeProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [ - FeatureAVX, - FeatureCMPXCHG16B, - FeatureSlowBTMem, - FeatureSlowUAMem32, - FeaturePOPCNT, - FeatureAES, - FeaturePCLMUL - ]>; + FeatureMMX, + FeatureAVX, + FeatureCMPXCHG16B, + FeatureSlowBTMem, + FeatureSlowUAMem32, + FeaturePOPCNT, + FeatureAES, + FeaturePCLMUL +]>; def : SandyBridgeProc<"sandybridge">; def : SandyBridgeProc<"corei7-avx">; // Legacy alias. class IvyBridgeProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [ - FeatureAVX, - FeatureCMPXCHG16B, - FeatureSlowBTMem, - FeatureSlowUAMem32, - FeaturePOPCNT, - FeatureAES, - FeaturePCLMUL, - FeatureRDRAND, - FeatureF16C, - FeatureFSGSBase - ]>; + FeatureMMX, + FeatureAVX, + FeatureCMPXCHG16B, + FeatureSlowBTMem, + FeatureSlowUAMem32, + FeaturePOPCNT, + FeatureAES, + FeaturePCLMUL, + FeatureRDRAND, + FeatureF16C, + FeatureFSGSBase +]>; def : IvyBridgeProc<"ivybridge">; def : IvyBridgeProc<"core-avx-i">; // Legacy alias. class HaswellProc<string Name> : ProcessorModel<Name, HaswellModel, [ - FeatureAVX2, - FeatureCMPXCHG16B, - FeatureSlowBTMem, - FeaturePOPCNT, - FeatureAES, - FeaturePCLMUL, - FeatureRDRAND, - FeatureF16C, - FeatureFSGSBase, - FeatureMOVBE, - FeatureLZCNT, - FeatureBMI, - FeatureBMI2, - FeatureFMA, - FeatureRTM, - FeatureHLE, - FeatureSlowIncDec - ]>; + FeatureMMX, + FeatureAVX2, + FeatureCMPXCHG16B, + FeatureSlowBTMem, + FeaturePOPCNT, + FeatureAES, + FeaturePCLMUL, + FeatureRDRAND, + FeatureF16C, + FeatureFSGSBase, + FeatureMOVBE, + FeatureLZCNT, + FeatureBMI, + FeatureBMI2, + FeatureFMA, + FeatureRTM, + FeatureHLE, + FeatureSlowIncDec +]>; def : HaswellProc<"haswell">; def : HaswellProc<"core-avx2">; // Legacy alias. class BroadwellProc<string Name> : ProcessorModel<Name, HaswellModel, [ - FeatureAVX2, - FeatureCMPXCHG16B, - FeatureSlowBTMem, - FeaturePOPCNT, - FeatureAES, - FeaturePCLMUL, - FeatureRDRAND, - FeatureF16C, - FeatureFSGSBase, - FeatureMOVBE, - FeatureLZCNT, - FeatureBMI, - FeatureBMI2, - FeatureFMA, - FeatureRTM, - FeatureHLE, - FeatureADX, - FeatureRDSEED, - FeatureSlowIncDec - ]>; + FeatureMMX, + FeatureAVX2, + FeatureCMPXCHG16B, + FeatureSlowBTMem, + FeaturePOPCNT, + FeatureAES, + FeaturePCLMUL, + FeatureRDRAND, + FeatureF16C, + FeatureFSGSBase, + FeatureMOVBE, + FeatureLZCNT, + FeatureBMI, + FeatureBMI2, + FeatureFMA, + FeatureRTM, + FeatureHLE, + FeatureADX, + FeatureRDSEED, + FeatureSlowIncDec +]>; def : BroadwellProc<"broadwell">; // FIXME: define KNL model -class KnightsLandingProc<string Name> : ProcessorModel<Name, HaswellModel, - [FeatureAVX512, FeatureERI, FeatureCDI, FeaturePFI, - FeatureCMPXCHG16B, FeaturePOPCNT, - FeatureAES, FeaturePCLMUL, FeatureRDRAND, FeatureF16C, - FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT, FeatureBMI, - FeatureBMI2, FeatureFMA, FeatureRTM, FeatureHLE, - FeatureSlowIncDec, FeatureMPX]>; +class KnightsLandingProc<string Name> : ProcessorModel<Name, HaswellModel, [ + FeatureMMX, + FeatureAVX512, + FeatureERI, + FeatureCDI, + FeaturePFI, + FeatureCMPXCHG16B, + FeaturePOPCNT, + FeatureAES, + FeaturePCLMUL, + FeatureRDRAND, + FeatureF16C, + FeatureFSGSBase, + FeatureMOVBE, + FeatureLZCNT, + FeatureBMI, + FeatureBMI2, + FeatureFMA, + FeatureRTM, + FeatureHLE, + FeatureSlowIncDec, + FeatureMPX +]>; def : KnightsLandingProc<"knl">; // FIXME: define SKX model -class SkylakeProc<string Name> : ProcessorModel<Name, HaswellModel, - [FeatureAVX512, FeatureCDI, - FeatureDQI, FeatureBWI, FeatureVLX, - FeatureCMPXCHG16B, FeatureSlowBTMem, - FeaturePOPCNT, FeatureAES, FeaturePCLMUL, FeatureRDRAND, - FeatureF16C, FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT, - FeatureBMI, FeatureBMI2, FeatureFMA, FeatureRTM, - FeatureHLE, FeatureADX, FeatureRDSEED, FeatureSlowIncDec, - FeatureMPX]>; +class SkylakeProc<string Name> : ProcessorModel<Name, HaswellModel, [ + FeatureMMX, + FeatureAVX512, + FeatureCDI, + FeatureDQI, + FeatureBWI, + FeatureVLX, + FeatureCMPXCHG16B, + FeatureSlowBTMem, + FeaturePOPCNT, + FeatureAES, + FeaturePCLMUL, + FeatureRDRAND, + FeatureF16C, + FeatureFSGSBase, + FeatureMOVBE, + FeatureLZCNT, + FeatureBMI, + FeatureBMI2, + FeatureFMA, + FeatureRTM, + FeatureHLE, + FeatureADX, + FeatureRDSEED, + FeatureSlowIncDec, + FeatureMPX +]>; def : SkylakeProc<"skylake">; def : SkylakeProc<"skx">; // Legacy alias. @@ -447,52 +507,117 @@ def : Proc<"barcelona", [FeatureSSE4A, FeatureSlowSHLD]>; // Bobcat -def : Proc<"btver1", [FeatureSSSE3, FeatureSSE4A, FeatureCMPXCHG16B, - FeaturePRFCHW, FeatureLZCNT, FeaturePOPCNT, - FeatureSlowSHLD]>; +def : Proc<"btver1", [ + FeatureMMX, + FeatureSSSE3, + FeatureSSE4A, + FeatureCMPXCHG16B, + FeaturePRFCHW, + FeatureLZCNT, + FeaturePOPCNT, + FeatureSlowSHLD +]>; // Jaguar -def : ProcessorModel<"btver2", BtVer2Model, - [FeatureAVX, FeatureSSE4A, FeatureCMPXCHG16B, - FeaturePRFCHW, FeatureAES, FeaturePCLMUL, - FeatureBMI, FeatureF16C, FeatureMOVBE, - FeatureLZCNT, FeaturePOPCNT, - FeatureSlowSHLD]>; +def : ProcessorModel<"btver2", BtVer2Model, [ + FeatureMMX, + FeatureAVX, + FeatureSSE4A, + FeatureCMPXCHG16B, + FeaturePRFCHW, + FeatureAES, + FeaturePCLMUL, + FeatureBMI, + FeatureF16C, + FeatureMOVBE, + FeatureLZCNT, + FeaturePOPCNT, + FeatureSlowSHLD +]>; // Bulldozer -def : Proc<"bdver1", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B, - FeatureAES, FeaturePRFCHW, FeaturePCLMUL, - FeatureAVX, FeatureSSE4A, FeatureLZCNT, - FeaturePOPCNT, FeatureSlowSHLD]>; +def : Proc<"bdver1", [ + FeatureXOP, + FeatureFMA4, + FeatureCMPXCHG16B, + FeatureAES, + FeaturePRFCHW, + FeaturePCLMUL, + FeatureMMX, + FeatureAVX, + FeatureSSE4A, + FeatureLZCNT, + FeaturePOPCNT, + FeatureSlowSHLD +]>; // Piledriver -def : Proc<"bdver2", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B, - FeatureAES, FeaturePRFCHW, FeaturePCLMUL, - FeatureAVX, FeatureSSE4A, FeatureF16C, - FeatureLZCNT, FeaturePOPCNT, FeatureBMI, - FeatureTBM, FeatureFMA, FeatureSlowSHLD]>; +def : Proc<"bdver2", [ + FeatureXOP, + FeatureFMA4, + FeatureCMPXCHG16B, + FeatureAES, + FeaturePRFCHW, + FeaturePCLMUL, + FeatureMMX, + FeatureAVX, + FeatureSSE4A, + FeatureF16C, + FeatureLZCNT, + FeaturePOPCNT, + FeatureBMI, + FeatureTBM, + FeatureFMA, + FeatureSlowSHLD +]>; // Steamroller -def : Proc<"bdver3", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B, - FeatureAES, FeaturePRFCHW, FeaturePCLMUL, - FeatureAVX, FeatureSSE4A, FeatureF16C, - FeatureLZCNT, FeaturePOPCNT, FeatureBMI, - FeatureTBM, FeatureFMA, FeatureSlowSHLD, - FeatureFSGSBase]>; +def : Proc<"bdver3", [ + FeatureXOP, + FeatureFMA4, + FeatureCMPXCHG16B, + FeatureAES, + FeaturePRFCHW, + FeaturePCLMUL, + FeatureMMX, + FeatureAVX, + FeatureSSE4A, + FeatureF16C, + FeatureLZCNT, + FeaturePOPCNT, + FeatureBMI, + FeatureTBM, + FeatureFMA, + FeatureSlowSHLD, + FeatureFSGSBase +]>; // Excavator -def : Proc<"bdver4", [FeatureAVX2, FeatureXOP, FeatureFMA4, - FeatureCMPXCHG16B, FeatureAES, FeaturePRFCHW, - FeaturePCLMUL, FeatureF16C, FeatureLZCNT, - FeaturePOPCNT, FeatureBMI, FeatureBMI2, - FeatureTBM, FeatureFMA, FeatureSSE4A, - FeatureFSGSBase]>; +def : Proc<"bdver4", [ + FeatureMMX, + FeatureAVX2, + FeatureXOP, + FeatureFMA4, + FeatureCMPXCHG16B, + FeatureAES, + FeaturePRFCHW, + FeaturePCLMUL, + FeatureF16C, + FeatureLZCNT, + FeaturePOPCNT, + FeatureBMI, + FeatureBMI2, + FeatureTBM, + FeatureFMA, + FeatureSSE4A, + FeatureFSGSBase +]>; def : Proc<"geode", [FeatureSlowUAMem16, Feature3DNowA]>; def : Proc<"winchip-c6", [FeatureSlowUAMem16, FeatureMMX]>; def : Proc<"winchip2", [FeatureSlowUAMem16, Feature3DNow]>; def : Proc<"c3", [FeatureSlowUAMem16, Feature3DNow]>; -def : Proc<"c3-2", [FeatureSlowUAMem16, FeatureSSE1]>; +def : Proc<"c3-2", [ FeatureSlowUAMem16, FeatureMMX, FeatureSSE1 ]>; // We also provide a generic 64-bit specific x86 processor model which tries to // be good for modern chips without enabling instruction set encodings past the @@ -504,8 +629,9 @@ def : Proc<"c3-2", [FeatureSlowUAMem16, FeatureSSE1]>; // covers a huge swath of x86 processors. If there are specific scheduling // knobs which need to be tuned differently for AMD chips, we might consider // forming a common base for them. -def : ProcessorModel<"x86-64", SandyBridgeModel, - [FeatureSSE2, Feature64Bit, FeatureSlowBTMem]>; +def : ProcessorModel< + "x86-64", SandyBridgeModel, + [ FeatureMMX, FeatureSSE2, Feature64Bit, FeatureSlowBTMem ]>; //===----------------------------------------------------------------------===// // Register File Description diff --git a/llvm/lib/Target/X86/X86Subtarget.cpp b/llvm/lib/Target/X86/X86Subtarget.cpp index 5b53ca93399..a4db4e60280 100644 --- a/llvm/lib/Target/X86/X86Subtarget.cpp +++ b/llvm/lib/Target/X86/X86Subtarget.cpp @@ -228,9 +228,10 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { } void X86Subtarget::initializeEnvironment() { - X86SSELevel = NoMMXSSE; + X86SSELevel = NoSSE; X863DNowLevel = NoThreeDNow; HasCMov = false; + HasMMX = false; HasX86_64 = false; HasPOPCNT = false; HasSSE4A = false; diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h index 52e68c08180..198e7fbf135 100644 --- a/llvm/lib/Target/X86/X86Subtarget.h +++ b/llvm/lib/Target/X86/X86Subtarget.h @@ -47,7 +47,7 @@ class X86Subtarget final : public X86GenSubtargetInfo { protected: enum X86SSEEnum { - NoMMXSSE, MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, AVX, AVX2, AVX512F + NoSSE, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, AVX, AVX2, AVX512F }; enum X863DNowEnum { @@ -64,7 +64,7 @@ protected: /// Which PIC style to use PICStyles::Style PICStyle; - /// MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, or none supported. + /// SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, or none supported. X86SSEEnum X86SSELevel; /// 3DNow, 3DNow Athlon, or none supported. @@ -74,6 +74,9 @@ protected: /// (generally pentium pro+). bool HasCMov; + /// True if this processor supports MMX instructions. + bool HasMMX; + /// True if the processor supports X86-64 instructions. bool HasX86_64; @@ -319,7 +322,7 @@ public: void setPICStyle(PICStyles::Style Style) { PICStyle = Style; } bool hasCMov() const { return HasCMov; } - bool hasMMX() const { return X86SSELevel >= MMX; } + bool hasMMX() const { return HasMMX; } bool hasSSE1() const { return X86SSELevel >= SSE1; } bool hasSSE2() const { return X86SSELevel >= SSE2; } bool hasSSE3() const { return X86SSELevel >= SSE3; } diff --git a/llvm/test/CodeGen/X86/mmx-intrinsics.ll b/llvm/test/CodeGen/X86/mmx-intrinsics.ll index d9bcdc4effd..7647fccb580 100644 --- a/llvm/test/CodeGen/X86/mmx-intrinsics.ll +++ b/llvm/test/CodeGen/X86/mmx-intrinsics.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=x86 -mattr=+mmx,+ssse3,-avx | FileCheck %s --check-prefix=ALL --check-prefix=X86 -; RUN: llc < %s -march=x86 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=X86 +; RUN: llc < %s -march=x86 -mattr=+mmx,+avx | FileCheck %s --check-prefix=ALL --check-prefix=X86 ; RUN: llc < %s -march=x86-64 -mattr=+mmx,+ssse3,-avx | FileCheck %s --check-prefix=ALL --check-prefix=X64 -; RUN: llc < %s -march=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=X64 +; RUN: llc < %s -march=x86-64 -mattr=+mmx,+avx | FileCheck %s --check-prefix=ALL --check-prefix=X64 declare x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx, x86_mmx) nounwind readnone diff --git a/llvm/test/CodeGen/X86/mmx-only.ll b/llvm/test/CodeGen/X86/mmx-only.ll new file mode 100644 index 00000000000..35598d5f6e1 --- /dev/null +++ b/llvm/test/CodeGen/X86/mmx-only.ll @@ -0,0 +1,21 @@ +; RUN: llc < %s -march=x86 -mattr=+mmx | FileCheck %s +; RUN: llc < %s -march=x86 -mattr=+mmx,-sse | FileCheck %s + +; Test that turning off sse doesn't turn off mmx. + +declare x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test88(<1 x i64> %a, <1 x i64> %b) nounwind readnone { +; CHECK-LABEL: @test88 +; CHECK: pcmpgtd +entry: + %0 = bitcast <1 x i64> %b to <2 x i32> + %1 = bitcast <1 x i64> %a to <2 x i32> + %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx + %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx + %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind + %3 = bitcast x86_mmx %2 to <2 x i32> + %4 = bitcast <2 x i32> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} diff --git a/llvm/test/CodeGen/X86/mult-alt-x86.ll b/llvm/test/CodeGen/X86/mult-alt-x86.ll index 5174f85adb9..1c83fedad3c 100644 --- a/llvm/test/CodeGen/X86/mult-alt-x86.ll +++ b/llvm/test/CodeGen/X86/mult-alt-x86.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86 -mattr=+sse2 -no-integrated-as +; RUN: llc < %s -march=x86 -mattr=+mmx,+sse2 -no-integrated-as ; ModuleID = 'mult-alt-x86.c' target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32" target triple = "i686-pc-win32" diff --git a/llvm/test/CodeGen/X86/sse-only.ll b/llvm/test/CodeGen/X86/sse-only.ll new file mode 100644 index 00000000000..a4fdf963b48 --- /dev/null +++ b/llvm/test/CodeGen/X86/sse-only.ll @@ -0,0 +1,19 @@ +; RUN: llc < %s -march=x86 -mattr=+sse2,-mmx | FileCheck %s + +; Test that turning off mmx doesn't turn off sse + +define void @test1(<2 x double>* %r, <2 x double>* %A, double %B) nounwind { +; CHECK-LABEL: test1: +; CHECK: ## BB#0: +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movapd (%ecx), %xmm0 +; CHECK-NEXT: movlpd {{[0-9]+}}(%esp), %xmm0 +; CHECK-NEXT: movapd %xmm0, (%eax) +; CHECK-NEXT: retl + %tmp3 = load <2 x double>, <2 x double>* %A, align 16 + %tmp7 = insertelement <2 x double> undef, double %B, i32 0 + %tmp9 = shufflevector <2 x double> %tmp3, <2 x double> %tmp7, <2 x i32> < i32 2, i32 1 > + store <2 x double> %tmp9, <2 x double>* %r, align 16 + ret void +} |

