summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
authorEric Christopher <echristo@gmail.com>2015-10-08 20:10:06 +0000
committerEric Christopher <echristo@gmail.com>2015-10-08 20:10:06 +0000
commit11e5983658ce90495804e7016bc95a3913c22d22 (patch)
tree9159ad85351f02019e19dbd77120a82bcd944bc1 /llvm
parent437615639fef5b72164606ff59fc3d6991a50bae (diff)
downloadbcm5719-llvm-11e5983658ce90495804e7016bc95a3913c22d22.tar.gz
bcm5719-llvm-11e5983658ce90495804e7016bc95a3913c22d22.zip
Move the MMX subtarget feature out of the SSE set of features and into
its own variable. This is needed so that we can explicitly turn off MMX without turning off SSE and also so that we can diagnose feature set incompatibilities that involve MMX without SSE. Rationale: // sse3 __m128d test_mm_addsub_pd(__m128d A, __m128d B) { return _mm_addsub_pd(A, B); } // mmx void shift(__m64 a, __m64 b, int c) { _mm_slli_pi16(a, c); _mm_slli_pi32(a, c); _mm_slli_si64(a, c); _mm_srli_pi16(a, c); _mm_srli_pi32(a, c); _mm_srli_si64(a, c); _mm_srai_pi16(a, c); _mm_srai_pi32(a, c); } clang -msse3 -mno-mmx file.c -c For this code we should be able to explicitly turn off MMX without affecting the compilation of the SSE3 function and then diagnose and error on compiling the MMX function. This matches the existing gcc behavior and follows the spirit of the SSE/MMX separation in llvm where we can (and do) turn off MMX code generation except in the presence of intrinsics. Updated a couple of tests, but primarily tested with a couple of tests for turning on only mmx and only sse. This is paired with a patch to clang to take advantage of this behavior. llvm-svn: 249731
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/Target/X86/X86.td454
-rw-r--r--llvm/lib/Target/X86/X86Subtarget.cpp3
-rw-r--r--llvm/lib/Target/X86/X86Subtarget.h9
-rw-r--r--llvm/test/CodeGen/X86/mmx-intrinsics.ll4
-rw-r--r--llvm/test/CodeGen/X86/mmx-only.ll21
-rw-r--r--llvm/test/CodeGen/X86/mult-alt-x86.ll2
-rw-r--r--llvm/test/CodeGen/X86/sse-only.ll19
7 files changed, 341 insertions, 171 deletions
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index 3a3b03874c0..fa0b674ff94 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -37,14 +37,17 @@ def FeatureCMOV : SubtargetFeature<"cmov","HasCMov", "true",
def FeaturePOPCNT : SubtargetFeature<"popcnt", "HasPOPCNT", "true",
"Support POPCNT instruction">;
-
-def FeatureMMX : SubtargetFeature<"mmx","X86SSELevel", "MMX",
+// The MMX subtarget feature is separate from the rest of the SSE features
+// because it's important (for odd compatibility reasons) to be able to
+// turn it off explicitly while allowing SSE+ to be on.
+def FeatureMMX : SubtargetFeature<"mmx","HasMMX", "true",
"Enable MMX instructions">;
+
def FeatureSSE1 : SubtargetFeature<"sse", "X86SSELevel", "SSE1",
"Enable SSE instructions",
// SSE codegen depends on cmovs, and all
// SSE1+ processors support them.
- [FeatureMMX, FeatureCMOV]>;
+ [FeatureCMOV]>;
def FeatureSSE2 : SubtargetFeature<"sse2", "X86SSELevel", "SSE2",
"Enable SSE2 instructions",
[FeatureSSE1]>;
@@ -219,184 +222,241 @@ def : Proc<"pentium-mmx", [FeatureSlowUAMem16, FeatureMMX]>;
def : Proc<"i686", [FeatureSlowUAMem16]>;
def : Proc<"pentiumpro", [FeatureSlowUAMem16, FeatureCMOV]>;
def : Proc<"pentium2", [FeatureSlowUAMem16, FeatureMMX, FeatureCMOV]>;
-def : Proc<"pentium3", [FeatureSlowUAMem16, FeatureSSE1]>;
-def : Proc<"pentium3m", [FeatureSlowUAMem16, FeatureSSE1,
+def : Proc<"pentium3", [FeatureSlowUAMem16, FeatureMMX, FeatureSSE1]>;
+def : Proc<"pentium3m", [FeatureSlowUAMem16, FeatureMMX, FeatureSSE1,
FeatureSlowBTMem]>;
-def : Proc<"pentium-m", [FeatureSlowUAMem16, FeatureSSE2,
+def : Proc<"pentium-m", [FeatureSlowUAMem16, FeatureMMX, FeatureSSE2,
FeatureSlowBTMem]>;
-def : Proc<"pentium4", [FeatureSlowUAMem16, FeatureSSE2]>;
-def : Proc<"pentium4m", [FeatureSlowUAMem16, FeatureSSE2,
+def : Proc<"pentium4", [FeatureSlowUAMem16, FeatureMMX, FeatureSSE2]>;
+def : Proc<"pentium4m", [FeatureSlowUAMem16, FeatureMMX, FeatureSSE2,
FeatureSlowBTMem]>;
// Intel Core Duo.
-def : ProcessorModel<"yonah", SandyBridgeModel,
- [FeatureSlowUAMem16, FeatureSSE3, FeatureSlowBTMem]>;
+def : ProcessorModel<
+ "yonah", SandyBridgeModel,
+ [ FeatureSlowUAMem16, FeatureMMX, FeatureSSE3, FeatureSlowBTMem ]>;
// NetBurst.
-def : Proc<"prescott", [FeatureSlowUAMem16, FeatureSSE3, FeatureSlowBTMem]>;
-def : Proc<"nocona", [FeatureSlowUAMem16, FeatureSSE3, FeatureCMPXCHG16B,
- FeatureSlowBTMem]>;
+def : Proc<"prescott",
+ [ FeatureSlowUAMem16, FeatureMMX, FeatureSSE3, FeatureSlowBTMem ]>;
+def : Proc<"nocona", [
+ FeatureSlowUAMem16,
+ FeatureMMX,
+ FeatureSSE3,
+ FeatureCMPXCHG16B,
+ FeatureSlowBTMem
+]>;
// Intel Core 2 Solo/Duo.
-def : ProcessorModel<"core2", SandyBridgeModel,
- [FeatureSlowUAMem16, FeatureSSSE3, FeatureCMPXCHG16B,
- FeatureSlowBTMem]>;
-def : ProcessorModel<"penryn", SandyBridgeModel,
- [FeatureSlowUAMem16, FeatureSSE41, FeatureCMPXCHG16B,
- FeatureSlowBTMem]>;
+def : ProcessorModel<"core2", SandyBridgeModel, [
+ FeatureSlowUAMem16,
+ FeatureMMX,
+ FeatureSSSE3,
+ FeatureCMPXCHG16B,
+ FeatureSlowBTMem
+]>;
+def : ProcessorModel<"penryn", SandyBridgeModel, [
+ FeatureSlowUAMem16,
+ FeatureMMX,
+ FeatureSSE41,
+ FeatureCMPXCHG16B,
+ FeatureSlowBTMem
+]>;
// Atom CPUs.
class BonnellProc<string Name> : ProcessorModel<Name, AtomModel, [
- ProcIntelAtom,
- FeatureSlowUAMem16,
- FeatureSSSE3,
- FeatureCMPXCHG16B,
- FeatureMOVBE,
- FeatureSlowBTMem,
- FeatureLeaForSP,
- FeatureSlowDivide32,
- FeatureSlowDivide64,
- FeatureCallRegIndirect,
- FeatureLEAUsesAG,
- FeaturePadShortFunctions
- ]>;
+ ProcIntelAtom,
+ FeatureSlowUAMem16,
+ FeatureMMX,
+ FeatureSSSE3,
+ FeatureCMPXCHG16B,
+ FeatureMOVBE,
+ FeatureSlowBTMem,
+ FeatureLeaForSP,
+ FeatureSlowDivide32,
+ FeatureSlowDivide64,
+ FeatureCallRegIndirect,
+ FeatureLEAUsesAG,
+ FeaturePadShortFunctions
+]>;
def : BonnellProc<"bonnell">;
def : BonnellProc<"atom">; // Pin the generic name to the baseline.
class SilvermontProc<string Name> : ProcessorModel<Name, SLMModel, [
- ProcIntelSLM,
- FeatureSSE42,
- FeatureCMPXCHG16B,
- FeatureMOVBE,
- FeaturePOPCNT,
- FeaturePCLMUL,
- FeatureAES,
- FeatureSlowDivide64,
- FeatureCallRegIndirect,
- FeaturePRFCHW,
- FeatureSlowLEA,
- FeatureSlowIncDec,
- FeatureSlowBTMem
- ]>;
+ ProcIntelSLM,
+ FeatureMMX,
+ FeatureSSE42,
+ FeatureCMPXCHG16B,
+ FeatureMOVBE,
+ FeaturePOPCNT,
+ FeaturePCLMUL,
+ FeatureAES,
+ FeatureSlowDivide64,
+ FeatureCallRegIndirect,
+ FeaturePRFCHW,
+ FeatureSlowLEA,
+ FeatureSlowIncDec,
+ FeatureSlowBTMem
+]>;
def : SilvermontProc<"silvermont">;
def : SilvermontProc<"slm">; // Legacy alias.
// "Arrandale" along with corei3 and corei5
class NehalemProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
- FeatureSSE42,
- FeatureCMPXCHG16B,
- FeatureSlowBTMem,
- FeaturePOPCNT
- ]>;
+ FeatureMMX,
+ FeatureSSE42,
+ FeatureCMPXCHG16B,
+ FeatureSlowBTMem,
+ FeaturePOPCNT
+]>;
def : NehalemProc<"nehalem">;
def : NehalemProc<"corei7">;
// Westmere is a similar machine to nehalem with some additional features.
// Westmere is the corei3/i5/i7 path from nehalem to sandybridge
class WestmereProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
- FeatureSSE42,
- FeatureCMPXCHG16B,
- FeatureSlowBTMem,
- FeaturePOPCNT,
- FeatureAES,
- FeaturePCLMUL
- ]>;
+ FeatureMMX,
+ FeatureSSE42,
+ FeatureCMPXCHG16B,
+ FeatureSlowBTMem,
+ FeaturePOPCNT,
+ FeatureAES,
+ FeaturePCLMUL
+]>;
def : WestmereProc<"westmere">;
// SSE is not listed here since llvm treats AVX as a reimplementation of SSE,
// rather than a superset.
class SandyBridgeProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
- FeatureAVX,
- FeatureCMPXCHG16B,
- FeatureSlowBTMem,
- FeatureSlowUAMem32,
- FeaturePOPCNT,
- FeatureAES,
- FeaturePCLMUL
- ]>;
+ FeatureMMX,
+ FeatureAVX,
+ FeatureCMPXCHG16B,
+ FeatureSlowBTMem,
+ FeatureSlowUAMem32,
+ FeaturePOPCNT,
+ FeatureAES,
+ FeaturePCLMUL
+]>;
def : SandyBridgeProc<"sandybridge">;
def : SandyBridgeProc<"corei7-avx">; // Legacy alias.
class IvyBridgeProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
- FeatureAVX,
- FeatureCMPXCHG16B,
- FeatureSlowBTMem,
- FeatureSlowUAMem32,
- FeaturePOPCNT,
- FeatureAES,
- FeaturePCLMUL,
- FeatureRDRAND,
- FeatureF16C,
- FeatureFSGSBase
- ]>;
+ FeatureMMX,
+ FeatureAVX,
+ FeatureCMPXCHG16B,
+ FeatureSlowBTMem,
+ FeatureSlowUAMem32,
+ FeaturePOPCNT,
+ FeatureAES,
+ FeaturePCLMUL,
+ FeatureRDRAND,
+ FeatureF16C,
+ FeatureFSGSBase
+]>;
def : IvyBridgeProc<"ivybridge">;
def : IvyBridgeProc<"core-avx-i">; // Legacy alias.
class HaswellProc<string Name> : ProcessorModel<Name, HaswellModel, [
- FeatureAVX2,
- FeatureCMPXCHG16B,
- FeatureSlowBTMem,
- FeaturePOPCNT,
- FeatureAES,
- FeaturePCLMUL,
- FeatureRDRAND,
- FeatureF16C,
- FeatureFSGSBase,
- FeatureMOVBE,
- FeatureLZCNT,
- FeatureBMI,
- FeatureBMI2,
- FeatureFMA,
- FeatureRTM,
- FeatureHLE,
- FeatureSlowIncDec
- ]>;
+ FeatureMMX,
+ FeatureAVX2,
+ FeatureCMPXCHG16B,
+ FeatureSlowBTMem,
+ FeaturePOPCNT,
+ FeatureAES,
+ FeaturePCLMUL,
+ FeatureRDRAND,
+ FeatureF16C,
+ FeatureFSGSBase,
+ FeatureMOVBE,
+ FeatureLZCNT,
+ FeatureBMI,
+ FeatureBMI2,
+ FeatureFMA,
+ FeatureRTM,
+ FeatureHLE,
+ FeatureSlowIncDec
+]>;
def : HaswellProc<"haswell">;
def : HaswellProc<"core-avx2">; // Legacy alias.
class BroadwellProc<string Name> : ProcessorModel<Name, HaswellModel, [
- FeatureAVX2,
- FeatureCMPXCHG16B,
- FeatureSlowBTMem,
- FeaturePOPCNT,
- FeatureAES,
- FeaturePCLMUL,
- FeatureRDRAND,
- FeatureF16C,
- FeatureFSGSBase,
- FeatureMOVBE,
- FeatureLZCNT,
- FeatureBMI,
- FeatureBMI2,
- FeatureFMA,
- FeatureRTM,
- FeatureHLE,
- FeatureADX,
- FeatureRDSEED,
- FeatureSlowIncDec
- ]>;
+ FeatureMMX,
+ FeatureAVX2,
+ FeatureCMPXCHG16B,
+ FeatureSlowBTMem,
+ FeaturePOPCNT,
+ FeatureAES,
+ FeaturePCLMUL,
+ FeatureRDRAND,
+ FeatureF16C,
+ FeatureFSGSBase,
+ FeatureMOVBE,
+ FeatureLZCNT,
+ FeatureBMI,
+ FeatureBMI2,
+ FeatureFMA,
+ FeatureRTM,
+ FeatureHLE,
+ FeatureADX,
+ FeatureRDSEED,
+ FeatureSlowIncDec
+]>;
def : BroadwellProc<"broadwell">;
// FIXME: define KNL model
-class KnightsLandingProc<string Name> : ProcessorModel<Name, HaswellModel,
- [FeatureAVX512, FeatureERI, FeatureCDI, FeaturePFI,
- FeatureCMPXCHG16B, FeaturePOPCNT,
- FeatureAES, FeaturePCLMUL, FeatureRDRAND, FeatureF16C,
- FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT, FeatureBMI,
- FeatureBMI2, FeatureFMA, FeatureRTM, FeatureHLE,
- FeatureSlowIncDec, FeatureMPX]>;
+class KnightsLandingProc<string Name> : ProcessorModel<Name, HaswellModel, [
+ FeatureMMX,
+ FeatureAVX512,
+ FeatureERI,
+ FeatureCDI,
+ FeaturePFI,
+ FeatureCMPXCHG16B,
+ FeaturePOPCNT,
+ FeatureAES,
+ FeaturePCLMUL,
+ FeatureRDRAND,
+ FeatureF16C,
+ FeatureFSGSBase,
+ FeatureMOVBE,
+ FeatureLZCNT,
+ FeatureBMI,
+ FeatureBMI2,
+ FeatureFMA,
+ FeatureRTM,
+ FeatureHLE,
+ FeatureSlowIncDec,
+ FeatureMPX
+]>;
def : KnightsLandingProc<"knl">;
// FIXME: define SKX model
-class SkylakeProc<string Name> : ProcessorModel<Name, HaswellModel,
- [FeatureAVX512, FeatureCDI,
- FeatureDQI, FeatureBWI, FeatureVLX,
- FeatureCMPXCHG16B, FeatureSlowBTMem,
- FeaturePOPCNT, FeatureAES, FeaturePCLMUL, FeatureRDRAND,
- FeatureF16C, FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT,
- FeatureBMI, FeatureBMI2, FeatureFMA, FeatureRTM,
- FeatureHLE, FeatureADX, FeatureRDSEED, FeatureSlowIncDec,
- FeatureMPX]>;
+class SkylakeProc<string Name> : ProcessorModel<Name, HaswellModel, [
+ FeatureMMX,
+ FeatureAVX512,
+ FeatureCDI,
+ FeatureDQI,
+ FeatureBWI,
+ FeatureVLX,
+ FeatureCMPXCHG16B,
+ FeatureSlowBTMem,
+ FeaturePOPCNT,
+ FeatureAES,
+ FeaturePCLMUL,
+ FeatureRDRAND,
+ FeatureF16C,
+ FeatureFSGSBase,
+ FeatureMOVBE,
+ FeatureLZCNT,
+ FeatureBMI,
+ FeatureBMI2,
+ FeatureFMA,
+ FeatureRTM,
+ FeatureHLE,
+ FeatureADX,
+ FeatureRDSEED,
+ FeatureSlowIncDec,
+ FeatureMPX
+]>;
def : SkylakeProc<"skylake">;
def : SkylakeProc<"skx">; // Legacy alias.
@@ -447,52 +507,117 @@ def : Proc<"barcelona", [FeatureSSE4A,
FeatureSlowSHLD]>;
// Bobcat
-def : Proc<"btver1", [FeatureSSSE3, FeatureSSE4A, FeatureCMPXCHG16B,
- FeaturePRFCHW, FeatureLZCNT, FeaturePOPCNT,
- FeatureSlowSHLD]>;
+def : Proc<"btver1", [
+ FeatureMMX,
+ FeatureSSSE3,
+ FeatureSSE4A,
+ FeatureCMPXCHG16B,
+ FeaturePRFCHW,
+ FeatureLZCNT,
+ FeaturePOPCNT,
+ FeatureSlowSHLD
+]>;
// Jaguar
-def : ProcessorModel<"btver2", BtVer2Model,
- [FeatureAVX, FeatureSSE4A, FeatureCMPXCHG16B,
- FeaturePRFCHW, FeatureAES, FeaturePCLMUL,
- FeatureBMI, FeatureF16C, FeatureMOVBE,
- FeatureLZCNT, FeaturePOPCNT,
- FeatureSlowSHLD]>;
+def : ProcessorModel<"btver2", BtVer2Model, [
+ FeatureMMX,
+ FeatureAVX,
+ FeatureSSE4A,
+ FeatureCMPXCHG16B,
+ FeaturePRFCHW,
+ FeatureAES,
+ FeaturePCLMUL,
+ FeatureBMI,
+ FeatureF16C,
+ FeatureMOVBE,
+ FeatureLZCNT,
+ FeaturePOPCNT,
+ FeatureSlowSHLD
+]>;
// Bulldozer
-def : Proc<"bdver1", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,
- FeatureAES, FeaturePRFCHW, FeaturePCLMUL,
- FeatureAVX, FeatureSSE4A, FeatureLZCNT,
- FeaturePOPCNT, FeatureSlowSHLD]>;
+def : Proc<"bdver1", [
+ FeatureXOP,
+ FeatureFMA4,
+ FeatureCMPXCHG16B,
+ FeatureAES,
+ FeaturePRFCHW,
+ FeaturePCLMUL,
+ FeatureMMX,
+ FeatureAVX,
+ FeatureSSE4A,
+ FeatureLZCNT,
+ FeaturePOPCNT,
+ FeatureSlowSHLD
+]>;
// Piledriver
-def : Proc<"bdver2", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,
- FeatureAES, FeaturePRFCHW, FeaturePCLMUL,
- FeatureAVX, FeatureSSE4A, FeatureF16C,
- FeatureLZCNT, FeaturePOPCNT, FeatureBMI,
- FeatureTBM, FeatureFMA, FeatureSlowSHLD]>;
+def : Proc<"bdver2", [
+ FeatureXOP,
+ FeatureFMA4,
+ FeatureCMPXCHG16B,
+ FeatureAES,
+ FeaturePRFCHW,
+ FeaturePCLMUL,
+ FeatureMMX,
+ FeatureAVX,
+ FeatureSSE4A,
+ FeatureF16C,
+ FeatureLZCNT,
+ FeaturePOPCNT,
+ FeatureBMI,
+ FeatureTBM,
+ FeatureFMA,
+ FeatureSlowSHLD
+]>;
// Steamroller
-def : Proc<"bdver3", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,
- FeatureAES, FeaturePRFCHW, FeaturePCLMUL,
- FeatureAVX, FeatureSSE4A, FeatureF16C,
- FeatureLZCNT, FeaturePOPCNT, FeatureBMI,
- FeatureTBM, FeatureFMA, FeatureSlowSHLD,
- FeatureFSGSBase]>;
+def : Proc<"bdver3", [
+ FeatureXOP,
+ FeatureFMA4,
+ FeatureCMPXCHG16B,
+ FeatureAES,
+ FeaturePRFCHW,
+ FeaturePCLMUL,
+ FeatureMMX,
+ FeatureAVX,
+ FeatureSSE4A,
+ FeatureF16C,
+ FeatureLZCNT,
+ FeaturePOPCNT,
+ FeatureBMI,
+ FeatureTBM,
+ FeatureFMA,
+ FeatureSlowSHLD,
+ FeatureFSGSBase
+]>;
// Excavator
-def : Proc<"bdver4", [FeatureAVX2, FeatureXOP, FeatureFMA4,
- FeatureCMPXCHG16B, FeatureAES, FeaturePRFCHW,
- FeaturePCLMUL, FeatureF16C, FeatureLZCNT,
- FeaturePOPCNT, FeatureBMI, FeatureBMI2,
- FeatureTBM, FeatureFMA, FeatureSSE4A,
- FeatureFSGSBase]>;
+def : Proc<"bdver4", [
+ FeatureMMX,
+ FeatureAVX2,
+ FeatureXOP,
+ FeatureFMA4,
+ FeatureCMPXCHG16B,
+ FeatureAES,
+ FeaturePRFCHW,
+ FeaturePCLMUL,
+ FeatureF16C,
+ FeatureLZCNT,
+ FeaturePOPCNT,
+ FeatureBMI,
+ FeatureBMI2,
+ FeatureTBM,
+ FeatureFMA,
+ FeatureSSE4A,
+ FeatureFSGSBase
+]>;
def : Proc<"geode", [FeatureSlowUAMem16, Feature3DNowA]>;
def : Proc<"winchip-c6", [FeatureSlowUAMem16, FeatureMMX]>;
def : Proc<"winchip2", [FeatureSlowUAMem16, Feature3DNow]>;
def : Proc<"c3", [FeatureSlowUAMem16, Feature3DNow]>;
-def : Proc<"c3-2", [FeatureSlowUAMem16, FeatureSSE1]>;
+def : Proc<"c3-2", [ FeatureSlowUAMem16, FeatureMMX, FeatureSSE1 ]>;
// We also provide a generic 64-bit specific x86 processor model which tries to
// be good for modern chips without enabling instruction set encodings past the
@@ -504,8 +629,9 @@ def : Proc<"c3-2", [FeatureSlowUAMem16, FeatureSSE1]>;
// covers a huge swath of x86 processors. If there are specific scheduling
// knobs which need to be tuned differently for AMD chips, we might consider
// forming a common base for them.
-def : ProcessorModel<"x86-64", SandyBridgeModel,
- [FeatureSSE2, Feature64Bit, FeatureSlowBTMem]>;
+def : ProcessorModel<
+ "x86-64", SandyBridgeModel,
+ [ FeatureMMX, FeatureSSE2, Feature64Bit, FeatureSlowBTMem ]>;
//===----------------------------------------------------------------------===//
// Register File Description
diff --git a/llvm/lib/Target/X86/X86Subtarget.cpp b/llvm/lib/Target/X86/X86Subtarget.cpp
index 5b53ca93399..a4db4e60280 100644
--- a/llvm/lib/Target/X86/X86Subtarget.cpp
+++ b/llvm/lib/Target/X86/X86Subtarget.cpp
@@ -228,9 +228,10 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
}
void X86Subtarget::initializeEnvironment() {
- X86SSELevel = NoMMXSSE;
+ X86SSELevel = NoSSE;
X863DNowLevel = NoThreeDNow;
HasCMov = false;
+ HasMMX = false;
HasX86_64 = false;
HasPOPCNT = false;
HasSSE4A = false;
diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h
index 52e68c08180..198e7fbf135 100644
--- a/llvm/lib/Target/X86/X86Subtarget.h
+++ b/llvm/lib/Target/X86/X86Subtarget.h
@@ -47,7 +47,7 @@ class X86Subtarget final : public X86GenSubtargetInfo {
protected:
enum X86SSEEnum {
- NoMMXSSE, MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, AVX, AVX2, AVX512F
+ NoSSE, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, AVX, AVX2, AVX512F
};
enum X863DNowEnum {
@@ -64,7 +64,7 @@ protected:
/// Which PIC style to use
PICStyles::Style PICStyle;
- /// MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, or none supported.
+ /// SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, or none supported.
X86SSEEnum X86SSELevel;
/// 3DNow, 3DNow Athlon, or none supported.
@@ -74,6 +74,9 @@ protected:
/// (generally pentium pro+).
bool HasCMov;
+ /// True if this processor supports MMX instructions.
+ bool HasMMX;
+
/// True if the processor supports X86-64 instructions.
bool HasX86_64;
@@ -319,7 +322,7 @@ public:
void setPICStyle(PICStyles::Style Style) { PICStyle = Style; }
bool hasCMov() const { return HasCMov; }
- bool hasMMX() const { return X86SSELevel >= MMX; }
+ bool hasMMX() const { return HasMMX; }
bool hasSSE1() const { return X86SSELevel >= SSE1; }
bool hasSSE2() const { return X86SSELevel >= SSE2; }
bool hasSSE3() const { return X86SSELevel >= SSE3; }
diff --git a/llvm/test/CodeGen/X86/mmx-intrinsics.ll b/llvm/test/CodeGen/X86/mmx-intrinsics.ll
index d9bcdc4effd..7647fccb580 100644
--- a/llvm/test/CodeGen/X86/mmx-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/mmx-intrinsics.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -march=x86 -mattr=+mmx,+ssse3,-avx | FileCheck %s --check-prefix=ALL --check-prefix=X86
-; RUN: llc < %s -march=x86 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=X86
+; RUN: llc < %s -march=x86 -mattr=+mmx,+avx | FileCheck %s --check-prefix=ALL --check-prefix=X86
; RUN: llc < %s -march=x86-64 -mattr=+mmx,+ssse3,-avx | FileCheck %s --check-prefix=ALL --check-prefix=X64
-; RUN: llc < %s -march=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=X64
+; RUN: llc < %s -march=x86-64 -mattr=+mmx,+avx | FileCheck %s --check-prefix=ALL --check-prefix=X64
declare x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx, x86_mmx) nounwind readnone
diff --git a/llvm/test/CodeGen/X86/mmx-only.ll b/llvm/test/CodeGen/X86/mmx-only.ll
new file mode 100644
index 00000000000..35598d5f6e1
--- /dev/null
+++ b/llvm/test/CodeGen/X86/mmx-only.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -march=x86 -mattr=+mmx | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+mmx,-sse | FileCheck %s
+
+; Test that turning off sse doesn't turn off mmx.
+
+declare x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test88(<1 x i64> %a, <1 x i64> %b) nounwind readnone {
+; CHECK-LABEL: @test88
+; CHECK: pcmpgtd
+entry:
+ %0 = bitcast <1 x i64> %b to <2 x i32>
+ %1 = bitcast <1 x i64> %a to <2 x i32>
+ %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+ %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <2 x i32>
+ %4 = bitcast <2 x i32> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
diff --git a/llvm/test/CodeGen/X86/mult-alt-x86.ll b/llvm/test/CodeGen/X86/mult-alt-x86.ll
index 5174f85adb9..1c83fedad3c 100644
--- a/llvm/test/CodeGen/X86/mult-alt-x86.ll
+++ b/llvm/test/CodeGen/X86/mult-alt-x86.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -no-integrated-as
+; RUN: llc < %s -march=x86 -mattr=+mmx,+sse2 -no-integrated-as
; ModuleID = 'mult-alt-x86.c'
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
target triple = "i686-pc-win32"
diff --git a/llvm/test/CodeGen/X86/sse-only.ll b/llvm/test/CodeGen/X86/sse-only.ll
new file mode 100644
index 00000000000..a4fdf963b48
--- /dev/null
+++ b/llvm/test/CodeGen/X86/sse-only.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2,-mmx | FileCheck %s
+
+; Test that turning off mmx doesn't turn off sse
+
+define void @test1(<2 x double>* %r, <2 x double>* %A, double %B) nounwind {
+; CHECK-LABEL: test1:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT: movapd (%ecx), %xmm0
+; CHECK-NEXT: movlpd {{[0-9]+}}(%esp), %xmm0
+; CHECK-NEXT: movapd %xmm0, (%eax)
+; CHECK-NEXT: retl
+ %tmp3 = load <2 x double>, <2 x double>* %A, align 16
+ %tmp7 = insertelement <2 x double> undef, double %B, i32 0
+ %tmp9 = shufflevector <2 x double> %tmp3, <2 x double> %tmp7, <2 x i32> < i32 2, i32 1 >
+ store <2 x double> %tmp9, <2 x double>* %r, align 16
+ ret void
+}
OpenPOWER on IntegriCloud