diff options
-rw-r--r-- | llvm/lib/Target/X86/X86.td | 14 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrFragmentsSIMD.td | 2 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86Subtarget.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86Subtarget.h | 8 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/fold-vex.ll | 17 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/sse-unaligned-mem-feature.ll (renamed from llvm/test/CodeGen/X86/2010-01-07-UAMemFeature.ll) | 6 |
6 files changed, 29 insertions, 20 deletions
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index ab3319afe93..30b3b2876b8 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -132,9 +132,9 @@ def FeatureFMA4 : SubtargetFeature<"fma4", "HasFMA4", "true", def FeatureXOP : SubtargetFeature<"xop", "HasXOP", "true", "Enable XOP instructions", [FeatureFMA4]>; -def FeatureVectorUAMem : SubtargetFeature<"vector-unaligned-mem", - "HasVectorUAMem", "true", - "Allow unaligned memory operands on vector/SIMD instructions">; +def FeatureSSEUnalignedMem : SubtargetFeature<"sse-unaligned-mem", + "HasSSEUnalignedMem", "true", + "Allow unaligned memory operands with SSE instructions">; def FeatureAES : SubtargetFeature<"aes", "HasAES", "true", "Enable AES instructions", [FeatureSSE2]>; @@ -309,7 +309,6 @@ class SandyBridgeProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [ FeatureCMPXCHG16B, FeatureFastUAMem, FeatureSlowUAMem32, - FeatureVectorUAMem, FeaturePOPCNT, FeatureAES, FeaturePCLMUL @@ -322,7 +321,6 @@ class IvyBridgeProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [ FeatureCMPXCHG16B, FeatureFastUAMem, FeatureSlowUAMem32, - FeatureVectorUAMem, FeaturePOPCNT, FeatureAES, FeaturePCLMUL, @@ -337,7 +335,6 @@ class HaswellProc<string Name> : ProcessorModel<Name, HaswellModel, [ FeatureAVX2, FeatureCMPXCHG16B, FeatureFastUAMem, - FeatureVectorUAMem, FeaturePOPCNT, FeatureAES, FeaturePCLMUL, @@ -360,7 +357,6 @@ class BroadwellProc<string Name> : ProcessorModel<Name, HaswellModel, [ FeatureAVX2, FeatureCMPXCHG16B, FeatureFastUAMem, - FeatureVectorUAMem, FeaturePOPCNT, FeatureAES, FeaturePCLMUL, @@ -388,7 +384,7 @@ class KnightsLandingProc<string Name> : ProcessorModel<Name, HaswellModel, FeatureAES, FeaturePCLMUL, FeatureRDRAND, FeatureF16C, FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT, FeatureBMI, FeatureBMI2, FeatureFMA, FeatureRTM, FeatureHLE, - FeatureSlowIncDec, FeatureVectorUAMem]>; + FeatureSlowIncDec]>; def : KnightsLandingProc<"knl">; // FIXME: define SKX model @@ -399,7 +395,7 @@ class SkylakeProc<string Name> : ProcessorModel<Name, HaswellModel, FeatureAES, FeaturePCLMUL, FeatureRDRAND, FeatureF16C, FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT, FeatureBMI, FeatureBMI2, FeatureFMA, FeatureRTM, FeatureHLE, - FeatureSlowIncDec, FeatureSGX, FeatureVectorUAMem]>; + FeatureSlowIncDec, FeatureSGX]>; def : SkylakeProc<"skylake">; def : SkylakeProc<"skx">; // Legacy alias. diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index 85768aeb4e4..e5de404ed21 100644 --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -433,7 +433,7 @@ def alignedloadv8i64 : PatFrag<(ops node:$ptr), // setting a feature bit in the processor (on startup, for example). // Opteron 10h and later implement such a feature. def memop : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - return Subtarget->hasVectorUAMem() + return Subtarget->hasSSEUnalignedMem() || cast<LoadSDNode>(N)->getAlignment() >= 16; }]>; diff --git a/llvm/lib/Target/X86/X86Subtarget.cpp b/llvm/lib/Target/X86/X86Subtarget.cpp index 01889e887c2..e90da0f98a6 100644 --- a/llvm/lib/Target/X86/X86Subtarget.cpp +++ b/llvm/lib/Target/X86/X86Subtarget.cpp @@ -265,7 +265,7 @@ void X86Subtarget::initializeEnvironment() { IsSHLDSlow = false; IsUAMemFast = false; IsUAMem32Slow = false; - HasVectorUAMem = false; + HasSSEUnalignedMem = false; HasCmpxchg16b = false; UseLeaForSP = false; HasSlowDivide32 = false; diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h index 27dec659643..417f1332c54 100644 --- a/llvm/lib/Target/X86/X86Subtarget.h +++ b/llvm/lib/Target/X86/X86Subtarget.h @@ -162,9 +162,9 @@ protected: /// True if unaligned 32-byte memory accesses are slow. bool IsUAMem32Slow; - /// HasVectorUAMem - True if SIMD operations can have unaligned memory - /// operands. This may require setting a feature bit in the processor. - bool HasVectorUAMem; + /// True if SSE operations can have unaligned memory operands. + /// This may require setting a configuration bit in the processor. + bool HasSSEUnalignedMem; /// HasCmpxchg16b - True if this processor has the CMPXCHG16B instruction; /// this is true for most x86-64 chips, but not the first AMD chips. @@ -375,7 +375,7 @@ public: bool isSHLDSlow() const { return IsSHLDSlow; } bool isUnalignedMemAccessFast() const { return IsUAMemFast; } bool isUnalignedMem32Slow() const { return IsUAMem32Slow; } - bool hasVectorUAMem() const { return HasVectorUAMem; } + bool hasSSEUnalignedMem() const { return HasSSEUnalignedMem; } bool hasCmpxchg16b() const { return HasCmpxchg16b; } bool useLeaForSP() const { return UseLeaForSP; } bool hasSlowDivide32() const { return HasSlowDivide32; } diff --git a/llvm/test/CodeGen/X86/fold-vex.ll b/llvm/test/CodeGen/X86/fold-vex.ll index a0c5e22b1c0..5a8b1d8cbfd 100644 --- a/llvm/test/CodeGen/X86/fold-vex.ll +++ b/llvm/test/CodeGen/X86/fold-vex.ll @@ -1,12 +1,18 @@ ; Use CPU parameters to ensure that a CPU-specific attribute is not overriding the AVX definition. -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=corei7-avx | FileCheck %s -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 | FileCheck %s -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=corei7-avx | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=-avx | FileCheck %s --check-prefix=SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=corei7-avx -mattr=-avx | FileCheck %s --check-prefix=SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 -mattr=-avx | FileCheck %s --check-prefix=SSE ; No need to load unaligned operand from memory using an explicit instruction with AVX. ; The operand should be folded into the AND instr. +; With SSE, folding memory operands into math/logic ops requires 16-byte alignment +; unless specially configured on some CPUs such as AMD Family 10H. + define <4 x i32> @test1(<4 x i32>* %p0, <4 x i32> %in1) nounwind { %in0 = load <4 x i32>* %p0, align 2 %a = and <4 x i32> %in0, %in1 @@ -16,5 +22,10 @@ define <4 x i32> @test1(<4 x i32>* %p0, <4 x i32> %in1) nounwind { ; CHECK-NOT: vmovups ; CHECK: vandps (%rdi), %xmm0, %xmm0 ; CHECK-NEXT: ret + +; SSE-LABEL: @test1 +; SSE: movups (%rdi), %xmm1 +; SSE-NEXT: andps %xmm1, %xmm0 +; SSE-NEXT: ret } diff --git a/llvm/test/CodeGen/X86/2010-01-07-UAMemFeature.ll b/llvm/test/CodeGen/X86/sse-unaligned-mem-feature.ll index bb24adb4181..15f91ee04ea 100644 --- a/llvm/test/CodeGen/X86/2010-01-07-UAMemFeature.ll +++ b/llvm/test/CodeGen/X86/sse-unaligned-mem-feature.ll @@ -1,5 +1,4 @@ -; RUN: llc -mcpu=yonah -mattr=vector-unaligned-mem -march=x86 < %s | FileCheck %s -; CHECK: addps ( +; RUN: llc -mcpu=yonah -mattr=sse-unaligned-mem -march=x86 < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" target triple = "x86_64-unknown-linux-gnu" @@ -8,4 +7,7 @@ define <4 x float> @foo(<4 x float>* %P, <4 x float> %In) nounwind { %A = load <4 x float>* %P, align 4 %B = fadd <4 x float> %A, %In ret <4 x float> %B + +; CHECK-LABEL: @foo +; CHECK: addps ( } |