summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/X86/X86.td14
-rw-r--r--llvm/lib/Target/X86/X86InstrFragmentsSIMD.td2
-rw-r--r--llvm/lib/Target/X86/X86Subtarget.cpp2
-rw-r--r--llvm/lib/Target/X86/X86Subtarget.h8
-rw-r--r--llvm/test/CodeGen/X86/fold-vex.ll17
-rw-r--r--llvm/test/CodeGen/X86/sse-unaligned-mem-feature.ll (renamed from llvm/test/CodeGen/X86/2010-01-07-UAMemFeature.ll)6
6 files changed, 29 insertions, 20 deletions
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index ab3319afe93..30b3b2876b8 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -132,9 +132,9 @@ def FeatureFMA4 : SubtargetFeature<"fma4", "HasFMA4", "true",
def FeatureXOP : SubtargetFeature<"xop", "HasXOP", "true",
"Enable XOP instructions",
[FeatureFMA4]>;
-def FeatureVectorUAMem : SubtargetFeature<"vector-unaligned-mem",
- "HasVectorUAMem", "true",
- "Allow unaligned memory operands on vector/SIMD instructions">;
+def FeatureSSEUnalignedMem : SubtargetFeature<"sse-unaligned-mem",
+ "HasSSEUnalignedMem", "true",
+ "Allow unaligned memory operands with SSE instructions">;
def FeatureAES : SubtargetFeature<"aes", "HasAES", "true",
"Enable AES instructions",
[FeatureSSE2]>;
@@ -309,7 +309,6 @@ class SandyBridgeProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
FeatureCMPXCHG16B,
FeatureFastUAMem,
FeatureSlowUAMem32,
- FeatureVectorUAMem,
FeaturePOPCNT,
FeatureAES,
FeaturePCLMUL
@@ -322,7 +321,6 @@ class IvyBridgeProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
FeatureCMPXCHG16B,
FeatureFastUAMem,
FeatureSlowUAMem32,
- FeatureVectorUAMem,
FeaturePOPCNT,
FeatureAES,
FeaturePCLMUL,
@@ -337,7 +335,6 @@ class HaswellProc<string Name> : ProcessorModel<Name, HaswellModel, [
FeatureAVX2,
FeatureCMPXCHG16B,
FeatureFastUAMem,
- FeatureVectorUAMem,
FeaturePOPCNT,
FeatureAES,
FeaturePCLMUL,
@@ -360,7 +357,6 @@ class BroadwellProc<string Name> : ProcessorModel<Name, HaswellModel, [
FeatureAVX2,
FeatureCMPXCHG16B,
FeatureFastUAMem,
- FeatureVectorUAMem,
FeaturePOPCNT,
FeatureAES,
FeaturePCLMUL,
@@ -388,7 +384,7 @@ class KnightsLandingProc<string Name> : ProcessorModel<Name, HaswellModel,
FeatureAES, FeaturePCLMUL, FeatureRDRAND, FeatureF16C,
FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT, FeatureBMI,
FeatureBMI2, FeatureFMA, FeatureRTM, FeatureHLE,
- FeatureSlowIncDec, FeatureVectorUAMem]>;
+ FeatureSlowIncDec]>;
def : KnightsLandingProc<"knl">;
// FIXME: define SKX model
@@ -399,7 +395,7 @@ class SkylakeProc<string Name> : ProcessorModel<Name, HaswellModel,
FeatureAES, FeaturePCLMUL, FeatureRDRAND, FeatureF16C,
FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT, FeatureBMI,
FeatureBMI2, FeatureFMA, FeatureRTM, FeatureHLE,
- FeatureSlowIncDec, FeatureSGX, FeatureVectorUAMem]>;
+ FeatureSlowIncDec, FeatureSGX]>;
def : SkylakeProc<"skylake">;
def : SkylakeProc<"skx">; // Legacy alias.
diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index 85768aeb4e4..e5de404ed21 100644
--- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -433,7 +433,7 @@ def alignedloadv8i64 : PatFrag<(ops node:$ptr),
// setting a feature bit in the processor (on startup, for example).
// Opteron 10h and later implement such a feature.
def memop : PatFrag<(ops node:$ptr), (load node:$ptr), [{
- return Subtarget->hasVectorUAMem()
+ return Subtarget->hasSSEUnalignedMem()
|| cast<LoadSDNode>(N)->getAlignment() >= 16;
}]>;
diff --git a/llvm/lib/Target/X86/X86Subtarget.cpp b/llvm/lib/Target/X86/X86Subtarget.cpp
index 01889e887c2..e90da0f98a6 100644
--- a/llvm/lib/Target/X86/X86Subtarget.cpp
+++ b/llvm/lib/Target/X86/X86Subtarget.cpp
@@ -265,7 +265,7 @@ void X86Subtarget::initializeEnvironment() {
IsSHLDSlow = false;
IsUAMemFast = false;
IsUAMem32Slow = false;
- HasVectorUAMem = false;
+ HasSSEUnalignedMem = false;
HasCmpxchg16b = false;
UseLeaForSP = false;
HasSlowDivide32 = false;
diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h
index 27dec659643..417f1332c54 100644
--- a/llvm/lib/Target/X86/X86Subtarget.h
+++ b/llvm/lib/Target/X86/X86Subtarget.h
@@ -162,9 +162,9 @@ protected:
/// True if unaligned 32-byte memory accesses are slow.
bool IsUAMem32Slow;
- /// HasVectorUAMem - True if SIMD operations can have unaligned memory
- /// operands. This may require setting a feature bit in the processor.
- bool HasVectorUAMem;
+ /// True if SSE operations can have unaligned memory operands.
+ /// This may require setting a configuration bit in the processor.
+ bool HasSSEUnalignedMem;
/// HasCmpxchg16b - True if this processor has the CMPXCHG16B instruction;
/// this is true for most x86-64 chips, but not the first AMD chips.
@@ -375,7 +375,7 @@ public:
bool isSHLDSlow() const { return IsSHLDSlow; }
bool isUnalignedMemAccessFast() const { return IsUAMemFast; }
bool isUnalignedMem32Slow() const { return IsUAMem32Slow; }
- bool hasVectorUAMem() const { return HasVectorUAMem; }
+ bool hasSSEUnalignedMem() const { return HasSSEUnalignedMem; }
bool hasCmpxchg16b() const { return HasCmpxchg16b; }
bool useLeaForSP() const { return UseLeaForSP; }
bool hasSlowDivide32() const { return HasSlowDivide32; }
diff --git a/llvm/test/CodeGen/X86/fold-vex.ll b/llvm/test/CodeGen/X86/fold-vex.ll
index a0c5e22b1c0..5a8b1d8cbfd 100644
--- a/llvm/test/CodeGen/X86/fold-vex.ll
+++ b/llvm/test/CodeGen/X86/fold-vex.ll
@@ -1,12 +1,18 @@
; Use CPU parameters to ensure that a CPU-specific attribute is not overriding the AVX definition.
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=corei7-avx | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=corei7-avx | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=-avx | FileCheck %s --check-prefix=SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=corei7-avx -mattr=-avx | FileCheck %s --check-prefix=SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 -mattr=-avx | FileCheck %s --check-prefix=SSE
; No need to load unaligned operand from memory using an explicit instruction with AVX.
; The operand should be folded into the AND instr.
+; With SSE, folding memory operands into math/logic ops requires 16-byte alignment
+; unless specially configured on some CPUs such as AMD Family 10H.
+
define <4 x i32> @test1(<4 x i32>* %p0, <4 x i32> %in1) nounwind {
%in0 = load <4 x i32>* %p0, align 2
%a = and <4 x i32> %in0, %in1
@@ -16,5 +22,10 @@ define <4 x i32> @test1(<4 x i32>* %p0, <4 x i32> %in1) nounwind {
; CHECK-NOT: vmovups
; CHECK: vandps (%rdi), %xmm0, %xmm0
; CHECK-NEXT: ret
+
+; SSE-LABEL: @test1
+; SSE: movups (%rdi), %xmm1
+; SSE-NEXT: andps %xmm1, %xmm0
+; SSE-NEXT: ret
}
diff --git a/llvm/test/CodeGen/X86/2010-01-07-UAMemFeature.ll b/llvm/test/CodeGen/X86/sse-unaligned-mem-feature.ll
index bb24adb4181..15f91ee04ea 100644
--- a/llvm/test/CodeGen/X86/2010-01-07-UAMemFeature.ll
+++ b/llvm/test/CodeGen/X86/sse-unaligned-mem-feature.ll
@@ -1,5 +1,4 @@
-; RUN: llc -mcpu=yonah -mattr=vector-unaligned-mem -march=x86 < %s | FileCheck %s
-; CHECK: addps (
+; RUN: llc -mcpu=yonah -mattr=sse-unaligned-mem -march=x86 < %s | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
target triple = "x86_64-unknown-linux-gnu"
@@ -8,4 +7,7 @@ define <4 x float> @foo(<4 x float>* %P, <4 x float> %In) nounwind {
%A = load <4 x float>* %P, align 4
%B = fadd <4 x float> %A, %In
ret <4 x float> %B
+
+; CHECK-LABEL: @foo
+; CHECK: addps (
}
OpenPOWER on IntegriCloud