summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorElena Demikhovsky <elena.demikhovsky@intel.com>2014-12-16 09:10:08 +0000
committerElena Demikhovsky <elena.demikhovsky@intel.com>2014-12-16 09:10:08 +0000
commita79fc16bb05a37eb9b42fb3ab349b37b9c58e034 (patch)
tree7d0cb10879d8bbabac871989249cd4e80cb57af7 /llvm/lib
parent07649fb7c5d7dec2e35b5c1c1c7907bafabb6a90 (diff)
downloadbcm5719-llvm-a79fc16bb05a37eb9b42fb3ab349b37b9c58e034.tar.gz
bcm5719-llvm-a79fc16bb05a37eb9b42fb3ab349b37b9c58e034.zip
X86: Added FeatureVectorUAMem for all AVX architectures.
According to AVX specification: "Most arithmetic and data processing instructions encoded using the VEX prefix and performing memory accesses have more flexible memory alignment requirements than instructions that are encoded without the VEX prefix. Specifically, With the exception of explicitly aligned 16 or 32 byte SIMD load/store instructions, most VEX-encoded, arithmetic and data processing instructions operate in a flexible environment regarding memory address alignment, i.e. VEX-encoded instruction with 32-byte or 16-byte load semantics will support unaligned load operation by default. Memory arguments for most instructions with VEX prefix operate normally without causing #GP(0) on any byte-granularity alignment (unlike Legacy SSE instructions)." The same for AVX-512. This change does not affect anything right now, because only the "memop pattern fragment" depends on FeatureVectorUAMem and it is not used in AVX patterns. All AVX patterns are based on the "unaligned load" anyway. llvm-svn: 224330
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/X86.td8
-rw-r--r--llvm/lib/Target/X86/X86InstrFragmentsSIMD.td18
2 files changed, 10 insertions, 16 deletions
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index 0bb597b3207..ab3319afe93 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -309,6 +309,7 @@ class SandyBridgeProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
FeatureCMPXCHG16B,
FeatureFastUAMem,
FeatureSlowUAMem32,
+ FeatureVectorUAMem,
FeaturePOPCNT,
FeatureAES,
FeaturePCLMUL
@@ -321,6 +322,7 @@ class IvyBridgeProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
FeatureCMPXCHG16B,
FeatureFastUAMem,
FeatureSlowUAMem32,
+ FeatureVectorUAMem,
FeaturePOPCNT,
FeatureAES,
FeaturePCLMUL,
@@ -335,6 +337,7 @@ class HaswellProc<string Name> : ProcessorModel<Name, HaswellModel, [
FeatureAVX2,
FeatureCMPXCHG16B,
FeatureFastUAMem,
+ FeatureVectorUAMem,
FeaturePOPCNT,
FeatureAES,
FeaturePCLMUL,
@@ -357,6 +360,7 @@ class BroadwellProc<string Name> : ProcessorModel<Name, HaswellModel, [
FeatureAVX2,
FeatureCMPXCHG16B,
FeatureFastUAMem,
+ FeatureVectorUAMem,
FeaturePOPCNT,
FeatureAES,
FeaturePCLMUL,
@@ -384,7 +388,7 @@ class KnightsLandingProc<string Name> : ProcessorModel<Name, HaswellModel,
FeatureAES, FeaturePCLMUL, FeatureRDRAND, FeatureF16C,
FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT, FeatureBMI,
FeatureBMI2, FeatureFMA, FeatureRTM, FeatureHLE,
- FeatureSlowIncDec]>;
+ FeatureSlowIncDec, FeatureVectorUAMem]>;
def : KnightsLandingProc<"knl">;
// FIXME: define SKX model
@@ -395,7 +399,7 @@ class SkylakeProc<string Name> : ProcessorModel<Name, HaswellModel,
FeatureAES, FeaturePCLMUL, FeatureRDRAND, FeatureF16C,
FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT, FeatureBMI,
FeatureBMI2, FeatureFMA, FeatureRTM, FeatureHLE,
- FeatureSlowIncDec, FeatureSGX]>;
+ FeatureSlowIncDec, FeatureSGX, FeatureVectorUAMem]>;
def : SkylakeProc<"skylake">;
def : SkylakeProc<"skx">; // Legacy alias.
diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index 5f695c02cfc..76e8fad78de 100644
--- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -428,16 +428,6 @@ def memop : PatFrag<(ops node:$ptr), (load node:$ptr), [{
|| cast<LoadSDNode>(N)->getAlignment() >= 16;
}]>;
-def memop4 : PatFrag<(ops node:$ptr), (load node:$ptr), [{
- return Subtarget->hasVectorUAMem()
- || cast<LoadSDNode>(N)->getAlignment() >= 4;
-}]>;
-
-def memop8 : PatFrag<(ops node:$ptr), (load node:$ptr), [{
- return Subtarget->hasVectorUAMem()
- || cast<LoadSDNode>(N)->getAlignment() >= 8;
-}]>;
-
def memopfsf32 : PatFrag<(ops node:$ptr), (f32 (memop node:$ptr))>;
def memopfsf64 : PatFrag<(ops node:$ptr), (f64 (memop node:$ptr))>;
@@ -454,10 +444,10 @@ def memopv4f64 : PatFrag<(ops node:$ptr), (v4f64 (memop node:$ptr))>;
def memopv4i64 : PatFrag<(ops node:$ptr), (v4i64 (memop node:$ptr))>;
// 512-bit memop pattern fragments
-def memopv16f32 : PatFrag<(ops node:$ptr), (v16f32 (memop4 node:$ptr))>;
-def memopv8f64 : PatFrag<(ops node:$ptr), (v8f64 (memop8 node:$ptr))>;
-def memopv16i32 : PatFrag<(ops node:$ptr), (v16i32 (memop4 node:$ptr))>;
-def memopv8i64 : PatFrag<(ops node:$ptr), (v8i64 (memop8 node:$ptr))>;
+def memopv16f32 : PatFrag<(ops node:$ptr), (v16f32 (memop node:$ptr))>;
+def memopv8f64 : PatFrag<(ops node:$ptr), (v8f64 (memop node:$ptr))>;
+def memopv16i32 : PatFrag<(ops node:$ptr), (v16i32 (memop node:$ptr))>;
+def memopv8i64 : PatFrag<(ops node:$ptr), (v8i64 (memop node:$ptr))>;
// SSSE3 uses MMX registers for some instructions. They aren't aligned on a
// 16-byte boundary.
OpenPOWER on IntegriCloud