summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.td9
-rw-r--r--llvm/lib/Target/X86/X86InstrSSE.td14
-rw-r--r--llvm/test/CodeGen/X86/extract-store.ll4
-rw-r--r--llvm/test/CodeGen/X86/fp128-select.ll2
4 files changed, 21 insertions, 8 deletions
diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td
index 81ab1e0dbb8..25a41a67ad5 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/llvm/lib/Target/X86/X86InstrInfo.td
@@ -1093,6 +1093,15 @@ def loadf32 : PatFrag<(ops node:$ptr), (f32 (load node:$ptr))>;
def loadf64 : PatFrag<(ops node:$ptr), (f64 (load node:$ptr))>;
def loadf80 : PatFrag<(ops node:$ptr), (f80 (load node:$ptr))>;
def loadf128 : PatFrag<(ops node:$ptr), (f128 (load node:$ptr))>;
+def alignedloadf128 : PatFrag<(ops node:$ptr), (f128 (load node:$ptr)), [{
+ LoadSDNode *Ld = cast<LoadSDNode>(N);
+ return Ld->getAlignment() >= Ld->getMemoryVT().getStoreSize();
+}]>;
+def memopf128 : PatFrag<(ops node:$ptr), (f128 (load node:$ptr)), [{
+ LoadSDNode *Ld = cast<LoadSDNode>(N);
+ return Subtarget->hasSSEUnalignedMem() ||
+ Ld->getAlignment() >= Ld->getMemoryVT().getStoreSize();
+}]>;
def sextloadi16i8 : PatFrag<(ops node:$ptr), (i16 (sextloadi8 node:$ptr))>;
def sextloadi32i8 : PatFrag<(ops node:$ptr), (i32 (sextloadi8 node:$ptr))>;
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index 71265b848f9..75c7e32c7f8 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -8228,14 +8228,18 @@ let Predicates = [UseAVX2] in {
// Extra selection patterns for FR128, f128, f128mem
// movaps is shorter than movdqa. movaps is in SSE and movdqa is in SSE2.
-def : Pat<(store (f128 FR128:$src), addr:$dst),
+def : Pat<(alignedstore (f128 FR128:$src), addr:$dst),
(MOVAPSmr addr:$dst, (COPY_TO_REGCLASS (f128 FR128:$src), VR128))>;
+def : Pat<(store (f128 FR128:$src), addr:$dst),
+ (MOVUPSmr addr:$dst, (COPY_TO_REGCLASS (f128 FR128:$src), VR128))>;
-def : Pat<(loadf128 addr:$src),
+def : Pat<(alignedloadf128 addr:$src),
(COPY_TO_REGCLASS (MOVAPSrm addr:$src), FR128)>;
+def : Pat<(loadf128 addr:$src),
+ (COPY_TO_REGCLASS (MOVUPSrm addr:$src), FR128)>;
// andps is shorter than andpd or pand. andps is SSE and andpd/pand are in SSE2
-def : Pat<(X86fand FR128:$src1, (loadf128 addr:$src2)),
+def : Pat<(X86fand FR128:$src1, (memopf128 addr:$src2)),
(COPY_TO_REGCLASS
(ANDPSrm (COPY_TO_REGCLASS FR128:$src1, VR128), f128mem:$src2),
FR128)>;
@@ -8250,7 +8254,7 @@ def : Pat<(and FR128:$src1, FR128:$src2),
(ANDPSrr (COPY_TO_REGCLASS FR128:$src1, VR128),
(COPY_TO_REGCLASS FR128:$src2, VR128)), FR128)>;
-def : Pat<(X86for FR128:$src1, (loadf128 addr:$src2)),
+def : Pat<(X86for FR128:$src1, (memopf128 addr:$src2)),
(COPY_TO_REGCLASS
(ORPSrm (COPY_TO_REGCLASS FR128:$src1, VR128), f128mem:$src2),
FR128)>;
@@ -8265,7 +8269,7 @@ def : Pat<(or FR128:$src1, FR128:$src2),
(ORPSrr (COPY_TO_REGCLASS FR128:$src1, VR128),
(COPY_TO_REGCLASS FR128:$src2, VR128)), FR128)>;
-def : Pat<(X86fxor FR128:$src1, (loadf128 addr:$src2)),
+def : Pat<(X86fxor FR128:$src1, (memopf128 addr:$src2)),
(COPY_TO_REGCLASS
(XORPSrm (COPY_TO_REGCLASS FR128:$src1, VR128), f128mem:$src2),
FR128)>;
diff --git a/llvm/test/CodeGen/X86/extract-store.ll b/llvm/test/CodeGen/X86/extract-store.ll
index 871d66d6e84..41c2f5c495b 100644
--- a/llvm/test/CodeGen/X86/extract-store.ll
+++ b/llvm/test/CodeGen/X86/extract-store.ll
@@ -554,7 +554,7 @@ define void @extract_f128_0(fp128* nocapture %dst, <2 x fp128> %foo) nounwind {
;
; SSE-F128-LABEL: extract_f128_0:
; SSE-F128: # %bb.0:
-; SSE-F128-NEXT: movaps %xmm0, (%rdi)
+; SSE-F128-NEXT: movups %xmm0, (%rdi)
; SSE-F128-NEXT: retq
%vecext = extractelement <2 x fp128> %foo, i32 0
store fp128 %vecext, fp128* %dst, align 1
@@ -606,7 +606,7 @@ define void @extract_f128_1(fp128* nocapture %dst, <2 x fp128> %foo) nounwind {
;
; SSE-F128-LABEL: extract_f128_1:
; SSE-F128: # %bb.0:
-; SSE-F128-NEXT: movaps %xmm1, (%rdi)
+; SSE-F128-NEXT: movups %xmm1, (%rdi)
; SSE-F128-NEXT: retq
%vecext = extractelement <2 x fp128> %foo, i32 1
store fp128 %vecext, fp128* %dst, align 1
diff --git a/llvm/test/CodeGen/X86/fp128-select.ll b/llvm/test/CodeGen/X86/fp128-select.ll
index 85f7d97c985..503c7a9291e 100644
--- a/llvm/test/CodeGen/X86/fp128-select.ll
+++ b/llvm/test/CodeGen/X86/fp128-select.ll
@@ -18,7 +18,7 @@ define void @test_select(fp128* %p, fp128* %q, i1 zeroext %c) {
; MMX-NEXT: movaps %xmm0, (%rsi)
; MMX-NEXT: retq
; MMX-NEXT: .LBB0_1:
-; MMX-NEXT: movaps (%rdi), %xmm0
+; MMX-NEXT: movups (%rdi), %xmm0
; MMX-NEXT: movaps %xmm0, (%rsi)
; MMX-NEXT: retq
;
OpenPOWER on IntegriCloud