diff options
-rw-r--r-- | llvm/lib/Target/X86/X86InstrInfo.td | 9 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 14 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/extract-store.ll | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/fp128-select.ll | 2 |
4 files changed, 21 insertions, 8 deletions
diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td index 81ab1e0dbb8..25a41a67ad5 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -1093,6 +1093,15 @@ def loadf32 : PatFrag<(ops node:$ptr), (f32 (load node:$ptr))>; def loadf64 : PatFrag<(ops node:$ptr), (f64 (load node:$ptr))>; def loadf80 : PatFrag<(ops node:$ptr), (f80 (load node:$ptr))>; def loadf128 : PatFrag<(ops node:$ptr), (f128 (load node:$ptr))>; +def alignedloadf128 : PatFrag<(ops node:$ptr), (f128 (load node:$ptr)), [{ + LoadSDNode *Ld = cast<LoadSDNode>(N); + return Ld->getAlignment() >= Ld->getMemoryVT().getStoreSize(); +}]>; +def memopf128 : PatFrag<(ops node:$ptr), (f128 (load node:$ptr)), [{ + LoadSDNode *Ld = cast<LoadSDNode>(N); + return Subtarget->hasSSEUnalignedMem() || + Ld->getAlignment() >= Ld->getMemoryVT().getStoreSize(); +}]>; def sextloadi16i8 : PatFrag<(ops node:$ptr), (i16 (sextloadi8 node:$ptr))>; def sextloadi32i8 : PatFrag<(ops node:$ptr), (i32 (sextloadi8 node:$ptr))>; diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 71265b848f9..75c7e32c7f8 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -8228,14 +8228,18 @@ let Predicates = [UseAVX2] in { // Extra selection patterns for FR128, f128, f128mem // movaps is shorter than movdqa. movaps is in SSE and movdqa is in SSE2. -def : Pat<(store (f128 FR128:$src), addr:$dst), +def : Pat<(alignedstore (f128 FR128:$src), addr:$dst), (MOVAPSmr addr:$dst, (COPY_TO_REGCLASS (f128 FR128:$src), VR128))>; +def : Pat<(store (f128 FR128:$src), addr:$dst), + (MOVUPSmr addr:$dst, (COPY_TO_REGCLASS (f128 FR128:$src), VR128))>; -def : Pat<(loadf128 addr:$src), +def : Pat<(alignedloadf128 addr:$src), (COPY_TO_REGCLASS (MOVAPSrm addr:$src), FR128)>; +def : Pat<(loadf128 addr:$src), + (COPY_TO_REGCLASS (MOVUPSrm addr:$src), FR128)>; // andps is shorter than andpd or pand. andps is SSE and andpd/pand are in SSE2 -def : Pat<(X86fand FR128:$src1, (loadf128 addr:$src2)), +def : Pat<(X86fand FR128:$src1, (memopf128 addr:$src2)), (COPY_TO_REGCLASS (ANDPSrm (COPY_TO_REGCLASS FR128:$src1, VR128), f128mem:$src2), FR128)>; @@ -8250,7 +8254,7 @@ def : Pat<(and FR128:$src1, FR128:$src2), (ANDPSrr (COPY_TO_REGCLASS FR128:$src1, VR128), (COPY_TO_REGCLASS FR128:$src2, VR128)), FR128)>; -def : Pat<(X86for FR128:$src1, (loadf128 addr:$src2)), +def : Pat<(X86for FR128:$src1, (memopf128 addr:$src2)), (COPY_TO_REGCLASS (ORPSrm (COPY_TO_REGCLASS FR128:$src1, VR128), f128mem:$src2), FR128)>; @@ -8265,7 +8269,7 @@ def : Pat<(or FR128:$src1, FR128:$src2), (ORPSrr (COPY_TO_REGCLASS FR128:$src1, VR128), (COPY_TO_REGCLASS FR128:$src2, VR128)), FR128)>; -def : Pat<(X86fxor FR128:$src1, (loadf128 addr:$src2)), +def : Pat<(X86fxor FR128:$src1, (memopf128 addr:$src2)), (COPY_TO_REGCLASS (XORPSrm (COPY_TO_REGCLASS FR128:$src1, VR128), f128mem:$src2), FR128)>; diff --git a/llvm/test/CodeGen/X86/extract-store.ll b/llvm/test/CodeGen/X86/extract-store.ll index 871d66d6e84..41c2f5c495b 100644 --- a/llvm/test/CodeGen/X86/extract-store.ll +++ b/llvm/test/CodeGen/X86/extract-store.ll @@ -554,7 +554,7 @@ define void @extract_f128_0(fp128* nocapture %dst, <2 x fp128> %foo) nounwind { ; ; SSE-F128-LABEL: extract_f128_0: ; SSE-F128: # %bb.0: -; SSE-F128-NEXT: movaps %xmm0, (%rdi) +; SSE-F128-NEXT: movups %xmm0, (%rdi) ; SSE-F128-NEXT: retq %vecext = extractelement <2 x fp128> %foo, i32 0 store fp128 %vecext, fp128* %dst, align 1 @@ -606,7 +606,7 @@ define void @extract_f128_1(fp128* nocapture %dst, <2 x fp128> %foo) nounwind { ; ; SSE-F128-LABEL: extract_f128_1: ; SSE-F128: # %bb.0: -; SSE-F128-NEXT: movaps %xmm1, (%rdi) +; SSE-F128-NEXT: movups %xmm1, (%rdi) ; SSE-F128-NEXT: retq %vecext = extractelement <2 x fp128> %foo, i32 1 store fp128 %vecext, fp128* %dst, align 1 diff --git a/llvm/test/CodeGen/X86/fp128-select.ll b/llvm/test/CodeGen/X86/fp128-select.ll index 85f7d97c985..503c7a9291e 100644 --- a/llvm/test/CodeGen/X86/fp128-select.ll +++ b/llvm/test/CodeGen/X86/fp128-select.ll @@ -18,7 +18,7 @@ define void @test_select(fp128* %p, fp128* %q, i1 zeroext %c) { ; MMX-NEXT: movaps %xmm0, (%rsi) ; MMX-NEXT: retq ; MMX-NEXT: .LBB0_1: -; MMX-NEXT: movaps (%rdi), %xmm0 +; MMX-NEXT: movups (%rdi), %xmm0 ; MMX-NEXT: movaps %xmm0, (%rsi) ; MMX-NEXT: retq ; |