From 9ef92865ecca5bbe3bf9ef10a3bb569c16af8bdd Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 17 Jul 2018 20:16:18 +0000 Subject: [X86] Add patterns for folding full vector load into MOVHPS and MOVLPS with SSE1 only. llvm-svn: 337320 --- llvm/lib/Target/X86/X86InstrAVX512.td | 4 +++- llvm/lib/Target/X86/X86InstrSSE.td | 37 +++++++++++++++++++++-------------- 2 files changed, 25 insertions(+), 16 deletions(-) (limited to 'llvm/lib/Target/X86') diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 3a1f840139e..2035e49720f 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -6452,7 +6452,9 @@ multiclass avx512_mov_hilo_packed opc, string OpcodeStr, Sched<[SchedWriteFShuffle.XMM.Folded, ReadAfterLd]>, EVEX_4V; } -defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", X86Movlhps, +// No patterns for MOVLPS/MOVHPS as the Movlhps node should only be created in +// SSE1. And MOVLPS pattern is even more complex. +defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", null_frag, v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS; defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Unpckl, v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W; diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 448be0eda0e..3797d91fb31 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -661,19 +661,16 @@ let Predicates = [UseSSE1] in { // SSE 1 & 2 - Move Low packed FP Instructions //===----------------------------------------------------------------------===// -multiclass sse12_mov_hilo_packed_baseopc, SDNode psnode, SDNode pdnode, +multiclass sse12_mov_hilo_packed_baseopc, SDNode pdnode, string base_opc, string asm_opr> { + // No pattern as they need be special cased between high and low. let hasSideEffects = 0, mayLoad = 1 in def PSrm : PI, PS, - Sched<[SchedWriteFShuffle.XMM.Folded, ReadAfterLd]>; + (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), + !strconcat(base_opc, "s", asm_opr), + [], SSEPackedSingle>, PS, + Sched<[SchedWriteFShuffle.XMM.Folded, ReadAfterLd]>; - let hasSideEffects = 0, mayLoad = 1 in def PDrm : PIopc, SDNode psnode, SDNode pdnode, Sched<[SchedWriteFShuffle.XMM.Folded, ReadAfterLd]>; } -multiclass sse12_mov_hilo_packedopc, SDPatternOperator psnode, - SDPatternOperator pdnode, string base_opc> { +multiclass sse12_mov_hilo_packedopc, SDPatternOperator pdnode, + string base_opc> { let Predicates = [UseAVX] in - defm V#NAME : sse12_mov_hilo_packed_base, VEX_4V, VEX_WIG; let Constraints = "$src1 = $dst" in - defm NAME : sse12_mov_hilo_packed_base; } -defm MOVL : sse12_mov_hilo_packed<0x12, null_frag, X86Movsd, "movlp">; +defm MOVL : sse12_mov_hilo_packed<0x12, X86Movsd, "movlp">; let SchedRW = [WriteFStore] in { let Predicates = [UseAVX] in { @@ -725,13 +722,18 @@ let Predicates = [UseSSE1] in { def : Pat<(store (i64 (extractelt (bc_v2i64 (v4f32 VR128:$src2)), (iPTR 0))), addr:$src1), (MOVLPSmr addr:$src1, VR128:$src2)>; + + // This pattern helps select MOVLPS on SSE1 only targets. With SSE2 we'll + // end up with a movsd or bleand instead of shufp. + def : Pat<(X86Shufp (memopv4f32 addr:$src2), VR128:$src1, (i8 -28)), + (MOVLPSrm VR128:$src1, addr:$src2)>; } //===----------------------------------------------------------------------===// // SSE 1 & 2 - Move Hi packed FP Instructions //===----------------------------------------------------------------------===// -defm MOVH : sse12_mov_hilo_packed<0x16, X86Movlhps, X86Unpckl, "movhp">; +defm MOVH : sse12_mov_hilo_packed<0x16, X86Unpckl, "movhp">; let SchedRW = [WriteFStore] in { // v2f64 extract element 1 is always custom lowered to unpack high to low @@ -796,6 +798,11 @@ let Predicates = [UseSSE1] in { def : Pat<(X86Movlhps VR128:$src1, (bc_v4f32 (v2i64 (X86vzload addr:$src2)))), (MOVHPSrm VR128:$src1, addr:$src2)>; + + // This pattern helps select MOVHPS on SSE1 only targets. With SSE2 we'll + // end up with a movsd or bleand instead of shufp. + def : Pat<(X86Movlhps VR128:$src1, (memopv4f32 addr:$src2)), + (MOVHPSrm VR128:$src1, addr:$src2)>; } let Predicates = [UseSSE2] in { -- cgit v1.2.3