diff options
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 38 | 
1 files changed, 22 insertions, 16 deletions
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 814247899ce..52b288926ef 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -965,18 +965,10 @@ let Predicates = [HasAVX, NoVLX] in {              (VMOVAPSmr addr:$dst, VR128:$src)>;    def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst),              (VMOVAPSmr addr:$dst, VR128:$src)>; -  def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst), -            (VMOVAPSmr addr:$dst, VR128:$src)>; -  def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst), -            (VMOVAPSmr addr:$dst, VR128:$src)>;    def : Pat<(store (v2i64 VR128:$src), addr:$dst),              (VMOVUPSmr addr:$dst, VR128:$src)>;    def : Pat<(store (v4i32 VR128:$src), addr:$dst),              (VMOVUPSmr addr:$dst, VR128:$src)>; -  def : Pat<(store (v8i16 VR128:$src), addr:$dst), -            (VMOVUPSmr addr:$dst, VR128:$src)>; -  def : Pat<(store (v16i8 VR128:$src), addr:$dst), -            (VMOVUPSmr addr:$dst, VR128:$src)>;    // 256-bit load/store    def : Pat<(alignedloadv4i64 addr:$src), @@ -987,18 +979,10 @@ let Predicates = [HasAVX, NoVLX] in {              (VMOVAPSYmr addr:$dst, VR256:$src)>;    def : Pat<(alignedstore256 (v8i32 VR256:$src), addr:$dst),              (VMOVAPSYmr addr:$dst, VR256:$src)>; -  def : Pat<(alignedstore256 (v16i16 VR256:$src), addr:$dst), -            (VMOVAPSYmr addr:$dst, VR256:$src)>; -  def : Pat<(alignedstore256 (v32i8 VR256:$src), addr:$dst), -            (VMOVAPSYmr addr:$dst, VR256:$src)>;    def : Pat<(store (v4i64 VR256:$src), addr:$dst),              (VMOVUPSYmr addr:$dst, VR256:$src)>;    def : Pat<(store (v8i32 VR256:$src), addr:$dst),              (VMOVUPSYmr addr:$dst, VR256:$src)>; -  def : Pat<(store (v16i16 VR256:$src), addr:$dst), -            (VMOVUPSYmr addr:$dst, VR256:$src)>; -  def : Pat<(store (v32i8 VR256:$src), addr:$dst), -            (VMOVUPSYmr addr:$dst, VR256:$src)>;    // Special patterns for storing subvector extracts of lower 128-bits    // Its cheaper to just use VMOVAPS/VMOVUPS instead of VEXTRACTF128mr @@ -1041,6 +1025,28 @@ let Predicates = [HasAVX, NoVLX] in {              (VMOVUPSmr addr:$dst, (v16i8 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;  } +let Predicates = [HasAVX, NoVLX_Or_NoBWI] in { +  // 128-bit load/store +  def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst), +            (VMOVAPSmr addr:$dst, VR128:$src)>; +  def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst), +            (VMOVAPSmr addr:$dst, VR128:$src)>; +  def : Pat<(store (v8i16 VR128:$src), addr:$dst), +            (VMOVUPSmr addr:$dst, VR128:$src)>; +  def : Pat<(store (v16i8 VR128:$src), addr:$dst), +            (VMOVUPSmr addr:$dst, VR128:$src)>; + +  // 256-bit load/store +  def : Pat<(alignedstore256 (v16i16 VR256:$src), addr:$dst), +            (VMOVAPSYmr addr:$dst, VR256:$src)>; +  def : Pat<(alignedstore256 (v32i8 VR256:$src), addr:$dst), +            (VMOVAPSYmr addr:$dst, VR256:$src)>; +  def : Pat<(store (v16i16 VR256:$src), addr:$dst), +            (VMOVUPSYmr addr:$dst, VR256:$src)>; +  def : Pat<(store (v32i8 VR256:$src), addr:$dst), +            (VMOVUPSYmr addr:$dst, VR256:$src)>; +} +  // Use movaps / movups for SSE integer load / store (one byte shorter).  // The instructions selected below are then converted to MOVDQA/MOVDQU  // during the SSE domain pass.  | 

