diff options
| author | Craig Topper <craig.topper@intel.com> | 2019-05-11 04:19:33 +0000 | 
|---|---|---|
| committer | Craig Topper <craig.topper@intel.com> | 2019-05-11 04:19:33 +0000 | 
| commit | 31f7adb94f6a46862fc3cadb75967121b31083ed (patch) | |
| tree | 3e993a31c04f627b4ed03823147ea6c571a89cc3 /llvm | |
| parent | bdef12df8d6f0cf7ddb0a626cef377cbcd6d8e30 (diff) | |
| download | bcm5719-llvm-31f7adb94f6a46862fc3cadb75967121b31083ed.tar.gz bcm5719-llvm-31f7adb94f6a46862fc3cadb75967121b31083ed.zip  | |
[X86] Don't emit MOVNTDQA loads from fast-isel without SSE4.1.
We were checking for SSE4.1 for FP types, but not integer 128-bit types.
Fixes PR41837.
llvm-svn: 360512
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Target/X86/X86FastISel.cpp | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/fast-isel-nontemporal.ll | 72 | 
2 files changed, 57 insertions, 17 deletions
diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp index 6fca1acb009..74464f28c02 100644 --- a/llvm/lib/Target/X86/X86FastISel.cpp +++ b/llvm/lib/Target/X86/X86FastISel.cpp @@ -396,7 +396,7 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,    case MVT::v2i64:    case MVT::v8i16:    case MVT::v16i8: -    if (IsNonTemporal && Alignment >= 16) +    if (IsNonTemporal && Alignment >= 16 && HasSSE41)        Opc = HasVLX ? X86::VMOVNTDQAZ128rm :              HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;      else if (Alignment >= 16) diff --git a/llvm/test/CodeGen/X86/fast-isel-nontemporal.ll b/llvm/test/CodeGen/X86/fast-isel-nontemporal.ll index db1ebfe6060..37e380b2b48 100644 --- a/llvm/test/CodeGen/X86/fast-isel-nontemporal.ll +++ b/llvm/test/CodeGen/X86/fast-isel-nontemporal.ll @@ -300,10 +300,20 @@ entry:  }  define <16 x i8> @test_load_nt16xi8(<16 x i8>* nocapture %ptr) { -; SSE-LABEL: test_load_nt16xi8: -; SSE:       # %bb.0: # %entry -; SSE-NEXT:    movntdqa (%rdi), %xmm0 -; SSE-NEXT:    retq +; SSE2-LABEL: test_load_nt16xi8: +; SSE2:       # %bb.0: # %entry +; SSE2-NEXT:    movdqa (%rdi), %xmm0 +; SSE2-NEXT:    retq +; +; SSE4A-LABEL: test_load_nt16xi8: +; SSE4A:       # %bb.0: # %entry +; SSE4A-NEXT:    movdqa (%rdi), %xmm0 +; SSE4A-NEXT:    retq +; +; SSE41-LABEL: test_load_nt16xi8: +; SSE41:       # %bb.0: # %entry +; SSE41-NEXT:    movntdqa (%rdi), %xmm0 +; SSE41-NEXT:    retq  ;  ; AVX-LABEL: test_load_nt16xi8:  ; AVX:       # %bb.0: # %entry @@ -320,10 +330,20 @@ entry:  }  define <8 x i16> @test_load_nt8xi16(<8 x i16>* nocapture %ptr) { -; SSE-LABEL: test_load_nt8xi16: -; SSE:       # %bb.0: # %entry -; SSE-NEXT:    movntdqa (%rdi), %xmm0 -; SSE-NEXT:    retq +; SSE2-LABEL: test_load_nt8xi16: +; SSE2:       # %bb.0: # %entry +; SSE2-NEXT:    movdqa (%rdi), %xmm0 +; SSE2-NEXT:    retq +; +; SSE4A-LABEL: test_load_nt8xi16: +; SSE4A:       # %bb.0: # %entry +; SSE4A-NEXT:    movdqa (%rdi), %xmm0 +; SSE4A-NEXT:    retq +; +; SSE41-LABEL: test_load_nt8xi16: +; SSE41:       # %bb.0: # %entry +; SSE41-NEXT:    movntdqa (%rdi), %xmm0 +; SSE41-NEXT:    retq  ;  ; AVX-LABEL: test_load_nt8xi16:  ; AVX:       # %bb.0: # %entry @@ -340,10 +360,20 @@ entry:  }  define <4 x i32> @test_load_nt4xi32(<4 x i32>* nocapture %ptr) { -; SSE-LABEL: test_load_nt4xi32: -; SSE:       # %bb.0: # %entry -; SSE-NEXT:    movntdqa (%rdi), %xmm0 -; SSE-NEXT:    retq +; SSE2-LABEL: test_load_nt4xi32: +; SSE2:       # %bb.0: # %entry +; SSE2-NEXT:    movdqa (%rdi), %xmm0 +; SSE2-NEXT:    retq +; +; SSE4A-LABEL: test_load_nt4xi32: +; SSE4A:       # %bb.0: # %entry +; SSE4A-NEXT:    movdqa (%rdi), %xmm0 +; SSE4A-NEXT:    retq +; +; SSE41-LABEL: test_load_nt4xi32: +; SSE41:       # %bb.0: # %entry +; SSE41-NEXT:    movntdqa (%rdi), %xmm0 +; SSE41-NEXT:    retq  ;  ; AVX-LABEL: test_load_nt4xi32:  ; AVX:       # %bb.0: # %entry @@ -360,10 +390,20 @@ entry:  }  define <2 x i64> @test_load_nt2xi64(<2 x i64>* nocapture %ptr) { -; SSE-LABEL: test_load_nt2xi64: -; SSE:       # %bb.0: # %entry -; SSE-NEXT:    movntdqa (%rdi), %xmm0 -; SSE-NEXT:    retq +; SSE2-LABEL: test_load_nt2xi64: +; SSE2:       # %bb.0: # %entry +; SSE2-NEXT:    movdqa (%rdi), %xmm0 +; SSE2-NEXT:    retq +; +; SSE4A-LABEL: test_load_nt2xi64: +; SSE4A:       # %bb.0: # %entry +; SSE4A-NEXT:    movdqa (%rdi), %xmm0 +; SSE4A-NEXT:    retq +; +; SSE41-LABEL: test_load_nt2xi64: +; SSE41:       # %bb.0: # %entry +; SSE41-NEXT:    movntdqa (%rdi), %xmm0 +; SSE41-NEXT:    retq  ;  ; AVX-LABEL: test_load_nt2xi64:  ; AVX:       # %bb.0: # %entry  | 

