diff options
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 16 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/pr39733.ll | 44 |
2 files changed, 52 insertions, 8 deletions
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 794c7971e44..472ee710310 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -5202,12 +5202,12 @@ defm BQ : SS41I_pmovx_rm<0x22, "bq", i16mem, i32mem, NoVLX>; // Any_extend_vector_inreg is currently legalized to zero_extend_vector_inreg. multiclass SS41I_pmovx_avx2_patterns_base<string OpcPrefix, SDNode ExtOp> { // Register-Register patterns - let Predicates = [HasAVX, NoVLX_Or_NoBWI] in { + let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { def : Pat<(v16i16 (ExtOp (v16i8 VR128:$src))), (!cast<I>(OpcPrefix#BWYrr) VR128:$src)>; } - let Predicates = [HasAVX, NoVLX] in { + let Predicates = [HasAVX2, NoVLX] in { def : Pat<(v8i32 (ExtOp (v8i16 VR128:$src))), (!cast<I>(OpcPrefix#WDYrr) VR128:$src)>; @@ -5216,7 +5216,7 @@ multiclass SS41I_pmovx_avx2_patterns_base<string OpcPrefix, SDNode ExtOp> { } // AVX2 Register-Memory patterns - let Predicates = [HasAVX, NoVLX_Or_NoBWI] in { + let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))), (!cast<I>(OpcPrefix#BWYrm) addr:$src)>; def : Pat<(v16i16 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))), @@ -5225,7 +5225,7 @@ multiclass SS41I_pmovx_avx2_patterns_base<string OpcPrefix, SDNode ExtOp> { (!cast<I>(OpcPrefix#BWYrm) addr:$src)>; } - let Predicates = [HasAVX, NoVLX] in { + let Predicates = [HasAVX2, NoVLX] in { def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))), (!cast<I>(OpcPrefix#WDYrm) addr:$src)>; def : Pat<(v8i32 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))), @@ -5248,7 +5248,7 @@ multiclass SS41I_pmovx_avx2_patterns<string OpcPrefix, string ExtTy, SS41I_pmovx_avx2_patterns_base<OpcPrefix, ExtOp> { // Register-Register patterns - let Predicates = [HasAVX, NoVLX] in { + let Predicates = [HasAVX2, NoVLX] in { def : Pat<(v8i32 (InVecOp (v16i8 VR128:$src))), (!cast<I>(OpcPrefix#BDYrr) VR128:$src)>; def : Pat<(v4i64 (InVecOp (v16i8 VR128:$src))), @@ -5259,11 +5259,11 @@ multiclass SS41I_pmovx_avx2_patterns<string OpcPrefix, string ExtTy, } // Simple Register-Memory patterns - let Predicates = [HasAVX, NoVLX_Or_NoBWI] in { + let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { def : Pat<(v16i16 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)), (!cast<I>(OpcPrefix#BWYrm) addr:$src)>; } - let Predicates = [HasAVX, NoVLX] in { + let Predicates = [HasAVX2, NoVLX] in { def : Pat<(v8i32 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)), (!cast<I>(OpcPrefix#BDYrm) addr:$src)>; def : Pat<(v4i64 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)), @@ -5279,7 +5279,7 @@ multiclass SS41I_pmovx_avx2_patterns<string OpcPrefix, string ExtTy, } // AVX2 Register-Memory patterns - let Predicates = [HasAVX, NoVLX] in { + let Predicates = [HasAVX2, NoVLX] in { def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), (!cast<I>(OpcPrefix#BDYrm) addr:$src)>; def : Pat<(v8i32 (InVecOp (v16i8 (vzmovl_v2i64 addr:$src)))), diff --git a/llvm/test/CodeGen/X86/pr39733.ll b/llvm/test/CodeGen/X86/pr39733.ll new file mode 100644 index 00000000000..4c7153852d2 --- /dev/null +++ b/llvm/test/CodeGen/X86/pr39733.ll @@ -0,0 +1,44 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=avx -O0 | FileCheck %s + +; We should not be emitting a sign extend using a %ymm register. + +define void @test55() { +; CHECK-LABEL: test55: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbp, -16 +; CHECK-NEXT: movq %rsp, %rbp +; CHECK-NEXT: .cfi_def_cfa_register %rbp +; CHECK-NEXT: andq $-32, %rsp +; CHECK-NEXT: subq $96, %rsp +; CHECK-NEXT: vmovdqa {{.*#+}} xmm0 = [26680,34632,63774,2423,35015,60307,6240,1951] +; CHECK-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vmovdqa {{[0-9]+}}(%rsp), %xmm0 +; CHECK-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vmovdqa {{[0-9]+}}(%rsp), %xmm0 +; CHECK-NEXT: vpmovsxwd %xmm0, %xmm1 +; CHECK-NEXT: # implicit-def: $ymm2 +; CHECK-NEXT: vmovaps %xmm1, %xmm2 +; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] +; CHECK-NEXT: vpmovsxwd %xmm0, %xmm0 +; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm2 +; CHECK-NEXT: vmovdqa %ymm2, (%rsp) +; CHECK-NEXT: movq %rbp, %rsp +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: .cfi_def_cfa %rsp, 8 +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %id11762 = alloca <8 x i16>, align 16 + %.compoundliteral = alloca <8 x i16>, align 16 + %id11761 = alloca <8 x i32>, align 32 + store <8 x i16> <i16 26680, i16 -30904, i16 -1762, i16 2423, i16 -30521, i16 -5229, i16 6240, i16 1951>, <8 x i16>* %.compoundliteral, align 16 + %0 = load <8 x i16>, <8 x i16>* %.compoundliteral, align 16 + store <8 x i16> %0, <8 x i16>* %id11762, align 16 + %1 = load <8 x i16>, <8 x i16>* %id11762, align 16 + %conv = sext <8 x i16> %1 to <8 x i32> + store <8 x i32> %conv, <8 x i32>* %id11761, align 32 + ret void +} |

