diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2019-06-18 09:50:13 +0000 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2019-06-18 09:50:13 +0000 |
commit | 7dd529e54da8daf143a359cac16dbaeafe9a7835 (patch) | |
tree | 2a0b010ea41fdc94aa252cb2cb736892e98c84b6 | |
parent | a1a4f5f12cc579be30d8eb61a4257d908c13e55a (diff) | |
download | bcm5719-llvm-7dd529e54da8daf143a359cac16dbaeafe9a7835.tar.gz bcm5719-llvm-7dd529e54da8daf143a359cac16dbaeafe9a7835.zip |
[X86] Replace any_extend* vector extensions with zero_extend* equivalents
First step toward addressing the vector-reduce-mul-widen.ll regression in D63281 - we should replace ANY_EXTEND/ANY_EXTEND_VECTOR_INREG in X86ISelDAGToDAG to avoid having to add duplicate patterns when treating any extensions as legal.
In future patches this will also allow us to keep any extension nodes around a lot longer in the DAG, which should mean that we can keep better track of undef elements that otherwise become zeros that we think we have to keep......
Differential Revision: https://reviews.llvm.org/D63326
llvm-svn: 363655
-rw-r--r-- | llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 20 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 36 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 81 |
3 files changed, 53 insertions, 84 deletions
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index 414b6bcdab4..e06d36e3500 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -815,6 +815,26 @@ void X86DAGToDAGISel::PreprocessISelDAG() { CurDAG->DeleteNode(N); continue; } + case ISD::ANY_EXTEND: + case ISD::ANY_EXTEND_VECTOR_INREG: { + // Replace vector any extend with the zero extend equivalents so we don't + // need 2 sets of patterns. Ignore vXi1 extensions. + if (!N->getValueType(0).isVector() || + N->getOperand(0).getScalarValueSizeInBits() == 1) + break; + + unsigned NewOpc = N->getOpcode() == ISD::ANY_EXTEND + ? ISD::ZERO_EXTEND + : ISD::ZERO_EXTEND_VECTOR_INREG; + + SDValue Res = CurDAG->getNode(NewOpc, SDLoc(N), N->getValueType(0), + N->getOperand(0)); + --I; + CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res); + ++I; + CurDAG->DeleteNode(N); + continue; + } case ISD::FCEIL: case ISD::FFLOOR: case ISD::FTRUNC: diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 991c790d750..9f4a75c6689 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -9732,41 +9732,6 @@ multiclass AVX512_pmovx_patterns_base<string OpcPrefix, SDNode ExtOp> { } } -multiclass AVX512_pmovx_patterns_aext<string OpcPrefix, SDNode ExtOp> : - AVX512_pmovx_patterns_base<OpcPrefix, ExtOp> { - let Predicates = [HasVLX, HasBWI] in { - def : Pat<(v16i16 (ExtOp (v16i8 VR128X:$src))), - (!cast<I>(OpcPrefix#BWZ256rr) VR128X:$src)>; - } - - let Predicates = [HasVLX] in { - def : Pat<(v8i32 (ExtOp (v8i16 VR128X:$src))), - (!cast<I>(OpcPrefix#WDZ256rr) VR128X:$src)>; - - def : Pat<(v4i64 (ExtOp (v4i32 VR128X:$src))), - (!cast<I>(OpcPrefix#DQZ256rr) VR128X:$src)>; - } - - // 512-bit patterns - let Predicates = [HasBWI] in { - def : Pat<(v32i16 (ExtOp (v32i8 VR256X:$src))), - (!cast<I>(OpcPrefix#BWZrr) VR256X:$src)>; - } - let Predicates = [HasAVX512] in { - def : Pat<(v16i32 (ExtOp (v16i8 VR128X:$src))), - (!cast<I>(OpcPrefix#BDZrr) VR128X:$src)>; - def : Pat<(v16i32 (ExtOp (v16i16 VR256X:$src))), - (!cast<I>(OpcPrefix#WDZrr) VR256X:$src)>; - - def : Pat<(v8i64 (ExtOp (v8i16 VR128X:$src))), - (!cast<I>(OpcPrefix#WQZrr) VR128X:$src)>; - - def : Pat<(v8i64 (ExtOp (v8i32 VR256X:$src))), - (!cast<I>(OpcPrefix#DQZrr) VR256X:$src)>; - } -} - - multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp, SDNode InVecOp> : AVX512_pmovx_patterns_base<OpcPrefix, ExtOp> { @@ -9872,7 +9837,6 @@ multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp, defm : AVX512_pmovx_patterns<"VPMOVSX", sext, sext_invec>; defm : AVX512_pmovx_patterns<"VPMOVZX", zext, zext_invec>; -defm : AVX512_pmovx_patterns_aext<"VPMOVZX", anyext>; // Without BWI we can't do a trunc from v16i16 to v16i8. DAG combine can merge // ext+trunc aggresively making it impossible to legalize the DAG to this diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 250f6819a1f..215c66166e2 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -4895,6 +4895,7 @@ def : InstAlias<"monitor\t{%rax, %rcx, %rdx|rdx, rcx, rax}", (MONITOR64rrr)>, //===----------------------------------------------------------------------===// // SSE4.1 - Packed Move with Sign/Zero Extend +// NOTE: Any Extend is promoted to Zero Extend in X86ISelDAGToDAG.cpp //===----------------------------------------------------------------------===// multiclass SS41I_pmovx_rrrm<bits<8> opc, string OpcodeStr, X86MemOperand MemOp, @@ -4942,71 +4943,42 @@ defm WQ : SS41I_pmovx_rm<0x24, "wq", i32mem, i64mem, NoVLX>; defm BQ : SS41I_pmovx_rm<0x22, "bq", i16mem, i32mem, NoVLX>; -// Patterns that we also need for any_extend. -// Any_extend_vector_inreg is currently legalized to zero_extend_vector_inreg. -multiclass SS41I_pmovx_avx2_patterns_base<string OpcPrefix, SDNode ExtOp> { - // Register-Register patterns - let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { - def : Pat<(v16i16 (ExtOp (v16i8 VR128:$src))), - (!cast<I>(OpcPrefix#BWYrr) VR128:$src)>; - } - - let Predicates = [HasAVX2, NoVLX] in { - def : Pat<(v8i32 (ExtOp (v8i16 VR128:$src))), - (!cast<I>(OpcPrefix#WDYrr) VR128:$src)>; - - def : Pat<(v4i64 (ExtOp (v4i32 VR128:$src))), - (!cast<I>(OpcPrefix#DQYrr) VR128:$src)>; - } - - // AVX2 Register-Memory patterns - let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { - def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))), - (!cast<I>(OpcPrefix#BWYrm) addr:$src)>; - def : Pat<(v16i16 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))), - (!cast<I>(OpcPrefix#BWYrm) addr:$src)>; - def : Pat<(v16i16 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))), - (!cast<I>(OpcPrefix#BWYrm) addr:$src)>; - } - - let Predicates = [HasAVX2, NoVLX] in { - def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))), - (!cast<I>(OpcPrefix#WDYrm) addr:$src)>; - def : Pat<(v8i32 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))), - (!cast<I>(OpcPrefix#WDYrm) addr:$src)>; - def : Pat<(v8i32 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))), - (!cast<I>(OpcPrefix#WDYrm) addr:$src)>; - - def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))), - (!cast<I>(OpcPrefix#DQYrm) addr:$src)>; - def : Pat<(v4i64 (ExtOp (v4i32 (vzmovl_v2i64 addr:$src)))), - (!cast<I>(OpcPrefix#DQYrm) addr:$src)>; - def : Pat<(v4i64 (ExtOp (v4i32 (vzload_v2i64 addr:$src)))), - (!cast<I>(OpcPrefix#DQYrm) addr:$src)>; - } -} - // AVX2 Patterns multiclass SS41I_pmovx_avx2_patterns<string OpcPrefix, string ExtTy, - SDNode ExtOp, SDNode InVecOp> : - SS41I_pmovx_avx2_patterns_base<OpcPrefix, ExtOp> { - + SDNode ExtOp, SDNode InVecOp> { // Register-Register patterns + let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { + def : Pat<(v16i16 (ExtOp (v16i8 VR128:$src))), + (!cast<I>(OpcPrefix#BWYrr) VR128:$src)>; + } let Predicates = [HasAVX2, NoVLX] in { def : Pat<(v8i32 (InVecOp (v16i8 VR128:$src))), (!cast<I>(OpcPrefix#BDYrr) VR128:$src)>; def : Pat<(v4i64 (InVecOp (v16i8 VR128:$src))), (!cast<I>(OpcPrefix#BQYrr) VR128:$src)>; + def : Pat<(v8i32 (ExtOp (v8i16 VR128:$src))), + (!cast<I>(OpcPrefix#WDYrr) VR128:$src)>; def : Pat<(v4i64 (InVecOp (v8i16 VR128:$src))), (!cast<I>(OpcPrefix#WQYrr) VR128:$src)>; + + def : Pat<(v4i64 (ExtOp (v4i32 VR128:$src))), + (!cast<I>(OpcPrefix#DQYrr) VR128:$src)>; } // Simple Register-Memory patterns let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { def : Pat<(v16i16 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)), (!cast<I>(OpcPrefix#BWYrm) addr:$src)>; + + def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))), + (!cast<I>(OpcPrefix#BWYrm) addr:$src)>; + def : Pat<(v16i16 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))), + (!cast<I>(OpcPrefix#BWYrm) addr:$src)>; + def : Pat<(v16i16 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))), + (!cast<I>(OpcPrefix#BWYrm) addr:$src)>; } + let Predicates = [HasAVX2, NoVLX] in { def : Pat<(v8i32 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)), (!cast<I>(OpcPrefix#BDYrm) addr:$src)>; @@ -5024,6 +4996,13 @@ multiclass SS41I_pmovx_avx2_patterns<string OpcPrefix, string ExtTy, // AVX2 Register-Memory patterns let Predicates = [HasAVX2, NoVLX] in { + def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))), + (!cast<I>(OpcPrefix#WDYrm) addr:$src)>; + def : Pat<(v8i32 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))), + (!cast<I>(OpcPrefix#WDYrm) addr:$src)>; + def : Pat<(v8i32 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))), + (!cast<I>(OpcPrefix#WDYrm) addr:$src)>; + def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), (!cast<I>(OpcPrefix#BDYrm) addr:$src)>; def : Pat<(v8i32 (InVecOp (v16i8 (vzmovl_v2i64 addr:$src)))), @@ -5033,6 +5012,13 @@ multiclass SS41I_pmovx_avx2_patterns<string OpcPrefix, string ExtTy, def : Pat<(v8i32 (InVecOp (loadv16i8 addr:$src))), (!cast<I>(OpcPrefix#BDYrm) addr:$src)>; + def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))), + (!cast<I>(OpcPrefix#DQYrm) addr:$src)>; + def : Pat<(v4i64 (ExtOp (v4i32 (vzmovl_v2i64 addr:$src)))), + (!cast<I>(OpcPrefix#DQYrm) addr:$src)>; + def : Pat<(v4i64 (ExtOp (v4i32 (vzload_v2i64 addr:$src)))), + (!cast<I>(OpcPrefix#DQYrm) addr:$src)>; + def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), (!cast<I>(OpcPrefix#BQYrm) addr:$src)>; def : Pat<(v4i64 (InVecOp (v16i8 (vzmovl_v4i32 addr:$src)))), @@ -5055,7 +5041,6 @@ multiclass SS41I_pmovx_avx2_patterns<string OpcPrefix, string ExtTy, defm : SS41I_pmovx_avx2_patterns<"VPMOVSX", "s", sext, sext_invec>; defm : SS41I_pmovx_avx2_patterns<"VPMOVZX", "z", zext, zext_invec>; -defm : SS41I_pmovx_avx2_patterns_base<"VPMOVZX", anyext>; // SSE4.1/AVX patterns. multiclass SS41I_pmovx_patterns<string OpcPrefix, string ExtTy, |