diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-07-22 13:58:44 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-07-22 13:58:44 +0000 |
| commit | ea0d4f9962fbc1741a730ec74b655940ea15424b (patch) | |
| tree | 47e47fd7351dc3615c9c33add8a694638cec173d /llvm/lib/Target/X86/X86InstrAVX512.td | |
| parent | 22c9e931470fea2e25bef1f52128e54ec96da403 (diff) | |
| download | bcm5719-llvm-ea0d4f9962fbc1741a730ec74b655940ea15424b.tar.gz bcm5719-llvm-ea0d4f9962fbc1741a730ec74b655940ea15424b.zip | |
[X86][AVX] Added support for lowering to VBROADCASTF128/VBROADCASTI128 (reapplied)
As reported on PR26235, we don't currently make use of the VBROADCASTF128/VBROADCASTI128 instructions (or the AVX512 equivalents) to load+splat a 128-bit vector to both lanes of a 256-bit vector.
This patch enables lowering from subvector insertion/concatenation patterns and auto-upgrades the llvm.x86.avx.vbroadcastf128.pd.256 / llvm.x86.avx.vbroadcastf128.ps.256 intrinsics to match.
We could possibly investigate using VBROADCASTF128/VBROADCASTI128 to load repeated constants as well (similar to how we already do for scalar broadcasts).
Reapplied with fix for PR28657 - removed intrinsic definitions (clang companion patch to be be submitted shortly).
Differential Revision: https://reviews.llvm.org/D22460
llvm-svn: 276416
Diffstat (limited to 'llvm/lib/Target/X86/X86InstrAVX512.td')
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 18 |
1 files changed, 18 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 8b6673277bd..890a5235d72 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -986,6 +986,10 @@ multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr, AVX5128IBase, EVEX; } +//===----------------------------------------------------------------------===// +// AVX-512 BROADCAST SUBVECTORS +// + defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4", v16i32_info, v4i32x_info>, EVEX_V512, EVEX_CD8<32, CD8VT4>; @@ -1006,7 +1010,13 @@ defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4", defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4", v8f32x_info, v4f32x_info>, EVEX_V256, EVEX_CD8<32, CD8VT4>; + +def : Pat<(v16i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))), + (VBROADCASTI32X4Z256rm addr:$src)>; +def : Pat<(v32i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))), + (VBROADCASTI32X4Z256rm addr:$src)>; } + let Predicates = [HasVLX, HasDQI] in { defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti64x2", v4i64x_info, v2i64x_info>, VEX_W, @@ -1015,6 +1025,14 @@ defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf64x2", v4f64x_info, v2f64x_info>, VEX_W, EVEX_V256, EVEX_CD8<64, CD8VT2>; } + +let Predicates = [HasVLX, NoDQI] in { +def : Pat<(v4f64 (X86SubVBroadcast (loadv2f64 addr:$src))), + (VBROADCASTF32X4Z256rm addr:$src)>; +def : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))), + (VBROADCASTI32X4Z256rm addr:$src)>; +} + let Predicates = [HasDQI] in { defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti64x2", v8i64_info, v2i64x_info>, VEX_W, |

