diff options
author | Craig Topper <craig.topper@intel.com> | 2017-10-10 22:40:31 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@intel.com> | 2017-10-10 22:40:31 +0000 |
commit | bb0e316dc749d0cfc62504d46d4fb4ee6936c8bc (patch) | |
tree | 63820a2d991867b78d3b5b869c22fb57e6cde26f | |
parent | d97d35e1500d98f4f50938c5ecae98a1301a9a10 (diff) | |
download | bcm5719-llvm-bb0e316dc749d0cfc62504d46d4fb4ee6936c8bc.tar.gz bcm5719-llvm-bb0e316dc749d0cfc62504d46d4fb4ee6936c8bc.zip |
[X86] Add broadcast patterns that allow a scalar_to_vector between the broadcast and the load.
We already have these patterns for AVX512VL, but not AVX1 or 2.
llvm-svn: 315382
-rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 18 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx2-vbroadcast.ll | 3 |
2 files changed, 19 insertions, 2 deletions
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index c2f08642851..3bbe31071e0 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -7381,6 +7381,15 @@ let ExeDomain = SSEPackedDouble, Predicates = [HasAVX2, NoVLX] in def VBROADCASTSDYrr : avx2_broadcast_rr<0x19, "vbroadcastsd", VR256, v4f64, v2f64, WriteFShuffle256>, VEX_L; +let Predicates = [HasAVX, NoVLX] in { + def : Pat<(v4f32 (X86VBroadcast (v4f32 (scalar_to_vector (loadf32 addr:$src))))), + (VBROADCASTSSrm addr:$src)>; + def : Pat<(v8f32 (X86VBroadcast (v4f32 (scalar_to_vector (loadf32 addr:$src))))), + (VBROADCASTSSYrm addr:$src)>; + def : Pat<(v4f64 (X86VBroadcast (v2f64 (scalar_to_vector (loadf64 addr:$src))))), + (VBROADCASTSDYrm addr:$src)>; +} + //===----------------------------------------------------------------------===// // VBROADCAST*128 - Load from memory and broadcast 128-bit vector to both // halves of a 256-bit vector. @@ -7861,6 +7870,15 @@ let Predicates = [HasAVX2, NoVLX] in { (VPBROADCASTQrm addr:$src)>; def : Pat<(v4i64 (X86VBroadcast (v4i64 (X86vzload addr:$src)))), (VPBROADCASTQYrm addr:$src)>; + + def : Pat<(v4i32 (X86VBroadcast (v4i32 (scalar_to_vector (loadi32 addr:$src))))), + (VPBROADCASTDrm addr:$src)>; + def : Pat<(v8i32 (X86VBroadcast (v4i32 (scalar_to_vector (loadi32 addr:$src))))), + (VPBROADCASTDYrm addr:$src)>; + def : Pat<(v2i64 (X86VBroadcast (v2i64 (scalar_to_vector (loadi64 addr:$src))))), + (VPBROADCASTQrm addr:$src)>; + def : Pat<(v4i64 (X86VBroadcast (v2i64 (scalar_to_vector (loadi64 addr:$src))))), + (VPBROADCASTQYrm addr:$src)>; } let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably. diff --git a/llvm/test/CodeGen/X86/avx2-vbroadcast.ll b/llvm/test/CodeGen/X86/avx2-vbroadcast.ll index 90e1232a8b9..08a11607eed 100644 --- a/llvm/test/CodeGen/X86/avx2-vbroadcast.ll +++ b/llvm/test/CodeGen/X86/avx2-vbroadcast.ll @@ -273,8 +273,7 @@ define <16 x i16> @broadcast_mem_v4i16_v16i16(<4 x i16>* %ptr) { ; ; X64-AVX2-LABEL: broadcast_mem_v4i16_v16i16: ; X64-AVX2: ## BB#0: -; X64-AVX2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; X64-AVX2-NEXT: vbroadcastsd %xmm0, %ymm0 +; X64-AVX2-NEXT: vbroadcastsd (%rdi), %ymm0 ; X64-AVX2-NEXT: retq ; ; X32-AVX512VL-LABEL: broadcast_mem_v4i16_v16i16: |