diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2017-02-21 16:41:44 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2017-02-21 16:41:44 +0000 |
| commit | 791955819c85b956c9f509ed0f274fbe59781d4f (patch) | |
| tree | 24eb7c0e87ffed8568b1e493b016d8db956914d0 | |
| parent | a6e95e1652bf42239562f86a665898530becd83f (diff) | |
| download | bcm5719-llvm-791955819c85b956c9f509ed0f274fbe59781d4f.tar.gz bcm5719-llvm-791955819c85b956c9f509ed0f274fbe59781d4f.zip | |
[X86][AVX2] Fix VPBROADCASTQ folding on 32-bit targets.
As i64 isn't a value type on 32-bit targets, we need to fold the VZEXT_LOAD into VPBROADCASTQ.
llvm-svn: 295733
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 11 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 5 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll | 6 |
3 files changed, 18 insertions, 4 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 5c0be19f0cc..527e86d94b9 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -1030,7 +1030,18 @@ multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr, AVX5128IBase, EVEX; } +let Predicates = [HasAVX512] in { + // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD. + def : Pat<(v8i64 (X86VBroadcast (v8i64 (X86vzload addr:$src)))), + (VPBROADCASTQZm addr:$src)>; +} + let Predicates = [HasVLX, HasBWI] in { + // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD. + def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload addr:$src)))), + (VPBROADCASTQZ128m addr:$src)>; + def : Pat<(v4i64 (X86VBroadcast (v4i64 (X86vzload addr:$src)))), + (VPBROADCASTQZ256m addr:$src)>; // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably. // This means we'll encounter truncated i32 loads; match that here. def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))), diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 7c7fa496e0b..2417cdc1904 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -8265,6 +8265,11 @@ defm VPBROADCASTQ : avx2_broadcast<0x59, "vpbroadcastq", i64mem, loadi64, v2i64, v4i64, NoVLX>; let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { + // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD. + def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload addr:$src)))), + (VPBROADCASTQrm addr:$src)>; + def : Pat<(v4i64 (X86VBroadcast (v4i64 (X86vzload addr:$src)))), + (VPBROADCASTQYrm addr:$src)>; // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably. // This means we'll encounter truncated i32 loads; match that here. def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))), diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll index a705527fc23..e1a52e9d3ac 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll @@ -803,8 +803,7 @@ define <32 x i8> @combine_unpack_unpack_pshufb(<32 x i8> %a0) { define <16 x i8> @combine_broadcast_pshufb_insertion_v2i64(i64 %a0) { ; X32-LABEL: combine_broadcast_pshufb_insertion_v2i64: ; X32: # BB#0: -; X32-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero -; X32-NEXT: vpbroadcastq %xmm0, %xmm0 +; X32-NEXT: vpbroadcastq {{[0-9]+}}(%esp), %xmm0 ; X32-NEXT: retl ; ; X64-LABEL: combine_broadcast_pshufb_insertion_v2i64: @@ -821,8 +820,7 @@ define <16 x i8> @combine_broadcast_pshufb_insertion_v2i64(i64 %a0) { define <8 x i32> @combine_broadcast_permd_insertion_v4i64(i64 %a0) { ; X32-LABEL: combine_broadcast_permd_insertion_v4i64: ; X32: # BB#0: -; X32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; X32-NEXT: vbroadcastsd %xmm0, %ymm0 +; X32-NEXT: vbroadcastsd {{[0-9]+}}(%esp), %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: combine_broadcast_permd_insertion_v4i64: |

