diff options
author | Craig Topper <craig.topper@intel.com> | 2017-07-31 17:35:44 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@intel.com> | 2017-07-31 17:35:44 +0000 |
commit | cb0e74975a516d1fbc83e674c36e4cca3fee7731 (patch) | |
tree | cdb0c6e1ea2ac97a840ff0ee08c7f7594e21c33f /llvm/lib | |
parent | ed99e4c5b2b881ec906e748dbd579d7ee75bc6eb (diff) | |
download | bcm5719-llvm-cb0e74975a516d1fbc83e674c36e4cca3fee7731.tar.gz bcm5719-llvm-cb0e74975a516d1fbc83e674c36e4cca3fee7731.zip |
[AVX-512] Remove patterns that select vmovdqu8/16 for unmasked loads. Prefer vmovdqa64/vmovdqu64 instead.
These were taking priority over the aligned load instructions since there is no vmovda8/16. I don't think there is really a difference between aligned and unaligned on newer cpus so I don't think it matters which instructions we use.
But with this change we reduce the size of the isel table a little and we allow the aligned information to pass through to the evex->vec pass and produce the same output has avx/avx2 in some cases.
I also generally dislike patterns rooted in a bitcast which these were.
Differential Revision: https://reviews.llvm.org/D35977
llvm-svn: 309589
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 29 |
1 files changed, 18 insertions, 11 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 079116353bc..c0f49e45fe8 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -3249,6 +3249,7 @@ defm : mask_shift_lowering<VK2, v2i1>, Requires<[HasAVX512]>; multiclass avx512_load<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, PatFrag ld_frag, PatFrag mload, + bit NoRMPattern = 0, SDPatternOperator SelectOprr = vselect> { let hasSideEffects = 0 in { def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src), @@ -3263,11 +3264,13 @@ multiclass avx512_load<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, _.ImmAllZerosV)))], _.ExeDomain>, EVEX, EVEX_KZ; - let canFoldAsLoad = 1, isReMaterializable = 1, + let mayLoad = 1, canFoldAsLoad = 1, isReMaterializable = 1, SchedRW = [WriteLoad] in def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [(set _.RC:$dst, (_.VT (bitconvert (ld_frag addr:$src))))], + !if(NoRMPattern, [], + [(set _.RC:$dst, + (_.VT (bitconvert (ld_frag addr:$src))))]), _.ExeDomain>, EVEX; let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in { @@ -3327,16 +3330,20 @@ multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr, multiclass avx512_load_vl<bits<8> opc, string OpcodeStr, AVX512VLVectorVTInfo _, Predicate prd, + bit NoRMPattern = 0, SDPatternOperator SelectOprr = vselect> { let Predicates = [prd] in defm Z : avx512_load<opc, OpcodeStr, _.info512, _.info512.LdFrag, - masked_load_unaligned, SelectOprr>, EVEX_V512; + masked_load_unaligned, NoRMPattern, + SelectOprr>, EVEX_V512; let Predicates = [prd, HasVLX] in { defm Z256 : avx512_load<opc, OpcodeStr, _.info256, _.info256.LdFrag, - masked_load_unaligned, SelectOprr>, EVEX_V256; + masked_load_unaligned, NoRMPattern, + SelectOprr>, EVEX_V256; defm Z128 : avx512_load<opc, OpcodeStr, _.info128, _.info128.LdFrag, - masked_load_unaligned, SelectOprr>, EVEX_V128; + masked_load_unaligned, NoRMPattern, + SelectOprr>, EVEX_V128; } } @@ -3416,13 +3423,13 @@ defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info, PD, VEX_W, EVEX_CD8<64, CD8VF>; defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512, - null_frag>, + 0, null_frag>, avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512, "VMOVUPS">, PS, EVEX_CD8<32, CD8VF>; defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512, - null_frag>, + 0, null_frag>, avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512, "VMOVUPD">, PD, VEX_W, EVEX_CD8<64, CD8VF>; @@ -3439,24 +3446,24 @@ defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info, HasAVX512, "VMOVDQA64">, PD, VEX_W, EVEX_CD8<64, CD8VF>; -defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI>, +defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI, 1>, avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, HasBWI, "VMOVDQU8">, XD, EVEX_CD8<8, CD8VF>; -defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI>, +defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI, 1>, avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info, HasBWI, "VMOVDQU16">, XD, VEX_W, EVEX_CD8<16, CD8VF>; defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512, - null_frag>, + 0, null_frag>, avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info, HasAVX512, "VMOVDQU32">, XS, EVEX_CD8<32, CD8VF>; defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512, - null_frag>, + 0, null_frag>, avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info, HasAVX512, "VMOVDQU64">, XS, VEX_W, EVEX_CD8<64, CD8VF>; |