diff options
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 38 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrInfo.cpp | 19 |
2 files changed, 56 insertions, 1 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 2616f117731..48287f40f83 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -651,6 +651,44 @@ let ExeDomain = SSEPackedDouble in { avx512vl_f64_info>, VEX_W, EVEX_CD8<64, CD8VT1>; } +// SrcRC_v and SrcRC_s are RegisterClasses for vector and scalar +// representations of source +multiclass avx512_broadcast_pat<string InstName, SDNode OpNode, + X86VectorVTInfo _, RegisterClass SrcRC_v, + RegisterClass SrcRC_s> { + def : Pat<(_.VT (OpNode (!cast<ValueType>(_.EltTypeName) SrcRC_s:$src))), + (!cast<Instruction>(InstName##"r") + (COPY_TO_REGCLASS SrcRC_s:$src, SrcRC_v))>; + + let AddedComplexity = 30 in { + def : Pat<(_.VT (vselect _.KRCWM:$mask, + (OpNode (!cast<ValueType>(_.EltTypeName) SrcRC_s:$src)), + _.RC:$src0)), + (!cast<Instruction>(InstName##"rk") _.RC:$src0, _.KRCWM:$mask, + (COPY_TO_REGCLASS SrcRC_s:$src, SrcRC_v))>; + + def : Pat<(_.VT(vselect _.KRCWM:$mask, + (OpNode (!cast<ValueType>(_.EltTypeName) SrcRC_s:$src)), + _.ImmAllZerosV)), + (!cast<Instruction>(InstName##"rkz") _.KRCWM:$mask, + (COPY_TO_REGCLASS SrcRC_s:$src, SrcRC_v))>; + } +} + +defm : avx512_broadcast_pat<"VBROADCASTSSZ", X86VBroadcast, v16f32_info, + VR128X, FR32X>; +defm : avx512_broadcast_pat<"VBROADCASTSDZ", X86VBroadcast, v8f64_info, + VR128X, FR64X>; + +let Predicates = [HasVLX] in { + defm : avx512_broadcast_pat<"VBROADCASTSSZ256", X86VBroadcast, + v8f32x_info, VR128X, FR32X>; + defm : avx512_broadcast_pat<"VBROADCASTSSZ128", X86VBroadcast, + v4f32x_info, VR128X, FR32X>; + defm : avx512_broadcast_pat<"VBROADCASTSDZ256", X86VBroadcast, + v4f64x_info, VR128X, FR64X>; +} + def : Pat<(v16f32 (X86VBroadcast (loadf32 addr:$src))), (VBROADCASTSSZm addr:$src)>; def : Pat<(v8f64 (X86VBroadcast (loadf64 addr:$src))), diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 785e5ec10bb..02195a35318 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -659,6 +659,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VMOVUPSZrr, X86::VMOVUPSZrm, 0 }, { X86::VPABSDZrr, X86::VPABSDZrm, 0 }, { X86::VPABSQZrr, X86::VPABSQZrm, 0 }, + { X86::VBROADCASTSSZr, X86::VBROADCASTSSZm, TB_NO_REVERSE }, + { X86::VBROADCASTSDZr, X86::VBROADCASTSDZm, TB_NO_REVERSE }, // AVX-512 foldable instructions (256-bit versions) { X86::VMOVAPDZ256rr, X86::VMOVAPDZ256rm, TB_ALIGN_32 }, { X86::VMOVAPSZ256rr, X86::VMOVAPSZ256rm, TB_ALIGN_32 }, @@ -670,6 +672,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VMOVDQU64Z256rr, X86::VMOVDQU64Z256rm, 0 }, { X86::VMOVUPDZ256rr, X86::VMOVUPDZ256rm, 0 }, { X86::VMOVUPSZ256rr, X86::VMOVUPSZ256rm, 0 }, + { X86::VBROADCASTSSZ256r, X86::VBROADCASTSSZ256m, TB_NO_REVERSE }, + { X86::VBROADCASTSDZ256r, X86::VBROADCASTSDZ256m, TB_NO_REVERSE }, // AVX-512 foldable instructions (256-bit versions) { X86::VMOVAPDZ128rr, X86::VMOVAPDZ128rm, TB_ALIGN_16 }, { X86::VMOVAPSZ128rr, X86::VMOVAPSZ128rm, TB_ALIGN_16 }, @@ -681,6 +685,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VMOVDQU64Z128rr, X86::VMOVDQU64Z128rm, 0 }, { X86::VMOVUPDZ128rr, X86::VMOVUPDZ128rm, 0 }, { X86::VMOVUPSZ128rr, X86::VMOVUPSZ128rm, 0 }, + { X86::VBROADCASTSSZ128r, X86::VBROADCASTSSZ128m, TB_NO_REVERSE }, // AES foldable instructions { X86::AESIMCrr, X86::AESIMCrm, TB_ALIGN_16 }, @@ -1321,6 +1326,13 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VALIGNQrri, X86::VALIGNQrmi, 0 }, { X86::VALIGNDrri, X86::VALIGNDrmi, 0 }, { X86::VPMULUDQZrr, X86::VPMULUDQZrm, 0 }, + { X86::VBROADCASTSSZrkz, X86::VBROADCASTSSZmkz, TB_NO_REVERSE }, + { X86::VBROADCASTSDZrkz, X86::VBROADCASTSDZmkz, TB_NO_REVERSE }, + + // AVX-512{F,VL} foldable instructions + { X86::VBROADCASTSSZ256rkz, X86::VBROADCASTSSZ256mkz, TB_NO_REVERSE }, + { X86::VBROADCASTSDZ256rkz, X86::VBROADCASTSDZ256mkz, TB_NO_REVERSE }, + { X86::VBROADCASTSSZ128rkz, X86::VBROADCASTSSZ128mkz, TB_NO_REVERSE }, // AES foldable instructions { X86::AESDECLASTrr, X86::AESDECLASTrm, TB_ALIGN_16 }, @@ -1501,7 +1513,12 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VBLENDMPDZrr, X86::VBLENDMPDZrm, 0 }, { X86::VBLENDMPSZrr, X86::VBLENDMPSZrm, 0 }, { X86::VPBLENDMDZrr, X86::VPBLENDMDZrm, 0 }, - { X86::VPBLENDMQZrr, X86::VPBLENDMQZrm, 0 } + { X86::VPBLENDMQZrr, X86::VPBLENDMQZrm, 0 }, + { X86::VBROADCASTSSZrk, X86::VBROADCASTSSZmk, TB_NO_REVERSE }, + { X86::VBROADCASTSDZrk, X86::VBROADCASTSDZmk, TB_NO_REVERSE }, + { X86::VBROADCASTSSZ256rk, X86::VBROADCASTSSZ256mk, TB_NO_REVERSE }, + { X86::VBROADCASTSDZ256rk, X86::VBROADCASTSDZ256mk, TB_NO_REVERSE }, + { X86::VBROADCASTSSZ128rk, X86::VBROADCASTSSZ128mk, TB_NO_REVERSE } }; for (unsigned i = 0, e = array_lengthof(OpTbl3); i != e; ++i) { |

