diff options
Diffstat (limited to 'llvm/lib/Target/X86/X86InstrInfo.cpp')
-rw-r--r-- | llvm/lib/Target/X86/X86InstrInfo.cpp | 640 |
1 files changed, 105 insertions, 535 deletions
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 9a83c09dfdb..9df179d566c 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -1855,281 +1855,6 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) } static const X86MemoryFoldTableEntry MemoryFoldTable3[] = { - // FMA foldable instructions - { X86::VFMADD231SSr, X86::VFMADD231SSm, TB_ALIGN_NONE }, - { X86::VFMADD231SSr_Int, X86::VFMADD231SSm_Int, TB_ALIGN_NONE }, - { X86::VFMADD231SDr, X86::VFMADD231SDm, TB_ALIGN_NONE }, - { X86::VFMADD231SDr_Int, X86::VFMADD231SDm_Int, TB_ALIGN_NONE }, - { X86::VFMADD132SSr, X86::VFMADD132SSm, TB_ALIGN_NONE }, - { X86::VFMADD132SSr_Int, X86::VFMADD132SSm_Int, TB_ALIGN_NONE }, - { X86::VFMADD132SDr, X86::VFMADD132SDm, TB_ALIGN_NONE }, - { X86::VFMADD132SDr_Int, X86::VFMADD132SDm_Int, TB_ALIGN_NONE }, - { X86::VFMADD213SSr, X86::VFMADD213SSm, TB_ALIGN_NONE }, - { X86::VFMADD213SSr_Int, X86::VFMADD213SSm_Int, TB_ALIGN_NONE }, - { X86::VFMADD213SDr, X86::VFMADD213SDm, TB_ALIGN_NONE }, - { X86::VFMADD213SDr_Int, X86::VFMADD213SDm_Int, TB_ALIGN_NONE }, - { X86::VFMADD231SSZr, X86::VFMADD231SSZm, TB_ALIGN_NONE }, - { X86::VFMADD231SSZr_Int, X86::VFMADD231SSZm_Int, TB_ALIGN_NONE }, - { X86::VFMADD231SDZr, X86::VFMADD231SDZm, TB_ALIGN_NONE }, - { X86::VFMADD231SDZr_Int, X86::VFMADD231SDZm_Int, TB_ALIGN_NONE }, - { X86::VFMADD132SSZr, X86::VFMADD132SSZm, TB_ALIGN_NONE }, - { X86::VFMADD132SSZr_Int, X86::VFMADD132SSZm_Int, TB_ALIGN_NONE }, - { X86::VFMADD132SDZr, X86::VFMADD132SDZm, TB_ALIGN_NONE }, - { X86::VFMADD132SDZr_Int, X86::VFMADD132SDZm_Int, TB_ALIGN_NONE }, - { X86::VFMADD213SSZr, X86::VFMADD213SSZm, TB_ALIGN_NONE }, - { X86::VFMADD213SSZr_Int, X86::VFMADD213SSZm_Int, TB_ALIGN_NONE }, - { X86::VFMADD213SDZr, X86::VFMADD213SDZm, TB_ALIGN_NONE }, - { X86::VFMADD213SDZr_Int, X86::VFMADD213SDZm_Int, TB_ALIGN_NONE }, - - { X86::VFMADD231PSr, X86::VFMADD231PSm, TB_ALIGN_NONE }, - { X86::VFMADD231PDr, X86::VFMADD231PDm, TB_ALIGN_NONE }, - { X86::VFMADD132PSr, X86::VFMADD132PSm, TB_ALIGN_NONE }, - { X86::VFMADD132PDr, X86::VFMADD132PDm, TB_ALIGN_NONE }, - { X86::VFMADD213PSr, X86::VFMADD213PSm, TB_ALIGN_NONE }, - { X86::VFMADD213PDr, X86::VFMADD213PDm, TB_ALIGN_NONE }, - { X86::VFMADD231PSYr, X86::VFMADD231PSYm, TB_ALIGN_NONE }, - { X86::VFMADD231PDYr, X86::VFMADD231PDYm, TB_ALIGN_NONE }, - { X86::VFMADD132PSYr, X86::VFMADD132PSYm, TB_ALIGN_NONE }, - { X86::VFMADD132PDYr, X86::VFMADD132PDYm, TB_ALIGN_NONE }, - { X86::VFMADD213PSYr, X86::VFMADD213PSYm, TB_ALIGN_NONE }, - { X86::VFMADD213PDYr, X86::VFMADD213PDYm, TB_ALIGN_NONE }, - { X86::VFMADD231PSZr, X86::VFMADD231PSZm, TB_ALIGN_NONE }, - { X86::VFMADD231PDZr, X86::VFMADD231PDZm, TB_ALIGN_NONE }, - { X86::VFMADD132PSZr, X86::VFMADD132PSZm, TB_ALIGN_NONE }, - { X86::VFMADD132PDZr, X86::VFMADD132PDZm, TB_ALIGN_NONE }, - { X86::VFMADD213PSZr, X86::VFMADD213PSZm, TB_ALIGN_NONE }, - { X86::VFMADD213PDZr, X86::VFMADD213PDZm, TB_ALIGN_NONE }, - { X86::VFMADD231PSZ128r, X86::VFMADD231PSZ128m, TB_ALIGN_NONE }, - { X86::VFMADD231PDZ128r, X86::VFMADD231PDZ128m, TB_ALIGN_NONE }, - { X86::VFMADD132PSZ128r, X86::VFMADD132PSZ128m, TB_ALIGN_NONE }, - { X86::VFMADD132PDZ128r, X86::VFMADD132PDZ128m, TB_ALIGN_NONE }, - { X86::VFMADD213PSZ128r, X86::VFMADD213PSZ128m, TB_ALIGN_NONE }, - { X86::VFMADD213PDZ128r, X86::VFMADD213PDZ128m, TB_ALIGN_NONE }, - { X86::VFMADD231PSZ256r, X86::VFMADD231PSZ256m, TB_ALIGN_NONE }, - { X86::VFMADD231PDZ256r, X86::VFMADD231PDZ256m, TB_ALIGN_NONE }, - { X86::VFMADD132PSZ256r, X86::VFMADD132PSZ256m, TB_ALIGN_NONE }, - { X86::VFMADD132PDZ256r, X86::VFMADD132PDZ256m, TB_ALIGN_NONE }, - { X86::VFMADD213PSZ256r, X86::VFMADD213PSZ256m, TB_ALIGN_NONE }, - { X86::VFMADD213PDZ256r, X86::VFMADD213PDZ256m, TB_ALIGN_NONE }, - - { X86::VFNMADD231SSr, X86::VFNMADD231SSm, TB_ALIGN_NONE }, - { X86::VFNMADD231SSr_Int, X86::VFNMADD231SSm_Int, TB_ALIGN_NONE }, - { X86::VFNMADD231SDr, X86::VFNMADD231SDm, TB_ALIGN_NONE }, - { X86::VFNMADD231SDr_Int, X86::VFNMADD231SDm_Int, TB_ALIGN_NONE }, - { X86::VFNMADD132SSr, X86::VFNMADD132SSm, TB_ALIGN_NONE }, - { X86::VFNMADD132SSr_Int, X86::VFNMADD132SSm_Int, TB_ALIGN_NONE }, - { X86::VFNMADD132SDr, X86::VFNMADD132SDm, TB_ALIGN_NONE }, - { X86::VFNMADD132SDr_Int, X86::VFNMADD132SDm_Int, TB_ALIGN_NONE }, - { X86::VFNMADD213SSr, X86::VFNMADD213SSm, TB_ALIGN_NONE }, - { X86::VFNMADD213SSr_Int, X86::VFNMADD213SSm_Int, TB_ALIGN_NONE }, - { X86::VFNMADD213SDr, X86::VFNMADD213SDm, TB_ALIGN_NONE }, - { X86::VFNMADD213SDr_Int, X86::VFNMADD213SDm_Int, TB_ALIGN_NONE }, - { X86::VFNMADD231SSZr, X86::VFNMADD231SSZm, TB_ALIGN_NONE }, - { X86::VFNMADD231SSZr_Int, X86::VFNMADD231SSZm_Int, TB_ALIGN_NONE }, - { X86::VFNMADD231SDZr, X86::VFNMADD231SDZm, TB_ALIGN_NONE }, - { X86::VFNMADD231SDZr_Int, X86::VFNMADD231SDZm_Int, TB_ALIGN_NONE }, - { X86::VFNMADD132SSZr, X86::VFNMADD132SSZm, TB_ALIGN_NONE }, - { X86::VFNMADD132SSZr_Int, X86::VFNMADD132SSZm_Int, TB_ALIGN_NONE }, - { X86::VFNMADD132SDZr, X86::VFNMADD132SDZm, TB_ALIGN_NONE }, - { X86::VFNMADD132SDZr_Int, X86::VFNMADD132SDZm_Int, TB_ALIGN_NONE }, - { X86::VFNMADD213SSZr, X86::VFNMADD213SSZm, TB_ALIGN_NONE }, - { X86::VFNMADD213SSZr_Int, X86::VFNMADD213SSZm_Int, TB_ALIGN_NONE }, - { X86::VFNMADD213SDZr, X86::VFNMADD213SDZm, TB_ALIGN_NONE }, - { X86::VFNMADD213SDZr_Int, X86::VFNMADD213SDZm_Int, TB_ALIGN_NONE }, - - { X86::VFNMADD231PSr, X86::VFNMADD231PSm, TB_ALIGN_NONE }, - { X86::VFNMADD231PDr, X86::VFNMADD231PDm, TB_ALIGN_NONE }, - { X86::VFNMADD132PSr, X86::VFNMADD132PSm, TB_ALIGN_NONE }, - { X86::VFNMADD132PDr, X86::VFNMADD132PDm, TB_ALIGN_NONE }, - { X86::VFNMADD213PSr, X86::VFNMADD213PSm, TB_ALIGN_NONE }, - { X86::VFNMADD213PDr, X86::VFNMADD213PDm, TB_ALIGN_NONE }, - { X86::VFNMADD231PSYr, X86::VFNMADD231PSYm, TB_ALIGN_NONE }, - { X86::VFNMADD231PDYr, X86::VFNMADD231PDYm, TB_ALIGN_NONE }, - { X86::VFNMADD132PSYr, X86::VFNMADD132PSYm, TB_ALIGN_NONE }, - { X86::VFNMADD132PDYr, X86::VFNMADD132PDYm, TB_ALIGN_NONE }, - { X86::VFNMADD213PSYr, X86::VFNMADD213PSYm, TB_ALIGN_NONE }, - { X86::VFNMADD213PDYr, X86::VFNMADD213PDYm, TB_ALIGN_NONE }, - { X86::VFNMADD231PSZr, X86::VFNMADD231PSZm, TB_ALIGN_NONE }, - { X86::VFNMADD231PDZr, X86::VFNMADD231PDZm, TB_ALIGN_NONE }, - { X86::VFNMADD132PSZr, X86::VFNMADD132PSZm, TB_ALIGN_NONE }, - { X86::VFNMADD132PDZr, X86::VFNMADD132PDZm, TB_ALIGN_NONE }, - { X86::VFNMADD213PSZr, X86::VFNMADD213PSZm, TB_ALIGN_NONE }, - { X86::VFNMADD213PDZr, X86::VFNMADD213PDZm, TB_ALIGN_NONE }, - { X86::VFNMADD231PSZ128r, X86::VFNMADD231PSZ128m, TB_ALIGN_NONE }, - { X86::VFNMADD231PDZ128r, X86::VFNMADD231PDZ128m, TB_ALIGN_NONE }, - { X86::VFNMADD132PSZ128r, X86::VFNMADD132PSZ128m, TB_ALIGN_NONE }, - { X86::VFNMADD132PDZ128r, X86::VFNMADD132PDZ128m, TB_ALIGN_NONE }, - { X86::VFNMADD213PSZ128r, X86::VFNMADD213PSZ128m, TB_ALIGN_NONE }, - { X86::VFNMADD213PDZ128r, X86::VFNMADD213PDZ128m, TB_ALIGN_NONE }, - { X86::VFNMADD231PSZ256r, X86::VFNMADD231PSZ256m, TB_ALIGN_NONE }, - { X86::VFNMADD231PDZ256r, X86::VFNMADD231PDZ256m, TB_ALIGN_NONE }, - { X86::VFNMADD132PSZ256r, X86::VFNMADD132PSZ256m, TB_ALIGN_NONE }, - { X86::VFNMADD132PDZ256r, X86::VFNMADD132PDZ256m, TB_ALIGN_NONE }, - { X86::VFNMADD213PSZ256r, X86::VFNMADD213PSZ256m, TB_ALIGN_NONE }, - { X86::VFNMADD213PDZ256r, X86::VFNMADD213PDZ256m, TB_ALIGN_NONE }, - - { X86::VFMSUB231SSr, X86::VFMSUB231SSm, TB_ALIGN_NONE }, - { X86::VFMSUB231SSr_Int, X86::VFMSUB231SSm_Int, TB_ALIGN_NONE }, - { X86::VFMSUB231SDr, X86::VFMSUB231SDm, TB_ALIGN_NONE }, - { X86::VFMSUB231SDr_Int, X86::VFMSUB231SDm_Int, TB_ALIGN_NONE }, - { X86::VFMSUB132SSr, X86::VFMSUB132SSm, TB_ALIGN_NONE }, - { X86::VFMSUB132SSr_Int, X86::VFMSUB132SSm_Int, TB_ALIGN_NONE }, - { X86::VFMSUB132SDr, X86::VFMSUB132SDm, TB_ALIGN_NONE }, - { X86::VFMSUB132SDr_Int, X86::VFMSUB132SDm_Int, TB_ALIGN_NONE }, - { X86::VFMSUB213SSr, X86::VFMSUB213SSm, TB_ALIGN_NONE }, - { X86::VFMSUB213SSr_Int, X86::VFMSUB213SSm_Int, TB_ALIGN_NONE }, - { X86::VFMSUB213SDr, X86::VFMSUB213SDm, TB_ALIGN_NONE }, - { X86::VFMSUB213SDr_Int, X86::VFMSUB213SDm_Int, TB_ALIGN_NONE }, - { X86::VFMSUB231SSZr, X86::VFMSUB231SSZm, TB_ALIGN_NONE }, - { X86::VFMSUB231SSZr_Int, X86::VFMSUB231SSZm_Int, TB_ALIGN_NONE }, - { X86::VFMSUB231SDZr, X86::VFMSUB231SDZm, TB_ALIGN_NONE }, - { X86::VFMSUB231SDZr_Int, X86::VFMSUB231SDZm_Int, TB_ALIGN_NONE }, - { X86::VFMSUB132SSZr, X86::VFMSUB132SSZm, TB_ALIGN_NONE }, - { X86::VFMSUB132SSZr_Int, X86::VFMSUB132SSZm_Int, TB_ALIGN_NONE }, - { X86::VFMSUB132SDZr, X86::VFMSUB132SDZm, TB_ALIGN_NONE }, - { X86::VFMSUB132SDZr_Int, X86::VFMSUB132SDZm_Int, TB_ALIGN_NONE }, - { X86::VFMSUB213SSZr, X86::VFMSUB213SSZm, TB_ALIGN_NONE }, - { X86::VFMSUB213SSZr_Int, X86::VFMSUB213SSZm_Int, TB_ALIGN_NONE }, - { X86::VFMSUB213SDZr, X86::VFMSUB213SDZm, TB_ALIGN_NONE }, - { X86::VFMSUB213SDZr_Int, X86::VFMSUB213SDZm_Int, TB_ALIGN_NONE }, - - { X86::VFMSUB231PSr, X86::VFMSUB231PSm, TB_ALIGN_NONE }, - { X86::VFMSUB231PDr, X86::VFMSUB231PDm, TB_ALIGN_NONE }, - { X86::VFMSUB132PSr, X86::VFMSUB132PSm, TB_ALIGN_NONE }, - { X86::VFMSUB132PDr, X86::VFMSUB132PDm, TB_ALIGN_NONE }, - { X86::VFMSUB213PSr, X86::VFMSUB213PSm, TB_ALIGN_NONE }, - { X86::VFMSUB213PDr, X86::VFMSUB213PDm, TB_ALIGN_NONE }, - { X86::VFMSUB231PSYr, X86::VFMSUB231PSYm, TB_ALIGN_NONE }, - { X86::VFMSUB231PDYr, X86::VFMSUB231PDYm, TB_ALIGN_NONE }, - { X86::VFMSUB132PSYr, X86::VFMSUB132PSYm, TB_ALIGN_NONE }, - { X86::VFMSUB132PDYr, X86::VFMSUB132PDYm, TB_ALIGN_NONE }, - { X86::VFMSUB213PSYr, X86::VFMSUB213PSYm, TB_ALIGN_NONE }, - { X86::VFMSUB213PDYr, X86::VFMSUB213PDYm, TB_ALIGN_NONE }, - { X86::VFMSUB231PSZr, X86::VFMSUB231PSZm, TB_ALIGN_NONE }, - { X86::VFMSUB231PDZr, X86::VFMSUB231PDZm, TB_ALIGN_NONE }, - { X86::VFMSUB132PSZr, X86::VFMSUB132PSZm, TB_ALIGN_NONE }, - { X86::VFMSUB132PDZr, X86::VFMSUB132PDZm, TB_ALIGN_NONE }, - { X86::VFMSUB213PSZr, X86::VFMSUB213PSZm, TB_ALIGN_NONE }, - { X86::VFMSUB213PDZr, X86::VFMSUB213PDZm, TB_ALIGN_NONE }, - { X86::VFMSUB231PSZ128r, X86::VFMSUB231PSZ128m, TB_ALIGN_NONE }, - { X86::VFMSUB231PDZ128r, X86::VFMSUB231PDZ128m, TB_ALIGN_NONE }, - { X86::VFMSUB132PSZ128r, X86::VFMSUB132PSZ128m, TB_ALIGN_NONE }, - { X86::VFMSUB132PDZ128r, X86::VFMSUB132PDZ128m, TB_ALIGN_NONE }, - { X86::VFMSUB213PSZ128r, X86::VFMSUB213PSZ128m, TB_ALIGN_NONE }, - { X86::VFMSUB213PDZ128r, X86::VFMSUB213PDZ128m, TB_ALIGN_NONE }, - { X86::VFMSUB231PSZ256r, X86::VFMSUB231PSZ256m, TB_ALIGN_NONE }, - { X86::VFMSUB231PDZ256r, X86::VFMSUB231PDZ256m, TB_ALIGN_NONE }, - { X86::VFMSUB132PSZ256r, X86::VFMSUB132PSZ256m, TB_ALIGN_NONE }, - { X86::VFMSUB132PDZ256r, X86::VFMSUB132PDZ256m, TB_ALIGN_NONE }, - { X86::VFMSUB213PSZ256r, X86::VFMSUB213PSZ256m, TB_ALIGN_NONE }, - { X86::VFMSUB213PDZ256r, X86::VFMSUB213PDZ256m, TB_ALIGN_NONE }, - - { X86::VFNMSUB231SSr, X86::VFNMSUB231SSm, TB_ALIGN_NONE }, - { X86::VFNMSUB231SSr_Int, X86::VFNMSUB231SSm_Int, TB_ALIGN_NONE }, - { X86::VFNMSUB231SDr, X86::VFNMSUB231SDm, TB_ALIGN_NONE }, - { X86::VFNMSUB231SDr_Int, X86::VFNMSUB231SDm_Int, TB_ALIGN_NONE }, - { X86::VFNMSUB132SSr, X86::VFNMSUB132SSm, TB_ALIGN_NONE }, - { X86::VFNMSUB132SSr_Int, X86::VFNMSUB132SSm_Int, TB_ALIGN_NONE }, - { X86::VFNMSUB132SDr, X86::VFNMSUB132SDm, TB_ALIGN_NONE }, - { X86::VFNMSUB132SDr_Int, X86::VFNMSUB132SDm_Int, TB_ALIGN_NONE }, - { X86::VFNMSUB213SSr, X86::VFNMSUB213SSm, TB_ALIGN_NONE }, - { X86::VFNMSUB213SSr_Int, X86::VFNMSUB213SSm_Int, TB_ALIGN_NONE }, - { X86::VFNMSUB213SDr, X86::VFNMSUB213SDm, TB_ALIGN_NONE }, - { X86::VFNMSUB213SDr_Int, X86::VFNMSUB213SDm_Int, TB_ALIGN_NONE }, - - { X86::VFNMSUB231PSr, X86::VFNMSUB231PSm, TB_ALIGN_NONE }, - { X86::VFNMSUB231PDr, X86::VFNMSUB231PDm, TB_ALIGN_NONE }, - { X86::VFNMSUB132PSr, X86::VFNMSUB132PSm, TB_ALIGN_NONE }, - { X86::VFNMSUB132PDr, X86::VFNMSUB132PDm, TB_ALIGN_NONE }, - { X86::VFNMSUB213PSr, X86::VFNMSUB213PSm, TB_ALIGN_NONE }, - { X86::VFNMSUB213PDr, X86::VFNMSUB213PDm, TB_ALIGN_NONE }, - { X86::VFNMSUB231PSYr, X86::VFNMSUB231PSYm, TB_ALIGN_NONE }, - { X86::VFNMSUB231PDYr, X86::VFNMSUB231PDYm, TB_ALIGN_NONE }, - { X86::VFNMSUB132PSYr, X86::VFNMSUB132PSYm, TB_ALIGN_NONE }, - { X86::VFNMSUB132PDYr, X86::VFNMSUB132PDYm, TB_ALIGN_NONE }, - { X86::VFNMSUB213PSYr, X86::VFNMSUB213PSYm, TB_ALIGN_NONE }, - { X86::VFNMSUB213PDYr, X86::VFNMSUB213PDYm, TB_ALIGN_NONE }, - { X86::VFNMSUB231PSZr, X86::VFNMSUB231PSZm, TB_ALIGN_NONE }, - { X86::VFNMSUB231PDZr, X86::VFNMSUB231PDZm, TB_ALIGN_NONE }, - { X86::VFNMSUB132PSZr, X86::VFNMSUB132PSZm, TB_ALIGN_NONE }, - { X86::VFNMSUB132PDZr, X86::VFNMSUB132PDZm, TB_ALIGN_NONE }, - { X86::VFNMSUB213PSZr, X86::VFNMSUB213PSZm, TB_ALIGN_NONE }, - { X86::VFNMSUB213PDZr, X86::VFNMSUB213PDZm, TB_ALIGN_NONE }, - { X86::VFNMSUB231PSZ128r, X86::VFNMSUB231PSZ128m, TB_ALIGN_NONE }, - { X86::VFNMSUB231PDZ128r, X86::VFNMSUB231PDZ128m, TB_ALIGN_NONE }, - { X86::VFNMSUB132PSZ128r, X86::VFNMSUB132PSZ128m, TB_ALIGN_NONE }, - { X86::VFNMSUB132PDZ128r, X86::VFNMSUB132PDZ128m, TB_ALIGN_NONE }, - { X86::VFNMSUB213PSZ128r, X86::VFNMSUB213PSZ128m, TB_ALIGN_NONE }, - { X86::VFNMSUB213PDZ128r, X86::VFNMSUB213PDZ128m, TB_ALIGN_NONE }, - { X86::VFNMSUB231PSZ256r, X86::VFNMSUB231PSZ256m, TB_ALIGN_NONE }, - { X86::VFNMSUB231PDZ256r, X86::VFNMSUB231PDZ256m, TB_ALIGN_NONE }, - { X86::VFNMSUB132PSZ256r, X86::VFNMSUB132PSZ256m, TB_ALIGN_NONE }, - { X86::VFNMSUB132PDZ256r, X86::VFNMSUB132PDZ256m, TB_ALIGN_NONE }, - { X86::VFNMSUB213PSZ256r, X86::VFNMSUB213PSZ256m, TB_ALIGN_NONE }, - { X86::VFNMSUB213PDZ256r, X86::VFNMSUB213PDZ256m, TB_ALIGN_NONE }, - - { X86::VFMADDSUB231PSr, X86::VFMADDSUB231PSm, TB_ALIGN_NONE }, - { X86::VFMADDSUB231PDr, X86::VFMADDSUB231PDm, TB_ALIGN_NONE }, - { X86::VFMADDSUB132PSr, X86::VFMADDSUB132PSm, TB_ALIGN_NONE }, - { X86::VFMADDSUB132PDr, X86::VFMADDSUB132PDm, TB_ALIGN_NONE }, - { X86::VFMADDSUB213PSr, X86::VFMADDSUB213PSm, TB_ALIGN_NONE }, - { X86::VFMADDSUB213PDr, X86::VFMADDSUB213PDm, TB_ALIGN_NONE }, - { X86::VFMADDSUB231PSYr, X86::VFMADDSUB231PSYm, TB_ALIGN_NONE }, - { X86::VFMADDSUB231PDYr, X86::VFMADDSUB231PDYm, TB_ALIGN_NONE }, - { X86::VFMADDSUB132PSYr, X86::VFMADDSUB132PSYm, TB_ALIGN_NONE }, - { X86::VFMADDSUB132PDYr, X86::VFMADDSUB132PDYm, TB_ALIGN_NONE }, - { X86::VFMADDSUB213PSYr, X86::VFMADDSUB213PSYm, TB_ALIGN_NONE }, - { X86::VFMADDSUB213PDYr, X86::VFMADDSUB213PDYm, TB_ALIGN_NONE }, - { X86::VFMADDSUB231PSZr, X86::VFMADDSUB231PSZm, TB_ALIGN_NONE }, - { X86::VFMADDSUB231PDZr, X86::VFMADDSUB231PDZm, TB_ALIGN_NONE }, - { X86::VFMADDSUB132PSZr, X86::VFMADDSUB132PSZm, TB_ALIGN_NONE }, - { X86::VFMADDSUB132PDZr, X86::VFMADDSUB132PDZm, TB_ALIGN_NONE }, - { X86::VFMADDSUB213PSZr, X86::VFMADDSUB213PSZm, TB_ALIGN_NONE }, - { X86::VFMADDSUB213PDZr, X86::VFMADDSUB213PDZm, TB_ALIGN_NONE }, - { X86::VFMADDSUB231PSZ128r, X86::VFMADDSUB231PSZ128m, TB_ALIGN_NONE }, - { X86::VFMADDSUB231PDZ128r, X86::VFMADDSUB231PDZ128m, TB_ALIGN_NONE }, - { X86::VFMADDSUB132PSZ128r, X86::VFMADDSUB132PSZ128m, TB_ALIGN_NONE }, - { X86::VFMADDSUB132PDZ128r, X86::VFMADDSUB132PDZ128m, TB_ALIGN_NONE }, - { X86::VFMADDSUB213PSZ128r, X86::VFMADDSUB213PSZ128m, TB_ALIGN_NONE }, - { X86::VFMADDSUB213PDZ128r, X86::VFMADDSUB213PDZ128m, TB_ALIGN_NONE }, - { X86::VFMADDSUB231PSZ256r, X86::VFMADDSUB231PSZ256m, TB_ALIGN_NONE }, - { X86::VFMADDSUB231PDZ256r, X86::VFMADDSUB231PDZ256m, TB_ALIGN_NONE }, - { X86::VFMADDSUB132PSZ256r, X86::VFMADDSUB132PSZ256m, TB_ALIGN_NONE }, - { X86::VFMADDSUB132PDZ256r, X86::VFMADDSUB132PDZ256m, TB_ALIGN_NONE }, - { X86::VFMADDSUB213PSZ256r, X86::VFMADDSUB213PSZ256m, TB_ALIGN_NONE }, - { X86::VFMADDSUB213PDZ256r, X86::VFMADDSUB213PDZ256m, TB_ALIGN_NONE }, - - { X86::VFMSUBADD231PSr, X86::VFMSUBADD231PSm, TB_ALIGN_NONE }, - { X86::VFMSUBADD231PDr, X86::VFMSUBADD231PDm, TB_ALIGN_NONE }, - { X86::VFMSUBADD132PSr, X86::VFMSUBADD132PSm, TB_ALIGN_NONE }, - { X86::VFMSUBADD132PDr, X86::VFMSUBADD132PDm, TB_ALIGN_NONE }, - { X86::VFMSUBADD213PSr, X86::VFMSUBADD213PSm, TB_ALIGN_NONE }, - { X86::VFMSUBADD213PDr, X86::VFMSUBADD213PDm, TB_ALIGN_NONE }, - { X86::VFMSUBADD231PSYr, X86::VFMSUBADD231PSYm, TB_ALIGN_NONE }, - { X86::VFMSUBADD231PDYr, X86::VFMSUBADD231PDYm, TB_ALIGN_NONE }, - { X86::VFMSUBADD132PSYr, X86::VFMSUBADD132PSYm, TB_ALIGN_NONE }, - { X86::VFMSUBADD132PDYr, X86::VFMSUBADD132PDYm, TB_ALIGN_NONE }, - { X86::VFMSUBADD213PSYr, X86::VFMSUBADD213PSYm, TB_ALIGN_NONE }, - { X86::VFMSUBADD213PDYr, X86::VFMSUBADD213PDYm, TB_ALIGN_NONE }, - { X86::VFMSUBADD231PSZr, X86::VFMSUBADD231PSZm, TB_ALIGN_NONE }, - { X86::VFMSUBADD231PDZr, X86::VFMSUBADD231PDZm, TB_ALIGN_NONE }, - { X86::VFMSUBADD132PSZr, X86::VFMSUBADD132PSZm, TB_ALIGN_NONE }, - { X86::VFMSUBADD132PDZr, X86::VFMSUBADD132PDZm, TB_ALIGN_NONE }, - { X86::VFMSUBADD213PSZr, X86::VFMSUBADD213PSZm, TB_ALIGN_NONE }, - { X86::VFMSUBADD213PDZr, X86::VFMSUBADD213PDZm, TB_ALIGN_NONE }, - { X86::VFMSUBADD231PSZ128r, X86::VFMSUBADD231PSZ128m, TB_ALIGN_NONE }, - { X86::VFMSUBADD231PDZ128r, X86::VFMSUBADD231PDZ128m, TB_ALIGN_NONE }, - { X86::VFMSUBADD132PSZ128r, X86::VFMSUBADD132PSZ128m, TB_ALIGN_NONE }, - { X86::VFMSUBADD132PDZ128r, X86::VFMSUBADD132PDZ128m, TB_ALIGN_NONE }, - { X86::VFMSUBADD213PSZ128r, X86::VFMSUBADD213PSZ128m, TB_ALIGN_NONE }, - { X86::VFMSUBADD213PDZ128r, X86::VFMSUBADD213PDZ128m, TB_ALIGN_NONE }, - { X86::VFMSUBADD231PSZ256r, X86::VFMSUBADD231PSZ256m, TB_ALIGN_NONE }, - { X86::VFMSUBADD231PDZ256r, X86::VFMSUBADD231PDZ256m, TB_ALIGN_NONE }, - { X86::VFMSUBADD132PSZ256r, X86::VFMSUBADD132PSZ256m, TB_ALIGN_NONE }, - { X86::VFMSUBADD132PDZ256r, X86::VFMSUBADD132PDZ256m, TB_ALIGN_NONE }, - { X86::VFMSUBADD213PSZ256r, X86::VFMSUBADD213PSZ256m, TB_ALIGN_NONE }, - { X86::VFMSUBADD213PDZ256r, X86::VFMSUBADD213PDZ256m, TB_ALIGN_NONE }, - // FMA4 foldable patterns { X86::VFMADDSS4rr, X86::VFMADDSS4rm, TB_ALIGN_NONE }, { X86::VFMADDSD4rr, X86::VFMADDSD4rm, TB_ALIGN_NONE }, @@ -2234,6 +1959,13 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) // Index 3, folded load Entry.Flags | TB_INDEX_3 | TB_FOLDED_LOAD); } + auto I = X86InstrFMA3Info::rm_begin(); + auto E = X86InstrFMA3Info::rm_end(); + for (; I != E; ++I) + if (!I.getGroup()->isKMasked()) + AddTableEntry(RegOp2MemOpTable3, MemOp2RegOpTable, + I.getRegOpcode(), I.getMemOpcode(), + TB_ALIGN_NONE | TB_INDEX_3 | TB_FOLDED_LOAD); static const X86MemoryFoldTableEntry MemoryFoldTable4[] = { // AVX-512 foldable instructions @@ -2283,6 +2015,11 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) // Index 4, folded load Entry.Flags | TB_INDEX_4 | TB_FOLDED_LOAD); } + for (I = X86InstrFMA3Info::rm_begin(); I != E; ++I) + if (I.getGroup()->isKMasked()) + AddTableEntry(RegOp2MemOpTable4, MemOp2RegOpTable, + I.getRegOpcode(), I.getMemOpcode(), + TB_ALIGN_NONE | TB_INDEX_4 | TB_FOLDED_LOAD); } void @@ -3345,241 +3082,11 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, return NewMI; } -/// Returns true if the given instruction opcode is FMA3. -/// Otherwise, returns false. -/// The second parameter is optional and is used as the second return from -/// the function. It is set to true if the given instruction has FMA3 opcode -/// that is used for lowering of scalar FMA intrinsics, and it is set to false -/// otherwise. -static bool isFMA3(unsigned Opcode, bool &IsIntrinsic) { - IsIntrinsic = false; - -#define FMA3_CASE(Name, Modifier) \ -case X86::Name##r##Modifier: case X86::Name##m##Modifier: - -#define FMA3_SCALAR_PAIR(Name, Size, Modifier) \ - FMA3_CASE(Name##SD##Size, Modifier) \ - FMA3_CASE(Name##SS##Size, Modifier) - -#define FMA3_PACKED_PAIR(Name, Size) \ - FMA3_CASE(Name##PD##Size, ) \ - FMA3_CASE(Name##PS##Size, ) - -#define FMA3_PACKED_SET(Form, Size) \ - FMA3_PACKED_PAIR(VFMADD##Form, Size) \ - FMA3_PACKED_PAIR(VFMSUB##Form, Size) \ - FMA3_PACKED_PAIR(VFNMADD##Form, Size) \ - FMA3_PACKED_PAIR(VFNMSUB##Form, Size) \ - FMA3_PACKED_PAIR(VFMADDSUB##Form, Size) \ - FMA3_PACKED_PAIR(VFMSUBADD##Form, Size) - -#define FMA3_CASES(Form) \ - FMA3_SCALAR_PAIR(VFMADD##Form, ,) \ - FMA3_SCALAR_PAIR(VFMSUB##Form, ,) \ - FMA3_SCALAR_PAIR(VFNMADD##Form, ,) \ - FMA3_SCALAR_PAIR(VFNMSUB##Form, ,) \ - FMA3_PACKED_SET(Form, ) \ - FMA3_PACKED_SET(Form, Y) \ - -#define FMA3_CASES_AVX512(Form) \ - FMA3_SCALAR_PAIR(VFMADD##Form, Z, ) \ - FMA3_SCALAR_PAIR(VFMSUB##Form, Z, ) \ - FMA3_SCALAR_PAIR(VFNMADD##Form, Z, ) \ - FMA3_SCALAR_PAIR(VFNMSUB##Form, Z, ) \ - FMA3_PACKED_SET(Form, Z128) \ - FMA3_PACKED_SET(Form, Z256) \ - FMA3_PACKED_SET(Form, Z) - -#define FMA3_CASES_SCALAR_INT(Form) \ - FMA3_SCALAR_PAIR(VFMADD##Form, , _Int) \ - FMA3_SCALAR_PAIR(VFMSUB##Form, , _Int) \ - FMA3_SCALAR_PAIR(VFNMADD##Form, , _Int) \ - FMA3_SCALAR_PAIR(VFNMSUB##Form, , _Int) - -#define FMA3_CASES_SCALAR_INT_AVX512(Form) \ - FMA3_SCALAR_PAIR(VFMADD##Form, Z, _Int) \ - FMA3_SCALAR_PAIR(VFMSUB##Form, Z, _Int) \ - FMA3_SCALAR_PAIR(VFNMADD##Form, Z, _Int) \ - FMA3_SCALAR_PAIR(VFNMSUB##Form, Z, _Int) - - switch (Opcode) { - FMA3_CASES(132) - FMA3_CASES(213) - FMA3_CASES(231) - - // AVX-512 instructions - FMA3_CASES_AVX512(132) - FMA3_CASES_AVX512(213) - FMA3_CASES_AVX512(231) - return true; - - FMA3_CASES_SCALAR_INT(132) - FMA3_CASES_SCALAR_INT(213) - FMA3_CASES_SCALAR_INT(231) - - // AVX-512 instructions - FMA3_CASES_SCALAR_INT_AVX512(132) - FMA3_CASES_SCALAR_INT_AVX512(213) - FMA3_CASES_SCALAR_INT_AVX512(231) - IsIntrinsic = true; - return true; - default: - return false; - } - llvm_unreachable("Opcode not handled by the switch"); - -#undef FMA3_CASE -#undef FMA3_SCALAR_PAIR -#undef FMA3_PACKED_PAIR -#undef FMA3_PACKED_SET -#undef FMA3_CASES -#undef FMA3_CASES_AVX512 -#undef FMA3_CASES_SCALAR_INT -#undef FMA3_CASES_SCALAR_INT_AVX512 -} - -/// Returns an adjusted FMA opcode that must be used in FMA instruction that -/// performs the same computations as the given MI but which has the operands -/// \p SrcOpIdx1 and \p SrcOpIdx2 commuted. -/// It may return 0 if it is unsafe to commute the operands. -/// -/// The returned FMA opcode may differ from the opcode in the given \p MI. -/// For example, commuting the operands #1 and #3 in the following FMA -/// FMA213 #1, #2, #3 -/// results into instruction with adjusted opcode: -/// FMA231 #3, #2, #1 -static unsigned getFMA3OpcodeToCommuteOperands(unsigned Opc, - bool IsIntrinOpcode, - unsigned SrcOpIdx1, - unsigned SrcOpIdx2) { -#define FMA3_ENTRY(Name, Suffix) \ - { X86::Name##132##Suffix, X86::Name##213##Suffix, X86::Name##231##Suffix }, - -#define FMA3_SCALAR_PAIR(Name, Suffix) \ - FMA3_ENTRY(Name, SS##Suffix) \ - FMA3_ENTRY(Name, SD##Suffix) - -#define FMA3_PACKED_PAIR(Name, Suffix) \ - FMA3_ENTRY(Name, PS##Suffix) \ - FMA3_ENTRY(Name, PD##Suffix) - -#define FMA3_PACKED_SIZES(Name, Suffix) \ - FMA3_PACKED_PAIR(Name, Suffix) \ - FMA3_PACKED_PAIR(Name, Y##Suffix) - -#define FMA3_TABLE_ALL(Name) \ - FMA3_SCALAR_PAIR(Name, r) \ - FMA3_PACKED_SIZES(Name, r) \ - FMA3_SCALAR_PAIR(Name, m) \ - FMA3_PACKED_SIZES(Name, m) - -#define FMA3_TABLE_PACKED(Name) \ - FMA3_PACKED_SIZES(Name, r) \ - FMA3_PACKED_SIZES(Name, m) - -#define FMA3_TABLE_SCALAR_INT(Name) \ - FMA3_SCALAR_PAIR(Name, r_Int) \ - FMA3_SCALAR_PAIR(Name, m_Int) - -#define FMA3_PACKED_SIZES_AVX512(Name, Suffix) \ - FMA3_PACKED_PAIR(Name, Z128##Suffix) \ - FMA3_PACKED_PAIR(Name, Z256##Suffix) \ - FMA3_PACKED_PAIR(Name, Z##Suffix) - -#define FMA3_TABLE_ALL_AVX512(Name) \ - FMA3_SCALAR_PAIR(Name, Zr) \ - FMA3_PACKED_SIZES_AVX512(Name, r) \ - FMA3_SCALAR_PAIR(Name, Zm) \ - FMA3_PACKED_SIZES_AVX512(Name, m) - -#define FMA3_TABLE_PACKED_AVX512(Name) \ - FMA3_PACKED_SIZES_AVX512(Name, r) \ - FMA3_PACKED_SIZES_AVX512(Name, m) - -#define FMA3_TABLE_SCALAR_INT_AVX512(Name) \ - FMA3_SCALAR_PAIR(Name, Zr_Int) \ - FMA3_SCALAR_PAIR(Name, Zm_Int) - - // Define the array that holds FMA opcodes in groups - // of 3 opcodes(132, 213, 231) in each group. - static const uint16_t RegularOpcodeGroups[][3] = { - FMA3_TABLE_ALL(VFMADD) - FMA3_TABLE_ALL(VFMSUB) - FMA3_TABLE_ALL(VFNMADD) - FMA3_TABLE_ALL(VFNMSUB) - FMA3_TABLE_PACKED(VFMADDSUB) - FMA3_TABLE_PACKED(VFMSUBADD) - - // AVX-512 instructions - FMA3_TABLE_ALL_AVX512(VFMADD) - FMA3_TABLE_ALL_AVX512(VFMSUB) - FMA3_TABLE_ALL_AVX512(VFNMADD) - FMA3_TABLE_ALL_AVX512(VFNMSUB) - FMA3_TABLE_PACKED_AVX512(VFMADDSUB) - FMA3_TABLE_PACKED_AVX512(VFMSUBADD) - }; - - // Define the array that holds FMA*_Int opcodes in groups - // of 3 opcodes(132, 213, 231) in each group. - static const uint16_t IntrinOpcodeGroups[][3] = { - FMA3_TABLE_SCALAR_INT(VFMADD) - FMA3_TABLE_SCALAR_INT(VFMSUB) - FMA3_TABLE_SCALAR_INT(VFNMADD) - FMA3_TABLE_SCALAR_INT(VFNMSUB) - - // AVX-512 instructions - FMA3_TABLE_SCALAR_INT_AVX512(VFMADD) - FMA3_TABLE_SCALAR_INT_AVX512(VFMSUB) - FMA3_TABLE_SCALAR_INT_AVX512(VFNMADD) - FMA3_TABLE_SCALAR_INT_AVX512(VFNMSUB) - }; - -#undef FMA3_ENTRY -#undef FMA3_SCALAR_PAIR -#undef FMA3_PACKED_PAIR -#undef FMA3_PACKED_SIZES -#undef FMA3_TABLE_ALL -#undef FMA3_TABLE_PACKED -#undef FMA3_TABLE_SCALAR_INT -#undef FMA3_SCALAR_PAIR_AVX512 -#undef FMA3_PACKED_SIZES_AVX512 -#undef FMA3_TABLE_ALL_AVX512 -#undef FMA3_TABLE_PACKED_AVX512 -#undef FMA3_TABLE_SCALAR_INT_AVX512 - - const unsigned Form132Index = 0; - const unsigned Form213Index = 1; - const unsigned Form231Index = 2; - const unsigned FormsNum = 3; - - size_t GroupsNum; - const uint16_t (*OpcodeGroups)[3]; - if (IsIntrinOpcode) { - GroupsNum = array_lengthof(IntrinOpcodeGroups); - OpcodeGroups = IntrinOpcodeGroups; - } else { - GroupsNum = array_lengthof(RegularOpcodeGroups); - OpcodeGroups = RegularOpcodeGroups; - } - - const uint16_t *FoundOpcodesGroup = nullptr; - size_t FormIndex; - - // Look for the input opcode in the corresponding opcodes table. - for (size_t GroupIndex = 0; GroupIndex < GroupsNum && !FoundOpcodesGroup; - ++GroupIndex) { - for (FormIndex = 0; FormIndex < FormsNum; ++FormIndex) { - if (OpcodeGroups[GroupIndex][FormIndex] == Opc) { - FoundOpcodesGroup = OpcodeGroups[GroupIndex]; - break; - } - } - } +unsigned X86InstrInfo::getFMA3OpcodeToCommuteOperands( + const MachineInstr &MI, unsigned SrcOpIdx1, unsigned SrcOpIdx2, + const X86InstrFMA3Group &FMA3Group) const { - // The input opcode does not match with any of the opcodes from the tables. - // The unsupported FMA opcode must be added to one of the two opcode groups - // defined above. - assert(FoundOpcodesGroup != nullptr && "Unexpected FMA3 opcode"); + unsigned Opc = MI.getOpcode(); // Put the lowest index to SrcOpIdx1 to simplify the checks below. if (SrcOpIdx1 > SrcOpIdx2) @@ -3591,15 +3098,40 @@ static unsigned getFMA3OpcodeToCommuteOperands(unsigned Opc, // not implemented yet. So, just return 0 in that case. // When such analysis are available this place will be the right place for // calling it. - if (IsIntrinOpcode && SrcOpIdx1 == 1) + if (FMA3Group.isIntrinsic() && SrcOpIdx1 == 1) return 0; + unsigned FMAOp1 = 1, FMAOp2 = 2, FMAOp3 = 3; + if (FMA3Group.isKMasked()) { + // The k-mask operand cannot be commuted. + if (SrcOpIdx1 == 2) + return 0; + + // For k-zero-masked operations it is Ok to commute the first vector + // operand. + // For regular k-masked operations a conservative choice is done as the + // elements of the first vector operand, for which the corresponding bit + // in the k-mask operand is set to 0, are copied to the result of FMA. + // TODO/FIXME: The commute still may be legal if it is known that the + // k-mask operand is set to either all ones or all zeroes. + // It is also Ok to commute the 1st operand if all users of MI use only + // the elements enabled by the k-mask operand. For example, + // v4 = VFMADD213PSZrk v1, k, v2, v3; // v1[i] = k[i] ? v2[i]*v1[i]+v3[i] + // : v1[i]; + // VMOVAPSZmrk <mem_addr>, k, v4; // this is the ONLY user of v4 -> + // // Ok, to commute v1 in FMADD213PSZrk. + if (FMA3Group.isKMergeMasked() && SrcOpIdx1 == FMAOp1) + return 0; + FMAOp2++; + FMAOp3++; + } + unsigned Case; - if (SrcOpIdx1 == 1 && SrcOpIdx2 == 2) + if (SrcOpIdx1 == FMAOp1 && SrcOpIdx2 == FMAOp2) Case = 0; - else if (SrcOpIdx1 == 1 && SrcOpIdx2 == 3) + else if (SrcOpIdx1 == FMAOp1 && SrcOpIdx2 == FMAOp3) Case = 1; - else if (SrcOpIdx1 == 2 && SrcOpIdx2 == 3) + else if (SrcOpIdx1 == FMAOp2 && SrcOpIdx2 == FMAOp3) Case = 2; else return 0; @@ -3607,6 +3139,9 @@ static unsigned getFMA3OpcodeToCommuteOperands(unsigned Opc, // Define the FMA forms mapping array that helps to map input FMA form // to output FMA form to preserve the operation semantics after // commuting the operands. + const unsigned Form132Index = 0; + const unsigned Form213Index = 1; + const unsigned Form231Index = 2; static const unsigned FormMapping[][3] = { // 0: SrcOpIdx1 == 1 && SrcOpIdx2 == 2; // FMA132 A, C, b; ==> FMA231 C, A, b; @@ -3625,9 +3160,24 @@ static unsigned getFMA3OpcodeToCommuteOperands(unsigned Opc, { Form213Index, Form132Index, Form231Index } }; + unsigned FMAForms[3]; + if (FMA3Group.isRegOpcodeFromGroup(Opc)) { + FMAForms[0] = FMA3Group.getReg132Opcode(); + FMAForms[1] = FMA3Group.getReg213Opcode(); + FMAForms[2] = FMA3Group.getReg231Opcode(); + } else { + FMAForms[0] = FMA3Group.getMem132Opcode(); + FMAForms[1] = FMA3Group.getMem213Opcode(); + FMAForms[2] = FMA3Group.getMem231Opcode(); + } + unsigned FormIndex; + for (FormIndex = 0; FormIndex < 3; FormIndex++) + if (Opc == FMAForms[FormIndex]) + break; + // Everything is ready, just adjust the FMA opcode and return it. FormIndex = FormMapping[Case][FormIndex]; - return FoundOpcodesGroup[FormIndex]; + return FMAForms[FormIndex]; } MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, @@ -3852,11 +3402,11 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, OpIdx1, OpIdx2); } default: - bool IsIntrinOpcode; - if (isFMA3(MI.getOpcode(), IsIntrinOpcode)) { - unsigned Opc = getFMA3OpcodeToCommuteOperands(MI.getOpcode(), - IsIntrinOpcode, - OpIdx1, OpIdx2); + const X86InstrFMA3Group *FMA3Group = + X86InstrFMA3Info::getFMA3Group(MI.getOpcode()); + if (FMA3Group) { + unsigned Opc = + getFMA3OpcodeToCommuteOperands(MI, OpIdx1, OpIdx2, *FMA3Group); if (Opc == 0) return nullptr; auto &WorkingMI = cloneIfNew(MI); @@ -3869,21 +3419,37 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, } } -bool X86InstrInfo::findFMA3CommutedOpIndices(MachineInstr &MI, - bool IsIntrinOpcode, - unsigned &SrcOpIdx1, - unsigned &SrcOpIdx2) const { +bool X86InstrInfo::findFMA3CommutedOpIndices( + const MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2, + const X86InstrFMA3Group &FMA3Group) const { + unsigned FirstCommutableVecOp = 1; + unsigned LastCommutableVecOp = 3; + unsigned KMaskOp = 0; + if (FMA3Group.isKMasked()) { + // The k-mask operand has index = 2 for masked and zero-masked operations. + KMaskOp = 2; + + // The operand with index = 1 is used as a source for those elements for + // which the corresponding bit in the k-mask is set to 0. + if (FMA3Group.isKMergeMasked()) + FirstCommutableVecOp = 3; + + LastCommutableVecOp++; + } - unsigned RegOpsNum = isMem(MI, 3) ? 2 : 3; + if (isMem(MI, LastCommutableVecOp)) + LastCommutableVecOp--; // Only the first RegOpsNum operands are commutable. // Also, the value 'CommuteAnyOperandIndex' is valid here as it means // that the operand is not specified/fixed. if (SrcOpIdx1 != CommuteAnyOperandIndex && - (SrcOpIdx1 < 1 || SrcOpIdx1 > RegOpsNum)) + (SrcOpIdx1 < FirstCommutableVecOp || SrcOpIdx1 > LastCommutableVecOp || + SrcOpIdx1 == KMaskOp)) return false; if (SrcOpIdx2 != CommuteAnyOperandIndex && - (SrcOpIdx2 < 1 || SrcOpIdx2 > RegOpsNum)) + (SrcOpIdx2 < FirstCommutableVecOp || SrcOpIdx2 > LastCommutableVecOp || + SrcOpIdx2 == KMaskOp)) return false; // Look for two different register operands assumed to be commutable @@ -3898,7 +3464,7 @@ bool X86InstrInfo::findFMA3CommutedOpIndices(MachineInstr &MI, if (SrcOpIdx1 == SrcOpIdx2) // Both of operands are not fixed. By default set one of commutable // operands to the last register operand of the instruction. - CommutableOpIdx2 = RegOpsNum; + CommutableOpIdx2 = LastCommutableVecOp; else if (SrcOpIdx2 == CommuteAnyOperandIndex) // Only one of operands is not fixed. CommutableOpIdx2 = SrcOpIdx1; @@ -3906,7 +3472,12 @@ bool X86InstrInfo::findFMA3CommutedOpIndices(MachineInstr &MI, // CommutableOpIdx2 is well defined now. Let's choose another commutable // operand and assign its index to CommutableOpIdx1. unsigned Op2Reg = MI.getOperand(CommutableOpIdx2).getReg(); - for (CommutableOpIdx1 = RegOpsNum; CommutableOpIdx1 > 0; CommutableOpIdx1--) { + for (CommutableOpIdx1 = LastCommutableVecOp; + CommutableOpIdx1 >= FirstCommutableVecOp; CommutableOpIdx1--) { + // Just ignore and skip the k-mask operand. + if (CommutableOpIdx1 == KMaskOp) + continue; + // The commuted operands must have different registers. // Otherwise, the commute transformation does not change anything and // is useless then. @@ -3915,7 +3486,7 @@ bool X86InstrInfo::findFMA3CommutedOpIndices(MachineInstr &MI, } // No appropriate commutable operands were found. - if (CommutableOpIdx1 == 0) + if (CommutableOpIdx1 < FirstCommutableVecOp) return false; // Assign the found pair of commutable indices to SrcOpIdx1 and SrcOpidx2 @@ -3927,8 +3498,7 @@ bool X86InstrInfo::findFMA3CommutedOpIndices(MachineInstr &MI, // Check if we can adjust the opcode to preserve the semantics when // commute the register operands. - return getFMA3OpcodeToCommuteOperands(MI.getOpcode(), IsIntrinOpcode, - SrcOpIdx1, SrcOpIdx2) != 0; + return getFMA3OpcodeToCommuteOperands(MI, SrcOpIdx1, SrcOpIdx2, FMA3Group) != 0; } bool X86InstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1, @@ -3955,10 +3525,10 @@ bool X86InstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1, return false; } default: - bool IsIntrinOpcode; - if (isFMA3(MI.getOpcode(), IsIntrinOpcode)) - return findFMA3CommutedOpIndices(MI, IsIntrinOpcode, - SrcOpIdx1, SrcOpIdx2); + const X86InstrFMA3Group *FMA3Group = + X86InstrFMA3Info::getFMA3Group(MI.getOpcode()); + if (FMA3Group) + return findFMA3CommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2, *FMA3Group); return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2); } return false; |