diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 192 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 6 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrFMA.td | 134 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrInfo.cpp | 458 |
4 files changed, 383 insertions, 407 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 7e5f0ad09d7..108166b892f 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -24257,102 +24257,102 @@ X86TargetLowering::emitFMA3Instr(MachineInstr &MI, // Found a matching instruction. unsigned NewFMAOpc = 0; switch (MI.getOpcode()) { - case X86::VFMADDPDr213r: - NewFMAOpc = X86::VFMADDPDr231r; + case X86::VFMADD213PDr: + NewFMAOpc = X86::VFMADD231PDr; break; - case X86::VFMADDPSr213r: - NewFMAOpc = X86::VFMADDPSr231r; + case X86::VFMADD213PSr: + NewFMAOpc = X86::VFMADD231PSr; break; - case X86::VFMADDSDr213r: - NewFMAOpc = X86::VFMADDSDr231r; + case X86::VFMADD213SDr: + NewFMAOpc = X86::VFMADD231SDr; break; - case X86::VFMADDSSr213r: - NewFMAOpc = X86::VFMADDSSr231r; + case X86::VFMADD213SSr: + NewFMAOpc = X86::VFMADD231SSr; break; - case X86::VFMSUBPDr213r: - NewFMAOpc = X86::VFMSUBPDr231r; + case X86::VFMSUB213PDr: + NewFMAOpc = X86::VFMSUB231PDr; break; - case X86::VFMSUBPSr213r: - NewFMAOpc = X86::VFMSUBPSr231r; + case X86::VFMSUB213PSr: + NewFMAOpc = X86::VFMSUB231PSr; break; - case X86::VFMSUBSDr213r: - NewFMAOpc = X86::VFMSUBSDr231r; + case X86::VFMSUB213SDr: + NewFMAOpc = X86::VFMSUB231SDr; break; - case X86::VFMSUBSSr213r: - NewFMAOpc = X86::VFMSUBSSr231r; + case X86::VFMSUB213SSr: + NewFMAOpc = X86::VFMSUB231SSr; break; - case X86::VFNMADDPDr213r: - NewFMAOpc = X86::VFNMADDPDr231r; + case X86::VFNMADD213PDr: + NewFMAOpc = X86::VFNMADD231PDr; break; - case X86::VFNMADDPSr213r: - NewFMAOpc = X86::VFNMADDPSr231r; + case X86::VFNMADD213PSr: + NewFMAOpc = X86::VFNMADD231PSr; break; - case X86::VFNMADDSDr213r: - NewFMAOpc = X86::VFNMADDSDr231r; + case X86::VFNMADD213SDr: + NewFMAOpc = X86::VFNMADD231SDr; break; - case X86::VFNMADDSSr213r: - NewFMAOpc = X86::VFNMADDSSr231r; + case X86::VFNMADD213SSr: + NewFMAOpc = X86::VFNMADD231SSr; break; - case X86::VFNMSUBPDr213r: - NewFMAOpc = X86::VFNMSUBPDr231r; + case X86::VFNMSUB213PDr: + NewFMAOpc = X86::VFNMSUB231PDr; break; - case X86::VFNMSUBPSr213r: - NewFMAOpc = X86::VFNMSUBPSr231r; + case X86::VFNMSUB213PSr: + NewFMAOpc = X86::VFNMSUB231PSr; break; - case X86::VFNMSUBSDr213r: - NewFMAOpc = X86::VFNMSUBSDr231r; + case X86::VFNMSUB213SDr: + NewFMAOpc = X86::VFNMSUB231SDr; break; - case X86::VFNMSUBSSr213r: - NewFMAOpc = X86::VFNMSUBSSr231r; + case X86::VFNMSUB213SSr: + NewFMAOpc = X86::VFNMSUB231SSr; break; - case X86::VFMADDSUBPDr213r: - NewFMAOpc = X86::VFMADDSUBPDr231r; + case X86::VFMADDSUB213PDr: + NewFMAOpc = X86::VFMADDSUB231PDr; break; - case X86::VFMADDSUBPSr213r: - NewFMAOpc = X86::VFMADDSUBPSr231r; + case X86::VFMADDSUB213PSr: + NewFMAOpc = X86::VFMADDSUB231PSr; break; - case X86::VFMSUBADDPDr213r: - NewFMAOpc = X86::VFMSUBADDPDr231r; + case X86::VFMSUBADD213PDr: + NewFMAOpc = X86::VFMSUBADD231PDr; break; - case X86::VFMSUBADDPSr213r: - NewFMAOpc = X86::VFMSUBADDPSr231r; + case X86::VFMSUBADD213PSr: + NewFMAOpc = X86::VFMSUBADD231PSr; break; - case X86::VFMADDPDr213rY: - NewFMAOpc = X86::VFMADDPDr231rY; + case X86::VFMADD213PDYr: + NewFMAOpc = X86::VFMADD231PDYr; break; - case X86::VFMADDPSr213rY: - NewFMAOpc = X86::VFMADDPSr231rY; + case X86::VFMADD213PSYr: + NewFMAOpc = X86::VFMADD231PSYr; break; - case X86::VFMSUBPDr213rY: - NewFMAOpc = X86::VFMSUBPDr231rY; + case X86::VFMSUB213PDYr: + NewFMAOpc = X86::VFMSUB231PDYr; break; - case X86::VFMSUBPSr213rY: - NewFMAOpc = X86::VFMSUBPSr231rY; + case X86::VFMSUB213PSYr: + NewFMAOpc = X86::VFMSUB231PSYr; break; - case X86::VFNMADDPDr213rY: - NewFMAOpc = X86::VFNMADDPDr231rY; + case X86::VFNMADD213PDYr: + NewFMAOpc = X86::VFNMADD231PDYr; break; - case X86::VFNMADDPSr213rY: - NewFMAOpc = X86::VFNMADDPSr231rY; + case X86::VFNMADD213PSYr: + NewFMAOpc = X86::VFNMADD231PSYr; break; - case X86::VFNMSUBPDr213rY: - NewFMAOpc = X86::VFNMSUBPDr231rY; + case X86::VFNMSUB213PDYr: + NewFMAOpc = X86::VFNMSUB231PDYr; break; - case X86::VFNMSUBPSr213rY: - NewFMAOpc = X86::VFNMSUBPSr231rY; + case X86::VFNMSUB213PSYr: + NewFMAOpc = X86::VFNMSUB231PSYr; break; - case X86::VFMADDSUBPDr213rY: - NewFMAOpc = X86::VFMADDSUBPDr231rY; + case X86::VFMADDSUB213PDYr: + NewFMAOpc = X86::VFMADDSUB231PDYr; break; - case X86::VFMADDSUBPSr213rY: - NewFMAOpc = X86::VFMADDSUBPSr231rY; + case X86::VFMADDSUB213PSYr: + NewFMAOpc = X86::VFMADDSUB231PSYr; break; - case X86::VFMSUBADDPDr213rY: - NewFMAOpc = X86::VFMSUBADDPDr231rY; + case X86::VFMSUBADD213PDYr: + NewFMAOpc = X86::VFMSUBADD231PDYr; break; - case X86::VFMSUBADDPSr213rY: - NewFMAOpc = X86::VFMSUBADDPSr231rY; + case X86::VFMSUBADD213PSYr: + NewFMAOpc = X86::VFMSUBADD231PSYr; break; default: llvm_unreachable("Unrecognized FMA variant."); @@ -24595,38 +24595,38 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, case TargetOpcode::PATCHPOINT: return emitPatchPoint(MI, BB); - case X86::VFMADDPDr213r: - case X86::VFMADDPSr213r: - case X86::VFMADDSDr213r: - case X86::VFMADDSSr213r: - case X86::VFMSUBPDr213r: - case X86::VFMSUBPSr213r: - case X86::VFMSUBSDr213r: - case X86::VFMSUBSSr213r: - case X86::VFNMADDPDr213r: - case X86::VFNMADDPSr213r: - case X86::VFNMADDSDr213r: - case X86::VFNMADDSSr213r: - case X86::VFNMSUBPDr213r: - case X86::VFNMSUBPSr213r: - case X86::VFNMSUBSDr213r: - case X86::VFNMSUBSSr213r: - case X86::VFMADDSUBPDr213r: - case X86::VFMADDSUBPSr213r: - case X86::VFMSUBADDPDr213r: - case X86::VFMSUBADDPSr213r: - case X86::VFMADDPDr213rY: - case X86::VFMADDPSr213rY: - case X86::VFMSUBPDr213rY: - case X86::VFMSUBPSr213rY: - case X86::VFNMADDPDr213rY: - case X86::VFNMADDPSr213rY: - case X86::VFNMSUBPDr213rY: - case X86::VFNMSUBPSr213rY: - case X86::VFMADDSUBPDr213rY: - case X86::VFMADDSUBPSr213rY: - case X86::VFMSUBADDPDr213rY: - case X86::VFMSUBADDPSr213rY: + case X86::VFMADD213PDr: + case X86::VFMADD213PSr: + case X86::VFMADD213SDr: + case X86::VFMADD213SSr: + case X86::VFMSUB213PDr: + case X86::VFMSUB213PSr: + case X86::VFMSUB213SDr: + case X86::VFMSUB213SSr: + case X86::VFNMADD213PDr: + case X86::VFNMADD213PSr: + case X86::VFNMADD213SDr: + case X86::VFNMADD213SSr: + case X86::VFNMSUB213PDr: + case X86::VFNMSUB213PSr: + case X86::VFNMSUB213SDr: + case X86::VFNMSUB213SSr: + case X86::VFMADDSUB213PDr: + case X86::VFMADDSUB213PSr: + case X86::VFMSUBADD213PDr: + case X86::VFMSUBADD213PSr: + case X86::VFMADD213PDYr: + case X86::VFMADD213PSYr: + case X86::VFMSUB213PDYr: + case X86::VFMSUB213PSYr: + case X86::VFNMADD213PDYr: + case X86::VFNMADD213PSYr: + case X86::VFNMSUB213PDYr: + case X86::VFNMSUB213PSYr: + case X86::VFMADDSUB213PDYr: + case X86::VFMADDSUB213PSYr: + case X86::VFMSUBADD213PDYr: + case X86::VFMSUBADD213PSYr: return emitFMA3Instr(MI, BB); case X86::LCMPXCHG8B_SAVE_EBX: case X86::LCMPXCHG16B_SAVE_RBX: { diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index f9c20f11410..1129255b64b 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -4995,7 +4995,7 @@ multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd, X86VectorVTInfo _ , string SUFF> { - defm NAME#213#SUFF: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix , _ , + defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix , _ , (_.VT (OpNodeRnd _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 FROUND_CURRENT))), (_.VT (OpNodeRnd _.RC:$src2, _.RC:$src1, (_.VT (scalar_to_vector(_.ScalarLdFrag addr:$src3))), (i32 FROUND_CURRENT))), @@ -5006,7 +5006,7 @@ multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132, (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1, (_.ScalarLdFrag addr:$src3))))>; - defm NAME#231#SUFF: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix , _ , + defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix , _ , (_.VT (OpNodeRnd _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 FROUND_CURRENT))), (_.VT (OpNodeRnd _.RC:$src2, (_.VT (scalar_to_vector(_.ScalarLdFrag addr:$src3))), @@ -5018,7 +5018,7 @@ multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132, (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, (_.ScalarLdFrag addr:$src3), _.FRC:$src1)))>; - defm NAME#132#SUFF: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix , _ , + defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix , _ , (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 FROUND_CURRENT))), (_.VT (OpNodeRnd _.RC:$src1, (_.VT (scalar_to_vector(_.ScalarLdFrag addr:$src3))), diff --git a/llvm/lib/Target/X86/X86InstrFMA.td b/llvm/lib/Target/X86/X86InstrFMA.td index fd800cf077f..8b3dbbcd5e6 100644 --- a/llvm/lib/Target/X86/X86InstrFMA.td +++ b/llvm/lib/Target/X86/X86InstrFMA.td @@ -56,7 +56,7 @@ multiclass fma3p_rm<bits<8> opc, string OpcodeStr, (MemFrag128 addr:$src3))))]>; let usesCustomInserter = 1 in - def rY : FMA3<opc, MRMSrcReg, (outs VR256:$dst), + def Yr : FMA3<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2, VR256:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), @@ -64,7 +64,7 @@ multiclass fma3p_rm<bits<8> opc, string OpcodeStr, VR256:$src3)))]>, VEX_L; let mayLoad = 1 in - def mY : FMA3<opc, MRMSrcMem, (outs VR256:$dst), + def Ym : FMA3<opc, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2, f256mem:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), @@ -74,60 +74,61 @@ multiclass fma3p_rm<bits<8> opc, string OpcodeStr, } multiclass fma3p_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231, - string OpcodeStr, string PackTy, + string OpcodeStr, string PackTy, string Suff, PatFrag MemFrag128, PatFrag MemFrag256, SDNode Op, ValueType OpTy128, ValueType OpTy256> { - defm r213 : fma3p_rm<opc213, - !strconcat(OpcodeStr, "213", PackTy), - MemFrag128, MemFrag256, OpTy128, OpTy256, Op>; - defm r132 : fma3p_rm<opc132, - !strconcat(OpcodeStr, "132", PackTy), - MemFrag128, MemFrag256, OpTy128, OpTy256>; - defm r231 : fma3p_rm<opc231, - !strconcat(OpcodeStr, "231", PackTy), - MemFrag128, MemFrag256, OpTy128, OpTy256>; + defm NAME#213#Suff : fma3p_rm<opc213, + !strconcat(OpcodeStr, "213", PackTy), + MemFrag128, MemFrag256, OpTy128, OpTy256, Op>; + defm NAME#132#Suff : fma3p_rm<opc132, + !strconcat(OpcodeStr, "132", PackTy), + MemFrag128, MemFrag256, OpTy128, OpTy256>; + defm NAME#231#Suff : fma3p_rm<opc231, + !strconcat(OpcodeStr, "231", PackTy), + MemFrag128, MemFrag256, OpTy128, OpTy256>; } // Fused Multiply-Add let ExeDomain = SSEPackedSingle in { - defm VFMADDPS : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "ps", loadv4f32, - loadv8f32, X86Fmadd, v4f32, v8f32>; - defm VFMSUBPS : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "ps", loadv4f32, - loadv8f32, X86Fmsub, v4f32, v8f32>; - defm VFMADDSUBPS : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "ps", - loadv4f32, loadv8f32, X86Fmaddsub, - v4f32, v8f32>; - defm VFMSUBADDPS : fma3p_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "ps", - loadv4f32, loadv8f32, X86Fmsubadd, - v4f32, v8f32>; + defm VFMADD : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "ps", "PS", + loadv4f32, loadv8f32, X86Fmadd, v4f32, v8f32>; + defm VFMSUB : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "ps", "PS", + loadv4f32, loadv8f32, X86Fmsub, v4f32, v8f32>; + defm VFMADDSUB : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "ps", "PS", + loadv4f32, loadv8f32, X86Fmaddsub, + v4f32, v8f32>; + defm VFMSUBADD : fma3p_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "ps", "PS", + loadv4f32, loadv8f32, X86Fmsubadd, + v4f32, v8f32>; } let ExeDomain = SSEPackedDouble in { - defm VFMADDPD : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "pd", loadv2f64, - loadv4f64, X86Fmadd, v2f64, v4f64>, VEX_W; - defm VFMSUBPD : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "pd", loadv2f64, - loadv4f64, X86Fmsub, v2f64, v4f64>, VEX_W; - defm VFMADDSUBPD : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "pd", - loadv2f64, loadv4f64, X86Fmaddsub, - v2f64, v4f64>, VEX_W; - defm VFMSUBADDPD : fma3p_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "pd", - loadv2f64, loadv4f64, X86Fmsubadd, - v2f64, v4f64>, VEX_W; + defm VFMADD : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "pd", "PD", + loadv2f64, loadv4f64, X86Fmadd, v2f64, + v4f64>, VEX_W; + defm VFMSUB : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "pd", "PD", + loadv2f64, loadv4f64, X86Fmsub, v2f64, + v4f64>, VEX_W; + defm VFMADDSUB : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "pd", "PD", + loadv2f64, loadv4f64, X86Fmaddsub, + v2f64, v4f64>, VEX_W; + defm VFMSUBADD : fma3p_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "pd", "PD", + loadv2f64, loadv4f64, X86Fmsubadd, + v2f64, v4f64>, VEX_W; } // Fused Negative Multiply-Add let ExeDomain = SSEPackedSingle in { - defm VFNMADDPS : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "ps", loadv4f32, - loadv8f32, X86Fnmadd, v4f32, v8f32>; - defm VFNMSUBPS : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "ps", loadv4f32, - loadv8f32, X86Fnmsub, v4f32, v8f32>; + defm VFNMADD : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "ps", "PS", loadv4f32, + loadv8f32, X86Fnmadd, v4f32, v8f32>; + defm VFNMSUB : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "ps", "PS", loadv4f32, + loadv8f32, X86Fnmsub, v4f32, v8f32>; } let ExeDomain = SSEPackedDouble in { - defm VFNMADDPD : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "pd", loadv2f64, - loadv4f64, X86Fnmadd, v2f64, v4f64>, VEX_W; - defm VFNMSUBPD : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "pd", - loadv2f64, loadv4f64, X86Fnmsub, v2f64, - v4f64>, VEX_W; + defm VFNMADD : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "pd", "PD", loadv2f64, + loadv4f64, X86Fnmadd, v2f64, v4f64>, VEX_W; + defm VFNMSUB : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "pd", "PD", loadv2f64, + loadv4f64, X86Fnmsub, v2f64, v4f64>, VEX_W; } // All source register operands of FMA opcodes defined in fma3s_rm multiclass @@ -191,13 +192,15 @@ multiclass fma3s_rm_int<bits<8> opc, string OpcodeStr, } multiclass fma3s_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231, - string OpStr, string PackTy, + string OpStr, string PackTy, string Suff, SDNode OpNode, RegisterClass RC, X86MemOperand x86memop> { - defm r132 : fma3s_rm<opc132, !strconcat(OpStr, "132", PackTy), x86memop, RC>; - defm r213 : fma3s_rm<opc213, !strconcat(OpStr, "213", PackTy), x86memop, RC, - OpNode>; - defm r231 : fma3s_rm<opc231, !strconcat(OpStr, "231", PackTy), x86memop, RC>; + defm NAME#132#Suff : fma3s_rm<opc132, !strconcat(OpStr, "132", PackTy), + x86memop, RC>; + defm NAME#213#Suff : fma3s_rm<opc213, !strconcat(OpStr, "213", PackTy), + x86memop, RC, OpNode>; + defm NAME#231#Suff : fma3s_rm<opc231, !strconcat(OpStr, "231", PackTy), + x86memop, RC>; } // The FMA 213 form is created for lowering of scalar FMA intrinscis @@ -210,29 +213,30 @@ multiclass fma3s_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231, // form of FMA*_Int instructions is done using an optimistic assumption that // such analysis will be implemented eventually. multiclass fma3s_int_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231, - string OpStr, string PackTy, + string OpStr, string PackTy, string Suff, RegisterClass RC, Operand memop> { - defm r132 : fma3s_rm_int<opc132, !strconcat(OpStr, "132", PackTy), - memop, RC>; - defm r213 : fma3s_rm_int<opc213, !strconcat(OpStr, "213", PackTy), - memop, RC>; - defm r231 : fma3s_rm_int<opc231, !strconcat(OpStr, "231", PackTy), - memop, RC>; + defm NAME#132#Suff : fma3s_rm_int<opc132, !strconcat(OpStr, "132", PackTy), + memop, RC>; + defm NAME#213#Suff : fma3s_rm_int<opc213, !strconcat(OpStr, "213", PackTy), + memop, RC>; + defm NAME#231#Suff : fma3s_rm_int<opc231, !strconcat(OpStr, "231", PackTy), + memop, RC>; } multiclass fma3s<bits<8> opc132, bits<8> opc213, bits<8> opc231, string OpStr, Intrinsic IntF32, Intrinsic IntF64, SDNode OpNode> { let ExeDomain = SSEPackedSingle in - defm SS : fma3s_forms<opc132, opc213, opc231, OpStr, "ss", OpNode, - FR32, f32mem>, - fma3s_int_forms<opc132, opc213, opc231, OpStr, "ss", VR128, ssmem>; + defm NAME : fma3s_forms<opc132, opc213, opc231, OpStr, "ss", "SS", OpNode, + FR32, f32mem>, + fma3s_int_forms<opc132, opc213, opc231, OpStr, "ss", "SS", + VR128, ssmem>; let ExeDomain = SSEPackedDouble in - defm SD : fma3s_forms<opc132, opc213, opc231, OpStr, "sd", OpNode, + defm NAME : fma3s_forms<opc132, opc213, opc231, OpStr, "sd", "SD", OpNode, FR64, f64mem>, - fma3s_int_forms<opc132, opc213, opc231, OpStr, "sd", VR128, sdmem>, - VEX_W; + fma3s_int_forms<opc132, opc213, opc231, OpStr, "sd", "SD", + VR128, sdmem>, VEX_W; // These patterns use the 123 ordering, instead of 213, even though // they match the intrinsic to the 213 version of the instruction. @@ -240,11 +244,11 @@ multiclass fma3s<bits<8> opc132, bits<8> opc213, bits<8> opc231, // require the pass-through values to come from the first source // operand, not the second. def : Pat<(IntF32 VR128:$src1, VR128:$src2, VR128:$src3), - (COPY_TO_REGCLASS(!cast<Instruction>(NAME#"SSr213r_Int") + (COPY_TO_REGCLASS(!cast<Instruction>(NAME#"213SSr_Int") $src1, $src2, $src3), VR128)>; def : Pat<(IntF64 VR128:$src1, VR128:$src2, VR128:$src3), - (COPY_TO_REGCLASS(!cast<Instruction>(NAME#"SDr213r_Int") + (COPY_TO_REGCLASS(!cast<Instruction>(NAME#"213SDr_Int") $src1, $src2, $src3), VR128)>; } @@ -344,20 +348,20 @@ multiclass fma4p<bits<8> opc, string OpcodeStr, SDNode OpNode, [(set VR128:$dst, (OpNode VR128:$src1, (ld_frag128 addr:$src2), VR128:$src3))]>; let isCommutable = 1 in - def rrY : FMA4<opc, MRMSrcReg, (outs VR256:$dst), + def Yrr : FMA4<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2, VR256:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set VR256:$dst, (OpVT256 (OpNode VR256:$src1, VR256:$src2, VR256:$src3)))]>, VEX_W, MemOp4, VEX_L; - def rmY : FMA4<opc, MRMSrcMem, (outs VR256:$dst), + def Yrm : FMA4<opc, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2, f256mem:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set VR256:$dst, (OpNode VR256:$src1, VR256:$src2, (ld_frag256 addr:$src3)))]>, VEX_W, MemOp4, VEX_L; - def mrY : FMA4<opc, MRMSrcMem, (outs VR256:$dst), + def Ymr : FMA4<opc, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, f256mem:$src2, VR256:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), @@ -369,7 +373,7 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in { (ins VR128:$src1, VR128:$src2, VR128:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>; - def rrY_REV : FMA4<opc, MRMSrcReg, (outs VR256:$dst), + def Yrr_REV : FMA4<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2, VR256:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>, diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 8401f40268b..9a0d3e9f7b8 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -1589,34 +1589,34 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VFMADDSD4rr, X86::VFMADDSD4mr, TB_ALIGN_NONE }, { X86::VFMADDPS4rr, X86::VFMADDPS4mr, TB_ALIGN_NONE }, { X86::VFMADDPD4rr, X86::VFMADDPD4mr, TB_ALIGN_NONE }, - { X86::VFMADDPS4rrY, X86::VFMADDPS4mrY, TB_ALIGN_NONE }, - { X86::VFMADDPD4rrY, X86::VFMADDPD4mrY, TB_ALIGN_NONE }, + { X86::VFMADDPS4Yrr, X86::VFMADDPS4Ymr, TB_ALIGN_NONE }, + { X86::VFMADDPD4Yrr, X86::VFMADDPD4Ymr, TB_ALIGN_NONE }, { X86::VFNMADDSS4rr, X86::VFNMADDSS4mr, TB_ALIGN_NONE }, { X86::VFNMADDSD4rr, X86::VFNMADDSD4mr, TB_ALIGN_NONE }, { X86::VFNMADDPS4rr, X86::VFNMADDPS4mr, TB_ALIGN_NONE }, { X86::VFNMADDPD4rr, X86::VFNMADDPD4mr, TB_ALIGN_NONE }, - { X86::VFNMADDPS4rrY, X86::VFNMADDPS4mrY, TB_ALIGN_NONE }, - { X86::VFNMADDPD4rrY, X86::VFNMADDPD4mrY, TB_ALIGN_NONE }, + { X86::VFNMADDPS4Yrr, X86::VFNMADDPS4Ymr, TB_ALIGN_NONE }, + { X86::VFNMADDPD4Yrr, X86::VFNMADDPD4Ymr, TB_ALIGN_NONE }, { X86::VFMSUBSS4rr, X86::VFMSUBSS4mr, TB_ALIGN_NONE }, { X86::VFMSUBSD4rr, X86::VFMSUBSD4mr, TB_ALIGN_NONE }, { X86::VFMSUBPS4rr, X86::VFMSUBPS4mr, TB_ALIGN_NONE }, { X86::VFMSUBPD4rr, X86::VFMSUBPD4mr, TB_ALIGN_NONE }, - { X86::VFMSUBPS4rrY, X86::VFMSUBPS4mrY, TB_ALIGN_NONE }, - { X86::VFMSUBPD4rrY, X86::VFMSUBPD4mrY, TB_ALIGN_NONE }, + { X86::VFMSUBPS4Yrr, X86::VFMSUBPS4Ymr, TB_ALIGN_NONE }, + { X86::VFMSUBPD4Yrr, X86::VFMSUBPD4Ymr, TB_ALIGN_NONE }, { X86::VFNMSUBSS4rr, X86::VFNMSUBSS4mr, TB_ALIGN_NONE }, { X86::VFNMSUBSD4rr, X86::VFNMSUBSD4mr, TB_ALIGN_NONE }, { X86::VFNMSUBPS4rr, X86::VFNMSUBPS4mr, TB_ALIGN_NONE }, { X86::VFNMSUBPD4rr, X86::VFNMSUBPD4mr, TB_ALIGN_NONE }, - { X86::VFNMSUBPS4rrY, X86::VFNMSUBPS4mrY, TB_ALIGN_NONE }, - { X86::VFNMSUBPD4rrY, X86::VFNMSUBPD4mrY, TB_ALIGN_NONE }, + { X86::VFNMSUBPS4Yrr, X86::VFNMSUBPS4Ymr, TB_ALIGN_NONE }, + { X86::VFNMSUBPD4Yrr, X86::VFNMSUBPD4Ymr, TB_ALIGN_NONE }, { X86::VFMADDSUBPS4rr, X86::VFMADDSUBPS4mr, TB_ALIGN_NONE }, { X86::VFMADDSUBPD4rr, X86::VFMADDSUBPD4mr, TB_ALIGN_NONE }, - { X86::VFMADDSUBPS4rrY, X86::VFMADDSUBPS4mrY, TB_ALIGN_NONE }, - { X86::VFMADDSUBPD4rrY, X86::VFMADDSUBPD4mrY, TB_ALIGN_NONE }, + { X86::VFMADDSUBPS4Yrr, X86::VFMADDSUBPS4Ymr, TB_ALIGN_NONE }, + { X86::VFMADDSUBPD4Yrr, X86::VFMADDSUBPD4Ymr, TB_ALIGN_NONE }, { X86::VFMSUBADDPS4rr, X86::VFMSUBADDPS4mr, TB_ALIGN_NONE }, { X86::VFMSUBADDPD4rr, X86::VFMSUBADDPD4mr, TB_ALIGN_NONE }, - { X86::VFMSUBADDPS4rrY, X86::VFMSUBADDPS4mrY, TB_ALIGN_NONE }, - { X86::VFMSUBADDPD4rrY, X86::VFMSUBADDPD4mrY, TB_ALIGN_NONE }, + { X86::VFMSUBADDPS4Yrr, X86::VFMSUBADDPS4Ymr, TB_ALIGN_NONE }, + { X86::VFMSUBADDPD4Yrr, X86::VFMSUBADDPD4Ymr, TB_ALIGN_NONE }, // XOP foldable instructions { X86::VPCMOVrrr, X86::VPCMOVrmr, 0 }, @@ -1808,169 +1808,169 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) static const X86MemoryFoldTableEntry MemoryFoldTable3[] = { // FMA foldable instructions - { X86::VFMADDSSr231r, X86::VFMADDSSr231m, TB_ALIGN_NONE }, - { X86::VFMADDSSr231r_Int, X86::VFMADDSSr231m_Int, TB_ALIGN_NONE }, - { X86::VFMADDSDr231r, X86::VFMADDSDr231m, TB_ALIGN_NONE }, - { X86::VFMADDSDr231r_Int, X86::VFMADDSDr231m_Int, TB_ALIGN_NONE }, - { X86::VFMADDSSr132r, X86::VFMADDSSr132m, TB_ALIGN_NONE }, - { X86::VFMADDSSr132r_Int, X86::VFMADDSSr132m_Int, TB_ALIGN_NONE }, - { X86::VFMADDSDr132r, X86::VFMADDSDr132m, TB_ALIGN_NONE }, - { X86::VFMADDSDr132r_Int, X86::VFMADDSDr132m_Int, TB_ALIGN_NONE }, - { X86::VFMADDSSr213r, X86::VFMADDSSr213m, TB_ALIGN_NONE }, - { X86::VFMADDSSr213r_Int, X86::VFMADDSSr213m_Int, TB_ALIGN_NONE }, - { X86::VFMADDSDr213r, X86::VFMADDSDr213m, TB_ALIGN_NONE }, - { X86::VFMADDSDr213r_Int, X86::VFMADDSDr213m_Int, TB_ALIGN_NONE }, - - { X86::VFMADDPSr231r, X86::VFMADDPSr231m, TB_ALIGN_NONE }, - { X86::VFMADDPDr231r, X86::VFMADDPDr231m, TB_ALIGN_NONE }, - { X86::VFMADDPSr132r, X86::VFMADDPSr132m, TB_ALIGN_NONE }, - { X86::VFMADDPDr132r, X86::VFMADDPDr132m, TB_ALIGN_NONE }, - { X86::VFMADDPSr213r, X86::VFMADDPSr213m, TB_ALIGN_NONE }, - { X86::VFMADDPDr213r, X86::VFMADDPDr213m, TB_ALIGN_NONE }, - { X86::VFMADDPSr231rY, X86::VFMADDPSr231mY, TB_ALIGN_NONE }, - { X86::VFMADDPDr231rY, X86::VFMADDPDr231mY, TB_ALIGN_NONE }, - { X86::VFMADDPSr132rY, X86::VFMADDPSr132mY, TB_ALIGN_NONE }, - { X86::VFMADDPDr132rY, X86::VFMADDPDr132mY, TB_ALIGN_NONE }, - { X86::VFMADDPSr213rY, X86::VFMADDPSr213mY, TB_ALIGN_NONE }, - { X86::VFMADDPDr213rY, X86::VFMADDPDr213mY, TB_ALIGN_NONE }, - - { X86::VFNMADDSSr231r, X86::VFNMADDSSr231m, TB_ALIGN_NONE }, - { X86::VFNMADDSSr231r_Int, X86::VFNMADDSSr231m_Int, TB_ALIGN_NONE }, - { X86::VFNMADDSDr231r, X86::VFNMADDSDr231m, TB_ALIGN_NONE }, - { X86::VFNMADDSDr231r_Int, X86::VFNMADDSDr231m_Int, TB_ALIGN_NONE }, - { X86::VFNMADDSSr132r, X86::VFNMADDSSr132m, TB_ALIGN_NONE }, - { X86::VFNMADDSSr132r_Int, X86::VFNMADDSSr132m_Int, TB_ALIGN_NONE }, - { X86::VFNMADDSDr132r, X86::VFNMADDSDr132m, TB_ALIGN_NONE }, - { X86::VFNMADDSDr132r_Int, X86::VFNMADDSDr132m_Int, TB_ALIGN_NONE }, - { X86::VFNMADDSSr213r, X86::VFNMADDSSr213m, TB_ALIGN_NONE }, - { X86::VFNMADDSSr213r_Int, X86::VFNMADDSSr213m_Int, TB_ALIGN_NONE }, - { X86::VFNMADDSDr213r, X86::VFNMADDSDr213m, TB_ALIGN_NONE }, - { X86::VFNMADDSDr213r_Int, X86::VFNMADDSDr213m_Int, TB_ALIGN_NONE }, - - { X86::VFNMADDPSr231r, X86::VFNMADDPSr231m, TB_ALIGN_NONE }, - { X86::VFNMADDPDr231r, X86::VFNMADDPDr231m, TB_ALIGN_NONE }, - { X86::VFNMADDPSr132r, X86::VFNMADDPSr132m, TB_ALIGN_NONE }, - { X86::VFNMADDPDr132r, X86::VFNMADDPDr132m, TB_ALIGN_NONE }, - { X86::VFNMADDPSr213r, X86::VFNMADDPSr213m, TB_ALIGN_NONE }, - { X86::VFNMADDPDr213r, X86::VFNMADDPDr213m, TB_ALIGN_NONE }, - { X86::VFNMADDPSr231rY, X86::VFNMADDPSr231mY, TB_ALIGN_NONE }, - { X86::VFNMADDPDr231rY, X86::VFNMADDPDr231mY, TB_ALIGN_NONE }, - { X86::VFNMADDPSr132rY, X86::VFNMADDPSr132mY, TB_ALIGN_NONE }, - { X86::VFNMADDPDr132rY, X86::VFNMADDPDr132mY, TB_ALIGN_NONE }, - { X86::VFNMADDPSr213rY, X86::VFNMADDPSr213mY, TB_ALIGN_NONE }, - { X86::VFNMADDPDr213rY, X86::VFNMADDPDr213mY, TB_ALIGN_NONE }, - - { X86::VFMSUBSSr231r, X86::VFMSUBSSr231m, TB_ALIGN_NONE }, - { X86::VFMSUBSSr231r_Int, X86::VFMSUBSSr231m_Int, TB_ALIGN_NONE }, - { X86::VFMSUBSDr231r, X86::VFMSUBSDr231m, TB_ALIGN_NONE }, - { X86::VFMSUBSDr231r_Int, X86::VFMSUBSDr231m_Int, TB_ALIGN_NONE }, - { X86::VFMSUBSSr132r, X86::VFMSUBSSr132m, TB_ALIGN_NONE }, - { X86::VFMSUBSSr132r_Int, X86::VFMSUBSSr132m_Int, TB_ALIGN_NONE }, - { X86::VFMSUBSDr132r, X86::VFMSUBSDr132m, TB_ALIGN_NONE }, - { X86::VFMSUBSDr132r_Int, X86::VFMSUBSDr132m_Int, TB_ALIGN_NONE }, - { X86::VFMSUBSSr213r, X86::VFMSUBSSr213m, TB_ALIGN_NONE }, - { X86::VFMSUBSSr213r_Int, X86::VFMSUBSSr213m_Int, TB_ALIGN_NONE }, - { X86::VFMSUBSDr213r, X86::VFMSUBSDr213m, TB_ALIGN_NONE }, - { X86::VFMSUBSDr213r_Int, X86::VFMSUBSDr213m_Int, TB_ALIGN_NONE }, - - { X86::VFMSUBPSr231r, X86::VFMSUBPSr231m, TB_ALIGN_NONE }, - { X86::VFMSUBPDr231r, X86::VFMSUBPDr231m, TB_ALIGN_NONE }, - { X86::VFMSUBPSr132r, X86::VFMSUBPSr132m, TB_ALIGN_NONE }, - { X86::VFMSUBPDr132r, X86::VFMSUBPDr132m, TB_ALIGN_NONE }, - { X86::VFMSUBPSr213r, X86::VFMSUBPSr213m, TB_ALIGN_NONE }, - { X86::VFMSUBPDr213r, X86::VFMSUBPDr213m, TB_ALIGN_NONE }, - { X86::VFMSUBPSr231rY, X86::VFMSUBPSr231mY, TB_ALIGN_NONE }, - { X86::VFMSUBPDr231rY, X86::VFMSUBPDr231mY, TB_ALIGN_NONE }, - { X86::VFMSUBPSr132rY, X86::VFMSUBPSr132mY, TB_ALIGN_NONE }, - { X86::VFMSUBPDr132rY, X86::VFMSUBPDr132mY, TB_ALIGN_NONE }, - { X86::VFMSUBPSr213rY, X86::VFMSUBPSr213mY, TB_ALIGN_NONE }, - { X86::VFMSUBPDr213rY, X86::VFMSUBPDr213mY, TB_ALIGN_NONE }, - - { X86::VFNMSUBSSr231r, X86::VFNMSUBSSr231m, TB_ALIGN_NONE }, - { X86::VFNMSUBSSr231r_Int, X86::VFNMSUBSSr231m_Int, TB_ALIGN_NONE }, - { X86::VFNMSUBSDr231r, X86::VFNMSUBSDr231m, TB_ALIGN_NONE }, - { X86::VFNMSUBSDr231r_Int, X86::VFNMSUBSDr231m_Int, TB_ALIGN_NONE }, - { X86::VFNMSUBSSr132r, X86::VFNMSUBSSr132m, TB_ALIGN_NONE }, - { X86::VFNMSUBSSr132r_Int, X86::VFNMSUBSSr132m_Int, TB_ALIGN_NONE }, - { X86::VFNMSUBSDr132r, X86::VFNMSUBSDr132m, TB_ALIGN_NONE }, - { X86::VFNMSUBSDr132r_Int, X86::VFNMSUBSDr132m_Int, TB_ALIGN_NONE }, - { X86::VFNMSUBSSr213r, X86::VFNMSUBSSr213m, TB_ALIGN_NONE }, - { X86::VFNMSUBSSr213r_Int, X86::VFNMSUBSSr213m_Int, TB_ALIGN_NONE }, - { X86::VFNMSUBSDr213r, X86::VFNMSUBSDr213m, TB_ALIGN_NONE }, - { X86::VFNMSUBSDr213r_Int, X86::VFNMSUBSDr213m_Int, TB_ALIGN_NONE }, - - { X86::VFNMSUBPSr231r, X86::VFNMSUBPSr231m, TB_ALIGN_NONE }, - { X86::VFNMSUBPDr231r, X86::VFNMSUBPDr231m, TB_ALIGN_NONE }, - { X86::VFNMSUBPSr132r, X86::VFNMSUBPSr132m, TB_ALIGN_NONE }, - { X86::VFNMSUBPDr132r, X86::VFNMSUBPDr132m, TB_ALIGN_NONE }, - { X86::VFNMSUBPSr213r, X86::VFNMSUBPSr213m, TB_ALIGN_NONE }, - { X86::VFNMSUBPDr213r, X86::VFNMSUBPDr213m, TB_ALIGN_NONE }, - { X86::VFNMSUBPSr231rY, X86::VFNMSUBPSr231mY, TB_ALIGN_NONE }, - { X86::VFNMSUBPDr231rY, X86::VFNMSUBPDr231mY, TB_ALIGN_NONE }, - { X86::VFNMSUBPSr132rY, X86::VFNMSUBPSr132mY, TB_ALIGN_NONE }, - { X86::VFNMSUBPDr132rY, X86::VFNMSUBPDr132mY, TB_ALIGN_NONE }, - { X86::VFNMSUBPSr213rY, X86::VFNMSUBPSr213mY, TB_ALIGN_NONE }, - { X86::VFNMSUBPDr213rY, X86::VFNMSUBPDr213mY, TB_ALIGN_NONE }, - - { X86::VFMADDSUBPSr231r, X86::VFMADDSUBPSr231m, TB_ALIGN_NONE }, - { X86::VFMADDSUBPDr231r, X86::VFMADDSUBPDr231m, TB_ALIGN_NONE }, - { X86::VFMADDSUBPSr132r, X86::VFMADDSUBPSr132m, TB_ALIGN_NONE }, - { X86::VFMADDSUBPDr132r, X86::VFMADDSUBPDr132m, TB_ALIGN_NONE }, - { X86::VFMADDSUBPSr213r, X86::VFMADDSUBPSr213m, TB_ALIGN_NONE }, - { X86::VFMADDSUBPDr213r, X86::VFMADDSUBPDr213m, TB_ALIGN_NONE }, - { X86::VFMADDSUBPSr231rY, X86::VFMADDSUBPSr231mY, TB_ALIGN_NONE }, - { X86::VFMADDSUBPDr231rY, X86::VFMADDSUBPDr231mY, TB_ALIGN_NONE }, - { X86::VFMADDSUBPSr132rY, X86::VFMADDSUBPSr132mY, TB_ALIGN_NONE }, - { X86::VFMADDSUBPDr132rY, X86::VFMADDSUBPDr132mY, TB_ALIGN_NONE }, - { X86::VFMADDSUBPSr213rY, X86::VFMADDSUBPSr213mY, TB_ALIGN_NONE }, - { X86::VFMADDSUBPDr213rY, X86::VFMADDSUBPDr213mY, TB_ALIGN_NONE }, - - { X86::VFMSUBADDPSr231r, X86::VFMSUBADDPSr231m, TB_ALIGN_NONE }, - { X86::VFMSUBADDPDr231r, X86::VFMSUBADDPDr231m, TB_ALIGN_NONE }, - { X86::VFMSUBADDPSr132r, X86::VFMSUBADDPSr132m, TB_ALIGN_NONE }, - { X86::VFMSUBADDPDr132r, X86::VFMSUBADDPDr132m, TB_ALIGN_NONE }, - { X86::VFMSUBADDPSr213r, X86::VFMSUBADDPSr213m, TB_ALIGN_NONE }, - { X86::VFMSUBADDPDr213r, X86::VFMSUBADDPDr213m, TB_ALIGN_NONE }, - { X86::VFMSUBADDPSr231rY, X86::VFMSUBADDPSr231mY, TB_ALIGN_NONE }, - { X86::VFMSUBADDPDr231rY, X86::VFMSUBADDPDr231mY, TB_ALIGN_NONE }, - { X86::VFMSUBADDPSr132rY, X86::VFMSUBADDPSr132mY, TB_ALIGN_NONE }, - { X86::VFMSUBADDPDr132rY, X86::VFMSUBADDPDr132mY, TB_ALIGN_NONE }, - { X86::VFMSUBADDPSr213rY, X86::VFMSUBADDPSr213mY, TB_ALIGN_NONE }, - { X86::VFMSUBADDPDr213rY, X86::VFMSUBADDPDr213mY, TB_ALIGN_NONE }, + { X86::VFMADD231SSr, X86::VFMADD231SSm, TB_ALIGN_NONE }, + { X86::VFMADD231SSr_Int, X86::VFMADD231SSm_Int, TB_ALIGN_NONE }, + { X86::VFMADD231SDr, X86::VFMADD231SDm, TB_ALIGN_NONE }, + { X86::VFMADD231SDr_Int, X86::VFMADD231SDm_Int, TB_ALIGN_NONE }, + { X86::VFMADD132SSr, X86::VFMADD132SSm, TB_ALIGN_NONE }, + { X86::VFMADD132SSr_Int, X86::VFMADD132SSm_Int, TB_ALIGN_NONE }, + { X86::VFMADD132SDr, X86::VFMADD132SDm, TB_ALIGN_NONE }, + { X86::VFMADD132SDr_Int, X86::VFMADD132SDm_Int, TB_ALIGN_NONE }, + { X86::VFMADD213SSr, X86::VFMADD213SSm, TB_ALIGN_NONE }, + { X86::VFMADD213SSr_Int, X86::VFMADD213SSm_Int, TB_ALIGN_NONE }, + { X86::VFMADD213SDr, X86::VFMADD213SDm, TB_ALIGN_NONE }, + { X86::VFMADD213SDr_Int, X86::VFMADD213SDm_Int, TB_ALIGN_NONE }, + + { X86::VFMADD231PSr, X86::VFMADD231PSm, TB_ALIGN_NONE }, + { X86::VFMADD231PDr, X86::VFMADD231PDm, TB_ALIGN_NONE }, + { X86::VFMADD132PSr, X86::VFMADD132PSm, TB_ALIGN_NONE }, + { X86::VFMADD132PDr, X86::VFMADD132PDm, TB_ALIGN_NONE }, + { X86::VFMADD213PSr, X86::VFMADD213PSm, TB_ALIGN_NONE }, + { X86::VFMADD213PDr, X86::VFMADD213PDm, TB_ALIGN_NONE }, + { X86::VFMADD231PSYr, X86::VFMADD231PSYm, TB_ALIGN_NONE }, + { X86::VFMADD231PDYr, X86::VFMADD231PDYm, TB_ALIGN_NONE }, + { X86::VFMADD132PSYr, X86::VFMADD132PSYm, TB_ALIGN_NONE }, + { X86::VFMADD132PDYr, X86::VFMADD132PDYm, TB_ALIGN_NONE }, + { X86::VFMADD213PSYr, X86::VFMADD213PSYm, TB_ALIGN_NONE }, + { X86::VFMADD213PDYr, X86::VFMADD213PDYm, TB_ALIGN_NONE }, + + { X86::VFNMADD231SSr, X86::VFNMADD231SSm, TB_ALIGN_NONE }, + { X86::VFNMADD231SSr_Int, X86::VFNMADD231SSm_Int, TB_ALIGN_NONE }, + { X86::VFNMADD231SDr, X86::VFNMADD231SDm, TB_ALIGN_NONE }, + { X86::VFNMADD231SDr_Int, X86::VFNMADD231SDm_Int, TB_ALIGN_NONE }, + { X86::VFNMADD132SSr, X86::VFNMADD132SSm, TB_ALIGN_NONE }, + { X86::VFNMADD132SSr_Int, X86::VFNMADD132SSm_Int, TB_ALIGN_NONE }, + { X86::VFNMADD132SDr, X86::VFNMADD132SDm, TB_ALIGN_NONE }, + { X86::VFNMADD132SDr_Int, X86::VFNMADD132SDm_Int, TB_ALIGN_NONE }, + { X86::VFNMADD213SSr, X86::VFNMADD213SSm, TB_ALIGN_NONE }, + { X86::VFNMADD213SSr_Int, X86::VFNMADD213SSm_Int, TB_ALIGN_NONE }, + { X86::VFNMADD213SDr, X86::VFNMADD213SDm, TB_ALIGN_NONE }, + { X86::VFNMADD213SDr_Int, X86::VFNMADD213SDm_Int, TB_ALIGN_NONE }, + + { X86::VFNMADD231PSr, X86::VFNMADD231PSm, TB_ALIGN_NONE }, + { X86::VFNMADD231PDr, X86::VFNMADD231PDm, TB_ALIGN_NONE }, + { X86::VFNMADD132PSr, X86::VFNMADD132PSm, TB_ALIGN_NONE }, + { X86::VFNMADD132PDr, X86::VFNMADD132PDm, TB_ALIGN_NONE }, + { X86::VFNMADD213PSr, X86::VFNMADD213PSm, TB_ALIGN_NONE }, + { X86::VFNMADD213PDr, X86::VFNMADD213PDm, TB_ALIGN_NONE }, + { X86::VFNMADD231PSYr, X86::VFNMADD231PSYm, TB_ALIGN_NONE }, + { X86::VFNMADD231PDYr, X86::VFNMADD231PDYm, TB_ALIGN_NONE }, + { X86::VFNMADD132PSYr, X86::VFNMADD132PSYm, TB_ALIGN_NONE }, + { X86::VFNMADD132PDYr, X86::VFNMADD132PDYm, TB_ALIGN_NONE }, + { X86::VFNMADD213PSYr, X86::VFNMADD213PSYm, TB_ALIGN_NONE }, + { X86::VFNMADD213PDYr, X86::VFNMADD213PDYm, TB_ALIGN_NONE }, + + { X86::VFMSUB231SSr, X86::VFMSUB231SSm, TB_ALIGN_NONE }, + { X86::VFMSUB231SSr_Int, X86::VFMSUB231SSm_Int, TB_ALIGN_NONE }, + { X86::VFMSUB231SDr, X86::VFMSUB231SDm, TB_ALIGN_NONE }, + { X86::VFMSUB231SDr_Int, X86::VFMSUB231SDm_Int, TB_ALIGN_NONE }, + { X86::VFMSUB132SSr, X86::VFMSUB132SSm, TB_ALIGN_NONE }, + { X86::VFMSUB132SSr_Int, X86::VFMSUB132SSm_Int, TB_ALIGN_NONE }, + { X86::VFMSUB132SDr, X86::VFMSUB132SDm, TB_ALIGN_NONE }, + { X86::VFMSUB132SDr_Int, X86::VFMSUB132SDm_Int, TB_ALIGN_NONE }, + { X86::VFMSUB213SSr, X86::VFMSUB213SSm, TB_ALIGN_NONE }, + { X86::VFMSUB213SSr_Int, X86::VFMSUB213SSm_Int, TB_ALIGN_NONE }, + { X86::VFMSUB213SDr, X86::VFMSUB213SDm, TB_ALIGN_NONE }, + { X86::VFMSUB213SDr_Int, X86::VFMSUB213SDm_Int, TB_ALIGN_NONE }, + + { X86::VFMSUB231PSr, X86::VFMSUB231PSm, TB_ALIGN_NONE }, + { X86::VFMSUB231PDr, X86::VFMSUB231PDm, TB_ALIGN_NONE }, + { X86::VFMSUB132PSr, X86::VFMSUB132PSm, TB_ALIGN_NONE }, + { X86::VFMSUB132PDr, X86::VFMSUB132PDm, TB_ALIGN_NONE }, + { X86::VFMSUB213PSr, X86::VFMSUB213PSm, TB_ALIGN_NONE }, + { X86::VFMSUB213PDr, X86::VFMSUB213PDm, TB_ALIGN_NONE }, + { X86::VFMSUB231PSYr, X86::VFMSUB231PSYm, TB_ALIGN_NONE }, + { X86::VFMSUB231PDYr, X86::VFMSUB231PDYm, TB_ALIGN_NONE }, + { X86::VFMSUB132PSYr, X86::VFMSUB132PSYm, TB_ALIGN_NONE }, + { X86::VFMSUB132PDYr, X86::VFMSUB132PDYm, TB_ALIGN_NONE }, + { X86::VFMSUB213PSYr, X86::VFMSUB213PSYm, TB_ALIGN_NONE }, + { X86::VFMSUB213PDYr, X86::VFMSUB213PDYm, TB_ALIGN_NONE }, + + { X86::VFNMSUB231SSr, X86::VFNMSUB231SSm, TB_ALIGN_NONE }, + { X86::VFNMSUB231SSr_Int, X86::VFNMSUB231SSm_Int, TB_ALIGN_NONE }, + { X86::VFNMSUB231SDr, X86::VFNMSUB231SDm, TB_ALIGN_NONE }, + { X86::VFNMSUB231SDr_Int, X86::VFNMSUB231SDm_Int, TB_ALIGN_NONE }, + { X86::VFNMSUB132SSr, X86::VFNMSUB132SSm, TB_ALIGN_NONE }, + { X86::VFNMSUB132SSr_Int, X86::VFNMSUB132SSm_Int, TB_ALIGN_NONE }, + { X86::VFNMSUB132SDr, X86::VFNMSUB132SDm, TB_ALIGN_NONE }, + { X86::VFNMSUB132SDr_Int, X86::VFNMSUB132SDm_Int, TB_ALIGN_NONE }, + { X86::VFNMSUB213SSr, X86::VFNMSUB213SSm, TB_ALIGN_NONE }, + { X86::VFNMSUB213SSr_Int, X86::VFNMSUB213SSm_Int, TB_ALIGN_NONE }, + { X86::VFNMSUB213SDr, X86::VFNMSUB213SDm, TB_ALIGN_NONE }, + { X86::VFNMSUB213SDr_Int, X86::VFNMSUB213SDm_Int, TB_ALIGN_NONE }, + + { X86::VFNMSUB231PSr, X86::VFNMSUB231PSm, TB_ALIGN_NONE }, + { X86::VFNMSUB231PDr, X86::VFNMSUB231PDm, TB_ALIGN_NONE }, + { X86::VFNMSUB132PSr, X86::VFNMSUB132PSm, TB_ALIGN_NONE }, + { X86::VFNMSUB132PDr, X86::VFNMSUB132PDm, TB_ALIGN_NONE }, + { X86::VFNMSUB213PSr, X86::VFNMSUB213PSm, TB_ALIGN_NONE }, + { X86::VFNMSUB213PDr, X86::VFNMSUB213PDm, TB_ALIGN_NONE }, + { X86::VFNMSUB231PSYr, X86::VFNMSUB231PSYm, TB_ALIGN_NONE }, + { X86::VFNMSUB231PDYr, X86::VFNMSUB231PDYm, TB_ALIGN_NONE }, + { X86::VFNMSUB132PSYr, X86::VFNMSUB132PSYm, TB_ALIGN_NONE }, + { X86::VFNMSUB132PDYr, X86::VFNMSUB132PDYm, TB_ALIGN_NONE }, + { X86::VFNMSUB213PSYr, X86::VFNMSUB213PSYm, TB_ALIGN_NONE }, + { X86::VFNMSUB213PDYr, X86::VFNMSUB213PDYm, TB_ALIGN_NONE }, + + { X86::VFMADDSUB231PSr, X86::VFMADDSUB231PSm, TB_ALIGN_NONE }, + { X86::VFMADDSUB231PDr, X86::VFMADDSUB231PDm, TB_ALIGN_NONE }, + { X86::VFMADDSUB132PSr, X86::VFMADDSUB132PSm, TB_ALIGN_NONE }, + { X86::VFMADDSUB132PDr, X86::VFMADDSUB132PDm, TB_ALIGN_NONE }, + { X86::VFMADDSUB213PSr, X86::VFMADDSUB213PSm, TB_ALIGN_NONE }, + { X86::VFMADDSUB213PDr, X86::VFMADDSUB213PDm, TB_ALIGN_NONE }, + { X86::VFMADDSUB231PSYr, X86::VFMADDSUB231PSYm, TB_ALIGN_NONE }, + { X86::VFMADDSUB231PDYr, X86::VFMADDSUB231PDYm, TB_ALIGN_NONE }, + { X86::VFMADDSUB132PSYr, X86::VFMADDSUB132PSYm, TB_ALIGN_NONE }, + { X86::VFMADDSUB132PDYr, X86::VFMADDSUB132PDYm, TB_ALIGN_NONE }, + { X86::VFMADDSUB213PSYr, X86::VFMADDSUB213PSYm, TB_ALIGN_NONE }, + { X86::VFMADDSUB213PDYr, X86::VFMADDSUB213PDYm, TB_ALIGN_NONE }, + + { X86::VFMSUBADD231PSr, X86::VFMSUBADD231PSm, TB_ALIGN_NONE }, + { X86::VFMSUBADD231PDr, X86::VFMSUBADD231PDm, TB_ALIGN_NONE }, + { X86::VFMSUBADD132PSr, X86::VFMSUBADD132PSm, TB_ALIGN_NONE }, + { X86::VFMSUBADD132PDr, X86::VFMSUBADD132PDm, TB_ALIGN_NONE }, + { X86::VFMSUBADD213PSr, X86::VFMSUBADD213PSm, TB_ALIGN_NONE }, + { X86::VFMSUBADD213PDr, X86::VFMSUBADD213PDm, TB_ALIGN_NONE }, + { X86::VFMSUBADD231PSYr, X86::VFMSUBADD231PSYm, TB_ALIGN_NONE }, + { X86::VFMSUBADD231PDYr, X86::VFMSUBADD231PDYm, TB_ALIGN_NONE }, + { X86::VFMSUBADD132PSYr, X86::VFMSUBADD132PSYm, TB_ALIGN_NONE }, + { X86::VFMSUBADD132PDYr, X86::VFMSUBADD132PDYm, TB_ALIGN_NONE }, + { X86::VFMSUBADD213PSYr, X86::VFMSUBADD213PSYm, TB_ALIGN_NONE }, + { X86::VFMSUBADD213PDYr, X86::VFMSUBADD213PDYm, TB_ALIGN_NONE }, // FMA4 foldable patterns { X86::VFMADDSS4rr, X86::VFMADDSS4rm, TB_ALIGN_NONE }, { X86::VFMADDSD4rr, X86::VFMADDSD4rm, TB_ALIGN_NONE }, { X86::VFMADDPS4rr, X86::VFMADDPS4rm, TB_ALIGN_NONE }, { X86::VFMADDPD4rr, X86::VFMADDPD4rm, TB_ALIGN_NONE }, - { X86::VFMADDPS4rrY, X86::VFMADDPS4rmY, TB_ALIGN_NONE }, - { X86::VFMADDPD4rrY, X86::VFMADDPD4rmY, TB_ALIGN_NONE }, + { X86::VFMADDPS4Yrr, X86::VFMADDPS4Yrm, TB_ALIGN_NONE }, + { X86::VFMADDPD4Yrr, X86::VFMADDPD4Yrm, TB_ALIGN_NONE }, { X86::VFNMADDSS4rr, X86::VFNMADDSS4rm, TB_ALIGN_NONE }, { X86::VFNMADDSD4rr, X86::VFNMADDSD4rm, TB_ALIGN_NONE }, { X86::VFNMADDPS4rr, X86::VFNMADDPS4rm, TB_ALIGN_NONE }, { X86::VFNMADDPD4rr, X86::VFNMADDPD4rm, TB_ALIGN_NONE }, - { X86::VFNMADDPS4rrY, X86::VFNMADDPS4rmY, TB_ALIGN_NONE }, - { X86::VFNMADDPD4rrY, X86::VFNMADDPD4rmY, TB_ALIGN_NONE }, + { X86::VFNMADDPS4Yrr, X86::VFNMADDPS4Yrm, TB_ALIGN_NONE }, + { X86::VFNMADDPD4Yrr, X86::VFNMADDPD4Yrm, TB_ALIGN_NONE }, { X86::VFMSUBSS4rr, X86::VFMSUBSS4rm, TB_ALIGN_NONE }, { X86::VFMSUBSD4rr, X86::VFMSUBSD4rm, TB_ALIGN_NONE }, { X86::VFMSUBPS4rr, X86::VFMSUBPS4rm, TB_ALIGN_NONE }, { X86::VFMSUBPD4rr, X86::VFMSUBPD4rm, TB_ALIGN_NONE }, - { X86::VFMSUBPS4rrY, X86::VFMSUBPS4rmY, TB_ALIGN_NONE }, - { X86::VFMSUBPD4rrY, X86::VFMSUBPD4rmY, TB_ALIGN_NONE }, + { X86::VFMSUBPS4Yrr, X86::VFMSUBPS4Yrm, TB_ALIGN_NONE }, + { X86::VFMSUBPD4Yrr, X86::VFMSUBPD4Yrm, TB_ALIGN_NONE }, { X86::VFNMSUBSS4rr, X86::VFNMSUBSS4rm, TB_ALIGN_NONE }, { X86::VFNMSUBSD4rr, X86::VFNMSUBSD4rm, TB_ALIGN_NONE }, { X86::VFNMSUBPS4rr, X86::VFNMSUBPS4rm, TB_ALIGN_NONE }, { X86::VFNMSUBPD4rr, X86::VFNMSUBPD4rm, TB_ALIGN_NONE }, - { X86::VFNMSUBPS4rrY, X86::VFNMSUBPS4rmY, TB_ALIGN_NONE }, - { X86::VFNMSUBPD4rrY, X86::VFNMSUBPD4rmY, TB_ALIGN_NONE }, + { X86::VFNMSUBPS4Yrr, X86::VFNMSUBPS4Yrm, TB_ALIGN_NONE }, + { X86::VFNMSUBPD4Yrr, X86::VFNMSUBPD4Yrm, TB_ALIGN_NONE }, { X86::VFMADDSUBPS4rr, X86::VFMADDSUBPS4rm, TB_ALIGN_NONE }, { X86::VFMADDSUBPD4rr, X86::VFMADDSUBPD4rm, TB_ALIGN_NONE }, - { X86::VFMADDSUBPS4rrY, X86::VFMADDSUBPS4rmY, TB_ALIGN_NONE }, - { X86::VFMADDSUBPD4rrY, X86::VFMADDSUBPD4rmY, TB_ALIGN_NONE }, + { X86::VFMADDSUBPS4Yrr, X86::VFMADDSUBPS4Yrm, TB_ALIGN_NONE }, + { X86::VFMADDSUBPD4Yrr, X86::VFMADDSUBPD4Yrm, TB_ALIGN_NONE }, { X86::VFMSUBADDPS4rr, X86::VFMSUBADDPS4rm, TB_ALIGN_NONE }, { X86::VFMSUBADDPD4rr, X86::VFMSUBADDPD4rm, TB_ALIGN_NONE }, - { X86::VFMSUBADDPS4rrY, X86::VFMSUBADDPS4rmY, TB_ALIGN_NONE }, - { X86::VFMSUBADDPD4rrY, X86::VFMSUBADDPD4rmY, TB_ALIGN_NONE }, + { X86::VFMSUBADDPS4Yrr, X86::VFMSUBADDPS4Yrm, TB_ALIGN_NONE }, + { X86::VFMSUBADDPD4Yrr, X86::VFMSUBADDPD4Yrm, TB_ALIGN_NONE }, // XOP foldable instructions { X86::VPCMOVrrr, X86::VPCMOVrrm, 0 }, @@ -3158,71 +3158,55 @@ static bool isFMA3(unsigned Opcode, bool &IsIntrinsic) { #define FMA3_CASE(Name, Modifier) \ case X86::Name##r##Modifier: case X86::Name##m##Modifier: -#define FMA3_SCALAR_PAIR(Name, Form, Modifier) \ - FMA3_CASE(Name##SD##Form, Modifier) \ - FMA3_CASE(Name##SS##Form, Modifier) +#define FMA3_SCALAR_PAIR(Name, Size, Modifier) \ + FMA3_CASE(Name##SD##Size, Modifier) \ + FMA3_CASE(Name##SS##Size, Modifier) -#define FMA3_PACKED_PAIR(Name, Form, Modifier) \ - FMA3_CASE(Name##PD##Form, Modifier) \ - FMA3_CASE(Name##PS##Form, Modifier) +#define FMA3_PACKED_PAIR(Name, Size) \ + FMA3_CASE(Name##PD##Size, ) \ + FMA3_CASE(Name##PS##Size, ) -#define FMA3_PACKED_SET(Form, Modifier) \ - FMA3_PACKED_PAIR(VFMADD, Form, Modifier) \ - FMA3_PACKED_PAIR(VFMSUB, Form, Modifier) \ - FMA3_PACKED_PAIR(VFNMADD, Form, Modifier) \ - FMA3_PACKED_PAIR(VFNMSUB, Form, Modifier) \ - FMA3_PACKED_PAIR(VFMADDSUB, Form, Modifier) \ - FMA3_PACKED_PAIR(VFMSUBADD, Form, Modifier) +#define FMA3_PACKED_SET(Form, Size) \ + FMA3_PACKED_PAIR(VFMADD##Form, Size) \ + FMA3_PACKED_PAIR(VFMSUB##Form, Size) \ + FMA3_PACKED_PAIR(VFNMADD##Form, Size) \ + FMA3_PACKED_PAIR(VFNMSUB##Form, Size) \ + FMA3_PACKED_PAIR(VFMADDSUB##Form, Size) \ + FMA3_PACKED_PAIR(VFMSUBADD##Form, Size) #define FMA3_CASES(Form) \ - FMA3_SCALAR_PAIR(VFMADD, Form, ) \ - FMA3_SCALAR_PAIR(VFMSUB, Form, ) \ - FMA3_SCALAR_PAIR(VFNMADD, Form, ) \ - FMA3_SCALAR_PAIR(VFNMSUB, Form, ) \ + FMA3_SCALAR_PAIR(VFMADD##Form, ,) \ + FMA3_SCALAR_PAIR(VFMSUB##Form, ,) \ + FMA3_SCALAR_PAIR(VFNMADD##Form, ,) \ + FMA3_SCALAR_PAIR(VFNMSUB##Form, ,) \ FMA3_PACKED_SET(Form, ) \ FMA3_PACKED_SET(Form, Y) \ -#define FMA3_SCALAR_PAIR_AVX512(Name, Modifier) \ - FMA3_CASE(Name##SD, Modifier) \ - FMA3_CASE(Name##SS, Modifier) - -#define FMA3_PACKED_PAIR_AVX512(Name, Size) \ - FMA3_CASE(Name##PD##Size, ) \ - FMA3_CASE(Name##PS##Size, ) - -#define FMA3_PACKED_SET_AVX512(Form, Size) \ - FMA3_PACKED_PAIR_AVX512(VFMADD##Form, Size) \ - FMA3_PACKED_PAIR_AVX512(VFMSUB##Form, Size) \ - FMA3_PACKED_PAIR_AVX512(VFNMADD##Form, Size) \ - FMA3_PACKED_PAIR_AVX512(VFNMSUB##Form, Size) \ - FMA3_PACKED_PAIR_AVX512(VFMADDSUB##Form, Size) \ - FMA3_PACKED_PAIR_AVX512(VFMSUBADD##Form, Size) - #define FMA3_CASES_AVX512(Form) \ - FMA3_SCALAR_PAIR_AVX512(VFMADD##Form, ) \ - FMA3_SCALAR_PAIR_AVX512(VFMSUB##Form, ) \ - FMA3_SCALAR_PAIR_AVX512(VFNMADD##Form, ) \ - FMA3_SCALAR_PAIR_AVX512(VFNMSUB##Form, ) \ - FMA3_PACKED_SET_AVX512(Form, Z128) \ - FMA3_PACKED_SET_AVX512(Form, Z256) \ - FMA3_PACKED_SET_AVX512(Form, Z) + FMA3_SCALAR_PAIR(VFMADD##Form, Z, ) \ + FMA3_SCALAR_PAIR(VFMSUB##Form, Z, ) \ + FMA3_SCALAR_PAIR(VFNMADD##Form, Z, ) \ + FMA3_SCALAR_PAIR(VFNMSUB##Form, Z, ) \ + FMA3_PACKED_SET(Form, Z128) \ + FMA3_PACKED_SET(Form, Z256) \ + FMA3_PACKED_SET(Form, Z) #define FMA3_CASES_SCALAR_INT(Form) \ - FMA3_SCALAR_PAIR(VFMADD, Form, _Int) \ - FMA3_SCALAR_PAIR(VFMSUB, Form, _Int) \ - FMA3_SCALAR_PAIR(VFNMADD, Form, _Int) \ - FMA3_SCALAR_PAIR(VFNMSUB, Form, _Int) + FMA3_SCALAR_PAIR(VFMADD##Form, , _Int) \ + FMA3_SCALAR_PAIR(VFMSUB##Form, , _Int) \ + FMA3_SCALAR_PAIR(VFNMADD##Form, , _Int) \ + FMA3_SCALAR_PAIR(VFNMSUB##Form, , _Int) #define FMA3_CASES_SCALAR_INT_AVX512(Form) \ - FMA3_SCALAR_PAIR_AVX512(VFMADD##Form, _Int) \ - FMA3_SCALAR_PAIR_AVX512(VFMSUB##Form, _Int) \ - FMA3_SCALAR_PAIR_AVX512(VFNMADD##Form, _Int) \ - FMA3_SCALAR_PAIR_AVX512(VFNMSUB##Form, _Int) + FMA3_SCALAR_PAIR(VFMADD##Form, Z, _Int) \ + FMA3_SCALAR_PAIR(VFMSUB##Form, Z, _Int) \ + FMA3_SCALAR_PAIR(VFNMADD##Form, Z, _Int) \ + FMA3_SCALAR_PAIR(VFNMSUB##Form, Z, _Int) switch (Opcode) { - FMA3_CASES(r132) - FMA3_CASES(r213) - FMA3_CASES(r231) + FMA3_CASES(132) + FMA3_CASES(213) + FMA3_CASES(231) // AVX-512 instructions FMA3_CASES_AVX512(132) @@ -3230,9 +3214,9 @@ case X86::Name##r##Modifier: case X86::Name##m##Modifier: FMA3_CASES_AVX512(231) return true; - FMA3_CASES_SCALAR_INT(r132) - FMA3_CASES_SCALAR_INT(r213) - FMA3_CASES_SCALAR_INT(r231) + FMA3_CASES_SCALAR_INT(132) + FMA3_CASES_SCALAR_INT(213) + FMA3_CASES_SCALAR_INT(231) // AVX-512 instructions FMA3_CASES_SCALAR_INT_AVX512(132) @@ -3250,9 +3234,6 @@ case X86::Name##r##Modifier: case X86::Name##m##Modifier: #undef FMA3_PACKED_PAIR #undef FMA3_PACKED_SET #undef FMA3_CASES -#undef FMA3_SCALAR_PAIR_AVX512 -#undef FMA3_PACKED_PAIR_AVX512 -#undef FMA3_PACKED_SET_AVX512 #undef FMA3_CASES_AVX512 #undef FMA3_CASES_SCALAR_INT #undef FMA3_CASES_SCALAR_INT_AVX512 @@ -3276,16 +3257,16 @@ static unsigned getFMA3OpcodeToCommuteOperands(unsigned Opc, { X86::Name##132##Suffix, X86::Name##213##Suffix, X86::Name##231##Suffix }, #define FMA3_SCALAR_PAIR(Name, Suffix) \ - FMA3_ENTRY(Name##SSr, Suffix) \ - FMA3_ENTRY(Name##SDr, Suffix) + FMA3_ENTRY(Name, SS##Suffix) \ + FMA3_ENTRY(Name, SD##Suffix) #define FMA3_PACKED_PAIR(Name, Suffix) \ - FMA3_ENTRY(Name##PSr, Suffix) \ - FMA3_ENTRY(Name##PDr, Suffix) + FMA3_ENTRY(Name, PS##Suffix) \ + FMA3_ENTRY(Name, PD##Suffix) #define FMA3_PACKED_SIZES(Name, Suffix) \ FMA3_PACKED_PAIR(Name, Suffix) \ - FMA3_PACKED_PAIR(Name, Suffix##Y) + FMA3_PACKED_PAIR(Name, Y##Suffix) #define FMA3_TABLE_ALL(Name) \ FMA3_SCALAR_PAIR(Name, r) \ @@ -3301,23 +3282,15 @@ static unsigned getFMA3OpcodeToCommuteOperands(unsigned Opc, FMA3_SCALAR_PAIR(Name, r_Int) \ FMA3_SCALAR_PAIR(Name, m_Int) -#define FMA3_SCALAR_PAIR_AVX512(Name, Suffix) \ - FMA3_ENTRY(Name, SS##Suffix) \ - FMA3_ENTRY(Name, SD##Suffix) - -#define FMA3_PACKED_PAIR_AVX512(Name, Suffix) \ - FMA3_ENTRY(Name, PS##Suffix) \ - FMA3_ENTRY(Name, PD##Suffix) - #define FMA3_PACKED_SIZES_AVX512(Name, Suffix) \ - FMA3_PACKED_PAIR_AVX512(Name, Z128##Suffix) \ - FMA3_PACKED_PAIR_AVX512(Name, Z256##Suffix) \ - FMA3_PACKED_PAIR_AVX512(Name, Z##Suffix) + FMA3_PACKED_PAIR(Name, Z128##Suffix) \ + FMA3_PACKED_PAIR(Name, Z256##Suffix) \ + FMA3_PACKED_PAIR(Name, Z##Suffix) #define FMA3_TABLE_ALL_AVX512(Name) \ - FMA3_SCALAR_PAIR_AVX512(Name, r) \ + FMA3_SCALAR_PAIR(Name, Zr) \ FMA3_PACKED_SIZES_AVX512(Name, r) \ - FMA3_SCALAR_PAIR_AVX512(Name, m) \ + FMA3_SCALAR_PAIR(Name, Zm) \ FMA3_PACKED_SIZES_AVX512(Name, m) #define FMA3_TABLE_PACKED_AVX512(Name) \ @@ -3325,8 +3298,8 @@ static unsigned getFMA3OpcodeToCommuteOperands(unsigned Opc, FMA3_PACKED_SIZES_AVX512(Name, m) #define FMA3_TABLE_SCALAR_INT_AVX512(Name) \ - FMA3_SCALAR_PAIR_AVX512(Name, r_Int) \ - FMA3_SCALAR_PAIR_AVX512(Name, m_Int) + FMA3_SCALAR_PAIR(Name, Zr_Int) \ + FMA3_SCALAR_PAIR(Name, Zm_Int) // Define the array that holds FMA opcodes in groups // of 3 opcodes(132, 213, 231) in each group. @@ -3370,7 +3343,6 @@ static unsigned getFMA3OpcodeToCommuteOperands(unsigned Opc, #undef FMA3_TABLE_PACKED #undef FMA3_TABLE_SCALAR_INT #undef FMA3_SCALAR_PAIR_AVX512 -#undef FMA3_PACKED_PAIR_AVX512 #undef FMA3_PACKED_SIZES_AVX512 #undef FMA3_TABLE_ALL_AVX512 #undef FMA3_TABLE_PACKED_AVX512 @@ -6295,12 +6267,12 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI, case X86::DIVSSrr_Int: case X86::VDIVSSrr_Int: case X86::VDIVSSZrr_Int: case X86::MULSSrr_Int: case X86::VMULSSrr_Int: case X86::VMULSSZrr_Int: case X86::SUBSSrr_Int: case X86::VSUBSSrr_Int: case X86::VSUBSSZrr_Int: - case X86::VFMADDSSr132r_Int: case X86::VFNMADDSSr132r_Int: - case X86::VFMADDSSr213r_Int: case X86::VFNMADDSSr213r_Int: - case X86::VFMADDSSr231r_Int: case X86::VFNMADDSSr231r_Int: - case X86::VFMSUBSSr132r_Int: case X86::VFNMSUBSSr132r_Int: - case X86::VFMSUBSSr213r_Int: case X86::VFNMSUBSSr213r_Int: - case X86::VFMSUBSSr231r_Int: case X86::VFNMSUBSSr231r_Int: + case X86::VFMADD132SSr_Int: case X86::VFNMADD132SSr_Int: + case X86::VFMADD213SSr_Int: case X86::VFNMADD213SSr_Int: + case X86::VFMADD231SSr_Int: case X86::VFNMADD231SSr_Int: + case X86::VFMSUB132SSr_Int: case X86::VFNMSUB132SSr_Int: + case X86::VFMSUB213SSr_Int: case X86::VFNMSUB213SSr_Int: + case X86::VFMSUB231SSr_Int: case X86::VFNMSUB231SSr_Int: return false; default: return true; @@ -6317,12 +6289,12 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI, case X86::DIVSDrr_Int: case X86::VDIVSDrr_Int: case X86::VDIVSDZrr_Int: case X86::MULSDrr_Int: case X86::VMULSDrr_Int: case X86::VMULSDZrr_Int: case X86::SUBSDrr_Int: case X86::VSUBSDrr_Int: case X86::VSUBSDZrr_Int: - case X86::VFMADDSDr132r_Int: case X86::VFNMADDSDr132r_Int: - case X86::VFMADDSDr213r_Int: case X86::VFNMADDSDr213r_Int: - case X86::VFMADDSDr231r_Int: case X86::VFNMADDSDr231r_Int: - case X86::VFMSUBSDr132r_Int: case X86::VFNMSUBSDr132r_Int: - case X86::VFMSUBSDr213r_Int: case X86::VFNMSUBSDr213r_Int: - case X86::VFMSUBSDr231r_Int: case X86::VFNMSUBSDr231r_Int: + case X86::VFMADD132SDr_Int: case X86::VFNMADD132SDr_Int: + case X86::VFMADD213SDr_Int: case X86::VFNMADD213SDr_Int: + case X86::VFMADD231SDr_Int: case X86::VFNMADD231SDr_Int: + case X86::VFMSUB132SDr_Int: case X86::VFNMSUB132SDr_Int: + case X86::VFMSUB213SDr_Int: case X86::VFNMSUB213SDr_Int: + case X86::VFMSUB231SDr_Int: case X86::VFNMSUB231SDr_Int: return false; default: return true; |