diff options
| author | Craig Topper <craig.topper@gmail.com> | 2012-08-31 15:40:30 +0000 | 
|---|---|---|
| committer | Craig Topper <craig.topper@gmail.com> | 2012-08-31 15:40:30 +0000 | 
| commit | c30fdbc46c1cdcbafa14562f448bdb8459dbebab (patch) | |
| tree | 203e7bb77ad4f116e588c2a6dc35340706f38824 /llvm/lib | |
| parent | 96f87069c4f02f727e635d77fb326eb2079b4195 (diff) | |
| download | bcm5719-llvm-c30fdbc46c1cdcbafa14562f448bdb8459dbebab.tar.gz bcm5719-llvm-c30fdbc46c1cdcbafa14562f448bdb8459dbebab.zip | |
Add support for converting llvm.fma to fma4 instructions.
llvm-svn: 162999
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp | 2 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 10 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrFMA.td | 100 | 
3 files changed, 76 insertions, 36 deletions
| diff --git a/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp b/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp index 5039887e1a2..a54d35dda2a 100644 --- a/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -379,6 +379,8 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate,    }    switch (type) { +  case TYPE_XMM32: +  case TYPE_XMM64:    case TYPE_XMM128:      mcInst.addOperand(MCOperand::CreateReg(X86::XMM0 + (immediate >> 4)));      return; diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index b2b30fe387c..f9184f693db 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -1052,7 +1052,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)      setOperationAction(ISD::VSELECT,           MVT::v8i32, Legal);      setOperationAction(ISD::VSELECT,           MVT::v8f32, Legal); -    if (Subtarget->hasFMA()) { +    if (Subtarget->hasFMA() || Subtarget->hasFMA4()) {        setOperationAction(ISD::FMA,             MVT::v8f32, Custom);        setOperationAction(ISD::FMA,             MVT::v4f64, Custom);        setOperationAction(ISD::FMA,             MVT::v4f32, Custom); @@ -15606,7 +15606,8 @@ static SDValue PerformFMACombine(SDNode *N, SelectionDAG &DAG,      return SDValue();    EVT ScalarVT = VT.getScalarType(); -  if ((ScalarVT != MVT::f32 && ScalarVT != MVT::f64) || !Subtarget->hasFMA()) +  if ((ScalarVT != MVT::f32 && ScalarVT != MVT::f64) || +      (!Subtarget->hasFMA() && !Subtarget->hasFMA4()))      return SDValue();    SDValue A = N->getOperand(0); @@ -15628,9 +15629,10 @@ static SDValue PerformFMACombine(SDNode *N, SelectionDAG &DAG,    unsigned Opcode;    if (!NegMul) -    Opcode = (!NegC)? X86ISD::FMADD : X86ISD::FMSUB; +    Opcode = (!NegC) ? X86ISD::FMADD : X86ISD::FMSUB;    else -    Opcode = (!NegC)? X86ISD::FNMADD : X86ISD::FNMSUB; +    Opcode = (!NegC) ? X86ISD::FNMADD : X86ISD::FNMSUB; +    return DAG.getNode(Opcode, dl, VT, A, B, C);  } diff --git a/llvm/lib/Target/X86/X86InstrFMA.td b/llvm/lib/Target/X86/X86InstrFMA.td index 445bbaa4c17..f99440d973f 100644 --- a/llvm/lib/Target/X86/X86InstrFMA.td +++ b/llvm/lib/Target/X86/X86InstrFMA.td @@ -193,34 +193,57 @@ defm VFNMSUB : fma3s<0x9F, 0xAF, 0xBF, "vfnmsub", int_x86_fma_vfnmsub_ss,  //===----------------------------------------------------------------------===// -multiclass fma4s<bits<8> opc, string OpcodeStr, Operand memop, -                 ComplexPattern mem_cpat, Intrinsic Int> { -  def rr : FMA4<opc, MRMSrcReg, (outs VR128:$dst), -           (ins VR128:$src1, VR128:$src2, VR128:$src3), +multiclass fma4s<bits<8> opc, string OpcodeStr, RegisterClass RC, +                 X86MemOperand x86memop, ValueType OpVT, SDNode OpNode, +                 PatFrag mem_frag> { +  def rr : FMA4<opc, MRMSrcReg, (outs RC:$dst), +           (ins RC:$src1, RC:$src2, RC:$src3),             !strconcat(OpcodeStr,             "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), -           [(set VR128:$dst, -             (Int VR128:$src1, VR128:$src2, VR128:$src3))]>, VEX_W, MemOp4; -  def rm : FMA4<opc, MRMSrcMem, (outs VR128:$dst), -           (ins VR128:$src1, VR128:$src2, memop:$src3), +           [(set RC:$dst, +             (OpVT (OpNode RC:$src1, RC:$src2, RC:$src3)))]>, VEX_W, MemOp4; +  def rm : FMA4<opc, MRMSrcMem, (outs RC:$dst), +           (ins RC:$src1, RC:$src2, x86memop:$src3),             !strconcat(OpcodeStr,             "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), -           [(set VR128:$dst, -             (Int VR128:$src1, VR128:$src2, mem_cpat:$src3))]>, VEX_W, MemOp4; -  def mr : FMA4<opc, MRMSrcMem, (outs VR128:$dst), -           (ins VR128:$src1, memop:$src2, VR128:$src3), +           [(set RC:$dst, (OpNode RC:$src1, RC:$src2, +                           (mem_frag addr:$src3)))]>, VEX_W, MemOp4; +  def mr : FMA4<opc, MRMSrcMem, (outs RC:$dst), +           (ins RC:$src1, x86memop:$src2, RC:$src3),             !strconcat(OpcodeStr,             "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), -           [(set VR128:$dst, -             (Int VR128:$src1, mem_cpat:$src2, VR128:$src3))]>; +           [(set RC:$dst, +             (OpNode RC:$src1, (mem_frag addr:$src2), RC:$src3))]>;  // For disassembler  let isCodeGenOnly = 1 in -  def rr_REV : FMA4<opc, MRMSrcReg, (outs VR128:$dst), -               (ins VR128:$src1, VR128:$src2, VR128:$src3), +  def rr_REV : FMA4<opc, MRMSrcReg, (outs RC:$dst), +               (ins RC:$src1, RC:$src2, RC:$src3),                 !strconcat(OpcodeStr,                 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>;  } +multiclass fma4s_int<bits<8> opc, string OpcodeStr, Operand memop, +                     ComplexPattern mem_cpat, Intrinsic Int> { +  def rr_Int : FMA4<opc, MRMSrcReg, (outs VR128:$dst), +               (ins VR128:$src1, VR128:$src2, VR128:$src3), +               !strconcat(OpcodeStr, +               "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), +               [(set VR128:$dst, +                 (Int VR128:$src1, VR128:$src2, VR128:$src3))]>, VEX_W, MemOp4; +  def rm_Int : FMA4<opc, MRMSrcMem, (outs VR128:$dst), +               (ins VR128:$src1, VR128:$src2, memop:$src3), +               !strconcat(OpcodeStr, +               "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), +               [(set VR128:$dst, (Int VR128:$src1, VR128:$src2, +                                  mem_cpat:$src3))]>, VEX_W, MemOp4; +  def mr_Int : FMA4<opc, MRMSrcMem, (outs VR128:$dst), +               (ins VR128:$src1, memop:$src2, VR128:$src3), +               !strconcat(OpcodeStr, +               "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), +               [(set VR128:$dst, +                 (Int VR128:$src1, mem_cpat:$src2, VR128:$src3))]>; +} +  multiclass fma4p<bits<8> opc, string OpcodeStr, SDNode OpNode,                   ValueType OpVT128, ValueType OpVT256,                   PatFrag ld_frag128, PatFrag ld_frag256> { @@ -277,34 +300,47 @@ let isCodeGenOnly = 1 in {  let Predicates = [HasFMA4] in { -defm VFMADDSS4    : fma4s<0x6A, "vfmaddss", ssmem, sse_load_f32, -                          int_x86_fma_vfmadd_ss>; -defm VFMADDSD4    : fma4s<0x6B, "vfmaddsd", sdmem, sse_load_f64, -                          int_x86_fma_vfmadd_sd>; +defm VFMADDSS4  : fma4s<0x6A, "vfmaddss", FR32, f32mem, f32, X86Fmadd, loadf32>, +                  fma4s_int<0x6A, "vfmaddss", ssmem, sse_load_f32, +                            int_x86_fma_vfmadd_ss>; +defm VFMADDSD4  : fma4s<0x6B, "vfmaddsd", FR64, f64mem, f64, X86Fmadd, loadf64>, +                  fma4s_int<0x6B, "vfmaddsd", sdmem, sse_load_f64, +                            int_x86_fma_vfmadd_sd>; +defm VFMSUBSS4  : fma4s<0x6E, "vfmsubss", FR32, f32mem, f32, X86Fmsub, loadf32>, +                  fma4s_int<0x6E, "vfmsubss", ssmem, sse_load_f32, +                            int_x86_fma_vfmsub_ss>; +defm VFMSUBSD4  : fma4s<0x6F, "vfmsubsd", FR64, f64mem, f64, X86Fmsub, loadf64>, +                  fma4s_int<0x6F, "vfmsubsd", sdmem, sse_load_f64, +                            int_x86_fma_vfmsub_sd>; +defm VFNMADDSS4 : fma4s<0x7A, "vfnmaddss", FR32, f32mem, f32, +                        X86Fnmadd, loadf32>, +                  fma4s_int<0x7A, "vfnmaddss", ssmem, sse_load_f32, +                            int_x86_fma_vfnmadd_ss>; +defm VFNMADDSD4 : fma4s<0x7B, "vfnmaddsd", FR64, f64mem, f64, +                        X86Fnmadd, loadf64>, +                  fma4s_int<0x7B, "vfnmaddsd", sdmem, sse_load_f64, +                            int_x86_fma_vfnmadd_sd>; +defm VFNMSUBSS4 : fma4s<0x7E, "vfnmsubss", FR32, f32mem, f32, +                        X86Fnmsub, loadf32>, +                  fma4s_int<0x7E, "vfnmsubss", ssmem, sse_load_f32, +                            int_x86_fma_vfnmsub_ss>; +defm VFNMSUBSD4 : fma4s<0x7F, "vfnmsubsd", FR64, f64mem, f64, +                        X86Fnmsub, loadf64>, +                  fma4s_int<0x7F, "vfnmsubsd", sdmem, sse_load_f64, +                            int_x86_fma_vfnmsub_sd>; +  defm VFMADDPS4    : fma4p<0x68, "vfmaddps", X86Fmadd, v4f32, v8f32,                            memopv4f32, memopv8f32>;  defm VFMADDPD4    : fma4p<0x69, "vfmaddpd", X86Fmadd, v2f64, v4f64,                            memopv2f64, memopv4f64>; -defm VFMSUBSS4    : fma4s<0x6E, "vfmsubss", ssmem, sse_load_f32, -                          int_x86_fma_vfmsub_ss>; -defm VFMSUBSD4    : fma4s<0x6F, "vfmsubsd", sdmem, sse_load_f64, -                          int_x86_fma_vfmsub_sd>;  defm VFMSUBPS4    : fma4p<0x6C, "vfmsubps", X86Fmsub, v4f32, v8f32,                            memopv4f32, memopv8f32>;  defm VFMSUBPD4    : fma4p<0x6D, "vfmsubpd", X86Fmsub, v2f64, v4f64,                            memopv2f64, memopv4f64>; -defm VFNMADDSS4   : fma4s<0x7A, "vfnmaddss", ssmem, sse_load_f32, -                          int_x86_fma_vfnmadd_ss>; -defm VFNMADDSD4   : fma4s<0x7B, "vfnmaddsd", sdmem, sse_load_f64, -                          int_x86_fma_vfnmadd_sd>;  defm VFNMADDPS4   : fma4p<0x78, "vfnmaddps", X86Fnmadd, v4f32, v8f32,                            memopv4f32, memopv8f32>;  defm VFNMADDPD4   : fma4p<0x79, "vfnmaddpd", X86Fnmadd, v2f64, v4f64,                            memopv2f64, memopv4f64>; -defm VFNMSUBSS4   : fma4s<0x7E, "vfnmsubss", ssmem, sse_load_f32, -                          int_x86_fma_vfnmsub_ss>; -defm VFNMSUBSD4   : fma4s<0x7F, "vfnmsubsd", sdmem, sse_load_f64, -                          int_x86_fma_vfnmsub_sd>;  defm VFNMSUBPS4   : fma4p<0x7C, "vfnmsubps", X86Fnmsub, v4f32, v8f32,                            memopv4f32, memopv8f32>;  defm VFNMSUBPD4   : fma4p<0x7D, "vfnmsubpd", X86Fnmsub, v2f64, v4f64, | 

