diff options
-rw-r--r-- | llvm/lib/Target/X86/X86InstrXOP.td | 43 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/xop-schedule.ll | 12 |
2 files changed, 34 insertions, 21 deletions
diff --git a/llvm/lib/Target/X86/X86InstrXOP.td b/llvm/lib/Target/X86/X86InstrXOP.td index ae707aecbef..5055c7ee99d 100644 --- a/llvm/lib/Target/X86/X86InstrXOP.td +++ b/llvm/lib/Target/X86/X86InstrXOP.td @@ -157,7 +157,8 @@ let ExeDomain = SSEPackedInt in { } // Instruction where second source can be memory, but third must be register -multiclass xop4opm2<bits<8> opc, string OpcodeStr, Intrinsic Int> { +multiclass xop4opm2<bits<8> opc, string OpcodeStr, Intrinsic Int, + X86FoldableSchedWrite sched> { let isCommutable = 1 in def rr : IXOPi8Reg<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, VR128:$src3), @@ -165,29 +166,41 @@ multiclass xop4opm2<bits<8> opc, string OpcodeStr, Intrinsic Int> { "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set VR128:$dst, (Int VR128:$src1, VR128:$src2, VR128:$src3))]>, XOP_4V, - Sched<[WriteVecIMul]>; + Sched<[sched]>; def rm : IXOPi8Reg<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2, VR128:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set VR128:$dst, (Int VR128:$src1, (bitconvert (loadv2i64 addr:$src2)), - VR128:$src3))]>, XOP_4V, Sched<[WriteVecIMulLd, ReadAfterLd]>; + VR128:$src3))]>, XOP_4V, Sched<[sched.Folded, ReadAfterLd]>; } let ExeDomain = SSEPackedInt in { - defm VPMADCSWD : xop4opm2<0xB6, "vpmadcswd", int_x86_xop_vpmadcswd>; - defm VPMADCSSWD : xop4opm2<0xA6, "vpmadcsswd", int_x86_xop_vpmadcsswd>; - defm VPMACSWW : xop4opm2<0x95, "vpmacsww", int_x86_xop_vpmacsww>; - defm VPMACSWD : xop4opm2<0x96, "vpmacswd", int_x86_xop_vpmacswd>; - defm VPMACSSWW : xop4opm2<0x85, "vpmacssww", int_x86_xop_vpmacssww>; - defm VPMACSSWD : xop4opm2<0x86, "vpmacsswd", int_x86_xop_vpmacsswd>; - defm VPMACSSDQL : xop4opm2<0x87, "vpmacssdql", int_x86_xop_vpmacssdql>; - defm VPMACSSDQH : xop4opm2<0x8F, "vpmacssdqh", int_x86_xop_vpmacssdqh>; - defm VPMACSSDD : xop4opm2<0x8E, "vpmacssdd", int_x86_xop_vpmacssdd>; - defm VPMACSDQL : xop4opm2<0x97, "vpmacsdql", int_x86_xop_vpmacsdql>; - defm VPMACSDQH : xop4opm2<0x9F, "vpmacsdqh", int_x86_xop_vpmacsdqh>; - defm VPMACSDD : xop4opm2<0x9E, "vpmacsdd", int_x86_xop_vpmacsdd>; + defm VPMADCSWD : xop4opm2<0xB6, "vpmadcswd", + int_x86_xop_vpmadcswd, WriteVecIMul>; + defm VPMADCSSWD : xop4opm2<0xA6, "vpmadcsswd", + int_x86_xop_vpmadcsswd, WriteVecIMul>; + defm VPMACSWW : xop4opm2<0x95, "vpmacsww", + int_x86_xop_vpmacsww, WriteVecIMul>; + defm VPMACSWD : xop4opm2<0x96, "vpmacswd", + int_x86_xop_vpmacswd, WriteVecIMul>; + defm VPMACSSWW : xop4opm2<0x85, "vpmacssww", + int_x86_xop_vpmacssww, WriteVecIMul>; + defm VPMACSSWD : xop4opm2<0x86, "vpmacsswd", + int_x86_xop_vpmacsswd, WriteVecIMul>; + defm VPMACSSDQL : xop4opm2<0x87, "vpmacssdql", + int_x86_xop_vpmacssdql, WritePMULLD>; + defm VPMACSSDQH : xop4opm2<0x8F, "vpmacssdqh", + int_x86_xop_vpmacssdqh, WritePMULLD>; + defm VPMACSSDD : xop4opm2<0x8E, "vpmacssdd", + int_x86_xop_vpmacssdd, WritePMULLD>; + defm VPMACSDQL : xop4opm2<0x97, "vpmacsdql", + int_x86_xop_vpmacsdql, WritePMULLD>; + defm VPMACSDQH : xop4opm2<0x9F, "vpmacsdqh", + int_x86_xop_vpmacsdqh, WritePMULLD>; + defm VPMACSDD : xop4opm2<0x9E, "vpmacsdd", + int_x86_xop_vpmacsdd, WritePMULLD>; } // IFMA patterns - for cases where we can safely ignore the overflow bits from diff --git a/llvm/test/CodeGen/X86/xop-schedule.ll b/llvm/test/CodeGen/X86/xop-schedule.ll index b0ed4d1babd..04abb90167b 100644 --- a/llvm/test/CodeGen/X86/xop-schedule.ll +++ b/llvm/test/CodeGen/X86/xop-schedule.ll @@ -604,7 +604,7 @@ define void @test_vpmacsdd(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64 ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vpmacsdd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: vpmacsdd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [10:1.00] +; GENERIC-NEXT: vpmacsdd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -624,7 +624,7 @@ define void @test_vpmacsdqh(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i6 ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vpmacsdqh %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: vpmacsdqh %xmm2, (%rdi), %xmm0, %xmm0 # sched: [10:1.00] +; GENERIC-NEXT: vpmacsdqh %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -644,7 +644,7 @@ define void @test_vpmacsdql(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i6 ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vpmacsdql %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: vpmacsdql %xmm2, (%rdi), %xmm0, %xmm0 # sched: [10:1.00] +; GENERIC-NEXT: vpmacsdql %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -664,7 +664,7 @@ define void @test_vpmacssdd(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i6 ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vpmacssdd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: vpmacssdd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [10:1.00] +; GENERIC-NEXT: vpmacssdd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -684,7 +684,7 @@ define void @test_vpmacssdqh(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vpmacssdqh %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: vpmacssdqh %xmm2, (%rdi), %xmm0, %xmm0 # sched: [10:1.00] +; GENERIC-NEXT: vpmacssdqh %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -704,7 +704,7 @@ define void @test_vpmacssdql(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vpmacssdql %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: vpmacssdql %xmm2, (%rdi), %xmm0, %xmm0 # sched: [10:1.00] +; GENERIC-NEXT: vpmacssdql %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; |