summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2018-04-11 13:49:19 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2018-04-11 13:49:19 +0000
commit89c8a10f7c2e76d230fda7dd1ef0daa632555fd2 (patch)
treef954f461330af1e45e7dfb51fc9d73a371011afc /llvm/lib
parent7bcb5720fd38b882f514b428d36fdbbef417ab45 (diff)
downloadbcm5719-llvm-89c8a10f7c2e76d230fda7dd1ef0daa632555fd2.tar.gz
bcm5719-llvm-89c8a10f7c2e76d230fda7dd1ef0daa632555fd2.zip
[X86] Add variable shuffle schedule classes
Split variable index shuffles from immediate index shuffles WriteFVarShuffle - variable 'in-lane' shuffles (VPERMILPS/VPERMIL2PS etc.) WriteVarShuffle - variable 'in-lane' shuffles (PSHUFB/VPPERM etc.) WriteFVarShuffle256 - variable 'cross-lane' shuffles (VPERMPS etc.) WriteVarShuffle256 - variable 'cross-lane' shuffles (VPERMD etc.) Differential Revision: https://reviews.llvm.org/D45404 llvm-svn: 329806
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td6
-rw-r--r--llvm/lib/Target/X86/X86InstrMMX.td3
-rw-r--r--llvm/lib/Target/X86/X86InstrSSE.td12
-rw-r--r--llvm/lib/Target/X86/X86InstrXOP.td16
-rwxr-xr-xllvm/lib/Target/X86/X86SchedBroadwell.td10
-rw-r--r--llvm/lib/Target/X86/X86SchedHaswell.td10
-rw-r--r--llvm/lib/Target/X86/X86SchedSandyBridge.td8
-rw-r--r--llvm/lib/Target/X86/X86SchedSkylakeClient.td10
-rwxr-xr-xllvm/lib/Target/X86/X86SchedSkylakeServer.td55
-rw-r--r--llvm/lib/Target/X86/X86Schedule.td4
-rw-r--r--llvm/lib/Target/X86/X86ScheduleBtVer2.td32
-rw-r--r--llvm/lib/Target/X86/X86ScheduleSLM.td4
-rw-r--r--llvm/lib/Target/X86/X86ScheduleZnver1.td4
13 files changed, 55 insertions, 119 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 67528fbab4a..942aa06e740 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -1725,12 +1725,12 @@ defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
//===----------------------------------------------------------------------===//
// -- VPERMI2 - 3 source operands form --
-let Sched = WriteFShuffle256 in
+let Sched = WriteFVarShuffle256 in
def AVX512_PERM2_F : OpndItins<
IIC_SSE_SHUFP, IIC_SSE_SHUFP
>;
-let Sched = WriteShuffle256 in
+let Sched = WriteVarShuffle256 in
def AVX512_PERM2_I : OpndItins<
IIC_SSE_PSHUF_RI, IIC_SSE_PSHUF_MI
>;
@@ -8969,7 +8969,7 @@ let Predicates = [HasDQI, NoBWI] in {
//
// FIXME: Is there a better scheduler itinerary for VPCOMPRESS/VPEXPAND?
-let Sched = WriteShuffle256 in {
+let Sched = WriteVarShuffle256 in {
def AVX512_COMPRESS : OpndItins<
IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM
>;
diff --git a/llvm/lib/Target/X86/X86InstrMMX.td b/llvm/lib/Target/X86/X86InstrMMX.td
index 06d30b3e62a..a95fdd4c800 100644
--- a/llvm/lib/Target/X86/X86InstrMMX.td
+++ b/llvm/lib/Target/X86/X86InstrMMX.td
@@ -74,11 +74,12 @@ def MMX_UNPCK_L_ITINS : OpndItins<
def MMX_PCK_ITINS : OpndItins<
IIC_MMX_PCK_RR, IIC_MMX_PCK_RM
>;
+} // Sched
+let Sched = WriteVarShuffle in
def MMX_PSHUF_ITINS : OpndItins<
IIC_MMX_PSHUF, IIC_MMX_PSHUF
>;
-} // Sched
let Sched = WriteCvtF2I in {
def MMX_CVT_PD_ITINS : OpndItins<
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index 4f0ad7f71b2..361f2a9ad34 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -5050,7 +5050,7 @@ def SSE_PHADDSUBW : OpndItins<
IIC_SSE_PHADDSUBW_RR, IIC_SSE_PHADDSUBW_RM
>;
}
-let Sched = WriteShuffle in
+let Sched = WriteVarShuffle in
def SSE_PSHUFB : OpndItins<
IIC_SSE_PSHUFB_RR, IIC_SSE_PSHUFB_RM
>;
@@ -7688,7 +7688,7 @@ defm VMASKMOVPD : avx_movmask_rm<0x2D, 0x2F, "vmaskmovpd",
// VPERMIL - Permute Single and Double Floating-Point Values
//
-let Sched = WriteFShuffle in
+let Sched = WriteFVarShuffle in
def AVX_VPERMILV : OpndItins<
IIC_SSE_SHUFP, IIC_SSE_SHUFP
>;
@@ -7707,13 +7707,13 @@ multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr,
(ins RC:$src1, RC:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst, (f_vt (X86VPermilpv RC:$src1, (i_vt RC:$src2))))]>, VEX_4V,
- Sched<[WriteFShuffle]>;
+ Sched<[WriteFVarShuffle]>;
def rm : AVX8I<opc_rm, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop_i:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst, (f_vt (X86VPermilpv RC:$src1,
(i_vt (bitconvert (i_frag addr:$src2))))))]>, VEX_4V,
- Sched<[WriteFShuffleLd, ReadAfterLd]>;
+ Sched<[WriteFVarShuffleLd, ReadAfterLd]>;
def ri : AVXAIi8<opc_rmi, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, u8imm:$src2),
@@ -8181,10 +8181,10 @@ multiclass avx2_perm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
}
}
-defm VPERMD : avx2_perm<0x36, "vpermd", loadv4i64, v8i32, WriteShuffle256,
+defm VPERMD : avx2_perm<0x36, "vpermd", loadv4i64, v8i32, WriteVarShuffle256,
i256mem>;
let ExeDomain = SSEPackedSingle in
-defm VPERMPS : avx2_perm<0x16, "vpermps", loadv8f32, v8f32, WriteFShuffle256,
+defm VPERMPS : avx2_perm<0x16, "vpermps", loadv8f32, v8f32, WriteFVarShuffle256,
f256mem>;
multiclass avx2_perm_imm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
diff --git a/llvm/lib/Target/X86/X86InstrXOP.td b/llvm/lib/Target/X86/X86InstrXOP.td
index 0aaaeebefa2..ae707aecbef 100644
--- a/llvm/lib/Target/X86/X86InstrXOP.td
+++ b/llvm/lib/Target/X86/X86InstrXOP.td
@@ -279,7 +279,7 @@ multiclass xop4op<bits<8> opc, string OpcodeStr, SDNode OpNode,
[(set VR128:$dst,
(vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2),
(vt128 VR128:$src3))))]>,
- XOP_4V, Sched<[WriteShuffle]>;
+ XOP_4V, Sched<[WriteVarShuffle]>;
def rrm : IXOPi8Reg<opc, MRMSrcMemOp4, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, i128mem:$src3),
!strconcat(OpcodeStr,
@@ -287,7 +287,7 @@ multiclass xop4op<bits<8> opc, string OpcodeStr, SDNode OpNode,
[(set VR128:$dst,
(vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2),
(vt128 (bitconvert (loadv2i64 addr:$src3))))))]>,
- XOP_4V, VEX_W, Sched<[WriteShuffleLd, ReadAfterLd, ReadAfterLd]>;
+ XOP_4V, VEX_W, Sched<[WriteVarShuffleLd, ReadAfterLd, ReadAfterLd]>;
def rmr : IXOPi8Reg<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2, VR128:$src3),
!strconcat(OpcodeStr,
@@ -295,7 +295,7 @@ multiclass xop4op<bits<8> opc, string OpcodeStr, SDNode OpNode,
[(set VR128:$dst,
(v16i8 (OpNode (vt128 VR128:$src1), (vt128 (bitconvert (loadv2i64 addr:$src2))),
(vt128 VR128:$src3))))]>,
- XOP_4V, Sched<[WriteShuffleLd, ReadAfterLd,
+ XOP_4V, Sched<[WriteVarShuffleLd, ReadAfterLd,
// 128mem:$src2
ReadDefault, ReadDefault, ReadDefault, ReadDefault,
ReadDefault,
@@ -307,7 +307,7 @@ multiclass xop4op<bits<8> opc, string OpcodeStr, SDNode OpNode,
(ins VR128:$src1, VR128:$src2, VR128:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- []>, XOP_4V, VEX_W, Sched<[WriteShuffle]>, FoldGenData<NAME#rrr>;
+ []>, XOP_4V, VEX_W, Sched<[WriteVarShuffle]>, FoldGenData<NAME#rrr>;
}
let ExeDomain = SSEPackedInt in {
@@ -367,7 +367,7 @@ multiclass xop_vpermil2<bits<8> Opc, string OpcodeStr, RegisterClass RC,
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
[(set RC:$dst,
(VT (X86vpermil2 RC:$src1, RC:$src2, RC:$src3, (i8 imm:$src4))))]>,
- Sched<[WriteFShuffle]>;
+ Sched<[WriteFVarShuffle]>;
def rm : IXOP5<Opc, MRMSrcMemOp4, (outs RC:$dst),
(ins RC:$src1, RC:$src2, intmemop:$src3, u8imm:$src4),
!strconcat(OpcodeStr,
@@ -376,7 +376,7 @@ multiclass xop_vpermil2<bits<8> Opc, string OpcodeStr, RegisterClass RC,
(VT (X86vpermil2 RC:$src1, RC:$src2,
(bitconvert (IntLdFrag addr:$src3)),
(i8 imm:$src4))))]>, VEX_W,
- Sched<[WriteFShuffleLd, ReadAfterLd, ReadAfterLd]>;
+ Sched<[WriteFVarShuffleLd, ReadAfterLd, ReadAfterLd]>;
def mr : IXOP5<Opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, fpmemop:$src2, RC:$src3, u8imm:$src4),
!strconcat(OpcodeStr,
@@ -384,7 +384,7 @@ multiclass xop_vpermil2<bits<8> Opc, string OpcodeStr, RegisterClass RC,
[(set RC:$dst,
(VT (X86vpermil2 RC:$src1, (FPLdFrag addr:$src2),
RC:$src3, (i8 imm:$src4))))]>,
- Sched<[WriteFShuffleLd, ReadAfterLd,
+ Sched<[WriteFVarShuffleLd, ReadAfterLd,
// fpmemop:$src2
ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
// RC:$src3
@@ -395,7 +395,7 @@ multiclass xop_vpermil2<bits<8> Opc, string OpcodeStr, RegisterClass RC,
(ins RC:$src1, RC:$src2, RC:$src3, u8imm:$src4),
!strconcat(OpcodeStr,
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
- []>, VEX_W, Sched<[WriteFShuffle]>, FoldGenData<NAME#rr>;
+ []>, VEX_W, Sched<[WriteFVarShuffle]>, FoldGenData<NAME#rr>;
}
let ExeDomain = SSEPackedDouble in {
diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td
index 1d80920e566..79a25959262 100755
--- a/llvm/lib/Target/X86/X86SchedBroadwell.td
+++ b/llvm/lib/Target/X86/X86SchedBroadwell.td
@@ -162,6 +162,7 @@ defm : BWWriteResPair<WriteFRcp, [BWPort0], 5>; // Floating point reciprocal e
defm : BWWriteResPair<WriteFRsqrt, [BWPort0], 5>; // Floating point reciprocal square root estimate.
defm : BWWriteResPair<WriteFMA, [BWPort01], 5>; // Fused Multiply Add.
defm : BWWriteResPair<WriteFShuffle, [BWPort5], 1>; // Floating point vector shuffles.
+defm : BWWriteResPair<WriteFVarShuffle, [BWPort5], 1>; // Floating point vector variable shuffles.
defm : BWWriteResPair<WriteFBlend, [BWPort015], 1>; // Floating point vector blends.
defm : BWWriteResPair<WriteFVarBlend, [BWPort5], 2, [2]>; // Fp vector variable blends.
@@ -178,6 +179,7 @@ defm : BWWriteResPair<WriteVecShift, [BWPort0], 1>; // Vector integer shifts.
defm : BWWriteResPair<WriteVecIMul, [BWPort0], 5>; // Vector integer multiply.
defm : BWWriteResPair<WritePMULLD, [BWPort0], 10, [2], 2, 5>; // PMULLD
defm : BWWriteResPair<WriteShuffle, [BWPort5], 1>; // Vector shuffles.
+defm : BWWriteResPair<WriteVarShuffle, [BWPort5], 1>; // Vector variable shuffles.
defm : BWWriteResPair<WriteBlend, [BWPort15], 1>; // Vector blends.
defm : BWWriteResPair<WriteVarBlend, [BWPort5], 2, [2]>; // Vector variable blends.
defm : BWWriteResPair<WriteMPSAD, [BWPort0, BWPort5], 6, [1, 2]>; // Vector MPSAD.
@@ -288,7 +290,9 @@ def : WriteRes<WriteSystem, [BWPort0156]> { let Latency = 100; } // def Writ
// AVX2.
defm : BWWriteResPair<WriteFShuffle256, [BWPort5], 3>; // Fp 256-bit width vector shuffles.
+defm : BWWriteResPair<WriteFVarShuffle256, [BWPort5], 3>; // Fp 256-bit width vector variable shuffles.
defm : BWWriteResPair<WriteShuffle256, [BWPort5], 3>; // 256-bit width vector shuffles.
+defm : BWWriteResPair<WriteVarShuffle256, [BWPort5], 3>; // 256-bit width vector variable shuffles.
defm : BWWriteResPair<WriteVarVecShift, [BWPort0, BWPort5], 2, [2, 1]>; // Variable vector shifts.
// Old microcoded instructions that nobody use.
@@ -366,7 +370,6 @@ def: InstRW<[BWWriteResGroup3], (instregex "MMX_MOVD64rr",
"MMX_MOVD64to64rr",
"MMX_MOVQ2DQrr",
"MMX_PALIGNRrri",
- "MMX_PSHUFBrr",
"MMX_PSHUFWri",
"MMX_PUNPCKHBWirr",
"MMX_PUNPCKHDQirr",
@@ -404,9 +407,7 @@ def: InstRW<[BWWriteResGroup3], (instregex "MMX_MOVD64rr",
"VPBROADCASTDrr",
"VPBROADCASTQrr",
"VPERMILPD(Y?)ri",
- "VPERMILPD(Y?)rr",
"VPERMILPS(Y?)ri",
- "VPERMILPS(Y?)rr",
"(V?)PMOVSXBDrr",
"(V?)PMOVSXBQrr",
"(V?)PMOVSXBWrr",
@@ -419,7 +420,6 @@ def: InstRW<[BWWriteResGroup3], (instregex "MMX_MOVD64rr",
"(V?)PMOVZXDQrr",
"(V?)PMOVZXWDrr",
"(V?)PMOVZXWQrr",
- "(V?)PSHUFB(Y?)rr",
"(V?)PSHUFD(Y?)ri",
"(V?)PSHUFHW(Y?)ri",
"(V?)PSHUFLW(Y?)ri",
@@ -891,9 +891,7 @@ def: InstRW<[BWWriteResGroup28], (instregex "VBROADCASTSDYrr",
"VPBROADCASTW(Y?)rr",
"VPERM2F128rr",
"VPERM2I128rr",
- "VPERMDYrr",
"VPERMPDYri",
- "VPERMPSYrr",
"VPERMQYri",
"VPMOVSXBDYrr",
"VPMOVSXBQYrr",
diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td
index 29f120f4edb..8022ddad111 100644
--- a/llvm/lib/Target/X86/X86SchedHaswell.td
+++ b/llvm/lib/Target/X86/X86SchedHaswell.td
@@ -159,8 +159,10 @@ defm : HWWriteResPair<WriteCvtI2F, [HWPort1], 4>;
defm : HWWriteResPair<WriteCvtF2F, [HWPort1], 3>;
defm : HWWriteResPair<WriteFMA, [HWPort01], 5>;
defm : HWWriteResPair<WriteFShuffle, [HWPort5], 1>;
+defm : HWWriteResPair<WriteFVarShuffle, [HWPort5], 1>;
defm : HWWriteResPair<WriteFBlend, [HWPort015], 1>;
defm : HWWriteResPair<WriteFShuffle256, [HWPort5], 3>;
+defm : HWWriteResPair<WriteFVarShuffle256, [HWPort5], 3>;
defm : HWWriteResPair<WriteFVarBlend, [HWPort5], 2, [2]>;
// Vector integer operations.
@@ -174,8 +176,10 @@ defm : HWWriteResPair<WriteVecALU, [HWPort15], 1>;
defm : HWWriteResPair<WriteVecIMul, [HWPort0], 5>;
defm : HWWriteResPair<WritePMULLD, [HWPort0], 10, [2], 2, 6>;
defm : HWWriteResPair<WriteShuffle, [HWPort5], 1>;
+defm : HWWriteResPair<WriteVarShuffle, [HWPort5], 1>;
defm : HWWriteResPair<WriteBlend, [HWPort15], 1>;
defm : HWWriteResPair<WriteShuffle256, [HWPort5], 3>;
+defm : HWWriteResPair<WriteVarShuffle256, [HWPort5], 3>;
defm : HWWriteResPair<WriteVarBlend, [HWPort5], 2, [2]>;
defm : HWWriteResPair<WriteVarVecShift, [HWPort0, HWPort5], 2, [2, 1]>;
defm : HWWriteResPair<WriteMPSAD, [HWPort0, HWPort5], 6, [1, 2]>;
@@ -724,7 +728,6 @@ def: InstRW<[HWWriteResGroup4], (instregex "MMX_MOVD64rr",
"MMX_MOVD64to64rr",
"MMX_MOVQ2DQrr",
"MMX_PALIGNRrri",
- "MMX_PSHUFBrr",
"MMX_PSHUFWri",
"MMX_PUNPCKHBWirr",
"MMX_PUNPCKHDQirr",
@@ -762,9 +765,7 @@ def: InstRW<[HWWriteResGroup4], (instregex "MMX_MOVD64rr",
"VPBROADCASTDrr",
"VPBROADCASTQrr",
"VPERMILPD(Y?)ri",
- "VPERMILPD(Y?)rr",
"VPERMILPS(Y?)ri",
- "VPERMILPS(Y?)rr",
"(V?)PMOVSXBDrr",
"(V?)PMOVSXBQrr",
"(V?)PMOVSXBWrr",
@@ -777,7 +778,6 @@ def: InstRW<[HWWriteResGroup4], (instregex "MMX_MOVD64rr",
"(V?)PMOVZXDQrr",
"(V?)PMOVZXWDrr",
"(V?)PMOVZXWQrr",
- "(V?)PSHUFB(Y?)rr",
"(V?)PSHUFD(Y?)ri",
"(V?)PSHUFHW(Y?)ri",
"(V?)PSHUFLW(Y?)ri",
@@ -1780,9 +1780,7 @@ def: InstRW<[HWWriteResGroup51], (instregex "VBROADCASTSDYrr",
"VPBROADCASTWrr",
"VPERM2F128rr",
"VPERM2I128rr",
- "VPERMDYrr",
"VPERMPDYri",
- "VPERMPSYrr",
"VPERMQYri",
"VPMOVSXBDYrr",
"VPMOVSXBQYrr",
diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td
index 5a2121f0770..aefbfb64cf8 100644
--- a/llvm/lib/Target/X86/X86SchedSandyBridge.td
+++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td
@@ -148,6 +148,7 @@ defm : SBWriteResPair<WriteCvtF2I, [SBPort1], 3>;
defm : SBWriteResPair<WriteCvtI2F, [SBPort1], 4>;
defm : SBWriteResPair<WriteCvtF2F, [SBPort1], 3>;
defm : SBWriteResPair<WriteFShuffle, [SBPort5], 1>;
+defm : SBWriteResPair<WriteFVarShuffle, [SBPort5], 1>;
defm : SBWriteResPair<WriteFBlend, [SBPort05], 1>;
defm : SBWriteResPair<WriteFVarBlend, [SBPort0, SBPort5], 2>;
@@ -162,6 +163,7 @@ defm : SBWriteResPair<WriteVecALU, [SBPort1], 3>;
defm : SBWriteResPair<WriteVecIMul, [SBPort0], 5>;
defm : SBWriteResPair<WritePMULLD, [SBPort0], 5, [1], 1, 6>; // TODO this is probably wrong for 256/512-bit for the "generic" model
defm : SBWriteResPair<WriteShuffle, [SBPort5], 1>;
+defm : SBWriteResPair<WriteVarShuffle, [SBPort15], 1>;
defm : SBWriteResPair<WriteBlend, [SBPort15], 1>;
defm : SBWriteResPair<WriteVarBlend, [SBPort1, SBPort5], 2>;
defm : SBWriteResPair<WriteMPSAD, [SBPort0, SBPort15], 5, [1,2], 3>;
@@ -275,7 +277,9 @@ def : WriteRes<WriteNop, []>;
// AVX2/FMA is not supported on that architecture, but we should define the basic
// scheduling resources anyway.
defm : SBWriteResPair<WriteFShuffle256, [SBPort0], 1>;
+defm : SBWriteResPair<WriteFVarShuffle256, [SBPort0], 1>;
defm : SBWriteResPair<WriteShuffle256, [SBPort0], 1>;
+defm : SBWriteResPair<WriteVarShuffle256, [SBPort0], 1>;
defm : SBWriteResPair<WriteVarVecShift, [SBPort0], 1>;
defm : SBWriteResPair<WriteFMA, [SBPort01], 5>;
@@ -352,9 +356,7 @@ def: InstRW<[SBWriteResGroup2], (instregex "FDECSTP",
"(V?)ORPS(Y?)rr",
"VPERM2F128rr",
"VPERMILPD(Y?)ri",
- "VPERMILPD(Y?)rr",
"VPERMILPS(Y?)ri",
- "VPERMILPS(Y?)rr",
"(V?)SHUFPD(Y?)rri",
"(V?)SHUFPS(Y?)rri",
"(V?)UNPCKHPD(Y?)rr",
@@ -408,7 +410,6 @@ def: InstRW<[SBWriteResGroup5], (instregex "MMX_PABSBrr",
"MMX_PABSWrr",
"MMX_PADDQirr",
"MMX_PALIGNRrri",
- "MMX_PSHUFBrr",
"MMX_PSIGNBrr",
"MMX_PSIGNDrr",
"MMX_PSIGNWrr",
@@ -462,7 +463,6 @@ def: InstRW<[SBWriteResGroup5], (instregex "MMX_PABSBrr",
"(V?)PMOVZXDQrr",
"(V?)PMOVZXWDrr",
"(V?)PMOVZXWQrr",
- "(V?)PSHUFBrr",
"(V?)PSHUFDri",
"(V?)PSHUFHWri",
"(V?)PSHUFLWri",
diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td
index 59e05df9904..6511206992d 100644
--- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td
+++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td
@@ -159,6 +159,7 @@ defm : SKLWriteResPair<WriteFRcp, [SKLPort0], 5>; // Floating point reciprocal
defm : SKLWriteResPair<WriteFRsqrt, [SKLPort0], 5>; // Floating point reciprocal square root estimate.
defm : SKLWriteResPair<WriteFMA, [SKLPort01], 4>; // Fused Multiply Add.
defm : SKLWriteResPair<WriteFShuffle, [SKLPort5], 1>; // Floating point vector shuffles.
+defm : SKLWriteResPair<WriteFVarShuffle, [SKLPort5], 1>; // Floating point vector shuffles.
defm : SKLWriteResPair<WriteFBlend, [SKLPort015], 1>; // Floating point vector blends.
defm : SKLWriteResPair<WriteFVarBlend, [SKLPort5], 2, [2]>; // Fp vector variable blends.
@@ -175,6 +176,7 @@ defm : SKLWriteResPair<WriteVecShift, [SKLPort0], 1>; // Vector integer shifts.
defm : SKLWriteResPair<WriteVecIMul, [SKLPort0], 5>; // Vector integer multiply.
defm : SKLWriteResPair<WritePMULLD, [SKLPort01], 10, [2], 2, 6>;
defm : SKLWriteResPair<WriteShuffle, [SKLPort5], 1>; // Vector shuffles.
+defm : SKLWriteResPair<WriteVarShuffle, [SKLPort5], 1>; // Vector shuffles.
defm : SKLWriteResPair<WriteBlend, [SKLPort15], 1>; // Vector blends.
defm : SKLWriteResPair<WriteVarBlend, [SKLPort5], 2, [2]>; // Vector variable blends.
defm : SKLWriteResPair<WriteMPSAD, [SKLPort0, SKLPort5], 6, [1, 2]>; // Vector MPSAD.
@@ -294,7 +296,9 @@ def : WriteRes<WriteSystem, [SKLPort0156]> { let Latency = 100; } // def Wri
// AVX2.
defm : SKLWriteResPair<WriteFShuffle256, [SKLPort5], 3>; // Fp 256-bit width vector shuffles.
+defm : SKLWriteResPair<WriteFVarShuffle256, [SKLPort5], 3>; // Fp 256-bit width vector variable shuffles.
defm : SKLWriteResPair<WriteShuffle256, [SKLPort5], 3>; // 256-bit width vector shuffles.
+defm : SKLWriteResPair<WriteVarShuffle256, [SKLPort5], 3>; // 256-bit width vector variable shuffles.
defm : SKLWriteResPair<WriteVarVecShift, [SKLPort0, SKLPort5], 2, [2, 1]>; // Variable vector shifts.
// Old microcoded instructions that nobody use.
@@ -367,7 +371,6 @@ def: InstRW<[SKLWriteResGroup3], (instregex "COMP_FST0r",
"MMX_MOVD64rr",
"MMX_MOVD64to64rr",
"MMX_PALIGNRrri",
- "MMX_PSHUFBrr",
"MMX_PSHUFWri",
"MMX_PUNPCKHBWirr",
"MMX_PUNPCKHDQirr",
@@ -397,9 +400,7 @@ def: InstRW<[SKLWriteResGroup3], (instregex "COMP_FST0r",
"VPBROADCASTDrr",
"VPBROADCASTQrr",
"VPERMILPD(Y?)ri",
- "VPERMILPD(Y?)rr",
"VPERMILPS(Y?)ri",
- "VPERMILPS(Y?)rr",
"(V?)PMOVSXBDrr",
"(V?)PMOVSXBQrr",
"(V?)PMOVSXBWrr",
@@ -412,7 +413,6 @@ def: InstRW<[SKLWriteResGroup3], (instregex "COMP_FST0r",
"(V?)PMOVZXDQrr",
"(V?)PMOVZXWDrr",
"(V?)PMOVZXWQrr",
- "(V?)PSHUFB(Y?)rr",
"(V?)PSHUFD(Y?)ri",
"(V?)PSHUFHW(Y?)ri",
"(V?)PSHUFLW(Y?)ri",
@@ -884,9 +884,7 @@ def: InstRW<[SKLWriteResGroup30], (instregex "ADD_FPrST0",
"(V?)PCMPGTQ(Y?)rr",
"VPERM2F128rr",
"VPERM2I128rr",
- "VPERMDYrr",
"VPERMPDYri",
- "VPERMPSYrr",
"VPERMQYri",
"VPMOVSXBDYrr",
"VPMOVSXBQYrr",
diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
index c7c7e2ab0b8..5a79f240273 100755
--- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td
+++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
@@ -159,6 +159,7 @@ defm : SKXWriteResPair<WriteFRcp, [SKXPort0], 5>; // Floating point reciprocal
defm : SKXWriteResPair<WriteFRsqrt, [SKXPort0], 5>; // Floating point reciprocal square root estimate.
defm : SKXWriteResPair<WriteFMA, [SKXPort015], 4>; // Fused Multiply Add.
defm : SKXWriteResPair<WriteFShuffle, [SKXPort5], 1>; // Floating point vector shuffles.
+defm : SKXWriteResPair<WriteFVarShuffle, [SKXPort5], 1>; // Floating point vector variable shuffles.
defm : SKXWriteResPair<WriteFBlend, [SKXPort015], 1>; // Floating point vector blends.
defm : SKXWriteResPair<WriteFVarBlend, [SKXPort5], 2, [2]>; // Fp vector variable blends.
@@ -175,6 +176,7 @@ defm : SKXWriteResPair<WriteVecShift, [SKXPort0], 1>; // Vector integer shifts.
defm : SKXWriteResPair<WriteVecIMul, [SKXPort0], 5>; // Vector integer multiply.
defm : SKXWriteResPair<WritePMULLD, [SKXPort015], 10, [2], 2, 6>; // Vector integer multiply.
defm : SKXWriteResPair<WriteShuffle, [SKXPort5], 1>; // Vector shuffles.
+defm : SKXWriteResPair<WriteVarShuffle, [SKXPort5], 1>; // Vector variable shuffles.
defm : SKXWriteResPair<WriteBlend, [SKXPort15], 1>; // Vector blends.
defm : SKXWriteResPair<WriteVarBlend, [SKXPort5], 2, [2]>; // Vector variable blends.
defm : SKXWriteResPair<WriteMPSAD, [SKXPort0, SKXPort5], 6, [1, 2]>; // Vector MPSAD.
@@ -294,7 +296,9 @@ def : WriteRes<WriteSystem, [SKXPort0156]> { let Latency = 100; } // def Wri
// AVX2.
defm : SKXWriteResPair<WriteFShuffle256, [SKXPort5], 3>; // Fp 256-bit width vector shuffles.
+defm : SKXWriteResPair<WriteFVarShuffle256, [SKXPort5], 3>; // Fp 256-bit width vector variable shuffles.
defm : SKXWriteResPair<WriteShuffle256, [SKXPort5], 3>; // 256-bit width vector shuffles.
+defm : SKXWriteResPair<WriteVarShuffle256, [SKXPort5], 3>; // 256-bit width vector variable shuffles.
defm : SKXWriteResPair<WriteVarVecShift, [SKXPort0, SKXPort5], 2, [2, 1]>; // Variable vector shifts.
// Old microcoded instructions that nobody use.
@@ -412,7 +416,6 @@ def: InstRW<[SKXWriteResGroup3], (instregex "COMP_FST0r",
"MMX_MOVD64rr",
"MMX_MOVD64to64rr",
"MMX_PALIGNRrri",
- "MMX_PSHUFBrr",
"MMX_PSHUFWri",
"MMX_PUNPCKHBWirr",
"MMX_PUNPCKHDQirr",
@@ -447,7 +450,6 @@ def: InstRW<[SKXWriteResGroup3], (instregex "COMP_FST0r",
"PMOVZXDQrr",
"PMOVZXWDrr",
"PMOVZXWQrr",
- "PSHUFBrr",
"PSHUFDri",
"PSHUFHWri",
"PSHUFLWri",
@@ -530,25 +532,15 @@ def: InstRW<[SKXWriteResGroup3], (instregex "COMP_FST0r",
"VPBROADCASTDrr",
"VPBROADCASTQrr",
"VPERMILPDYri",
- "VPERMILPDYrr",
"VPERMILPDZ128ri",
- "VPERMILPDZ128rr",
"VPERMILPDZ256ri",
- "VPERMILPDZ256rr",
"VPERMILPDZri",
- "VPERMILPDZrr",
"VPERMILPDri",
- "VPERMILPDrr",
"VPERMILPSYri",
- "VPERMILPSYrr",
"VPERMILPSZ128ri",
- "VPERMILPSZ128rr",
"VPERMILPSZ256ri",
- "VPERMILPSZ256rr",
"VPERMILPSZri",
- "VPERMILPSZrr",
"VPERMILPSri",
- "VPERMILPSrr",
"VPMOVSXBDrr",
"VPMOVSXBQrr",
"VPMOVSXBWrr",
@@ -561,11 +553,6 @@ def: InstRW<[SKXWriteResGroup3], (instregex "COMP_FST0r",
"VPMOVZXDQrr",
"VPMOVZXWDrr",
"VPMOVZXWQrr",
- "VPSHUFBYrr",
- "VPSHUFBZ128rr",
- "VPSHUFBZ256rr",
- "VPSHUFBZrr",
- "VPSHUFBrr",
"VPSHUFDYri",
"VPSHUFDZ128ri",
"VPSHUFDZ256ri",
@@ -1859,46 +1846,12 @@ def: InstRW<[SKXWriteResGroup32], (instregex "ADD_FPrST0",
"VPCMPWZrri",
"VPERM2F128rr",
"VPERM2I128rr",
- "VPERMDYrr",
- "VPERMDZ256rr",
- "VPERMDZrr",
- "VPERMI2D128rr",
- "VPERMI2D256rr",
- "VPERMI2Drr",
- "VPERMI2PD128rr",
- "VPERMI2PD256rr",
- "VPERMI2PDrr",
- "VPERMI2PS128rr",
- "VPERMI2PS256rr",
- "VPERMI2PSrr",
- "VPERMI2Q128rr",
- "VPERMI2Q256rr",
- "VPERMI2Qrr",
"VPERMPDYri",
"VPERMPDZ256ri",
- "VPERMPDZ256rr",
"VPERMPDZri",
- "VPERMPDZrr",
- "VPERMPSYrr",
- "VPERMPSZ256rr",
- "VPERMPSZrr",
"VPERMQYri",
"VPERMQZ256ri",
- "VPERMQZ256rr",
"VPERMQZri",
- "VPERMQZrr",
- "VPERMT2D128rr",
- "VPERMT2D256rr",
- "VPERMT2Drr",
- "VPERMT2PD128rr",
- "VPERMT2PD256rr",
- "VPERMT2PDrr",
- "VPERMT2PS128rr",
- "VPERMT2PS256rr",
- "VPERMT2PSrr",
- "VPERMT2Q128rr",
- "VPERMT2Q256rr",
- "VPERMT2Qrr",
"VPMAXSQZ128rr",
"VPMAXSQZ256rr",
"VPMAXSQZrr",
diff --git a/llvm/lib/Target/X86/X86Schedule.td b/llvm/lib/Target/X86/X86Schedule.td
index 7f381a891ad..0dc5f7288d5 100644
--- a/llvm/lib/Target/X86/X86Schedule.td
+++ b/llvm/lib/Target/X86/X86Schedule.td
@@ -87,6 +87,7 @@ defm WriteFRcp : X86SchedWritePair; // Floating point reciprocal estimate.
defm WriteFRsqrt : X86SchedWritePair; // Floating point reciprocal square root estimate.
defm WriteFMA : X86SchedWritePair; // Fused Multiply Add.
defm WriteFShuffle : X86SchedWritePair; // Floating point vector shuffles.
+defm WriteFVarShuffle : X86SchedWritePair; // Floating point vector variable shuffles.
defm WriteFBlend : X86SchedWritePair; // Floating point vector blends.
defm WriteFVarBlend : X86SchedWritePair; // Fp vector variable blends.
@@ -106,6 +107,7 @@ defm WriteVecShift : X86SchedWritePair; // Vector integer shifts.
defm WriteVecIMul : X86SchedWritePair; // Vector integer multiply.
defm WritePMULLD : X86SchedWritePair; // PMULLD
defm WriteShuffle : X86SchedWritePair; // Vector shuffles.
+defm WriteVarShuffle : X86SchedWritePair; // Vector variable shuffles.
defm WriteBlend : X86SchedWritePair; // Vector blends.
defm WriteVarBlend : X86SchedWritePair; // Vector variable blends.
defm WriteMPSAD : X86SchedWritePair; // Vector MPSAD.
@@ -150,7 +152,9 @@ def WriteSystem : SchedWrite;
// AVX2.
defm WriteFShuffle256 : X86SchedWritePair; // Fp 256-bit width vector shuffles.
+defm WriteFVarShuffle256 : X86SchedWritePair; // Fp 256-bit width variable shuffles.
defm WriteShuffle256 : X86SchedWritePair; // 256-bit width vector shuffles.
+defm WriteVarShuffle256 : X86SchedWritePair; // 256-bit width vector variable shuffles.
defm WriteVarVecShift : X86SchedWritePair; // Variable vector shifts.
// Old microcoded instructions that nobody use.
diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td
index f9dfefbc915..2ab593af326 100644
--- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td
+++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td
@@ -301,9 +301,11 @@ defm : JWriteResFpuPair<WriteFRsqrt, [JFPU1, JFPM], 2>;
defm : JWriteResFpuPair<WriteFDiv, [JFPU1, JFPM], 19, [1, 19]>;
defm : JWriteResFpuPair<WriteFSqrt, [JFPU1, JFPM], 21, [1, 21]>;
defm : JWriteResFpuPair<WriteFShuffle, [JFPU01, JFPX], 1>;
+defm : JWriteResFpuPair<WriteFVarShuffle, [JFPU01, JFPX], 2, [1, 4], 3>;
defm : JWriteResFpuPair<WriteFBlend, [JFPU01, JFPX], 1>;
defm : JWriteResFpuPair<WriteFVarBlend, [JFPU01, JFPX], 2, [1, 4], 3>;
defm : JWriteResFpuPair<WriteFShuffle256, [JFPU01, JFPX], 1>;
+defm : JWriteResFpuPair<WriteFVarShuffle256, [JFPU01, JFPX], 1>; // NOTE: Doesn't exist on Jaguar.
////////////////////////////////////////////////////////////////////////////////
// Conversions.
@@ -367,10 +369,12 @@ defm : JWriteResFpuPair<WriteVecIMul, [JFPU0, JVIMUL], 2>;
defm : JWriteResFpuPair<WritePMULLD, [JFPU0, JFPU01, JVIMUL, JVALU], 4, [2, 1, 2, 1], 3>;
defm : JWriteResFpuPair<WriteMPSAD, [JFPU0, JVIMUL], 3, [1, 2]>;
defm : JWriteResFpuPair<WriteShuffle, [JFPU01, JVALU], 1>;
+defm : JWriteResFpuPair<WriteVarShuffle, [JFPU01, JVALU], 2, [1, 4], 3>;
defm : JWriteResFpuPair<WriteBlend, [JFPU01, JVALU], 1>;
defm : JWriteResFpuPair<WriteVarBlend, [JFPU01, JVALU], 2, [1, 4], 3>;
defm : JWriteResFpuPair<WriteVecLogic, [JFPU01, JVALU], 1>;
defm : JWriteResFpuPair<WriteShuffle256, [JFPU01, JVALU], 1>;
+defm : JWriteResFpuPair<WriteVarShuffle256, [JFPU01, JVALU], 1>; // NOTE: Doesn't exist on Jaguar.
defm : JWriteResFpuPair<WriteVarVecShift, [JFPU01, JVALU], 1>; // NOTE: Doesn't exist on Jaguar.
////////////////////////////////////////////////////////////////////////////////
@@ -750,34 +754,6 @@ def JWriteVCVTPDYLd: SchedWriteRes<[JLAGU, JFPU1, JSTC, JFPX]> {
}
def : InstRW<[JWriteVCVTPDYLd, ReadAfterLd], (instrs VCVTPD2DQYrm, VCVTTPD2DQYrm, VCVTPD2PSYrm)>;
-def JWritePSHUFB: SchedWriteRes<[JFPU01, JVALU]> {
- let Latency = 2;
- let ResourceCycles = [1, 4];
- let NumMicroOps = 3;
-}
-def : InstRW<[JWritePSHUFB], (instrs MMX_PSHUFBrr, PSHUFBrr, VPSHUFBrr)>;
-
-def JWritePSHUFBLd: SchedWriteRes<[JLAGU, JFPU01, JVALU]> {
- let Latency = 7;
- let ResourceCycles = [1, 1, 4];
- let NumMicroOps = 3;
-}
-def : InstRW<[JWritePSHUFBLd, ReadAfterLd], (instrs MMX_PSHUFBrm, PSHUFBrm, VPSHUFBrm)>;
-
-def JWriteVPERM: SchedWriteRes<[JFPU01, JFPX]> {
- let Latency = 2;
- let ResourceCycles = [1, 4];
- let NumMicroOps = 3;
-}
-def : InstRW<[JWriteVPERM], (instrs VPERMILPDrr, VPERMILPSrr)>;
-
-def JWriteVPERMLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> {
- let Latency = 7;
- let ResourceCycles = [1, 1, 4];
- let NumMicroOps = 3;
-}
-def : InstRW<[JWriteVPERMLd, ReadAfterLd], (instrs VPERMILPDrm, VPERMILPSrm)>;
-
def JWriteVPERMY: SchedWriteRes<[JFPU01, JFPX]> {
let Latency = 3;
let ResourceCycles = [2, 6];
diff --git a/llvm/lib/Target/X86/X86ScheduleSLM.td b/llvm/lib/Target/X86/X86ScheduleSLM.td
index a712a188aa0..8147c94a2e7 100644
--- a/llvm/lib/Target/X86/X86ScheduleSLM.td
+++ b/llvm/lib/Target/X86/X86ScheduleSLM.td
@@ -134,6 +134,7 @@ defm : SLMWriteResPair<WriteCvtF2I, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtI2F, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtF2F, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteFShuffle, [SLM_FPC_RSV0], 1>;
+defm : SLMWriteResPair<WriteFVarShuffle, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteFBlend, [SLM_FPC_RSV0], 1>;
// Vector integer operations.
@@ -149,6 +150,7 @@ defm : SLMWriteResPair<WriteVecIMul, [SLM_FPC_RSV0], 4>;
//defm : SLMWriteResPair<WritePMULLD, [SLM_FPC_RSV0], 11, [11], 7>;
defm : SLMWriteResPair<WritePMULLD, [SLM_FPC_RSV0], 4>;
defm : SLMWriteResPair<WriteShuffle, [SLM_FPC_RSV0], 1>;
+defm : SLMWriteResPair<WriteVarShuffle, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteBlend, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteMPSAD, [SLM_FPC_RSV0], 7>;
@@ -255,7 +257,9 @@ def : WriteRes<WriteIMulH, [SLM_FPC_RSV0]>;
defm : SLMWriteResPair<WriteVarBlend, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteFVarBlend, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteFShuffle256, [SLM_FPC_RSV0], 1>;
+defm : SLMWriteResPair<WriteFVarShuffle256, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteShuffle256, [SLM_FPC_RSV0], 1>;
+defm : SLMWriteResPair<WriteVarShuffle256, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteVarVecShift, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteFMA, [SLM_FPC_RSV0], 1>;
diff --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td
index 3018e0f20ba..a88c680a7e1 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver1.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td
@@ -201,6 +201,7 @@ defm : ZnWriteResFpuPair<WriteCvtF2F, [ZnFPU3], 5>;
defm : ZnWriteResFpuPair<WriteCvtF2I, [ZnFPU3], 5>;
defm : ZnWriteResFpuPair<WriteFDiv, [ZnFPU3], 15>;
defm : ZnWriteResFpuPair<WriteFShuffle, [ZnFPU12], 1>;
+defm : ZnWriteResFpuPair<WriteFVarShuffle, [ZnFPU12], 1>;
defm : ZnWriteResFpuPair<WriteFMul, [ZnFPU0], 5>;
defm : ZnWriteResFpuPair<WriteFMA, [ZnFPU03], 5>;
defm : ZnWriteResFpuPair<WriteFRcp, [ZnFPU01], 5>;
@@ -219,8 +220,10 @@ defm : ZnWriteResFpuPair<WriteVecALU, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteVecIMul, [ZnFPU0], 4>;
defm : ZnWriteResFpuPair<WritePMULLD, [ZnFPU0], 4>; // FIXME
defm : ZnWriteResFpuPair<WriteShuffle, [ZnFPU], 1>;
+defm : ZnWriteResFpuPair<WriteVarShuffle, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteBlend, [ZnFPU01], 1>;
defm : ZnWriteResFpuPair<WriteShuffle256, [ZnFPU], 2>;
+defm : ZnWriteResFpuPair<WriteVarShuffle256, [ZnFPU], 2>;
// Vector Shift Operations
defm : ZnWriteResFpuPair<WriteVarVecShift, [ZnFPU12], 1>;
@@ -241,6 +244,7 @@ def : WriteRes<WriteNop, []>;
// Following instructions with latency=100 are microcoded.
// We set long latency so as to block the entire pipeline.
defm : ZnWriteResFpuPair<WriteFShuffle256, [ZnFPU], 100>;
+defm : ZnWriteResFpuPair<WriteFVarShuffle256, [ZnFPU], 100>;
//Microcoded Instructions
let Latency = 100 in {
OpenPOWER on IntegriCloud