diff options
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 4 | ||||
| -rwxr-xr-x | llvm/lib/Target/X86/X86SchedBroadwell.td | 3 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86SchedHaswell.td | 2 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86SchedSandyBridge.td | 4 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86SchedSkylakeClient.td | 3 | ||||
| -rwxr-xr-x | llvm/lib/Target/X86/X86SchedSkylakeServer.td | 5 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86Schedule.td | 1 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ScheduleAtom.td | 1 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ScheduleBtVer2.td | 1 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ScheduleSLM.td | 1 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ScheduleZnver1.td | 1 | ||||
| -rw-r--r-- | llvm/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s | 6 | ||||
| -rw-r--r-- | llvm/test/tools/llvm-mca/X86/BtVer2/resources-sse41.s | 6 | 
13 files changed, 20 insertions, 18 deletions
| diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 1ef255dec7d..485a27b61ae 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -5751,10 +5751,10 @@ multiclass SS41I_unop_rm_int_v16<bits<8> opc, string OpcodeStr,  let Predicates = [HasAVX] in  defm VPHMINPOSUW : SS41I_unop_rm_int_v16<0x41, "vphminposuw",                                           X86phminpos, loadv2i64, -                                         WriteVecIMul>, VEX, VEX_WIG; +                                         WritePHMINPOS>, VEX, VEX_WIG;  defm PHMINPOSUW : SS41I_unop_rm_int_v16<0x41, "phminposuw",                                           X86phminpos, memopv2i64, -                                         WriteVecIMul>; +                                         WritePHMINPOS>;  /// SS48I_binop_rm - Simple SSE41 binary operator.  multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td index 662ba189871..15b07c3dd67 100755 --- a/llvm/lib/Target/X86/X86SchedBroadwell.td +++ b/llvm/lib/Target/X86/X86SchedBroadwell.td @@ -194,7 +194,8 @@ defm : BWWriteResPair<WriteVarShuffle, [BWPort5],  1>; // Vector variable shuffl  defm : BWWriteResPair<WriteBlend,  [BWPort5],  1>; // Vector blends.  defm : BWWriteResPair<WriteVarBlend,  [BWPort5], 2, [2], 2, 5>; // Vector variable blends.  defm : BWWriteResPair<WriteMPSAD,  [BWPort0, BWPort5], 7, [1, 2], 3, 5>; // Vector MPSAD. -defm : BWWriteResPair<WritePSADBW,  [BWPort0],   5>; // Vector PSADBW. +defm : BWWriteResPair<WritePSADBW,   [BWPort0],   5>; // Vector PSADBW. +defm : BWWriteResPair<WritePHMINPOS, [BWPort0],   5>; // Vector PHMINPOS.  // Vector insert/extract operations.  def : WriteRes<WriteVecInsert, [BWPort5]> { diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td index 034f1d1b24b..a9ffba0650a 100644 --- a/llvm/lib/Target/X86/X86SchedHaswell.td +++ b/llvm/lib/Target/X86/X86SchedHaswell.td @@ -194,6 +194,7 @@ defm : HWWriteResPair<WriteVarBlend,  [HWPort5], 2, [2], 2, 6>;  defm : HWWriteResPair<WriteVarVecShift,  [HWPort0, HWPort5], 2, [2, 1]>;  defm : HWWriteResPair<WriteMPSAD,  [HWPort0, HWPort5], 7, [1, 2], 3, 6>;  defm : HWWriteResPair<WritePSADBW, [HWPort0], 5>; +defm : HWWriteResPair<WritePHMINPOS, [HWPort0],  5, [1], 1, 6>;  // Vector insert/extract operations.  def : WriteRes<WriteVecInsert, [HWPort5]> { @@ -1890,7 +1891,6 @@ def HWWriteResGroup91_2 : SchedWriteRes<[HWPort0,HWPort23]> {    let ResourceCycles = [1,1];  }  def: InstRW<[HWWriteResGroup91_2], (instregex "(V?)PCMPGTQrm", -                                              "(V?)PHMINPOSUWrm",                                                "(V?)PMADDUBSWrm",                                                "(V?)PMADDWDrm",                                                "(V?)PMULDQrm", diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td index b59d84fc0f1..1f3efa2df61 100644 --- a/llvm/lib/Target/X86/X86SchedSandyBridge.td +++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td @@ -173,6 +173,7 @@ defm : SBWriteResPair<WriteBlend,   [SBPort15], 1, [1], 1, 6>;  defm : SBWriteResPair<WriteVarBlend, [SBPort15], 2, [2], 2, 6>;  defm : SBWriteResPair<WriteMPSAD, [SBPort0, SBPort15], 7, [1,2], 3, 6>;  defm : SBWriteResPair<WritePSADBW,  [SBPort0], 5>; +defm : SBWriteResPair<WritePHMINPOS,  [SBPort0], 5, [1], 1, 6>;  // Vector insert/extract operations.  def : WriteRes<WriteVecInsert, [SBPort5,SBPort15]> { @@ -1537,8 +1538,7 @@ def SBWriteResGroup104 : SchedWriteRes<[SBPort0,SBPort23]> {    let NumMicroOps = 2;    let ResourceCycles = [1,1];  } -def: InstRW<[SBWriteResGroup104], (instregex "(V?)PCMPGTQrm", -                                             "(V?)PHMINPOSUWrm")>; +def: InstRW<[SBWriteResGroup104], (instregex "(V?)PCMPGTQrm")>;  def SBWriteResGroup106 : SchedWriteRes<[SBPort1,SBPort23]> {    let Latency = 11; diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td index 831f614461e..e05f58c6800 100644 --- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td @@ -192,6 +192,7 @@ defm : SKLWriteResPair<WriteBlend,  [SKLPort5], 1, [1], 1, 6>; // Vector blends.  defm : SKLWriteResPair<WriteVarBlend,  [SKLPort015], 2, [2], 2, 6>; // Vector variable blends.  defm : SKLWriteResPair<WriteMPSAD,  [SKLPort5], 4, [2], 2, 6>; // Vector MPSAD.  defm : SKLWriteResPair<WritePSADBW, [SKLPort5], 3>; // Vector PSADBW. +defm : SKLWriteResPair<WritePHMINPOS, [SKLPort01], 4, [1], 1, 6>; // Vector PHMINPOS.  // Vector insert/extract operations.  def : WriteRes<WriteVecInsert, [SKLPort5]> { @@ -927,7 +928,6 @@ def: InstRW<[SKLWriteResGroup48], (instregex "(V?)ADDPD(Y?)rr",                                               "(V?)MULPS(Y?)rr",                                               "(V?)MULSDrr",                                               "(V?)MULSSrr", -                                             "(V?)PHMINPOSUWrr",                                               "(V?)PMADDUBSW(Y?)rr",                                               "(V?)PMADDWD(Y?)rr",                                               "(V?)PMULDQ(Y?)rr", @@ -1883,7 +1883,6 @@ def: InstRW<[SKLWriteResGroup134], (instregex "(V?)ADDPDrm",                                                "(V?)CVTTPS2DQrm",                                                "(V?)MULPDrm",                                                "(V?)MULPSrm", -                                              "(V?)PHMINPOSUWrm",                                                "(V?)PMADDUBSWrm",                                                "(V?)PMADDWDrm",                                                "(V?)PMULDQrm", diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td index 820b0ca9c10..b13033a3182 100755 --- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td @@ -192,6 +192,7 @@ defm : SKXWriteResPair<WriteBlend, [SKXPort5], 1, [1], 1, 6>; // Vector blends.  defm : SKXWriteResPair<WriteVarBlend, [SKXPort015], 2, [2], 2, 6>; // Vector variable blends.  defm : SKXWriteResPair<WriteMPSAD,  [SKXPort5], 4, [2], 2, 6>; // Vector MPSAD.  defm : SKXWriteResPair<WritePSADBW, [SKXPort5], 3, [1,1], 1, 6>; // Vector PSADBW. +defm : SKXWriteResPair<WritePHMINPOS, [SKXPort015], 4, [1], 1, 6>; // Vector PHMINPOS.  // Vector insert/extract operations.  def : WriteRes<WriteVecInsert, [SKXPort5]> { @@ -1615,7 +1616,6 @@ def: InstRW<[SKXWriteResGroup50], (instregex "ADDPDrr",                                               "MULPSrr",                                               "MULSDrr",                                               "MULSSrr", -                                             "PHMINPOSUWrr",                                               "PMADDUBSWrr",                                               "PMADDWDrr",                                               "PMULDQrr", @@ -1726,7 +1726,6 @@ def: InstRW<[SKXWriteResGroup50], (instregex "ADDPDrr",                                               "VMULSDrr",                                               "VMULSSZrr",                                               "VMULSSrr", -                                             "VPHMINPOSUWrr",                                               "VPLZCNTDZ128rr",                                               "VPLZCNTDZ256rr",                                               "VPLZCNTDZrr", @@ -3893,7 +3892,6 @@ def: InstRW<[SKXWriteResGroup149], (instregex "ADDPDrm",                                                "CVTTPS2DQrm",                                                "MULPDrm",                                                "MULPSrm", -                                              "PHMINPOSUWrm",                                                "PMADDUBSWrm",                                                "PMADDWDrm",                                                "PMULDQrm", @@ -3958,7 +3956,6 @@ def: InstRW<[SKXWriteResGroup149], (instregex "ADDPDrm",                                                "VMULPSrm",                                                "VMULSDZrm",                                                "VMULSSZrm", -                                              "VPHMINPOSUWrm",                                                "VPLZCNTDZ128rm(b?)",                                                "VPLZCNTQZ128rm(b?)",                                                "VPMADDUBSWZ128rm(b?)", diff --git a/llvm/lib/Target/X86/X86Schedule.td b/llvm/lib/Target/X86/X86Schedule.td index 4c869ac50a8..379bf8099f6 100644 --- a/llvm/lib/Target/X86/X86Schedule.td +++ b/llvm/lib/Target/X86/X86Schedule.td @@ -116,6 +116,7 @@ defm WriteBlend  : X86SchedWritePair; // Vector blends.  defm WriteVarBlend  : X86SchedWritePair; // Vector variable blends.  defm WritePSADBW : X86SchedWritePair; // Vector PSADBW.  defm WriteMPSAD : X86SchedWritePair; // Vector MPSAD. +defm WritePHMINPOS : X86SchedWritePair; // Vector PHMINPOS.  // Vector insert/extract operations.  defm WriteVecInsert : X86SchedWritePair; // Insert gpr to vector element. diff --git a/llvm/lib/Target/X86/X86ScheduleAtom.td b/llvm/lib/Target/X86/X86ScheduleAtom.td index baf7463cfe5..fcb32ef357f 100644 --- a/llvm/lib/Target/X86/X86ScheduleAtom.td +++ b/llvm/lib/Target/X86/X86ScheduleAtom.td @@ -242,6 +242,7 @@ defm : AtomWriteResPair<WriteVecLogic,     [AtomPort01],  [AtomPort0], 1, 1>;  defm : AtomWriteResPair<WriteVecShift,     [AtomPort01], [AtomPort01], 2, 3, [2], [3]>;  defm : AtomWriteResPair<WriteVecIMul,       [AtomPort0],  [AtomPort0], 5, 5, [5], [5]>;  defm : AtomWriteResPair<WritePMULLD,       [AtomPort01],  [AtomPort0], 1, 1>; +defm : AtomWriteResPair<WritePHMINPOS,      [AtomPort0],  [AtomPort0], 5, 5, [5], [5]>;  defm : AtomWriteResPair<WriteMPSAD,        [AtomPort01],  [AtomPort0], 1, 1>;  defm : AtomWriteResPair<WritePSADBW,        [AtomPort0],  [AtomPort0], 5, 5, [5], [5]>;  defm : AtomWriteResPair<WriteShuffle,       [AtomPort0],  [AtomPort0], 1, 1>; diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td index 8c4d6dad3b8..44f8b8ba08f 100644 --- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td +++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td @@ -376,6 +376,7 @@ defm : JWriteResFpuPair<WriteVecIMul,     [JFPU0, JVIMUL], 2>;  defm : JWriteResFpuPair<WritePMULLD,      [JFPU0, JFPU01, JVIMUL, JVALU], 4, [2, 1, 2, 1], 3>;  defm : JWriteResFpuPair<WriteMPSAD,       [JFPU0, JVIMUL], 3, [1, 2]>;  defm : JWriteResFpuPair<WritePSADBW,      [JFPU01, JVALU], 2>; +defm : JWriteResFpuPair<WritePHMINPOS,    [JFPU0,  JVALU], 2>;  defm : JWriteResFpuPair<WriteShuffle,     [JFPU01, JVALU], 1>;  defm : JWriteResFpuPair<WriteVarShuffle,  [JFPU01, JVALU], 2, [1, 4], 3>;  defm : JWriteResFpuPair<WriteBlend,       [JFPU01, JVALU], 1>; diff --git a/llvm/lib/Target/X86/X86ScheduleSLM.td b/llvm/lib/Target/X86/X86ScheduleSLM.td index 55ee84fc9f0..7b0c0855a66 100644 --- a/llvm/lib/Target/X86/X86ScheduleSLM.td +++ b/llvm/lib/Target/X86/X86ScheduleSLM.td @@ -164,6 +164,7 @@ defm : SLMWriteResPair<WriteVarShuffle,  [SLM_FPC_RSV0],  1>;  defm : SLMWriteResPair<WriteBlend,  [SLM_FPC_RSV0],  1>;  defm : SLMWriteResPair<WriteMPSAD,  [SLM_FPC_RSV0],  7>;  defm : SLMWriteResPair<WritePSADBW, [SLM_FPC_RSV0],  4>; +defm : SLMWriteResPair<WritePHMINPOS,  [SLM_FPC_RSV0],   4>;  // Vector insert/extract operations.  defm : SLMWriteResPair<WriteVecInsert, [SLM_FPC_RSV0],  1>; diff --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td index 2de60dec502..f65aa758f91 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver1.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td @@ -230,6 +230,7 @@ defm : ZnWriteResFpuPair<WriteBlend,      [ZnFPU01], 1>;  defm : ZnWriteResFpuPair<WriteShuffle256, [ZnFPU],   2>;  defm : ZnWriteResFpuPair<WriteVarShuffle256, [ZnFPU],   2>;  defm : ZnWriteResFpuPair<WritePSADBW,     [ZnFPU0],  3>; +defm : ZnWriteResFpuPair<WritePHMINPOS,   [ZnFPU0],  4>;  // Vector Shift Operations  defm : ZnWriteResFpuPair<WriteVarVecShift, [ZnFPU12], 1>; diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s index 44ec862182d..6dbc249c117 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s @@ -1720,7 +1720,7 @@ vzeroupper  # CHECK:      Resource pressure per iteration:  # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12]   [13] -# CHECK-NEXT: 48.00  2.00    -     355.50 907.50 402.00 398.00 381.00  -     43.00  114.00 116.50 116.50 40.00 +# CHECK-NEXT: 48.00  2.00    -     355.50 907.50 402.00 398.00 381.00  -     43.00  114.00 117.50 117.50 38.00  # CHECK:      Resource pressure by instruction:  # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12]   [13]   	Instructions: @@ -2147,8 +2147,8 @@ vzeroupper  # CHECK-NEXT:  -      -      -      -      -     0.50   0.50   1.00    -      -      -     0.50   0.50    -     	vphaddsw	(%rax), %xmm1, %xmm2  # CHECK-NEXT:  -      -      -      -      -     0.50   0.50    -      -      -      -     0.50   0.50    -     	vphaddw	%xmm0, %xmm1, %xmm2  # CHECK-NEXT:  -      -      -      -      -     0.50   0.50   1.00    -      -      -     0.50   0.50    -     	vphaddw	(%rax), %xmm1, %xmm2 -# CHECK-NEXT:  -      -      -      -      -     1.00    -      -      -      -      -      -      -     1.00   	vphminposuw	%xmm0, %xmm2 -# CHECK-NEXT:  -      -      -      -      -     1.00    -     1.00    -      -      -      -      -     1.00   	vphminposuw	(%rax), %xmm2 +# CHECK-NEXT:  -      -      -      -      -     1.00    -      -      -      -      -     0.50   0.50    -     	vphminposuw	%xmm0, %xmm2 +# CHECK-NEXT:  -      -      -      -      -     1.00    -     1.00    -      -      -     0.50   0.50    -     	vphminposuw	(%rax), %xmm2  # CHECK-NEXT:  -      -      -      -      -     0.50   0.50    -      -      -      -     0.50   0.50    -     	vphsubd	%xmm0, %xmm1, %xmm2  # CHECK-NEXT:  -      -      -      -      -     0.50   0.50   1.00    -      -      -     0.50   0.50    -     	vphsubd	(%rax), %xmm1, %xmm2  # CHECK-NEXT:  -      -      -      -      -     0.50   0.50    -      -      -      -     0.50   0.50    -     	vphsubsw	%xmm0, %xmm1, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/resources-sse41.s b/llvm/test/tools/llvm-mca/X86/BtVer2/resources-sse41.s index 661141d9a75..f2e1f0d337b 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/resources-sse41.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/resources-sse41.s @@ -270,7 +270,7 @@ roundss     $1, (%rax), %xmm2  # CHECK:      Resource pressure per iteration:  # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12]   [13] -# CHECK-NEXT: 6.00    -      -     37.00  23.00  57.50  42.50  44.00   -     5.00   5.00   31.50  31.50  12.00 +# CHECK-NEXT: 6.00    -      -     37.00  23.00  57.50  42.50  44.00   -     5.00   5.00   32.50  32.50  10.00  # CHECK:      Resource pressure by instruction:  # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12]   [13]   	Instructions: @@ -308,8 +308,8 @@ roundss     $1, (%rax), %xmm2  # CHECK-NEXT: 1.00    -      -     1.00    -     1.00    -      -      -      -      -      -      -      -     	pextrq	$1, %xmm0, %rcx  # CHECK-NEXT:  -      -      -      -      -      -     1.00    -      -     1.00   1.00    -      -      -     	pextrq	$1, %xmm0, (%rax)  # CHECK-NEXT:  -      -      -      -      -      -     1.00    -      -     1.00   1.00    -      -      -     	pextrw	$1, %xmm0, (%rax) -# CHECK-NEXT:  -      -      -      -      -     1.00    -      -      -      -      -      -      -     1.00   	phminposuw	%xmm0, %xmm2 -# CHECK-NEXT:  -      -      -      -      -     1.00    -     1.00    -      -      -      -      -     1.00   	phminposuw	(%rax), %xmm2 +# CHECK-NEXT:  -      -      -      -      -     1.00    -      -      -      -      -     0.50   0.50    -     	phminposuw	%xmm0, %xmm2 +# CHECK-NEXT:  -      -      -      -      -     1.00    -     1.00    -      -      -     0.50   0.50    -     	phminposuw	(%rax), %xmm2  # CHECK-NEXT:  -      -      -      -      -     0.50   0.50    -      -      -      -     0.50   0.50    -     	pinsrb	$1, %eax, %xmm1  # CHECK-NEXT:  -      -      -      -      -     0.50   0.50   1.00    -      -      -     0.50   0.50    -     	pinsrb	$1, (%rax), %xmm1  # CHECK-NEXT:  -      -      -      -      -     0.50   0.50    -      -      -      -     0.50   0.50    -     	pinsrd	$1, %eax, %xmm1 | 

