summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/X86/X86ScheduleSLM.td4
-rw-r--r--llvm/test/CodeGen/X86/slow-pmulld.ll42
-rw-r--r--llvm/test/CodeGen/X86/sse41-schedule.ll4
3 files changed, 27 insertions, 23 deletions
diff --git a/llvm/lib/Target/X86/X86ScheduleSLM.td b/llvm/lib/Target/X86/X86ScheduleSLM.td
index 73eb257ad5c..a712a188aa0 100644
--- a/llvm/lib/Target/X86/X86ScheduleSLM.td
+++ b/llvm/lib/Target/X86/X86ScheduleSLM.td
@@ -145,7 +145,9 @@ defm : SLMWriteResPair<WriteVecShift, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteVecLogic, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteVecALU, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteVecIMul, [SLM_FPC_RSV0], 4>;
-defm : SLMWriteResPair<WritePMULLD, [SLM_FPC_RSV0], 11, [11], 7>;
+// FIXME: The below is closer to correct, but caused some perf regressions.
+//defm : SLMWriteResPair<WritePMULLD, [SLM_FPC_RSV0], 11, [11], 7>;
+defm : SLMWriteResPair<WritePMULLD, [SLM_FPC_RSV0], 4>;
defm : SLMWriteResPair<WriteShuffle, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteBlend, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteMPSAD, [SLM_FPC_RSV0], 7>;
diff --git a/llvm/test/CodeGen/X86/slow-pmulld.ll b/llvm/test/CodeGen/X86/slow-pmulld.ll
index 59b36e93274..007531fca7d 100644
--- a/llvm/test/CodeGen/X86/slow-pmulld.ll
+++ b/llvm/test/CodeGen/X86/slow-pmulld.ll
@@ -1215,32 +1215,34 @@ define <8 x i32> @test_mul_v8i32_v8i16_minsize(<8 x i16> %A) minsize {
define <16 x i32> @test_mul_v16i32_v16i16_minsize(<16 x i16> %A) minsize {
; SLM32-LABEL: test_mul_v16i32_v16i16_minsize:
; SLM32: # %bb.0:
-; SLM32-NEXT: movdqa {{.*#+}} xmm5 = [18778,18778,18778,18778]
-; SLM32-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
-; SLM32-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1]
-; SLM32-NEXT: pmovzxwd {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
+; SLM32-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
+; SLM32-NEXT: pmovzxwd {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
+; SLM32-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
; SLM32-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; SLM32-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero
-; SLM32-NEXT: pmovzxwd {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero
-; SLM32-NEXT: pmulld %xmm5, %xmm0
-; SLM32-NEXT: pmulld %xmm5, %xmm2
-; SLM32-NEXT: pmulld %xmm5, %xmm1
-; SLM32-NEXT: pmulld %xmm5, %xmm3
+; SLM32-NEXT: pmovzxwd {{.*#+}} xmm4 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
+; SLM32-NEXT: pmovzxwd {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
+; SLM32-NEXT: movdqa {{.*#+}} xmm1 = [18778,18778,18778,18778]
+; SLM32-NEXT: pmulld %xmm1, %xmm4
+; SLM32-NEXT: pmulld %xmm1, %xmm0
+; SLM32-NEXT: pmulld %xmm1, %xmm2
+; SLM32-NEXT: pmulld %xmm1, %xmm3
+; SLM32-NEXT: movdqa %xmm4, %xmm1
; SLM32-NEXT: retl
;
; SLM64-LABEL: test_mul_v16i32_v16i16_minsize:
; SLM64: # %bb.0:
-; SLM64-NEXT: movdqa {{.*#+}} xmm5 = [18778,18778,18778,18778]
-; SLM64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
-; SLM64-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1]
-; SLM64-NEXT: pmovzxwd {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
+; SLM64-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
+; SLM64-NEXT: pmovzxwd {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
+; SLM64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
; SLM64-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; SLM64-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero
-; SLM64-NEXT: pmovzxwd {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero
-; SLM64-NEXT: pmulld %xmm5, %xmm0
-; SLM64-NEXT: pmulld %xmm5, %xmm2
-; SLM64-NEXT: pmulld %xmm5, %xmm1
-; SLM64-NEXT: pmulld %xmm5, %xmm3
+; SLM64-NEXT: pmovzxwd {{.*#+}} xmm4 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
+; SLM64-NEXT: pmovzxwd {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
+; SLM64-NEXT: movdqa {{.*#+}} xmm1 = [18778,18778,18778,18778]
+; SLM64-NEXT: pmulld %xmm1, %xmm4
+; SLM64-NEXT: pmulld %xmm1, %xmm0
+; SLM64-NEXT: pmulld %xmm1, %xmm2
+; SLM64-NEXT: pmulld %xmm1, %xmm3
+; SLM64-NEXT: movdqa %xmm4, %xmm1
; SLM64-NEXT: retq
;
; SLOW32-LABEL: test_mul_v16i32_v16i16_minsize:
diff --git a/llvm/test/CodeGen/X86/sse41-schedule.ll b/llvm/test/CodeGen/X86/sse41-schedule.ll
index 39052ba15fc..96da898e396 100644
--- a/llvm/test/CodeGen/X86/sse41-schedule.ll
+++ b/llvm/test/CodeGen/X86/sse41-schedule.ll
@@ -4817,8 +4817,8 @@ define <4 x i32> @test_pmulld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
;
; SLM-LABEL: test_pmulld:
; SLM: # %bb.0:
-; SLM-NEXT: pmulld %xmm1, %xmm0 # sched: [11:11.00]
-; SLM-NEXT: pmulld (%rdi), %xmm0 # sched: [14:11.00]
+; SLM-NEXT: pmulld %xmm1, %xmm0 # sched: [4:1.00]
+; SLM-NEXT: pmulld (%rdi), %xmm0 # sched: [7:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
; SANDY-SSE-LABEL: test_pmulld:
OpenPOWER on IntegriCloud