summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/X86/X86InstrSSE.td16
-rw-r--r--llvm/lib/Target/X86/X86Schedule.td1
-rw-r--r--llvm/lib/Target/X86/X86ScheduleAtom.td1
-rw-r--r--llvm/test/CodeGen/X86/sse2-schedule.ll24
4 files changed, 16 insertions, 26 deletions
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index c7305aec9a8..b3c5ae54181 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -4249,8 +4249,8 @@ multiclass sse2_pack<bits<8> opc, string OpcodeStr, ValueType OutVT,
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set VR128:$dst,
- (OutVT (OpNode (ArgVT VR128:$src1), VR128:$src2)))]>,
- Sched<[WriteShuffle]>;
+ (OutVT (OpNode (ArgVT VR128:$src1), VR128:$src2)))],
+ IIC_SSE_PACK>, Sched<[WriteShuffle]>;
def rm : PDI<opc, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
!if(Is2Addr,
@@ -4259,8 +4259,8 @@ multiclass sse2_pack<bits<8> opc, string OpcodeStr, ValueType OutVT,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set VR128:$dst,
(OutVT (OpNode (ArgVT VR128:$src1),
- (bitconvert (ld_frag addr:$src2)))))]>,
- Sched<[WriteShuffleLd, ReadAfterLd]>;
+ (bitconvert (ld_frag addr:$src2)))))],
+ IIC_SSE_PACK>, Sched<[WriteShuffleLd, ReadAfterLd]>;
}
multiclass sse2_pack_y<bits<8> opc, string OpcodeStr, ValueType OutVT,
@@ -4292,8 +4292,8 @@ multiclass sse4_pack<bits<8> opc, string OpcodeStr, ValueType OutVT,
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set VR128:$dst,
- (OutVT (OpNode (ArgVT VR128:$src1), VR128:$src2)))]>,
- Sched<[WriteShuffle]>;
+ (OutVT (OpNode (ArgVT VR128:$src1), VR128:$src2)))],
+ IIC_SSE_PACK>, Sched<[WriteShuffle]>;
def rm : SS48I<opc, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
!if(Is2Addr,
@@ -4302,8 +4302,8 @@ multiclass sse4_pack<bits<8> opc, string OpcodeStr, ValueType OutVT,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set VR128:$dst,
(OutVT (OpNode (ArgVT VR128:$src1),
- (bitconvert (ld_frag addr:$src2)))))]>,
- Sched<[WriteShuffleLd, ReadAfterLd]>;
+ (bitconvert (ld_frag addr:$src2)))))],
+ IIC_SSE_PACK>, Sched<[WriteShuffleLd, ReadAfterLd]>;
}
multiclass sse4_pack_y<bits<8> opc, string OpcodeStr, ValueType OutVT,
diff --git a/llvm/lib/Target/X86/X86Schedule.td b/llvm/lib/Target/X86/X86Schedule.td
index d831a797435..64662e8d18e 100644
--- a/llvm/lib/Target/X86/X86Schedule.td
+++ b/llvm/lib/Target/X86/X86Schedule.td
@@ -299,6 +299,7 @@ def IIC_SSE_SHUFP : InstrItinClass;
def IIC_SSE_PSHUF_RI : InstrItinClass;
def IIC_SSE_PSHUF_MI : InstrItinClass;
+def IIC_SSE_PACK : InstrItinClass;
def IIC_SSE_UNPCK : InstrItinClass;
def IIC_SSE_MOVMSK : InstrItinClass;
diff --git a/llvm/lib/Target/X86/X86ScheduleAtom.td b/llvm/lib/Target/X86/X86ScheduleAtom.td
index a5b440182aa..200a3216f6f 100644
--- a/llvm/lib/Target/X86/X86ScheduleAtom.td
+++ b/llvm/lib/Target/X86/X86ScheduleAtom.td
@@ -212,6 +212,7 @@ def AtomItineraries : ProcessorItineraries<
InstrItinData<IIC_SSE_PSHUF_RI, [InstrStage<1, [Port0]>] >,
InstrItinData<IIC_SSE_PSHUF_MI, [InstrStage<1, [Port0]>] >,
+ InstrItinData<IIC_SSE_PACK, [InstrStage<1, [Port0]>] >,
InstrItinData<IIC_SSE_UNPCK, [InstrStage<1, [Port0]>] >,
InstrItinData<IIC_SSE_SQRTPS_RR, [InstrStage<70, [Port0, Port1]>] >,
diff --git a/llvm/test/CodeGen/X86/sse2-schedule.ll b/llvm/test/CodeGen/X86/sse2-schedule.ll
index fbc21390d2d..1fa1b6e4f10 100644
--- a/llvm/test/CodeGen/X86/sse2-schedule.ll
+++ b/llvm/test/CodeGen/X86/sse2-schedule.ll
@@ -2816,12 +2816,8 @@ define <8 x i16> @test_packssdw(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
;
; ATOM-LABEL: test_packssdw:
; ATOM: # BB#0:
-; ATOM-NEXT: packssdw %xmm1, %xmm0
-; ATOM-NEXT: packssdw (%rdi), %xmm0
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
+; ATOM-NEXT: packssdw %xmm1, %xmm0 # sched: [1:1.00]
+; ATOM-NEXT: packssdw (%rdi), %xmm0 # sched: [1:1.00]
; ATOM-NEXT: nop # sched: [1:0.50]
; ATOM-NEXT: nop # sched: [1:0.50]
; ATOM-NEXT: nop # sched: [1:0.50]
@@ -2874,12 +2870,8 @@ define <16 x i8> @test_packsswb(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
;
; ATOM-LABEL: test_packsswb:
; ATOM: # BB#0:
-; ATOM-NEXT: packsswb %xmm1, %xmm0
-; ATOM-NEXT: packsswb (%rdi), %xmm0
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
+; ATOM-NEXT: packsswb %xmm1, %xmm0 # sched: [1:1.00]
+; ATOM-NEXT: packsswb (%rdi), %xmm0 # sched: [1:1.00]
; ATOM-NEXT: nop # sched: [1:0.50]
; ATOM-NEXT: nop # sched: [1:0.50]
; ATOM-NEXT: nop # sched: [1:0.50]
@@ -2932,12 +2924,8 @@ define <16 x i8> @test_packuswb(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
;
; ATOM-LABEL: test_packuswb:
; ATOM: # BB#0:
-; ATOM-NEXT: packuswb %xmm1, %xmm0
-; ATOM-NEXT: packuswb (%rdi), %xmm0
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
+; ATOM-NEXT: packuswb %xmm1, %xmm0 # sched: [1:1.00]
+; ATOM-NEXT: packuswb (%rdi), %xmm0 # sched: [1:1.00]
; ATOM-NEXT: nop # sched: [1:0.50]
; ATOM-NEXT: nop # sched: [1:0.50]
; ATOM-NEXT: nop # sched: [1:0.50]
OpenPOWER on IntegriCloud