diff options
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 16 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86Schedule.td | 1 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ScheduleAtom.td | 1 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/sse2-schedule.ll | 24 |
4 files changed, 16 insertions, 26 deletions
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index c7305aec9a8..b3c5ae54181 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -4249,8 +4249,8 @@ multiclass sse2_pack<bits<8> opc, string OpcodeStr, ValueType OutVT, !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set VR128:$dst, - (OutVT (OpNode (ArgVT VR128:$src1), VR128:$src2)))]>, - Sched<[WriteShuffle]>; + (OutVT (OpNode (ArgVT VR128:$src1), VR128:$src2)))], + IIC_SSE_PACK>, Sched<[WriteShuffle]>; def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), !if(Is2Addr, @@ -4259,8 +4259,8 @@ multiclass sse2_pack<bits<8> opc, string OpcodeStr, ValueType OutVT, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set VR128:$dst, (OutVT (OpNode (ArgVT VR128:$src1), - (bitconvert (ld_frag addr:$src2)))))]>, - Sched<[WriteShuffleLd, ReadAfterLd]>; + (bitconvert (ld_frag addr:$src2)))))], + IIC_SSE_PACK>, Sched<[WriteShuffleLd, ReadAfterLd]>; } multiclass sse2_pack_y<bits<8> opc, string OpcodeStr, ValueType OutVT, @@ -4292,8 +4292,8 @@ multiclass sse4_pack<bits<8> opc, string OpcodeStr, ValueType OutVT, !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set VR128:$dst, - (OutVT (OpNode (ArgVT VR128:$src1), VR128:$src2)))]>, - Sched<[WriteShuffle]>; + (OutVT (OpNode (ArgVT VR128:$src1), VR128:$src2)))], + IIC_SSE_PACK>, Sched<[WriteShuffle]>; def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), !if(Is2Addr, @@ -4302,8 +4302,8 @@ multiclass sse4_pack<bits<8> opc, string OpcodeStr, ValueType OutVT, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set VR128:$dst, (OutVT (OpNode (ArgVT VR128:$src1), - (bitconvert (ld_frag addr:$src2)))))]>, - Sched<[WriteShuffleLd, ReadAfterLd]>; + (bitconvert (ld_frag addr:$src2)))))], + IIC_SSE_PACK>, Sched<[WriteShuffleLd, ReadAfterLd]>; } multiclass sse4_pack_y<bits<8> opc, string OpcodeStr, ValueType OutVT, diff --git a/llvm/lib/Target/X86/X86Schedule.td b/llvm/lib/Target/X86/X86Schedule.td index d831a797435..64662e8d18e 100644 --- a/llvm/lib/Target/X86/X86Schedule.td +++ b/llvm/lib/Target/X86/X86Schedule.td @@ -299,6 +299,7 @@ def IIC_SSE_SHUFP : InstrItinClass; def IIC_SSE_PSHUF_RI : InstrItinClass; def IIC_SSE_PSHUF_MI : InstrItinClass; +def IIC_SSE_PACK : InstrItinClass; def IIC_SSE_UNPCK : InstrItinClass; def IIC_SSE_MOVMSK : InstrItinClass; diff --git a/llvm/lib/Target/X86/X86ScheduleAtom.td b/llvm/lib/Target/X86/X86ScheduleAtom.td index a5b440182aa..200a3216f6f 100644 --- a/llvm/lib/Target/X86/X86ScheduleAtom.td +++ b/llvm/lib/Target/X86/X86ScheduleAtom.td @@ -212,6 +212,7 @@ def AtomItineraries : ProcessorItineraries< InstrItinData<IIC_SSE_PSHUF_RI, [InstrStage<1, [Port0]>] >, InstrItinData<IIC_SSE_PSHUF_MI, [InstrStage<1, [Port0]>] >, + InstrItinData<IIC_SSE_PACK, [InstrStage<1, [Port0]>] >, InstrItinData<IIC_SSE_UNPCK, [InstrStage<1, [Port0]>] >, InstrItinData<IIC_SSE_SQRTPS_RR, [InstrStage<70, [Port0, Port1]>] >, diff --git a/llvm/test/CodeGen/X86/sse2-schedule.ll b/llvm/test/CodeGen/X86/sse2-schedule.ll index fbc21390d2d..1fa1b6e4f10 100644 --- a/llvm/test/CodeGen/X86/sse2-schedule.ll +++ b/llvm/test/CodeGen/X86/sse2-schedule.ll @@ -2816,12 +2816,8 @@ define <8 x i16> @test_packssdw(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; ; ATOM-LABEL: test_packssdw: ; ATOM: # BB#0: -; ATOM-NEXT: packssdw %xmm1, %xmm0 -; ATOM-NEXT: packssdw (%rdi), %xmm0 -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] +; ATOM-NEXT: packssdw %xmm1, %xmm0 # sched: [1:1.00] +; ATOM-NEXT: packssdw (%rdi), %xmm0 # sched: [1:1.00] ; ATOM-NEXT: nop # sched: [1:0.50] ; ATOM-NEXT: nop # sched: [1:0.50] ; ATOM-NEXT: nop # sched: [1:0.50] @@ -2874,12 +2870,8 @@ define <16 x i8> @test_packsswb(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; ; ATOM-LABEL: test_packsswb: ; ATOM: # BB#0: -; ATOM-NEXT: packsswb %xmm1, %xmm0 -; ATOM-NEXT: packsswb (%rdi), %xmm0 -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] +; ATOM-NEXT: packsswb %xmm1, %xmm0 # sched: [1:1.00] +; ATOM-NEXT: packsswb (%rdi), %xmm0 # sched: [1:1.00] ; ATOM-NEXT: nop # sched: [1:0.50] ; ATOM-NEXT: nop # sched: [1:0.50] ; ATOM-NEXT: nop # sched: [1:0.50] @@ -2932,12 +2924,8 @@ define <16 x i8> @test_packuswb(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; ; ATOM-LABEL: test_packuswb: ; ATOM: # BB#0: -; ATOM-NEXT: packuswb %xmm1, %xmm0 -; ATOM-NEXT: packuswb (%rdi), %xmm0 -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] +; ATOM-NEXT: packuswb %xmm1, %xmm0 # sched: [1:1.00] +; ATOM-NEXT: packuswb (%rdi), %xmm0 # sched: [1:1.00] ; ATOM-NEXT: nop # sched: [1:0.50] ; ATOM-NEXT: nop # sched: [1:0.50] ; ATOM-NEXT: nop # sched: [1:0.50] |

