summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/X86/X86ScheduleAtom.td1318
-rw-r--r--llvm/test/CodeGen/X86/lsr-loop-exit-cond.ll4
-rw-r--r--llvm/test/CodeGen/X86/lsr-static-addr.ll12
-rw-r--r--llvm/test/CodeGen/X86/mmx-schedule.ll12
-rw-r--r--llvm/test/CodeGen/X86/schedule-x86_32.ll2
-rw-r--r--llvm/test/CodeGen/X86/schedule-x86_64.ll2
-rw-r--r--llvm/test/CodeGen/X86/select.ll78
-rw-r--r--llvm/test/CodeGen/X86/sse-schedule.ll2
-rw-r--r--llvm/test/CodeGen/X86/sse2-schedule.ll31
-rw-r--r--llvm/test/CodeGen/X86/sse3-schedule.ll21
-rw-r--r--llvm/test/CodeGen/X86/ssse3-schedule.ll27
-rw-r--r--llvm/test/CodeGen/X86/x87-schedule.ll140
12 files changed, 957 insertions, 692 deletions
diff --git a/llvm/lib/Target/X86/X86ScheduleAtom.td b/llvm/lib/Target/X86/X86ScheduleAtom.td
index 98a9d86c65a..74940b3a5f2 100644
--- a/llvm/lib/Target/X86/X86ScheduleAtom.td
+++ b/llvm/lib/Target/X86/X86ScheduleAtom.td
@@ -15,549 +15,801 @@
//
// Scheduling information derived from the "Intel 64 and IA32 Architectures
// Optimization Reference Manual", Chapter 13, Section 4.
-// Functional Units
-// Port 0
-def Port0 : FuncUnit; // ALU: ALU0, shift/rotate, load/store
- // SIMD/FP: SIMD ALU, Shuffle,SIMD/FP multiply, divide
-def Port1 : FuncUnit; // ALU: ALU1, bit processing, jump, and LEA
- // SIMD/FP: SIMD ALU, FP Adder
-
-def AtomItineraries : ProcessorItineraries<
- [ Port0, Port1 ],
- [], [
- // P0 only
- // InstrItinData<class, [InstrStage<N, [P0]>] >,
- // P0 or P1
- // InstrItinData<class, [InstrStage<N, [P0, P1]>] >,
- // P0 and P1
- // InstrItinData<class, [InstrStage<N, [P0], 0>, InstrStage<N, [P1]>] >,
- //
- // Default is 1 cycle, port0 or port1
- InstrItinData<IIC_ALU_MEM, [InstrStage<1, [Port0]>] >,
- InstrItinData<IIC_ALU_NONMEM, [InstrStage<1, [Port0, Port1]>] >,
- InstrItinData<IIC_LEA, [InstrStage<1, [Port1]>] >,
- InstrItinData<IIC_LEA_16, [InstrStage<2, [Port0, Port1]>] >,
- // mul
- InstrItinData<IIC_MUL8_MEM, [InstrStage<7, [Port0, Port1]>] >,
- InstrItinData<IIC_MUL8_REG, [InstrStage<7, [Port0, Port1]>] >,
- InstrItinData<IIC_MUL16_MEM, [InstrStage<8, [Port0, Port1]>] >,
- InstrItinData<IIC_MUL16_REG, [InstrStage<7, [Port0, Port1]>] >,
- InstrItinData<IIC_MUL32_MEM, [InstrStage<7, [Port0, Port1]>] >,
- InstrItinData<IIC_MUL32_REG, [InstrStage<6, [Port0, Port1]>] >,
- InstrItinData<IIC_MUL64_MEM, [InstrStage<12, [Port0, Port1]>] >,
- InstrItinData<IIC_MUL64_REG, [InstrStage<12, [Port0, Port1]>] >,
- // imul by al, ax, eax, rax
- InstrItinData<IIC_IMUL8_MEM, [InstrStage<7, [Port0, Port1]>] >,
- InstrItinData<IIC_IMUL8_REG, [InstrStage<7, [Port0, Port1]>] >,
- InstrItinData<IIC_IMUL16_MEM, [InstrStage<8, [Port0, Port1]>] >,
- InstrItinData<IIC_IMUL16_REG, [InstrStage<7, [Port0, Port1]>] >,
- InstrItinData<IIC_IMUL32_MEM, [InstrStage<7, [Port0, Port1]>] >,
- InstrItinData<IIC_IMUL32_REG, [InstrStage<6, [Port0, Port1]>] >,
- InstrItinData<IIC_IMUL64_MEM, [InstrStage<12, [Port0, Port1]>] >,
- InstrItinData<IIC_IMUL64_REG, [InstrStage<12, [Port0, Port1]>] >,
- // imul reg by reg|mem
- InstrItinData<IIC_IMUL16_RM, [InstrStage<7, [Port0, Port1]>] >,
- InstrItinData<IIC_IMUL16_RR, [InstrStage<6, [Port0, Port1]>] >,
- InstrItinData<IIC_IMUL32_RM, [InstrStage<5, [Port0]>] >,
- InstrItinData<IIC_IMUL32_RR, [InstrStage<5, [Port0]>] >,
- InstrItinData<IIC_IMUL64_RM, [InstrStage<12, [Port0, Port1]>] >,
- InstrItinData<IIC_IMUL64_RR, [InstrStage<12, [Port0, Port1]>] >,
- // imul reg = reg/mem * imm
- InstrItinData<IIC_IMUL16_RRI, [InstrStage<6, [Port0, Port1]>] >,
- InstrItinData<IIC_IMUL32_RRI, [InstrStage<5, [Port0]>] >,
- InstrItinData<IIC_IMUL64_RRI, [InstrStage<14, [Port0, Port1]>] >,
- InstrItinData<IIC_IMUL16_RMI, [InstrStage<7, [Port0, Port1]>] >,
- InstrItinData<IIC_IMUL32_RMI, [InstrStage<5, [Port0]>] >,
- InstrItinData<IIC_IMUL64_RMI, [InstrStage<14, [Port0, Port1]>] >,
- // idiv
- InstrItinData<IIC_IDIV8_MEM, [InstrStage<62, [Port0, Port1]>] >,
- InstrItinData<IIC_IDIV8_REG, [InstrStage<62, [Port0, Port1]>] >,
- InstrItinData<IIC_IDIV16_MEM, [InstrStage<62, [Port0, Port1]>] >,
- InstrItinData<IIC_IDIV16_REG, [InstrStage<62, [Port0, Port1]>] >,
- InstrItinData<IIC_IDIV32_MEM, [InstrStage<62, [Port0, Port1]>] >,
- InstrItinData<IIC_IDIV32_REG, [InstrStage<62, [Port0, Port1]>] >,
- InstrItinData<IIC_IDIV64_MEM, [InstrStage<130, [Port0, Port1]>] >,
- InstrItinData<IIC_IDIV64_REG, [InstrStage<130, [Port0, Port1]>] >,
- // div
- InstrItinData<IIC_DIV8_REG, [InstrStage<50, [Port0, Port1]>] >,
- InstrItinData<IIC_DIV8_MEM, [InstrStage<68, [Port0, Port1]>] >,
- InstrItinData<IIC_DIV16_MEM, [InstrStage<50, [Port0, Port1]>] >,
- InstrItinData<IIC_DIV16_REG, [InstrStage<50, [Port0, Port1]>] >,
- InstrItinData<IIC_DIV32_MEM, [InstrStage<50, [Port0, Port1]>] >,
- InstrItinData<IIC_DIV32_REG, [InstrStage<50, [Port0, Port1]>] >,
- InstrItinData<IIC_DIV64_MEM, [InstrStage<130, [Port0, Port1]>] >,
- InstrItinData<IIC_DIV64_REG, [InstrStage<130, [Port0, Port1]>] >,
- // neg/not/inc/dec
- InstrItinData<IIC_UNARY_REG, [InstrStage<1, [Port0, Port1]>] >,
- InstrItinData<IIC_UNARY_MEM, [InstrStage<1, [Port0]>] >,
- // add/sub/and/or/xor/cmp/test
- InstrItinData<IIC_BIN_NONMEM, [InstrStage<1, [Port0, Port1]>] >,
- InstrItinData<IIC_BIN_MEM, [InstrStage<1, [Port0]>] >,
- // adc/sbc
- InstrItinData<IIC_BIN_CARRY_NONMEM, [InstrStage<1, [Port0, Port1]>] >,
- InstrItinData<IIC_BIN_CARRY_MEM, [InstrStage<1, [Port0]>] >,
- // shift/rotate
- InstrItinData<IIC_SR, [InstrStage<1, [Port0]>] >,
- // shift double
- InstrItinData<IIC_SHD16_REG_IM, [InstrStage<6, [Port0, Port1]>] >,
- InstrItinData<IIC_SHD16_REG_CL, [InstrStage<6, [Port0, Port1]>] >,
- InstrItinData<IIC_SHD16_MEM_IM, [InstrStage<6, [Port0, Port1]>] >,
- InstrItinData<IIC_SHD16_MEM_CL, [InstrStage<6, [Port0, Port1]>] >,
- InstrItinData<IIC_SHD32_REG_IM, [InstrStage<2, [Port0, Port1]>] >,
- InstrItinData<IIC_SHD32_REG_CL, [InstrStage<2, [Port0, Port1]>] >,
- InstrItinData<IIC_SHD32_MEM_IM, [InstrStage<4, [Port0, Port1]>] >,
- InstrItinData<IIC_SHD32_MEM_CL, [InstrStage<4, [Port0, Port1]>] >,
- InstrItinData<IIC_SHD64_REG_IM, [InstrStage<9, [Port0, Port1]>] >,
- InstrItinData<IIC_SHD64_REG_CL, [InstrStage<8, [Port0, Port1]>] >,
- InstrItinData<IIC_SHD64_MEM_IM, [InstrStage<9, [Port0, Port1]>] >,
- InstrItinData<IIC_SHD64_MEM_CL, [InstrStage<9, [Port0, Port1]>] >,
- // cmov
- InstrItinData<IIC_CMOV16_RM, [InstrStage<1, [Port0]>] >,
- InstrItinData<IIC_CMOV16_RR, [InstrStage<1, [Port0, Port1]>] >,
- InstrItinData<IIC_CMOV32_RM, [InstrStage<1, [Port0]>] >,
- InstrItinData<IIC_CMOV32_RR, [InstrStage<1, [Port0, Port1]>] >,
- InstrItinData<IIC_CMOV64_RM, [InstrStage<1, [Port0]>] >,
- InstrItinData<IIC_CMOV64_RR, [InstrStage<1, [Port0, Port1]>] >,
- // set
- InstrItinData<IIC_SET_M, [InstrStage<2, [Port0, Port1]>] >,
- InstrItinData<IIC_SET_R, [InstrStage<1, [Port0, Port1]>] >,
- // jcc
- InstrItinData<IIC_Jcc, [InstrStage<1, [Port1]>] >,
- // jcxz/jecxz/jrcxz
- InstrItinData<IIC_JCXZ, [InstrStage<4, [Port0, Port1]>] >,
- // jmp rel
- InstrItinData<IIC_JMP_REL, [InstrStage<1, [Port1]>] >,
- // jmp indirect
- InstrItinData<IIC_JMP_REG, [InstrStage<1, [Port1]>] >,
- InstrItinData<IIC_JMP_MEM, [InstrStage<2, [Port0, Port1]>] >,
- // jmp far
- InstrItinData<IIC_JMP_FAR_MEM, [InstrStage<32, [Port0, Port1]>] >,
- InstrItinData<IIC_JMP_FAR_PTR, [InstrStage<31, [Port0, Port1]>] >,
- // loop/loope/loopne
- InstrItinData<IIC_LOOP, [InstrStage<18, [Port0, Port1]>] >,
- InstrItinData<IIC_LOOPE, [InstrStage<8, [Port0, Port1]>] >,
- InstrItinData<IIC_LOOPNE, [InstrStage<17, [Port0, Port1]>] >,
- // call - all but reg/imm
- InstrItinData<IIC_CALL_RI, [InstrStage<1, [Port0], 0>,
- InstrStage<1, [Port1]>] >,
- InstrItinData<IIC_CALL_MEM, [InstrStage<15, [Port0, Port1]>] >,
- InstrItinData<IIC_CALL_FAR_MEM, [InstrStage<40, [Port0, Port1]>] >,
- InstrItinData<IIC_CALL_FAR_PTR, [InstrStage<39, [Port0, Port1]>] >,
- //ret
- InstrItinData<IIC_RET, [InstrStage<79, [Port0, Port1]>] >,
- InstrItinData<IIC_RET_IMM, [InstrStage<1, [Port0], 0>, InstrStage<1, [Port1]>] >,
- //sign extension movs
- InstrItinData<IIC_MOVSX,[InstrStage<1, [Port0] >] >,
- InstrItinData<IIC_MOVSX_R16_R8, [InstrStage<2, [Port0, Port1]>] >,
- InstrItinData<IIC_MOVSX_R16_M8, [InstrStage<3, [Port0, Port1]>] >,
- //zero extension movs
- InstrItinData<IIC_MOVZX,[InstrStage<1, [Port0]>] >,
- InstrItinData<IIC_MOVZX_R16_R8, [InstrStage<2, [Port0, Port1]>] >,
- InstrItinData<IIC_MOVZX_R16_M8, [InstrStage<3, [Port0, Port1]>] >,
-
- InstrItinData<IIC_REP_MOVS, [InstrStage<75, [Port0, Port1]>] >,
- InstrItinData<IIC_REP_STOS, [InstrStage<74, [Port0, Port1]>] >,
-
- // SSE binary operations
- // arithmetic fp scalar
- InstrItinData<IIC_SSE_ALU_F32S_RR, [InstrStage<5, [Port1]>] >,
- InstrItinData<IIC_SSE_ALU_F32S_RM, [InstrStage<5, [Port0], 0>,
- InstrStage<5, [Port1]>] >,
- InstrItinData<IIC_SSE_ALU_F64S_RR, [InstrStage<5, [Port1]>] >,
- InstrItinData<IIC_SSE_ALU_F64S_RM, [InstrStage<5, [Port0], 0>,
- InstrStage<5, [Port1]>] >,
- InstrItinData<IIC_SSE_MUL_F32S_RR, [InstrStage<4, [Port0]>] >,
- InstrItinData<IIC_SSE_MUL_F32S_RM, [InstrStage<4, [Port0]>] >,
- InstrItinData<IIC_SSE_MUL_F64S_RR, [InstrStage<5, [Port0]>] >,
- InstrItinData<IIC_SSE_MUL_F64S_RM, [InstrStage<5, [Port0]>] >,
- InstrItinData<IIC_SSE_DIV_F32S_RR, [InstrStage<34, [Port0, Port1]>] >,
- InstrItinData<IIC_SSE_DIV_F32S_RM, [InstrStage<34, [Port0, Port1]>] >,
- InstrItinData<IIC_SSE_DIV_F64S_RR, [InstrStage<62, [Port0, Port1]>] >,
- InstrItinData<IIC_SSE_DIV_F64S_RM, [InstrStage<62, [Port0, Port1]>] >,
-
- InstrItinData<IIC_SSE_COMIS_RR, [InstrStage<9, [Port0, Port1]>] >,
- InstrItinData<IIC_SSE_COMIS_RM, [InstrStage<10, [Port0, Port1]>] >,
-
- InstrItinData<IIC_SSE_HADDSUB_RR, [InstrStage<8, [Port0, Port1]>] >,
- InstrItinData<IIC_SSE_HADDSUB_RM, [InstrStage<9, [Port0, Port1]>] >,
-
- // arithmetic fp parallel
- InstrItinData<IIC_SSE_ALU_F32P_RR, [InstrStage<5, [Port1]>] >,
- InstrItinData<IIC_SSE_ALU_F32P_RM, [InstrStage<5, [Port0], 0>,
- InstrStage<5, [Port1]>] >,
- InstrItinData<IIC_SSE_ALU_F64P_RR, [InstrStage<6, [Port0, Port1]>] >,
- InstrItinData<IIC_SSE_ALU_F64P_RM, [InstrStage<7, [Port0, Port1]>] >,
- InstrItinData<IIC_SSE_MUL_F32P_RR, [InstrStage<5, [Port0]>] >,
- InstrItinData<IIC_SSE_MUL_F32P_RM, [InstrStage<5, [Port0]>] >,
- InstrItinData<IIC_SSE_MUL_F64P_RR, [InstrStage<9, [Port0, Port1]>] >,
- InstrItinData<IIC_SSE_MUL_F64P_RM, [InstrStage<10, [Port0, Port1]>] >,
- InstrItinData<IIC_SSE_DIV_F32P_RR, [InstrStage<70, [Port0, Port1]>] >,
- InstrItinData<IIC_SSE_DIV_F32P_RM, [InstrStage<70, [Port0, Port1]>] >,
- InstrItinData<IIC_SSE_DIV_F64P_RR, [InstrStage<125, [Port0, Port1]>] >,
- InstrItinData<IIC_SSE_DIV_F64P_RM, [InstrStage<125, [Port0, Port1]>] >,
-
- // bitwise parallel
- InstrItinData<IIC_SSE_BIT_P_RR, [InstrStage<1, [Port0, Port1]>] >,
- InstrItinData<IIC_SSE_BIT_P_RM, [InstrStage<1, [Port0]>] >,
-
- // arithmetic int parallel
- InstrItinData<IIC_SSE_INTALU_P_RR, [InstrStage<1, [Port0, Port1]>] >,
- InstrItinData<IIC_SSE_INTALU_P_RM, [InstrStage<1, [Port0]>] >,
- InstrItinData<IIC_SSE_INTALUQ_P_RR, [InstrStage<2, [Port0, Port1]>] >,
- InstrItinData<IIC_SSE_INTALUQ_P_RM, [InstrStage<3, [Port0, Port1]>] >,
-
- // multiply int parallel
- InstrItinData<IIC_SSE_INTMUL_P_RR, [InstrStage<5, [Port0]>] >,
- InstrItinData<IIC_SSE_INTMUL_P_RM, [InstrStage<5, [Port0]>] >,
-
- // shift parallel
- InstrItinData<IIC_SSE_INTSH_P_RR, [InstrStage<2, [Port0, Port1]>] >,
- InstrItinData<IIC_SSE_INTSH_P_RM, [InstrStage<3, [Port0, Port1]>] >,
- InstrItinData<IIC_SSE_INTSH_P_RI, [InstrStage<1, [Port0, Port1]>] >,
-
- InstrItinData<IIC_SSE_INTSHDQ_P_RI, [InstrStage<1, [Port0, Port1]>] >,
-
- InstrItinData<IIC_SSE_SHUFP, [InstrStage<1, [Port0]>] >,
- InstrItinData<IIC_SSE_PSHUF_RI, [InstrStage<1, [Port0]>] >,
- InstrItinData<IIC_SSE_PSHUF_MI, [InstrStage<1, [Port0]>] >,
-
- InstrItinData<IIC_SSE_PACK, [InstrStage<1, [Port0]>] >,
- InstrItinData<IIC_SSE_UNPCK, [InstrStage<1, [Port0]>] >,
-
- InstrItinData<IIC_SSE_SQRTPS_RR, [InstrStage<70, [Port0, Port1]>] >,
- InstrItinData<IIC_SSE_SQRTPS_RM, [InstrStage<70, [Port0, Port1]>] >,
- InstrItinData<IIC_SSE_SQRTSS_RR, [InstrStage<34, [Port0, Port1]>] >,
- InstrItinData<IIC_SSE_SQRTSS_RM, [InstrStage<34, [Port0, Port1]>] >,
-
- InstrItinData<IIC_SSE_SQRTPD_RR, [InstrStage<125, [Port0, Port1]>] >,
- InstrItinData<IIC_SSE_SQRTPD_RM, [InstrStage<125, [Port0, Port1]>] >,
- InstrItinData<IIC_SSE_SQRTSD_RR, [InstrStage<62, [Port0, Port1]>] >,
- InstrItinData<IIC_SSE_SQRTSD_RM, [InstrStage<62, [Port0, Port1]>] >,
-
- InstrItinData<IIC_SSE_RSQRTPS_RR, [InstrStage<9, [Port0, Port1]>] >,
- InstrItinData<IIC_SSE_RSQRTPS_RM, [InstrStage<10, [Port0, Port1]>] >,
- InstrItinData<IIC_SSE_RSQRTSS_RR, [InstrStage<4, [Port0]>] >,
- InstrItinData<IIC_SSE_RSQRTSS_RM, [InstrStage<4, [Port0]>] >,
-
- InstrItinData<IIC_SSE_RCPP_RR, [InstrStage<9, [Port0, Port1]>] >,
- InstrItinData<IIC_SSE_RCPP_RM, [InstrStage<10, [Port0, Port1]>] >,
- InstrItinData<IIC_SSE_RCPS_RR, [InstrStage<4, [Port0]>] >,
- InstrItinData<IIC_SSE_RCPS_RM, [InstrStage<4, [Port0]>] >,
-
- InstrItinData<IIC_SSE_MOVMSK, [InstrStage<3, [Port0]>] >,
- InstrItinData<IIC_SSE_MASKMOV, [InstrStage<2, [Port0, Port1]>] >,
-
- InstrItinData<IIC_SSE_PEXTRW, [InstrStage<4, [Port0, Port1]>] >,
- InstrItinData<IIC_SSE_PINSRW, [InstrStage<1, [Port0]>] >,
-
- InstrItinData<IIC_SSE_PABS_RR, [InstrStage<1, [Port0, Port1]>] >,
- InstrItinData<IIC_SSE_PABS_RM, [InstrStage<1, [Port0]>] >,
-
- InstrItinData<IIC_SSE_MOV_S_RR, [InstrStage<1, [Port0, Port1]>] >,
- InstrItinData<IIC_SSE_MOV_S_RM, [InstrStage<1, [Port0]>] >,
- InstrItinData<IIC_SSE_MOV_S_MR, [InstrStage<1, [Port0]>] >,
-
- InstrItinData<IIC_SSE_MOVA_P_RR, [InstrStage<1, [Port0, Port1]>] >,
- InstrItinData<IIC_SSE_MOVA_P_RM, [InstrStage<1, [Port0]>] >,
- InstrItinData<IIC_SSE_MOVA_P_MR, [InstrStage<1, [Port0]>] >,
-
- InstrItinData<IIC_SSE_MOVU_P_RR, [InstrStage<1, [Port0, Port1]>] >,
- InstrItinData<IIC_SSE_MOVU_P_RM, [InstrStage<3, [Port0, Port1]>] >,
- InstrItinData<IIC_SSE_MOVU_P_MR, [InstrStage<2, [Port0, Port1]>] >,
-
- InstrItinData<IIC_SSE_MOV_LH, [InstrStage<1, [Port0]>] >,
-
- InstrItinData<IIC_SSE_LDDQU, [InstrStage<3, [Port0, Port1]>] >,
-
- InstrItinData<IIC_SSE_MOVDQ, [InstrStage<1, [Port0]>] >,
- InstrItinData<IIC_SSE_MOVD_ToGP, [InstrStage<3, [Port0]>] >,
- InstrItinData<IIC_SSE_MOVQ_RR, [InstrStage<1, [Port0, Port1]>] >,
-
- InstrItinData<IIC_SSE_MOVNT, [InstrStage<1, [Port0]>] >,
-
- InstrItinData<IIC_SSE_PREFETCH, [InstrStage<1, [Port0]>] >,
- InstrItinData<IIC_SSE_PAUSE, [InstrStage<17, [Port0, Port1]>] >,
- InstrItinData<IIC_SSE_LFENCE, [InstrStage<1, [Port0, Port1]>] >,
- InstrItinData<IIC_SSE_MFENCE, [InstrStage<1, [Port0]>] >,
- InstrItinData<IIC_SSE_SFENCE, [InstrStage<1, [Port0]>] >,
- InstrItinData<IIC_SSE_LDMXCSR, [InstrStage<5, [Port0, Port1]>] >,
- InstrItinData<IIC_SSE_STMXCSR, [InstrStage<15, [Port0, Port1]>] >,
-
- InstrItinData<IIC_SSE_PHADDSUBD_RR, [InstrStage<3, [Port0, Port1]>] >,
- InstrItinData<IIC_SSE_PHADDSUBD_RM, [InstrStage<4, [Port0, Port1]>] >,
- InstrItinData<IIC_SSE_PHADDSUBSW_RR, [InstrStage<7, [Port0, Port1]>] >,
- InstrItinData<IIC_SSE_PHADDSUBSW_RM, [InstrStage<8, [Port0, Port1]>] >,
- InstrItinData<IIC_SSE_PHADDSUBW_RR, [InstrStage<7, [Port0, Port1]>] >,
- InstrItinData<IIC_SSE_PHADDSUBW_RM, [InstrStage<8, [Port0, Port1]>] >,
- InstrItinData<IIC_SSE_PSHUFB_RR, [InstrStage<4, [Port0, Port1]>] >,
- InstrItinData<IIC_SSE_PSHUFB_RM, [InstrStage<5, [Port0, Port1]>] >,
- InstrItinData<IIC_SSE_PSIGN_RR, [InstrStage<1, [Port0, Port1]>] >,
- InstrItinData<IIC_SSE_PSIGN_RM, [InstrStage<1, [Port0]>] >,
-
- InstrItinData<IIC_SSE_PMADD, [InstrStage<5, [Port0]>] >,
- InstrItinData<IIC_SSE_PMULHRSW, [InstrStage<5, [Port0]>] >,
- InstrItinData<IIC_SSE_PALIGNRR, [InstrStage<1, [Port0]>] >,
- InstrItinData<IIC_SSE_PALIGNRM, [InstrStage<1, [Port0]>] >,
- InstrItinData<IIC_SSE_MWAIT, [InstrStage<46, [Port0, Port1]>] >,
- InstrItinData<IIC_SSE_MONITOR, [InstrStage<45, [Port0, Port1]>] >,
-
- // conversions
- // to/from PD ...
- InstrItinData<IIC_SSE_CVT_PD_RR, [InstrStage<7, [Port0, Port1]>] >,
- InstrItinData<IIC_SSE_CVT_PD_RM, [InstrStage<8, [Port0, Port1]>] >,
- // to/from PS except to/from PD and PS2PI
- InstrItinData<IIC_SSE_CVT_PS_RR, [InstrStage<6, [Port0, Port1]>] >,
- InstrItinData<IIC_SSE_CVT_PS_RM, [InstrStage<7, [Port0, Port1]>] >,
- InstrItinData<IIC_SSE_CVT_Scalar_RR, [InstrStage<6, [Port0, Port1]>] >,
- InstrItinData<IIC_SSE_CVT_Scalar_RM, [InstrStage<7, [Port0, Port1]>] >,
- InstrItinData<IIC_SSE_CVT_SS2SI32_RR, [InstrStage<8, [Port0, Port1]>] >,
- InstrItinData<IIC_SSE_CVT_SS2SI32_RM, [InstrStage<9, [Port0, Port1]>] >,
- InstrItinData<IIC_SSE_CVT_SS2SI64_RR, [InstrStage<9, [Port0, Port1]>] >,
- InstrItinData<IIC_SSE_CVT_SS2SI64_RM, [InstrStage<10, [Port0, Port1]>] >,
- InstrItinData<IIC_SSE_CVT_SD2SI_RR, [InstrStage<8, [Port0, Port1]>] >,
- InstrItinData<IIC_SSE_CVT_SD2SI_RM, [InstrStage<9, [Port0, Port1]>] >,
-
- // MMX MOVs
- InstrItinData<IIC_MMX_MOV_MM_RM, [InstrStage<1, [Port0]>] >,
- InstrItinData<IIC_MMX_MOV_REG_MM, [InstrStage<3, [Port0]>] >,
- InstrItinData<IIC_MMX_MOVQ_RM, [InstrStage<1, [Port0]>] >,
- InstrItinData<IIC_MMX_MOVQ_RR, [InstrStage<1, [Port0, Port1]>] >,
- // other MMX
- InstrItinData<IIC_MMX_ALU_RM, [InstrStage<1, [Port0]>] >,
- InstrItinData<IIC_MMX_ALU_RR, [InstrStage<1, [Port0, Port1]>] >,
- InstrItinData<IIC_MMX_ALUQ_RM, [InstrStage<3, [Port0, Port1]>] >,
- InstrItinData<IIC_MMX_ALUQ_RR, [InstrStage<2, [Port0, Port1]>] >,
- InstrItinData<IIC_MMX_PHADDSUBW_RM, [InstrStage<6, [Port0, Port1]>] >,
- InstrItinData<IIC_MMX_PHADDSUBW_RR, [InstrStage<5, [Port0, Port1]>] >,
- InstrItinData<IIC_MMX_PHADDSUBD_RM, [InstrStage<4, [Port0, Port1]>] >,
- InstrItinData<IIC_MMX_PHADDSUBD_RR, [InstrStage<3, [Port0, Port1]>] >,
- InstrItinData<IIC_MMX_PMUL, [InstrStage<4, [Port0]>] >,
- InstrItinData<IIC_MMX_MISC_FUNC_MEM, [InstrStage<1, [Port0]>] >,
- InstrItinData<IIC_MMX_MISC_FUNC_REG, [InstrStage<1, [Port0, Port1]>] >,
- InstrItinData<IIC_MMX_PSADBW, [InstrStage<4, [Port0, Port1]>] >,
- InstrItinData<IIC_MMX_SHIFT_RI, [InstrStage<1, [Port0, Port1]>] >,
- InstrItinData<IIC_MMX_SHIFT_RM, [InstrStage<3, [Port0, Port1]>] >,
- InstrItinData<IIC_MMX_SHIFT_RR, [InstrStage<2, [Port0, Port1]>] >,
- InstrItinData<IIC_MMX_UNPCK_H_RM, [InstrStage<1, [Port0]>] >,
- InstrItinData<IIC_MMX_UNPCK_H_RR, [InstrStage<1, [Port0, Port1]>] >,
- InstrItinData<IIC_MMX_UNPCK_L, [InstrStage<1, [Port0]>] >,
- InstrItinData<IIC_MMX_PCK_RM, [InstrStage<1, [Port0]>] >,
- InstrItinData<IIC_MMX_PCK_RR, [InstrStage<1, [Port0, Port1]>] >,
- InstrItinData<IIC_MMX_PSHUF, [InstrStage<1, [Port0]>] >,
- InstrItinData<IIC_MMX_PEXTR, [InstrStage<4, [Port0, Port1]>] >,
- InstrItinData<IIC_MMX_PINSRW, [InstrStage<1, [Port0]>] >,
- InstrItinData<IIC_MMX_MASKMOV, [InstrStage<1, [Port0]>] >,
- InstrItinData<IIC_MMX_MOVMSK, [InstrStage<3, [Port0]>] >,
- // conversions
- // from/to PD
- InstrItinData<IIC_MMX_CVT_PD_RR, [InstrStage<7, [Port0, Port1]>] >,
- InstrItinData<IIC_MMX_CVT_PD_RM, [InstrStage<8, [Port0, Port1]>] >,
- // from/to PI
- InstrItinData<IIC_MMX_CVT_PS_RR, [InstrStage<5, [Port1]>] >,
- InstrItinData<IIC_MMX_CVT_PS_RM, [InstrStage<5, [Port0], 0>,
- InstrStage<5, [Port1]>]>,
-
- InstrItinData<IIC_CMPX_LOCK, [InstrStage<14, [Port0, Port1]>] >,
- InstrItinData<IIC_CMPX_LOCK_8, [InstrStage<6, [Port0, Port1]>] >,
- InstrItinData<IIC_CMPX_LOCK_8B, [InstrStage<18, [Port0, Port1]>] >,
- InstrItinData<IIC_CMPX_LOCK_16B, [InstrStage<22, [Port0, Port1]>] >,
-
- InstrItinData<IIC_XADD_LOCK_MEM, [InstrStage<2, [Port0, Port1]>] >,
- InstrItinData<IIC_XADD_LOCK_MEM, [InstrStage<3, [Port0, Port1]>] >,
-
- InstrItinData<IIC_FILD, [InstrStage<5, [Port0], 0>, InstrStage<5, [Port1]>] >,
- InstrItinData<IIC_FLD, [InstrStage<1, [Port0]>] >,
- InstrItinData<IIC_FLD80, [InstrStage<4, [Port0, Port1]>] >,
-
- InstrItinData<IIC_FST, [InstrStage<2, [Port0, Port1]>] >,
- InstrItinData<IIC_FST80, [InstrStage<5, [Port0, Port1]>] >,
- InstrItinData<IIC_FIST, [InstrStage<6, [Port0, Port1]>] >,
-
- InstrItinData<IIC_FCMOV, [InstrStage<9, [Port0, Port1]>] >,
- InstrItinData<IIC_FLDZ, [InstrStage<1, [Port0, Port1]>] >,
- InstrItinData<IIC_FUCOM, [InstrStage<1, [Port1]>] >,
- InstrItinData<IIC_FUCOMI, [InstrStage<9, [Port0, Port1]>] >,
- InstrItinData<IIC_FCOMI, [InstrStage<9, [Port0, Port1]>] >,
- InstrItinData<IIC_FNSTSW, [InstrStage<10, [Port0, Port1]>] >,
- InstrItinData<IIC_FNSTCW, [InstrStage<8, [Port0, Port1]>] >,
- InstrItinData<IIC_FLDCW, [InstrStage<5, [Port0, Port1]>] >,
- InstrItinData<IIC_FNINIT, [InstrStage<63, [Port0, Port1]>] >,
- InstrItinData<IIC_FFREE, [InstrStage<1, [Port0, Port1]>] >,
- InstrItinData<IIC_FNCLEX, [InstrStage<25, [Port0, Port1]>] >,
- InstrItinData<IIC_WAIT, [InstrStage<1, [Port0, Port1]>] >,
- InstrItinData<IIC_FXAM, [InstrStage<1, [Port0]>] >,
- InstrItinData<IIC_FNOP, [InstrStage<1, [Port0, Port1]>] >,
- InstrItinData<IIC_FLDL, [InstrStage<10, [Port0, Port1]>] >,
- InstrItinData<IIC_F2XM1, [InstrStage<99, [Port0, Port1]>] >,
- InstrItinData<IIC_FYL2X, [InstrStage<146, [Port0, Port1]>] >,
- InstrItinData<IIC_FPTAN, [InstrStage<168, [Port0, Port1]>] >,
- InstrItinData<IIC_FPATAN, [InstrStage<183, [Port0, Port1]>] >,
- InstrItinData<IIC_FXTRACT, [InstrStage<25, [Port0, Port1]>] >,
- InstrItinData<IIC_FPREM1, [InstrStage<71, [Port0, Port1]>] >,
- InstrItinData<IIC_FPSTP, [InstrStage<1, [Port0, Port1]>] >,
- InstrItinData<IIC_FPREM, [InstrStage<55, [Port0, Port1]>] >,
- InstrItinData<IIC_FYL2XP1, [InstrStage<147, [Port0, Port1]>] >,
- InstrItinData<IIC_FSINCOS, [InstrStage<174, [Port0, Port1]>] >,
- InstrItinData<IIC_FRNDINT, [InstrStage<46, [Port0, Port1]>] >,
- InstrItinData<IIC_FSCALE, [InstrStage<77, [Port0, Port1]>] >,
- InstrItinData<IIC_FCOMPP, [InstrStage<1, [Port1]>] >,
- InstrItinData<IIC_FXSAVE, [InstrStage<140, [Port0, Port1]>] >,
- InstrItinData<IIC_FXRSTOR, [InstrStage<141, [Port0, Port1]>] >,
- InstrItinData<IIC_FXCH, [InstrStage<1, [Port0], 0>, InstrStage<1, [Port1]>] >,
- InstrItinData<IIC_FSIGN, [InstrStage<1, [Port1]>] >,
- InstrItinData<IIC_FSQRT, [InstrStage<71, [Port0, Port1]>] >,
-
- // System instructions
- InstrItinData<IIC_CPUID, [InstrStage<121, [Port0, Port1]>] >,
- InstrItinData<IIC_INT, [InstrStage<127, [Port0, Port1]>] >,
- InstrItinData<IIC_INT3, [InstrStage<130, [Port0, Port1]>] >,
- InstrItinData<IIC_INVD, [InstrStage<1003, [Port0, Port1]>] >,
- InstrItinData<IIC_INVLPG, [InstrStage<71, [Port0, Port1]>] >,
- InstrItinData<IIC_IRET, [InstrStage<109, [Port0, Port1]>] >,
- InstrItinData<IIC_HLT, [InstrStage<121, [Port0, Port1]>] >,
- InstrItinData<IIC_LXS, [InstrStage<10, [Port0, Port1]>] >,
- InstrItinData<IIC_LTR, [InstrStage<83, [Port0, Port1]>] >,
- InstrItinData<IIC_RDTSC, [InstrStage<30, [Port0, Port1]>] >,
- InstrItinData<IIC_RDTSCP, [InstrStage<30, [Port0, Port1]>] >,
- InstrItinData<IIC_RSM, [InstrStage<741, [Port0, Port1]>] >,
- InstrItinData<IIC_SIDT, [InstrStage<4, [Port0, Port1]>] >,
- InstrItinData<IIC_SGDT, [InstrStage<4, [Port0, Port1]>] >,
- InstrItinData<IIC_SLDT, [InstrStage<3, [Port0, Port1]>] >,
- InstrItinData<IIC_STR, [InstrStage<3, [Port0, Port1]>] >,
- InstrItinData<IIC_SWAPGS, [InstrStage<22, [Port0, Port1]>] >,
- InstrItinData<IIC_SYSCALL, [InstrStage<96, [Port0, Port1]>] >,
- InstrItinData<IIC_SYS_ENTER_EXIT, [InstrStage<88, [Port0, Port1]>] >,
-
- InstrItinData<IIC_IN_RR, [InstrStage<94, [Port0, Port1]>] >,
- InstrItinData<IIC_IN_RI, [InstrStage<92, [Port0, Port1]>] >,
- InstrItinData<IIC_OUT_RR, [InstrStage<68, [Port0, Port1]>] >,
- InstrItinData<IIC_OUT_IR, [InstrStage<72, [Port0, Port1]>] >,
- InstrItinData<IIC_INS, [InstrStage<59, [Port0, Port1]>] >,
-
- InstrItinData<IIC_MOV_REG_DR, [InstrStage<88, [Port0, Port1]>] >,
- InstrItinData<IIC_MOV_DR_REG, [InstrStage<123, [Port0, Port1]>] >,
- // worst case for mov REG_CRx
- InstrItinData<IIC_MOV_REG_CR, [InstrStage<12, [Port0, Port1]>] >,
- InstrItinData<IIC_MOV_CR_REG, [InstrStage<136, [Port0, Port1]>] >,
-
- InstrItinData<IIC_MOV_REG_SR, [InstrStage<1, [Port0]>] >,
- InstrItinData<IIC_MOV_MEM_SR, [InstrStage<2, [Port0, Port1]>] >,
- InstrItinData<IIC_MOV_SR_REG, [InstrStage<21, [Port0, Port1]>] >,
- InstrItinData<IIC_MOV_SR_MEM, [InstrStage<26, [Port0, Port1]>] >,
- // LAR
- InstrItinData<IIC_LAR_RM, [InstrStage<50, [Port0, Port1]>] >,
- InstrItinData<IIC_LAR_RR, [InstrStage<54, [Port0, Port1]>] >,
- // LSL
- InstrItinData<IIC_LSL_RM, [InstrStage<46, [Port0, Port1]>] >,
- InstrItinData<IIC_LSL_RR, [InstrStage<49, [Port0, Port1]>] >,
-
- InstrItinData<IIC_LGDT, [InstrStage<44, [Port0, Port1]>] >,
- InstrItinData<IIC_LIDT, [InstrStage<44, [Port0, Port1]>] >,
- InstrItinData<IIC_LLDT_REG, [InstrStage<60, [Port0, Port1]>] >,
- InstrItinData<IIC_LLDT_MEM, [InstrStage<64, [Port0, Port1]>] >,
- // push control register, segment registers
- InstrItinData<IIC_PUSH_CS, [InstrStage<2, [Port0, Port1]>] >,
- InstrItinData<IIC_PUSH_SR, [InstrStage<2, [Port0, Port1]>] >,
- // pop control register, segment registers
- InstrItinData<IIC_POP_SR, [InstrStage<29, [Port0, Port1]>] >,
- InstrItinData<IIC_POP_SR_SS, [InstrStage<48, [Port0, Port1]>] >,
- // VERR, VERW
- InstrItinData<IIC_VERR, [InstrStage<41, [Port0, Port1]>] >,
- InstrItinData<IIC_VERW_REG, [InstrStage<51, [Port0, Port1]>] >,
- InstrItinData<IIC_VERW_MEM, [InstrStage<50, [Port0, Port1]>] >,
- // WRMSR, RDMSR
- InstrItinData<IIC_WRMSR, [InstrStage<202, [Port0, Port1]>] >,
- InstrItinData<IIC_RDMSR, [InstrStage<78, [Port0, Port1]>] >,
- InstrItinData<IIC_RDPMC, [InstrStage<46, [Port0, Port1]>] >,
- // SMSW, LMSW
- InstrItinData<IIC_SMSW, [InstrStage<9, [Port0, Port1]>] >,
- InstrItinData<IIC_LMSW_REG, [InstrStage<69, [Port0, Port1]>] >,
- InstrItinData<IIC_LMSW_MEM, [InstrStage<67, [Port0, Port1]>] >,
-
- InstrItinData<IIC_ENTER, [InstrStage<32, [Port0, Port1]>] >,
- InstrItinData<IIC_LEAVE, [InstrStage<2, [Port0, Port1]>] >,
-
- InstrItinData<IIC_POP_MEM, [InstrStage<3, [Port0, Port1]>] >,
- InstrItinData<IIC_POP_REG16, [InstrStage<2, [Port0, Port1]>] >,
- InstrItinData<IIC_POP_REG, [InstrStage<1, [Port0], 0>,
- InstrStage<1, [Port1]>] >,
- InstrItinData<IIC_POP_F, [InstrStage<32, [Port0, Port1]>] >,
- InstrItinData<IIC_POP_FD, [InstrStage<26, [Port0, Port1]>] >,
- InstrItinData<IIC_POP_A, [InstrStage<9, [Port0, Port1]>] >,
-
- InstrItinData<IIC_PUSH_IMM, [InstrStage<1, [Port0], 0>,
- InstrStage<1, [Port1]>] >,
- InstrItinData<IIC_PUSH_MEM, [InstrStage<2, [Port0, Port1]>] >,
- InstrItinData<IIC_PUSH_REG, [InstrStage<1, [Port0], 0>,
- InstrStage<1, [Port1]>] >,
- InstrItinData<IIC_PUSH_F, [InstrStage<9, [Port0, Port1]>] >,
- InstrItinData<IIC_PUSH_A, [InstrStage<8, [Port0, Port1]>] >,
-
- InstrItinData<IIC_BSWAP, [InstrStage<1, [Port0]>] >,
- InstrItinData<IIC_BIT_SCAN_MEM, [InstrStage<16, [Port0, Port1]>] >,
- InstrItinData<IIC_BIT_SCAN_REG, [InstrStage<16, [Port0, Port1]>] >,
- InstrItinData<IIC_MOVS, [InstrStage<3, [Port0, Port1]>] >,
- InstrItinData<IIC_STOS, [InstrStage<1, [Port0, Port1]>] >,
- InstrItinData<IIC_SCAS, [InstrStage<2, [Port0, Port1]>] >,
- InstrItinData<IIC_CMPS, [InstrStage<3, [Port0, Port1]>] >,
- InstrItinData<IIC_MOV, [InstrStage<1, [Port0, Port1]>] >,
- InstrItinData<IIC_MOV_MEM, [InstrStage<1, [Port0]>] >,
- InstrItinData<IIC_AHF, [InstrStage<1, [Port0, Port1]>] >,
- InstrItinData<IIC_BT_MI, [InstrStage<1, [Port0, Port1]>] >,
- InstrItinData<IIC_BT_MR, [InstrStage<9, [Port0, Port1]>] >,
- InstrItinData<IIC_BT_RI, [InstrStage<1, [Port1]>] >,
- InstrItinData<IIC_BT_RR, [InstrStage<1, [Port1]>] >,
- InstrItinData<IIC_BTX_MI, [InstrStage<2, [Port0, Port1]>] >,
- InstrItinData<IIC_BTX_MR, [InstrStage<11, [Port0, Port1]>] >,
- InstrItinData<IIC_BTX_RI, [InstrStage<1, [Port1]>] >,
- InstrItinData<IIC_BTX_RR, [InstrStage<1, [Port1]>] >,
- InstrItinData<IIC_XCHG_REG, [InstrStage<2, [Port0, Port1]>] >,
- InstrItinData<IIC_XCHG_MEM, [InstrStage<3, [Port0, Port1]>] >,
- InstrItinData<IIC_XADD_REG, [InstrStage<2, [Port0, Port1]>] >,
- InstrItinData<IIC_XADD_MEM, [InstrStage<3, [Port0, Port1]>] >,
- InstrItinData<IIC_CMPXCHG_MEM, [InstrStage<14, [Port0, Port1]>] >,
- InstrItinData<IIC_CMPXCHG_REG, [InstrStage<15, [Port0, Port1]>] >,
- InstrItinData<IIC_CMPXCHG_MEM8, [InstrStage<6, [Port0, Port1]>] >,
- InstrItinData<IIC_CMPXCHG_REG8, [InstrStage<9, [Port0, Port1]>] >,
- InstrItinData<IIC_CMPXCHG_8B, [InstrStage<18, [Port0, Port1]>] >,
- InstrItinData<IIC_CMPXCHG_16B, [InstrStage<22, [Port0, Port1]>] >,
- InstrItinData<IIC_LODS, [InstrStage<2, [Port0, Port1]>] >,
- InstrItinData<IIC_OUTS, [InstrStage<74, [Port0, Port1]>] >,
- InstrItinData<IIC_CLC_CMC_STC, [InstrStage<1, [Port0, Port1]>] >,
- InstrItinData<IIC_CLD, [InstrStage<3, [Port0, Port1]>] >,
- InstrItinData<IIC_CLI, [InstrStage<14, [Port0, Port1]>] >,
- InstrItinData<IIC_CLTS, [InstrStage<33, [Port0, Port1]>] >,
- InstrItinData<IIC_STI, [InstrStage<17, [Port0, Port1]>] >,
- InstrItinData<IIC_STD, [InstrStage<21, [Port0, Port1]>] >,
- InstrItinData<IIC_XLAT, [InstrStage<6, [Port0, Port1]>] >,
- InstrItinData<IIC_AAA, [InstrStage<13, [Port0, Port1]>] >,
- InstrItinData<IIC_AAD, [InstrStage<7, [Port0, Port1]>] >,
- InstrItinData<IIC_AAM, [InstrStage<21, [Port0, Port1]>] >,
- InstrItinData<IIC_AAS, [InstrStage<13, [Port0, Port1]>] >,
- InstrItinData<IIC_DAA, [InstrStage<18, [Port0, Port1]>] >,
- InstrItinData<IIC_DAS, [InstrStage<20, [Port0, Port1]>] >,
- InstrItinData<IIC_BOUND, [InstrStage<11, [Port0, Port1]>] >,
- InstrItinData<IIC_ARPL_REG, [InstrStage<24, [Port0, Port1]>] >,
- InstrItinData<IIC_ARPL_MEM, [InstrStage<23, [Port0, Port1]>] >,
- InstrItinData<IIC_MOVBE, [InstrStage<1, [Port0]>] >,
- InstrItinData<IIC_CBW, [InstrStage<4, [Port0, Port1]>] >,
- InstrItinData<IIC_MMX_EMMS, [InstrStage<5, [Port0, Port1]>] >,
-
- InstrItinData<IIC_NOP, [InstrStage<1, [Port0, Port1]>] >
- ]>;
// Atom machine model.
def AtomModel : SchedMachineModel {
let IssueWidth = 2; // Allows 2 instructions per scheduling group.
let MicroOpBufferSize = 0; // In-order execution, always hide latency.
- let LoadLatency = 3; // Expected cycles, may be overriden by OperandCycles.
- let HighLatency = 30;// Expected, may be overriden by OperandCycles.
+ let LoadLatency = 3; // Expected cycles, may be overriden.
+ let HighLatency = 30;// Expected, may be overriden.
// On the Atom, the throughput for taken branches is 2 cycles. For small
// simple loops, expand by a small factor to hide the backedge cost.
let LoopMicroOpBufferSize = 10;
let PostRAScheduler = 1;
let CompleteModel = 0;
+}
+
+let SchedModel = AtomModel in {
+
+// Functional Units
+def AtomPort0 : ProcResource<1>; // ALU: ALU0, shift/rotate, load/store
+ // SIMD/FP: SIMD ALU, Shuffle,SIMD/FP multiply, divide
+def AtomPort1 : ProcResource<1>; // ALU: ALU1, bit processing, jump, and LEA
+ // SIMD/FP: SIMD ALU, FP Adder
+
+def AtomPort01 : ProcResGroup<[AtomPort0, AtomPort1]>;
+
+// Loads are 3 cycles, so ReadAfterLd registers needn't be available until 3
+// cycles after the memory operand.
+def : ReadAdvance<ReadAfterLd, 3>;
+
+// Many SchedWrites are defined in pairs with and without a folded load.
+// Instructions with folded loads are usually micro-fused, so they only appear
+// as two micro-ops when dispatched by the schedulers.
+// This multiclass defines the resource usage for variants with and without
+// folded loads.
+multiclass AtomWriteResPair<X86FoldableSchedWrite SchedRW,
+ list<ProcResourceKind> RRPorts,
+ list<ProcResourceKind> RMPorts,
+ int RRLat = 1, int RMLat = 1,
+ list<int> RRRes = [1],
+ list<int> RMRes = [1]> {
+ // Register variant is using a single cycle on ExePort.
+ def : WriteRes<SchedRW, RRPorts> {
+ let Latency = RRLat;
+ let ResourceCycles = RRRes;
+ }
+
+ // Memory variant also uses a cycle on JLAGU and adds 3 cycles to the
+ // latency.
+ def : WriteRes<SchedRW.Folded, RMPorts> {
+ let Latency = RMLat;
+ let ResourceCycles = RMRes;
+ }
+}
+
+// A folded store needs a cycle on Port0 for the store data.
+def : WriteRes<WriteRMW, [AtomPort0]>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Arithmetic.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : AtomWriteResPair<WriteALU, [AtomPort01], [AtomPort0]>;
+defm : AtomWriteResPair<WriteIMul, [AtomPort01], [AtomPort01], 7, 7, [7], [7]>;
+defm : AtomWriteResPair<WriteIDiv, [AtomPort01], [AtomPort01], 62, 62, [62], [62]>;
+defm : AtomWriteResPair<WriteCRC32, [AtomPort01], [AtomPort01]>; // NOTE: Doesn't exist on Atom.
+
+defm : AtomWriteResPair<WriteCMOV, [AtomPort01], [AtomPort0]>;
+
+def : WriteRes<WriteSETCC, [AtomPort01]>;
+def : WriteRes<WriteSETCCStore, [AtomPort01]> {
+ let Latency = 2;
+ let ResourceCycles = [2];
+}
+
+def : WriteRes<WriteIMulH, [AtomPort01]>; // NOTE: Doesn't exist on Atom.
+
+// This is for simple LEAs with one or two input operands.
+def : WriteRes<WriteLEA, [AtomPort1]>;
+
+def AtomWriteIMul16Ld : SchedWriteRes<[AtomPort01]> {
+ let Latency = 8;
+ let ResourceCycles = [8];
+}
+def : InstRW<[AtomWriteIMul16Ld], (instrs MUL16m, IMUL16m)>;
+
+def AtomWriteIMul32 : SchedWriteRes<[AtomPort01]> {
+ let Latency = 6;
+ let ResourceCycles = [6];
+}
+def : InstRW<[AtomWriteIMul32], (instrs MUL32r, IMUL32r)>;
+
+def AtomWriteIMul64 : SchedWriteRes<[AtomPort01]> {
+ let Latency = 12;
+ let ResourceCycles = [12];
+}
+def : InstRW<[AtomWriteIMul64], (instrs MUL64r, IMUL64r, IMUL64rr, IMUL64rm,
+ MUL64m, IMUL64m)>;
+
+def AtomWriteIMul64I : SchedWriteRes<[AtomPort01]> {
+ let Latency = 14;
+ let ResourceCycles = [14];
+}
+def : InstRW<[AtomWriteIMul64I], (instrs IMUL64rri8, IMUL64rri32,
+ IMUL64rmi8, IMUL64rmi32)>;
+
+def AtomWriteDiv : SchedWriteRes<[AtomPort01]> {
+ let Latency = 50;
+ let ResourceCycles = [50];
+}
+def : InstRW<[AtomWriteDiv], (instrs DIV8r,
+ DIV16r, DIV16m,
+ DIV32r, DIV32m)>;
+
+def AtomWriteDiv8Ld : SchedWriteRes<[AtomPort01]> {
+ let Latency = 68;
+ let ResourceCycles = [68];
+}
+def : InstRW<[AtomWriteDiv8Ld], (instrs DIV8m)>;
+
+def AtomWriteIDiv64 : SchedWriteRes<[AtomPort01]> {
+ let Latency = 130;
+ let ResourceCycles = [130];
+}
+def : InstRW<[AtomWriteIDiv64], (instrs DIV64r, IDIV64r,
+ DIV64m, IDIV64m)>;
+
+// Bit counts.
+defm : AtomWriteResPair<WriteBitScan, [AtomPort01], [AtomPort01], 16, 16, [16], [16]>;
+defm : AtomWriteResPair<WritePOPCNT, [AtomPort01], [AtomPort01]>; // NOTE: Doesn't exist on Atom.
+defm : AtomWriteResPair<WriteLZCNT, [AtomPort01], [AtomPort01]>; // NOTE: Doesn't exist on Atom.
+defm : AtomWriteResPair<WriteTZCNT, [AtomPort01], [AtomPort01]>; // NOTE: Doesn't exist on Atom.
+
+// BMI1 BEXTR, BMI2 BZHI
+defm : AtomWriteResPair<WriteBEXTR, [AtomPort01], [AtomPort01]>; // NOTE: Doesn't exist on Atom.
+defm : AtomWriteResPair<WriteBZHI, [AtomPort01], [AtomPort01]>; // NOTE: Doesn't exist on Atom.
+
+////////////////////////////////////////////////////////////////////////////////
+// Integer shifts and rotates.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : AtomWriteResPair<WriteShift, [AtomPort0], [AtomPort0]>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Loads, stores, and moves, not folded with other operations.
+////////////////////////////////////////////////////////////////////////////////
+
+def : WriteRes<WriteLoad, [AtomPort0]>;
+def : WriteRes<WriteStore, [AtomPort0]>;
+def : WriteRes<WriteMove, [AtomPort01]>;
+
+// Treat misc copies as a move.
+def : InstRW<[WriteMove], (instrs COPY)>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Idioms that clear a register, like xorps %xmm0, %xmm0.
+// These can often bypass execution ports completely.
+////////////////////////////////////////////////////////////////////////////////
+
+def : WriteRes<WriteZero, []>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Branches don't produce values, so they have no latency, but they still
+// consume resources. Indirect branches can fold loads.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : AtomWriteResPair<WriteJump, [AtomPort1], [AtomPort1]>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Special case scheduling classes.
+////////////////////////////////////////////////////////////////////////////////
+
+def : WriteRes<WriteSystem, [AtomPort01]> { let Latency = 100; }
+def : WriteRes<WriteMicrocoded, [AtomPort01]> { let Latency = 100; }
+def : WriteRes<WriteFence, [AtomPort0]>;
+
+// Nops don't have dependencies, so there's no actual latency, but we set this
+// to '1' to tell the scheduler that the nop uses an ALU slot for a cycle.
+def : WriteRes<WriteNop, [AtomPort01]>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Floating point. This covers both scalar and vector operations.
+////////////////////////////////////////////////////////////////////////////////
+
+def : WriteRes<WriteFLoad, [AtomPort0]>;
+def : WriteRes<WriteFStore, [AtomPort0]>;
+def : WriteRes<WriteFMove, [AtomPort01]>;
+
+defm : AtomWriteResPair<WriteFAdd, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
+defm : AtomWriteResPair<WriteFMul, [AtomPort0], [AtomPort0], 4, 4, [4], [4]>;
+defm : AtomWriteResPair<WriteFRcp, [AtomPort0], [AtomPort0], 4, 4, [4], [4]>;
+defm : AtomWriteResPair<WriteFRsqrt, [AtomPort0], [AtomPort0], 4, 4, [4], [4]>;
+defm : AtomWriteResPair<WriteFDiv, [AtomPort01], [AtomPort01], 34, 34, [34], [34]>;
+defm : AtomWriteResPair<WriteFSqrt, [AtomPort01], [AtomPort01], 34, 34, [34], [34]>;
+defm : AtomWriteResPair<WriteFShuffle, [AtomPort0], [AtomPort0]>;
+defm : AtomWriteResPair<WriteFVarShuffle, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom.
+defm : AtomWriteResPair<WriteFMA, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom.
+defm : AtomWriteResPair<WriteFBlend, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom.
+defm : AtomWriteResPair<WriteFVarBlend, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom.
+defm : AtomWriteResPair<WriteFShuffle256, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom.
+defm : AtomWriteResPair<WriteFVarShuffle256, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom.
+
+////////////////////////////////////////////////////////////////////////////////
+// Conversions.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : AtomWriteResPair<WriteCvtF2I, [AtomPort01], [AtomPort01], 8, 9, [8], [9]>; // Float -> Integer.
+defm : AtomWriteResPair<WriteCvtI2F, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>; // Integer -> Float.
+defm : AtomWriteResPair<WriteCvtF2F, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>; // Float -> Float size conversion.
+
+////////////////////////////////////////////////////////////////////////////////
+// Vector integer operations.
+////////////////////////////////////////////////////////////////////////////////
+
+def : WriteRes<WriteVecLoad, [AtomPort0]>;
+def : WriteRes<WriteVecStore, [AtomPort0]>;
+def : WriteRes<WriteVecMove, [AtomPort01]>;
+
+defm : AtomWriteResPair<WriteVecALU, [AtomPort01], [AtomPort0], 1, 1>;
+defm : AtomWriteResPair<WriteVecLogic, [AtomPort01], [AtomPort0], 1, 1>;
+defm : AtomWriteResPair<WriteVecShift, [AtomPort01], [AtomPort01], 2, 3, [2], [3]>;
+defm : AtomWriteResPair<WriteVecIMul, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
+defm : AtomWriteResPair<WritePMULLD, [AtomPort01], [AtomPort0], 1, 1>;
+defm : AtomWriteResPair<WriteMPSAD, [AtomPort01], [AtomPort0], 1, 1>;
+defm : AtomWriteResPair<WriteShuffle, [AtomPort0], [AtomPort0], 1, 1>;
+defm : AtomWriteResPair<WriteVarShuffle, [AtomPort01], [AtomPort01], 4, 5, [4], [5]>;
+defm : AtomWriteResPair<WriteBlend, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom.
+defm : AtomWriteResPair<WriteVarBlend, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom.
+defm : AtomWriteResPair<WriteShuffle256, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom.
+defm : AtomWriteResPair<WriteVarShuffle256, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom.
+defm : AtomWriteResPair<WriteVarVecShift, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom.
+
+////////////////////////////////////////////////////////////////////////////////
+// SSE42 String instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : AtomWriteResPair<WritePCmpIStrI, [AtomPort01], [AtomPort01]>; // NOTE: Doesn't exist on Atom.
+defm : AtomWriteResPair<WritePCmpIStrM, [AtomPort01], [AtomPort01]>; // NOTE: Doesn't exist on Atom.
+defm : AtomWriteResPair<WritePCmpEStrI, [AtomPort01], [AtomPort01]>; // NOTE: Doesn't exist on Atom.
+defm : AtomWriteResPair<WritePCmpEStrM, [AtomPort01], [AtomPort01]>; // NOTE: Doesn't exist on Atom.
+
+////////////////////////////////////////////////////////////////////////////////
+// MOVMSK Instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+def : WriteRes<WriteFMOVMSK, [AtomPort0]> { let Latency = 3; let ResourceCycles = [3]; }
+def : WriteRes<WriteVecMOVMSK, [AtomPort0]> { let Latency = 3; let ResourceCycles = [3]; }
+def : WriteRes<WriteMMXMOVMSK, [AtomPort0]> { let Latency = 3; let ResourceCycles = [3]; }
+
+////////////////////////////////////////////////////////////////////////////////
+// AES Instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : AtomWriteResPair<WriteAESIMC, [AtomPort01], [AtomPort01]>; // NOTE: Doesn't exist on Atom.
+defm : AtomWriteResPair<WriteAESKeyGen, [AtomPort01], [AtomPort01]>; // NOTE: Doesn't exist on Atom.
+defm : AtomWriteResPair<WriteAESDecEnc, [AtomPort01], [AtomPort01]>; // NOTE: Doesn't exist on Atom.
+
+////////////////////////////////////////////////////////////////////////////////
+// Horizontal add/sub instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : AtomWriteResPair<WriteFHAdd, [AtomPort01], [AtomPort01], 8, 9, [8], [9]>;
+defm : AtomWriteResPair<WritePHAdd, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Carry-less multiplication instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : AtomWriteResPair<WriteCLMul, [AtomPort01], [AtomPort01]>; // NOTE: Doesn't exist on Atom.
+
+////////////////////////////////////////////////////////////////////////////////
+// Special Cases.
+////////////////////////////////////////////////////////////////////////////////
+
+// Port0
+def AtomWrite0_1 : SchedWriteRes<[AtomPort0]> {
+ let Latency = 1;
+ let ResourceCycles = [1];
+}
+def : InstRW<[AtomWrite0_1], (instrs FXAM,
+ BSWAP32r, BSWAP64r,
+ DEC8m, DEC16m, DEC32m, DEC64m,
+ INC8m, INC16m, INC32m, INC64m,
+ MOVSX64rr32,
+ MMX_MOVD64rr, MMX_MOVD64mr,
+ MMX_MOVD64to64rr, MMX_MOVD64to64rm,
+ MMX_PSHUFBrr, MMX_PSHUFBrm,
+ MOVDI2PDIrr, MOVDI2PDIrm,
+ MOV64toPQIrr, MOV64toPQIrm,
+ MOV64toSDrr, MOV64toSDrm, MOVSDto64mr,
+ MOVDI2SSrr, MOVDI2SSrm,
+ MOVPDI2DImr, MOVPQIto64mr, MOVSS2DImr, MOVQI2PQIrm, MOVPQI2QImr)>;
+def : InstRW<[AtomWrite0_1], (instregex "(ADC|ADD|AND|NEG|NOT|OR|SBB|SUB|XOR)(8|16|32|64)m",
+ "(RCL|RCR|ROL|ROR|SAR|SHL|SHR)(8|16|32|64)m",
+ "MOV(S|Z)X(32|64)(rr|rm)(8|8_NOREX|16)",
+ "LD_F(P)?(16|32|64)?(m|rr)",
+ "MMX_MASKMOVQ(64)?",
+ "MMX_PAVG(B|W)irm",
+ "MMX_P(MAX|MIN)(UB|SW)irm",
+ "MMX_PSIGN(B|D|W)rm")>;
+
+def AtomWrite0_3 : SchedWriteRes<[AtomPort0]> {
+ let Latency = 3;
+ let ResourceCycles = [3];
+}
+def : InstRW<[AtomWrite0_3], (instrs MMX_MOVD64from64rr, MMX_MOVD64grr,
+ MOVPDI2DIrr, MOVPQIto64rr,
+ MOVSDto64rr, MOVSS2DIrr)>;
+
+def AtomWrite0_4 : SchedWriteRes<[AtomPort0]> {
+ let Latency = 4;
+ let ResourceCycles = [4];
+}
+def : InstRW<[AtomWrite0_4], (instrs MMX_PMADDUBSWrr, MMX_PMADDUBSWrm,
+ MMX_PMADDWDirr, MMX_PMADDWDirm,
+ MMX_PMULHRSWrr, MMX_PMULHRSWrm,
+ MMX_PMULHUWirr, MMX_PMULHUWirm,
+ MMX_PMULHWirr, MMX_PMULHWirm,
+ MMX_PMULLWirr, MMX_PMULLWirm,
+ MMX_PMULUDQirr, MMX_PMULUDQirm)>;
+
+def AtomWrite0_5 : SchedWriteRes<[AtomPort0]> {
+ let Latency = 5;
+ let ResourceCycles = [5];
+}
+def : InstRW<[AtomWrite0_5], (instregex "IMUL32(rm|rr)",
+ "MUL(PS|SD)(rr|rm)(_Int)?")>;
+
+// Port1
+def AtomWrite1_1 : SchedWriteRes<[AtomPort1]> {
+ let Latency = 1;
+ let ResourceCycles = [1];
+}
+def : InstRW<[AtomWrite1_1], (instrs FCOMPP)>;
+def : InstRW<[AtomWrite1_1], (instregex "ABS_F", "CHS_F",
+ "UCOM_F(P|PP)?r",
+ "BT(C|R|S)?(16|32|64)(rr|ri8)")>;
+
+def AtomWrite1_5 : SchedWriteRes<[AtomPort1]> {
+ let Latency = 5;
+ let ResourceCycles = [5];
+}
+def : InstRW<[AtomWrite1_5], (instrs MMX_CVTPI2PSirr, MMX_CVTPI2PSirm,
+ MMX_CVTPS2PIirr, MMX_CVTTPS2PIirr)>;
+
+// Port0 and Port1
+def AtomWrite0_1_1 : SchedWriteRes<[AtomPort0, AtomPort1]> {
+ let Latency = 1;
+ let ResourceCycles = [1, 1];
+}
+def : InstRW<[AtomWrite0_1_1], (instrs POP32r, POP64r,
+ POP16rmr, POP32rmr, POP64rmr,
+ PUSH16r, PUSH32r, PUSH64r,
+ PUSHi16, PUSHi32,
+ PUSH16rmr, PUSH32rmr, PUSH64rmr,
+ PUSH16i8, PUSH32i8, PUSH64i8, PUSH64i32,
+ XCH_F)>;
+def : InstRW<[AtomWrite0_1_1], (instregex "RETI(L|Q|W)$",
+ "IRET(16|32|64)?")>;
+
+def AtomWrite0_1_5 : SchedWriteRes<[AtomPort0, AtomPort1]> {
+ let Latency = 5;
+ let ResourceCycles = [5, 5];
+}
+def : InstRW<[AtomWrite0_1_5], (instrs MMX_CVTPS2PIirm, MMX_CVTTPS2PIirm)>;
+def : InstRW<[AtomWrite0_1_5], (instregex "ILD_F(16|32|64)")>;
+
+// Port0 or Port1
+def AtomWrite01_1 : SchedWriteRes<[AtomPort01]> {
+ let Latency = 1;
+ let ResourceCycles = [1];
+}
+def : InstRW<[AtomWrite01_1], (instrs FDECSTP, FFREE, FFREEP, FINCSTP, LD_F0, WAIT,
+ LFENCE,
+ STOSB, STOSL, STOSQ, STOSW,
+ MOVSSrr, MOVSSrr_REV,
+ PSLLDQri, PSRLDQri)>;
+def : InstRW<[AtomWrite01_1], (instregex "(MMX_)?PS(LL|RA|RL)(D|Q|W)ri",
+ "MMX_PAVG(B|W)irr",
+ "MMX_P(MAX|MIN)(UB|SW)irr",
+ "MMX_PSIGN(B|D|W)rr",
+ "MMX_PACK(SSDW|SSWB|USWB)irr",
+ "MMX_PUNPCKH(BW|DQ|WD)irr")>;
+
+def AtomWrite01_2 : SchedWriteRes<[AtomPort01]> {
+ let Latency = 2;
+ let ResourceCycles = [2];
+}
+def : InstRW<[AtomWrite01_2], (instrs LEAVE, LEAVE64, POP16r,
+ PUSH16rmm, PUSH32rmm, PUSH64rmm,
+ LODSB, LODSL, LODSQ, LODSW,
+ SCASB, SCASL, SCASQ, SCASW,
+ SHLD32rrCL, SHRD32rrCL,
+ SHLD32rri8, SHRD32rri8)>;
+def : InstRW<[AtomWrite01_2], (instregex "BT(C|R|S)(16|32|64)mi8",
+ "PUSH(CS|DS|ES|FS|GS|SS)(16|32|64)",
+ "XADD(8|16|32|64)rr",
+ "XCHG(8|16|32|64)(ar|rr)",
+ "(ST|ISTT)_F(P)?(16|32|64)?(m|rr)",
+ "MMX_P(ADD|SUB)Qirr",
+ "MOV(S|Z)X16rr8",
+ "MOV(UPS|UPD|DQU)mr",
+ "MASKMOVDQU(64)?",
+ "P(ADD|SUB)Qrr")>;
+
+def AtomWrite01_3 : SchedWriteRes<[AtomPort01]> {
+ let Latency = 3;
+ let ResourceCycles = [3];
+}
+def : InstRW<[AtomWrite01_3], (instrs CLD, LDDQUrm,
+ CMPSB, CMPSL, CMPSQ, CMPSW,
+ MOVSB, MOVSL, MOVSQ, MOVSW,
+ POP16rmm, POP32rmm, POP64rmm)>;
+def : InstRW<[AtomWrite01_3], (instregex "XADD(8|16|32|64)rm",
+ "XCHG(8|16|32|64)rm",
+ "(MMX_)?PH(ADD|SUB)Drr",
+ "MOV(S|Z)X16rm8",
+ "MMX_P(ADD|SUB)Qirm",
+ "MOV(UPS|UPD|DQU)rm",
+ "P(ADD|SUB)Qrm")>;
+
+def AtomWrite01_4 : SchedWriteRes<[AtomPort01]> {
+ let Latency = 4;
+ let ResourceCycles = [4];
+}
+def : InstRW<[AtomWrite01_4], (instrs CBW, CWD, CWDE, CDQ, CDQE, CQO,
+ JCXZ, JECXZ, JRCXZ,
+ SHLD32mrCL, SHRD32mrCL,
+ SHLD32mri8, SHRD32mri8,
+ LD_F80m,
+ MMX_PSADBWirr, MMX_PSADBWirm)>;
+def : InstRW<[AtomWrite01_4], (instregex "(MMX_)?PH(ADD|SUB)Drm",
+ "(MMX_)?PEXTRWrr(_REV)?")>;
+
+def AtomWrite01_5 : SchedWriteRes<[AtomPort01]> {
+ let Latency = 5;
+ let ResourceCycles = [5];
+}
+def : InstRW<[AtomWrite01_5], (instrs FLDCW16m, LDMXCSR,
+ MMX_EMMS)>;
+def : InstRW<[AtomWrite01_5], (instregex "ST_FP80m",
+ "MMX_PH(ADD|SUB)S?Wrr")>;
+
+def AtomWrite01_6 : SchedWriteRes<[AtomPort01]> {
+ let Latency = 6;
+ let ResourceCycles = [6];
+}
+def : InstRW<[AtomWrite01_6], (instrs LD_F1, CMPXCHG8rm, INTO, XLAT,
+ SHLD16rrCL, SHRD16rrCL,
+ SHLD16rri8, SHRD16rri8,
+ SHLD16mrCL, SHRD16mrCL,
+ SHLD16mri8, SHRD16mri8,
+ ADDSUBPDrr, ADDSUBPDrm,
+ CVTPS2DQrr, CVTTPS2DQrr)>;
+def : InstRW<[AtomWrite01_6], (instregex "IMUL16rr",
+ "IST_F(P)?(16|32|64)?m",
+ "MMX_PH(ADD|SUB)S?Wrm",
+ "(ADD|SUB|MAX|MIN)PDrr",
+ "CMPPDrri")>;
+
+def AtomWrite01_7 : SchedWriteRes<[AtomPort01]> {
+ let Latency = 7;
+ let ResourceCycles = [7];
+}
+def : InstRW<[AtomWrite01_7], (instrs AAD8i8,
+ CVTDQ2PDrr,
+ CVTPD2DQrr,
+ CVTPD2PSrr,
+ CVTPS2DQrm,
+ CVTPS2PDrr,
+ CVTTPD2DQrr,
+ CVTTPS2DQrm,
+ MMX_CVTPD2PIirr,
+ MMX_CVTPI2PDirr,
+ MMX_CVTTPD2PIirr)>;
+def : InstRW<[AtomWrite01_7], (instregex "(ADD|SUB|MAX|MIN)PDrm",
+ "CMPPDrmi")>;
+
+def AtomWrite01_8 : SchedWriteRes<[AtomPort01]> {
+ let Latency = 8;
+ let ResourceCycles = [8];
+}
+def : InstRW<[AtomWrite01_8], (instrs LOOPE,
+ PUSHA16, PUSHA32,
+ SHLD64rrCL, SHRD64rrCL,
+ FNSTCW16m,
+ CVTDQ2PDrm,
+ CVTPD2DQrm,
+ CVTPD2PSrm,
+ CVTPS2PDrm,
+ CVTTPD2DQrm,
+ MMX_CVTPD2PIirm,
+ MMX_CVTPI2PDirm,
+ MMX_CVTTPD2PIirm)>;
+
+def AtomWrite01_9 : SchedWriteRes<[AtomPort01]> {
+ let Latency = 9;
+ let ResourceCycles = [9];
+}
+def : InstRW<[AtomWrite01_9], (instrs BT16mr, BT32mr, BT64mr,
+ POPA16, POPA32,
+ PUSHF16, PUSHF32, PUSHF64,
+ SHLD64mrCL, SHRD64mrCL,
+ SHLD64mri8, SHRD64mri8,
+ SHLD64rri8, SHRD64rri8,
+ CMPXCHG8rr,
+ MULPDrr, RCPPSr, RSQRTPSr)>;
+def : InstRW<[AtomWrite01_9], (instregex "CMOV(B|BE|E|P|NB|NBE|NE|NP)_F",
+ "(U)?COM_FI", "TST_F",
+ "(U)?COMIS(D|S)rr",
+ "CVT(T)?SS2SI64rr(_Int)?")>;
+
+def AtomWrite01_10 : SchedWriteRes<[AtomPort01]> {
+ let Latency = 10;
+ let ResourceCycles = [10];
+}
+def : InstRW<[AtomWrite01_10], (instrs FLDL2E, FLDL2T, FLDLG2, FLDLN2, FLDPI,
+ MULPDrm, RCPPSm, RSQRTPSm)>;
+def : InstRW<[AtomWrite01_10], (instregex "(U)?COMIS(D|S)rm",
+ "CVT(T)?SS2SI64rm(_Int)?")>;
+
+def AtomWrite01_11 : SchedWriteRes<[AtomPort01]> {
+ let Latency = 11;
+ let ResourceCycles = [11];
+}
+def : InstRW<[AtomWrite01_11], (instrs BOUNDS16rm, BOUNDS32rm)>;
+def : InstRW<[AtomWrite01_11], (instregex "BT(C|R|S)(16|32|64)mr")>;
+
+def AtomWrite01_13 : SchedWriteRes<[AtomPort01]> {
+ let Latency = 13;
+ let ResourceCycles = [13];
+}
+def : InstRW<[AtomWrite01_13], (instrs AAA, AAS)>;
+
+def AtomWrite01_14 : SchedWriteRes<[AtomPort01]> {
+ let Latency = 14;
+ let ResourceCycles = [14];
+}
+def : InstRW<[AtomWrite01_14], (instrs CMPXCHG16rm, CMPXCHG32rm, CMPXCHG64rm)>;
+
+def AtomWrite01_15 : SchedWriteRes<[AtomPort01]> {
+ let Latency = 15;
+ let ResourceCycles = [15];
+}
+def : InstRW<[AtomWrite01_15], (instrs CMPXCHG16rr, CMPXCHG32rr, CMPXCHG64rr,
+ STMXCSR)>;
+
+def AtomWrite01_17 : SchedWriteRes<[AtomPort01]> {
+ let Latency = 17;
+ let ResourceCycles = [17];
+}
+def : InstRW<[AtomWrite01_17], (instrs LOOPNE, PAUSE)>;
+
+def AtomWrite01_18 : SchedWriteRes<[AtomPort01]> {
+ let Latency = 18;
+ let ResourceCycles = [18];
+}
+def : InstRW<[AtomWrite01_18], (instrs CMPXCHG8B, DAA, LOOP)>;
+
+def AtomWrite01_20 : SchedWriteRes<[AtomPort01]> {
+ let Latency = 20;
+ let ResourceCycles = [20];
+}
+def : InstRW<[AtomWrite01_20], (instrs DAS)>;
+
+def AtomWrite01_21 : SchedWriteRes<[AtomPort01]> {
+ let Latency = 21;
+ let ResourceCycles = [21];
+}
+def : InstRW<[AtomWrite01_21], (instrs AAM8i8, STD)>;
+
+def AtomWrite01_22 : SchedWriteRes<[AtomPort01]> {
+ let Latency = 22;
+ let ResourceCycles = [22];
+}
+def : InstRW<[AtomWrite01_22], (instrs CMPXCHG16B)>;
+
+def AtomWrite01_23 : SchedWriteRes<[AtomPort01]> {
+ let Latency = 23;
+ let ResourceCycles = [23];
+}
+def : InstRW<[AtomWrite01_23], (instrs ARPL16mr, ARPL16rr)>;
- let Itineraries = AtomItineraries;
+def AtomWrite01_25 : SchedWriteRes<[AtomPort01]> {
+ let Latency = 25;
+ let ResourceCycles = [25];
}
+def : InstRW<[AtomWrite01_25], (instrs FNCLEX, FXTRACT)>;
+
+def AtomWrite01_26 : SchedWriteRes<[AtomPort01]> {
+ let Latency = 26;
+ let ResourceCycles = [26];
+}
+def : InstRW<[AtomWrite01_26], (instrs POPF32, POPF64)>;
+
+def AtomWrite01_29 : SchedWriteRes<[AtomPort01]> {
+ let Latency = 29;
+ let ResourceCycles = [29];
+}
+def : InstRW<[AtomWrite01_29], (instregex "POP(DS|ES|FS|GS)(16|32|64)")>;
+
+def AtomWrite01_30 : SchedWriteRes<[AtomPort01]> {
+ let Latency = 30;
+ let ResourceCycles = [30];
+}
+def : InstRW<[AtomWrite01_30], (instrs RDTSC, RDTSCP)>;
+
+def AtomWrite01_32 : SchedWriteRes<[AtomPort01]> {
+ let Latency = 32;
+ let ResourceCycles = [32];
+}
+def : InstRW<[AtomWrite01_32], (instrs ENTER, POPF16)>;
+
+def AtomWrite01_45 : SchedWriteRes<[AtomPort01]> {
+ let Latency = 45;
+ let ResourceCycles = [45];
+}
+def : InstRW<[AtomWrite01_45], (instrs MONITORrrr)>;
+
+def AtomWrite01_46 : SchedWriteRes<[AtomPort01]> {
+ let Latency = 46;
+ let ResourceCycles = [46];
+}
+def : InstRW<[AtomWrite01_46], (instrs FRNDINT, MWAITrr, RDPMC)>;
+
+def AtomWrite01_48 : SchedWriteRes<[AtomPort01]> {
+ let Latency = 48;
+ let ResourceCycles = [48];
+}
+def : InstRW<[AtomWrite01_48], (instrs POPSS16, POPSS32)>;
+
+def AtomWrite01_55 : SchedWriteRes<[AtomPort01]> {
+ let Latency = 55;
+ let ResourceCycles = [55];
+}
+def : InstRW<[AtomWrite01_55], (instrs FPREM)>;
+
+def AtomWrite01_59 : SchedWriteRes<[AtomPort01]> {
+ let Latency = 59;
+ let ResourceCycles = [59];
+}
+def : InstRW<[AtomWrite01_59], (instrs INSB, INSL, INSW)>;
+
+def AtomWrite01_62 : SchedWriteRes<[AtomPort01]> {
+ let Latency = 62;
+ let ResourceCycles = [62];
+}
+def : InstRW<[AtomWrite01_62], (instregex "DIVSD(r|m)(_Int)?",
+ "SQRTSD(r|m)(_Int)?")>;
+
+def AtomWrite01_63 : SchedWriteRes<[AtomPort01]> {
+ let Latency = 63;
+ let ResourceCycles = [63];
+}
+def : InstRW<[AtomWrite01_63], (instrs FNINIT)>;
+
+def AtomWrite01_68 : SchedWriteRes<[AtomPort01]> {
+ let Latency = 68;
+ let ResourceCycles = [68];
+}
+def : InstRW<[AtomWrite01_68], (instrs OUT8rr, OUT16rr, OUT32rr)>;
+
+def AtomWrite01_70 : SchedWriteRes<[AtomPort01]> {
+ let Latency = 70;
+ let ResourceCycles = [70];
+}
+def : InstRW<[AtomWrite01_70], (instrs DIVPSrr, DIVPSrm, SQRTPSr, SQRTPSm)>;
+
+def AtomWrite01_71 : SchedWriteRes<[AtomPort01]> {
+ let Latency = 71;
+ let ResourceCycles = [71];
+}
+def : InstRW<[AtomWrite01_71], (instrs FPREM1,
+ INVLPG, INVLPGA32, INVLPGA64)>;
+def : InstRW<[AtomWrite01_71], (instregex "SQRT_F")>;
+
+def AtomWrite01_72 : SchedWriteRes<[AtomPort01]> {
+ let Latency = 72;
+ let ResourceCycles = [72];
+}
+def : InstRW<[AtomWrite01_72], (instrs OUT8ir, OUT16ir, OUT32ir)>;
+
+def AtomWrite01_74 : SchedWriteRes<[AtomPort01]> {
+ let Latency = 74;
+ let ResourceCycles = [74];
+}
+def : InstRW<[AtomWrite01_74], (instrs OUTSB, OUTSL, OUTSW)>;
+
+def AtomWrite01_77 : SchedWriteRes<[AtomPort01]> {
+ let Latency = 77;
+ let ResourceCycles = [77];
+}
+def : InstRW<[AtomWrite01_77], (instrs FSCALE)>;
+
+def AtomWrite01_78 : SchedWriteRes<[AtomPort01]> {
+ let Latency = 78;
+ let ResourceCycles = [78];
+}
+def : InstRW<[AtomWrite01_78], (instrs RDMSR)>;
+
+def AtomWrite01_79 : SchedWriteRes<[AtomPort01]> {
+ let Latency = 79;
+ let ResourceCycles = [79];
+}
+def : InstRW<[AtomWrite01_79], (instregex "RET(L|Q|W)?$",
+ "LRETI?(L|Q|W)")>;
+
+def AtomWrite01_92 : SchedWriteRes<[AtomPort01]> {
+ let Latency = 92;
+ let ResourceCycles = [92];
+}
+def : InstRW<[AtomWrite01_92], (instrs IN8ri, IN16ri, IN32ri)>;
+
+def AtomWrite01_94 : SchedWriteRes<[AtomPort01]> {
+ let Latency = 94;
+ let ResourceCycles = [94];
+}
+def : InstRW<[AtomWrite01_94], (instrs IN8rr, IN16rr, IN32rr)>;
+
+def AtomWrite01_99 : SchedWriteRes<[AtomPort01]> {
+ let Latency = 99;
+ let ResourceCycles = [99];
+}
+def : InstRW<[AtomWrite01_99], (instrs F2XM1)>;
+
+def AtomWrite01_121 : SchedWriteRes<[AtomPort01]> {
+ let Latency = 121;
+ let ResourceCycles = [121];
+}
+def : InstRW<[AtomWrite01_121], (instrs CPUID)>;
+
+def AtomWrite01_125 : SchedWriteRes<[AtomPort01]> {
+ let Latency = 125;
+ let ResourceCycles = [125];
+}
+def : InstRW<[AtomWrite01_125], (instrs DIVPDrr, DIVPDrm, SQRTPDr, SQRTPDm)>;
+
+def AtomWrite01_127 : SchedWriteRes<[AtomPort01]> {
+ let Latency = 127;
+ let ResourceCycles = [127];
+}
+def : InstRW<[AtomWrite01_127], (instrs INT)>;
+
+def AtomWrite01_130 : SchedWriteRes<[AtomPort01]> {
+ let Latency = 130;
+ let ResourceCycles = [130];
+}
+def : InstRW<[AtomWrite01_130], (instrs INT3)>;
+
+def AtomWrite01_140 : SchedWriteRes<[AtomPort01]> {
+ let Latency = 140;
+ let ResourceCycles = [140];
+}
+def : InstRW<[AtomWrite01_140], (instrs FXSAVE, FXSAVE64)>;
+
+def AtomWrite01_141 : SchedWriteRes<[AtomPort01]> {
+ let Latency = 141;
+ let ResourceCycles = [141];
+}
+def : InstRW<[AtomWrite01_141], (instrs FXRSTOR, FXRSTOR64)>;
+
+def AtomWrite01_146 : SchedWriteRes<[AtomPort01]> {
+ let Latency = 146;
+ let ResourceCycles = [146];
+}
+def : InstRW<[AtomWrite01_146], (instrs FYL2X)>;
+
+def AtomWrite01_147 : SchedWriteRes<[AtomPort01]> {
+ let Latency = 147;
+ let ResourceCycles = [147];
+}
+def : InstRW<[AtomWrite01_147], (instrs FYL2XP1)>;
+
+def AtomWrite01_168 : SchedWriteRes<[AtomPort01]> {
+ let Latency = 168;
+ let ResourceCycles = [168];
+}
+def : InstRW<[AtomWrite01_168], (instrs FPTAN)>;
+
+def AtomWrite01_174 : SchedWriteRes<[AtomPort01]> {
+ let Latency = 174;
+ let ResourceCycles = [174];
+}
+def : InstRW<[AtomWrite01_174], (instrs FSINCOS)>;
+def : InstRW<[AtomWrite01_174], (instregex "(COS|SIN)_F")>;
+
+def AtomWrite01_183 : SchedWriteRes<[AtomPort01]> {
+ let Latency = 183;
+ let ResourceCycles = [183];
+}
+def : InstRW<[AtomWrite01_183], (instrs FPATAN)>;
+
+def AtomWrite01_202 : SchedWriteRes<[AtomPort01]> {
+ let Latency = 202;
+ let ResourceCycles = [202];
+}
+def : InstRW<[AtomWrite01_202], (instrs WRMSR)>;
+
+} // SchedModel
diff --git a/llvm/test/CodeGen/X86/lsr-loop-exit-cond.ll b/llvm/test/CodeGen/X86/lsr-loop-exit-cond.ll
index 240026164e6..c594354f570 100644
--- a/llvm/test/CodeGen/X86/lsr-loop-exit-cond.ll
+++ b/llvm/test/CodeGen/X86/lsr-loop-exit-cond.ll
@@ -7,8 +7,10 @@
; CHECK-NEXT: jne
; ATOM-LABEL: t:
-; ATOM: movl (%r9,%r{{.+}},4), %e{{..}}
+; ATOM: movl (%r9,%r{{.+}},4), %r{{..}}
+; ATOM-NEXT: xorl
; ATOM-NEXT: testq
+; ATOM-NEXT: movl
; ATOM-NEXT: jne
@Te0 = external global [256 x i32] ; <[256 x i32]*> [#uses=5]
diff --git a/llvm/test/CodeGen/X86/lsr-static-addr.ll b/llvm/test/CodeGen/X86/lsr-static-addr.ll
index 1d4cb3c04e9..beea00139cf 100644
--- a/llvm/test/CodeGen/X86/lsr-static-addr.ll
+++ b/llvm/test/CodeGen/X86/lsr-static-addr.ll
@@ -1,5 +1,5 @@
; RUN: llc -mcpu=generic -mtriple=x86_64-unknown-linux-gnu -relocation-model=static -asm-verbose=false < %s | FileCheck %s
-; RUN: llc -mcpu=atom -mtriple=x86_64-unknown-linux-gnu -relocation-model=static -asm-verbose=false < %s | FileCheck -check-prefix=ATOM %s
+; RUN: llc -mcpu=atom -mtriple=x86_64-unknown-linux-gnu -relocation-model=static -asm-verbose=false < %s | FileCheck %s
; CHECK: xorl %eax, %eax
; CHECK: movsd .LCPI0_0(%rip), %xmm0
@@ -10,16 +10,6 @@
; CHECK-NEXT: movsd
; CHECK-NEXT: incq %rax
-
-; ATOM: movsd .LCPI0_0(%rip), %xmm0
-; ATOM: xorl %eax, %eax
-; ATOM: align
-; ATOM-NEXT: BB0_2:
-; ATOM-NEXT: movsd A(,%rax,8)
-; ATOM-NEXT: mulsd
-; ATOM-NEXT: movsd
-; ATOM-NEXT: incq %rax
-
@A = external global [0 x double]
define void @foo(i64 %n) nounwind {
diff --git a/llvm/test/CodeGen/X86/mmx-schedule.ll b/llvm/test/CodeGen/X86/mmx-schedule.ll
index 65271fda8df..53cd12ed937 100644
--- a/llvm/test/CodeGen/X86/mmx-schedule.ll
+++ b/llvm/test/CodeGen/X86/mmx-schedule.ll
@@ -647,11 +647,11 @@ define i32 @test_movd(x86_mmx %a0, i32 %a1, i32 *%a2) {
;
; ATOM-LABEL: test_movd:
; ATOM: # %bb.0:
-; ATOM-NEXT: movd (%rsi), %mm1 # sched: [1:1.00]
-; ATOM-NEXT: movd %edi, %mm2 # sched: [1:1.00]
-; ATOM-NEXT: paddd %mm2, %mm1 # sched: [1:0.50]
-; ATOM-NEXT: paddd %mm1, %mm0 # sched: [1:0.50]
-; ATOM-NEXT: movd %mm1, %ecx # sched: [3:3.00]
+; ATOM-NEXT: movd %edi, %mm1 # sched: [1:1.00]
+; ATOM-NEXT: movd (%rsi), %mm2 # sched: [1:1.00]
+; ATOM-NEXT: paddd %mm1, %mm2 # sched: [1:0.50]
+; ATOM-NEXT: paddd %mm2, %mm0 # sched: [1:0.50]
+; ATOM-NEXT: movd %mm2, %ecx # sched: [3:3.00]
; ATOM-NEXT: movd %mm0, %eax # sched: [3:3.00]
; ATOM-NEXT: movl %ecx, (%rsi) # sched: [1:1.00]
; ATOM-NEXT: retq # sched: [79:39.50]
@@ -3509,8 +3509,8 @@ define i64 @test_pinsrw(x86_mmx %a0, i32 %a1, i16* %a2) optsize {
;
; ATOM-LABEL: test_pinsrw:
; ATOM: # %bb.0:
-; ATOM-NEXT: movswl (%rsi), %eax # sched: [1:1.00]
; ATOM-NEXT: pinsrw $0, %edi, %mm0 # sched: [1:1.00]
+; ATOM-NEXT: movswl (%rsi), %eax # sched: [1:1.00]
; ATOM-NEXT: pinsrw $1, %eax, %mm0 # sched: [1:1.00]
; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00]
; ATOM-NEXT: retq # sched: [79:39.50]
diff --git a/llvm/test/CodeGen/X86/schedule-x86_32.ll b/llvm/test/CodeGen/X86/schedule-x86_32.ll
index 6f5c403c852..bcd1c6b74fc 100644
--- a/llvm/test/CodeGen/X86/schedule-x86_32.ll
+++ b/llvm/test/CodeGen/X86/schedule-x86_32.ll
@@ -1220,7 +1220,7 @@ define void @test_into() optsize {
; ATOM-LABEL: test_into:
; ATOM: # %bb.0:
; ATOM-NEXT: #APP
-; ATOM-NEXT: into # sched: [0:?]
+; ATOM-NEXT: into # sched: [6:3.00]
; ATOM-NEXT: #NO_APP
; ATOM-NEXT: retl # sched: [79:39.50]
;
diff --git a/llvm/test/CodeGen/X86/schedule-x86_64.ll b/llvm/test/CodeGen/X86/schedule-x86_64.ll
index db49c55e199..859591f7045 100644
--- a/llvm/test/CodeGen/X86/schedule-x86_64.ll
+++ b/llvm/test/CodeGen/X86/schedule-x86_64.ll
@@ -15737,7 +15737,7 @@ define void @test_ud2() optsize {
; ATOM-LABEL: test_ud2:
; ATOM: # %bb.0:
; ATOM-NEXT: #APP
-; ATOM-NEXT: ud2 # sched: [0:?]
+; ATOM-NEXT: ud2 # sched: [100:0.50]
; ATOM-NEXT: #NO_APP
; ATOM-NEXT: retq # sched: [79:39.50]
;
diff --git a/llvm/test/CodeGen/X86/select.ll b/llvm/test/CodeGen/X86/select.ll
index e7ee2657f61..7e881de0c80 100644
--- a/llvm/test/CodeGen/X86/select.ll
+++ b/llvm/test/CodeGen/X86/select.ll
@@ -104,14 +104,23 @@ declare i1 @return_false()
;; Select between two floating point constants.
define float @test3(i32 %x) nounwind readnone {
-; CHECK-LABEL: test3:
-; CHECK: ## %bb.0: ## %entry
-; CHECK-NEXT: xorl %eax, %eax
-; CHECK-NEXT: testl %edi, %edi
-; CHECK-NEXT: sete %al
-; CHECK-NEXT: leaq {{.*}}(%rip), %rcx
-; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; CHECK-NEXT: retq
+; GENERIC-LABEL: test3:
+; GENERIC: ## %bb.0: ## %entry
+; GENERIC-NEXT: xorl %eax, %eax
+; GENERIC-NEXT: testl %edi, %edi
+; GENERIC-NEXT: sete %al
+; GENERIC-NEXT: leaq {{.*}}(%rip), %rcx
+; GENERIC-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test3:
+; ATOM: ## %bb.0: ## %entry
+; ATOM-NEXT: xorl %eax, %eax
+; ATOM-NEXT: leaq {{.*}}(%rip), %rcx
+; ATOM-NEXT: testl %edi, %edi
+; ATOM-NEXT: sete %al
+; ATOM-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; ATOM-NEXT: retq
;
; MCU-LABEL: test3:
; MCU: # %bb.0: # %entry
@@ -266,15 +275,25 @@ define void @test6(i32 %C, <4 x float>* %A, <4 x float>* %B) nounwind {
; Select with fp80's
define x86_fp80 @test7(i32 %tmp8) nounwind {
-; CHECK-LABEL: test7:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: xorl %eax, %eax
-; CHECK-NEXT: testl %edi, %edi
-; CHECK-NEXT: setns %al
-; CHECK-NEXT: shlq $4, %rax
-; CHECK-NEXT: leaq {{.*}}(%rip), %rcx
-; CHECK-NEXT: fldt (%rax,%rcx)
-; CHECK-NEXT: retq
+; GENERIC-LABEL: test7:
+; GENERIC: ## %bb.0:
+; GENERIC-NEXT: xorl %eax, %eax
+; GENERIC-NEXT: testl %edi, %edi
+; GENERIC-NEXT: setns %al
+; GENERIC-NEXT: shlq $4, %rax
+; GENERIC-NEXT: leaq {{.*}}(%rip), %rcx
+; GENERIC-NEXT: fldt (%rax,%rcx)
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test7:
+; ATOM: ## %bb.0:
+; ATOM-NEXT: xorl %eax, %eax
+; ATOM-NEXT: leaq {{.*}}(%rip), %rcx
+; ATOM-NEXT: testl %edi, %edi
+; ATOM-NEXT: setns %al
+; ATOM-NEXT: shlq $4, %rax
+; ATOM-NEXT: fldt (%rax,%rcx)
+; ATOM-NEXT: retq
;
; MCU-LABEL: test7:
; MCU: # %bb.0:
@@ -330,31 +349,32 @@ define void @test8(i1 %c, <6 x i32>* %dst.addr, <6 x i32> %src1,<6 x i32> %src2)
; ATOM-NEXT: testb $1, %dil
; ATOM-NEXT: jne LBB7_1
; ATOM-NEXT: ## %bb.2:
-; ATOM-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; ATOM-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; ATOM-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
; ATOM-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
-; ATOM-NEXT: movd {{.*#+}} xmm4 = mem[0],zero,zero,zero
-; ATOM-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
; ATOM-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; ATOM-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; ATOM-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; ATOM-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
; ATOM-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; ATOM-NEXT: jmp LBB7_3
; ATOM-NEXT: LBB7_1:
-; ATOM-NEXT: movd %r9d, %xmm0
+; ATOM-NEXT: movd %r9d, %xmm1
; ATOM-NEXT: movd %r8d, %xmm2
-; ATOM-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
; ATOM-NEXT: movd %ecx, %xmm3
; ATOM-NEXT: movd %edx, %xmm0
-; ATOM-NEXT: movd {{.*#+}} xmm4 = mem[0],zero,zero,zero
+; ATOM-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; ATOM-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; ATOM-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
; ATOM-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; ATOM-NEXT: LBB7_3:
-; ATOM-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
; ATOM-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; ATOM-NEXT: pcmpeqd %xmm2, %xmm2
-; ATOM-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1]
+; ATOM-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
; ATOM-NEXT: paddd %xmm2, %xmm0
; ATOM-NEXT: paddd %xmm2, %xmm1
-; ATOM-NEXT: movdqa %xmm0, (%rsi)
; ATOM-NEXT: movq %xmm1, 16(%rsi)
+; ATOM-NEXT: movdqa %xmm0, (%rsi)
; ATOM-NEXT: retq
;
; MCU-LABEL: test8:
@@ -634,8 +654,8 @@ define noalias i8* @test12(i64 %count) nounwind ssp noredzone {
; ATOM: ## %bb.0: ## %entry
; ATOM-NEXT: movq %rdi, %rax
; ATOM-NEXT: movl $4, %ecx
-; ATOM-NEXT: mulq %rcx
; ATOM-NEXT: movq $-1, %rdi
+; ATOM-NEXT: mulq %rcx
; ATOM-NEXT: cmovnoq %rax, %rdi
; ATOM-NEXT: jmp __Znam ## TAILCALL
;
@@ -894,8 +914,8 @@ define void @clamp_i8(i32 %src, i8* %dst) {
; ATOM: ## %bb.0:
; ATOM-NEXT: cmpl $127, %edi
; ATOM-NEXT: movl $127, %eax
-; ATOM-NEXT: cmovlel %edi, %eax
; ATOM-NEXT: movb $-128, %cl
+; ATOM-NEXT: cmovlel %edi, %eax
; ATOM-NEXT: cmpl $-128, %eax
; ATOM-NEXT: jl LBB22_2
; ATOM-NEXT: ## %bb.1:
@@ -946,8 +966,8 @@ define void @clamp(i32 %src, i16* %dst) {
; ATOM: ## %bb.0:
; ATOM-NEXT: cmpl $32767, %edi ## imm = 0x7FFF
; ATOM-NEXT: movl $32767, %eax ## imm = 0x7FFF
-; ATOM-NEXT: cmovlel %edi, %eax
; ATOM-NEXT: movl $32768, %ecx ## imm = 0x8000
+; ATOM-NEXT: cmovlel %edi, %eax
; ATOM-NEXT: cmpl $-32768, %eax ## imm = 0x8000
; ATOM-NEXT: cmovgel %eax, %ecx
; ATOM-NEXT: movw %cx, (%rsi)
diff --git a/llvm/test/CodeGen/X86/sse-schedule.ll b/llvm/test/CodeGen/X86/sse-schedule.ll
index 7b6831afcda..dcd66435863 100644
--- a/llvm/test/CodeGen/X86/sse-schedule.ll
+++ b/llvm/test/CodeGen/X86/sse-schedule.ll
@@ -6133,8 +6133,6 @@ define <4 x float> @test_fnop() nounwind {
; ATOM-NEXT: nop # sched: [1:0.50]
; ATOM-NEXT: nop # sched: [1:0.50]
; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
; ATOM-NEXT: retq # sched: [79:39.50]
;
; SLM-LABEL: test_fnop:
diff --git a/llvm/test/CodeGen/X86/sse2-schedule.ll b/llvm/test/CodeGen/X86/sse2-schedule.ll
index 5a1f1fbc8b8..76c5360b570 100644
--- a/llvm/test/CodeGen/X86/sse2-schedule.ll
+++ b/llvm/test/CodeGen/X86/sse2-schedule.ll
@@ -4670,10 +4670,10 @@ define i64 @test_movd_64(<2 x i64> %a0, i64 %a1, i64 *%a2) {
; ATOM: # %bb.0:
; ATOM-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [1:1.00]
; ATOM-NEXT: movq %rdi, %xmm2 # sched: [1:1.00]
-; ATOM-NEXT: paddq %xmm0, %xmm2 # sched: [2:1.00]
; ATOM-NEXT: paddq %xmm0, %xmm1 # sched: [2:1.00]
-; ATOM-NEXT: movq %xmm2, (%rsi) # sched: [1:1.00]
+; ATOM-NEXT: paddq %xmm0, %xmm2 # sched: [2:1.00]
; ATOM-NEXT: movq %xmm1, %rax # sched: [3:3.00]
+; ATOM-NEXT: movq %xmm2, (%rsi) # sched: [1:1.00]
; ATOM-NEXT: retq # sched: [79:39.50]
;
; SLM-LABEL: test_movd_64:
@@ -10447,10 +10447,11 @@ define <4 x i32> @test_pshufd(<4 x i32> %a0, <4 x i32> *%a1) {
;
; ATOM-LABEL: test_pshufd:
; ATOM: # %bb.0:
-; ATOM-NEXT: pshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [1:1.00]
-; ATOM-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:1.00]
-; ATOM-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50]
-; ATOM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
+; ATOM-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:1.00]
+; ATOM-NEXT: pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [1:1.00]
+; ATOM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; ATOM-NEXT: nop # sched: [1:0.50]
+; ATOM-NEXT: nop # sched: [1:0.50]
; ATOM-NEXT: retq # sched: [79:39.50]
;
; SLM-LABEL: test_pshufd:
@@ -10575,10 +10576,11 @@ define <8 x i16> @test_pshufhw(<8 x i16> %a0, <8 x i16> *%a1) {
;
; ATOM-LABEL: test_pshufhw:
; ATOM: # %bb.0:
-; ATOM-NEXT: pshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [1:1.00]
-; ATOM-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00]
-; ATOM-NEXT: paddw %xmm0, %xmm1 # sched: [1:0.50]
-; ATOM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
+; ATOM-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00]
+; ATOM-NEXT: pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [1:1.00]
+; ATOM-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
+; ATOM-NEXT: nop # sched: [1:0.50]
+; ATOM-NEXT: nop # sched: [1:0.50]
; ATOM-NEXT: retq # sched: [79:39.50]
;
; SLM-LABEL: test_pshufhw:
@@ -10703,10 +10705,11 @@ define <8 x i16> @test_pshuflw(<8 x i16> %a0, <8 x i16> *%a1) {
;
; ATOM-LABEL: test_pshuflw:
; ATOM: # %bb.0:
-; ATOM-NEXT: pshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [1:1.00]
-; ATOM-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00]
-; ATOM-NEXT: paddw %xmm0, %xmm1 # sched: [1:0.50]
-; ATOM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
+; ATOM-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00]
+; ATOM-NEXT: pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [1:1.00]
+; ATOM-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
+; ATOM-NEXT: nop # sched: [1:0.50]
+; ATOM-NEXT: nop # sched: [1:0.50]
; ATOM-NEXT: retq # sched: [79:39.50]
;
; SLM-LABEL: test_pshuflw:
diff --git a/llvm/test/CodeGen/X86/sse3-schedule.ll b/llvm/test/CodeGen/X86/sse3-schedule.ll
index 50bdf203c74..8c2424ec405 100644
--- a/llvm/test/CodeGen/X86/sse3-schedule.ll
+++ b/llvm/test/CodeGen/X86/sse3-schedule.ll
@@ -899,10 +899,9 @@ define <2 x double> @test_movddup(<2 x double> %a0, <2 x double> *%a1) {
;
; ATOM-LABEL: test_movddup:
; ATOM: # %bb.0:
-; ATOM-NEXT: movddup {{.*#+}} xmm1 = mem[0,0] sched: [1:1.00]
-; ATOM-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
-; ATOM-NEXT: subpd %xmm0, %xmm1 # sched: [6:3.00]
-; ATOM-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50]
+; ATOM-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00]
+; ATOM-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [1:1.00]
+; ATOM-NEXT: subpd %xmm1, %xmm0 # sched: [6:3.00]
; ATOM-NEXT: retq # sched: [79:39.50]
;
; SLM-LABEL: test_movddup:
@@ -1027,10 +1026,9 @@ define <4 x float> @test_movshdup(<4 x float> %a0, <4 x float> *%a1) {
;
; ATOM-LABEL: test_movshdup:
; ATOM: # %bb.0:
-; ATOM-NEXT: movshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [1:1.00]
-; ATOM-NEXT: movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
-; ATOM-NEXT: addps %xmm0, %xmm1 # sched: [5:5.00]
-; ATOM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50]
+; ATOM-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00]
+; ATOM-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [1:1.00]
+; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00]
; ATOM-NEXT: retq # sched: [79:39.50]
;
; SLM-LABEL: test_movshdup:
@@ -1155,10 +1153,9 @@ define <4 x float> @test_movsldup(<4 x float> %a0, <4 x float> *%a1) {
;
; ATOM-LABEL: test_movsldup:
; ATOM: # %bb.0:
-; ATOM-NEXT: movsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [1:1.00]
-; ATOM-NEXT: movsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
-; ATOM-NEXT: addps %xmm0, %xmm1 # sched: [5:5.00]
-; ATOM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50]
+; ATOM-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00]
+; ATOM-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [1:1.00]
+; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00]
; ATOM-NEXT: retq # sched: [79:39.50]
;
; SLM-LABEL: test_movsldup:
diff --git a/llvm/test/CodeGen/X86/ssse3-schedule.ll b/llvm/test/CodeGen/X86/ssse3-schedule.ll
index ffa7ef12f38..d6177434f49 100644
--- a/llvm/test/CodeGen/X86/ssse3-schedule.ll
+++ b/llvm/test/CodeGen/X86/ssse3-schedule.ll
@@ -29,10 +29,11 @@ define <16 x i8> @test_pabsb(<16 x i8> %a0, <16 x i8> *%a1) {
;
; ATOM-LABEL: test_pabsb:
; ATOM: # %bb.0:
-; ATOM-NEXT: pabsb (%rdi), %xmm1 # sched: [1:1.00]
-; ATOM-NEXT: pabsb %xmm0, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: por %xmm0, %xmm1 # sched: [1:0.50]
-; ATOM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
+; ATOM-NEXT: pabsb %xmm0, %xmm1 # sched: [1:0.50]
+; ATOM-NEXT: pabsb (%rdi), %xmm0 # sched: [1:1.00]
+; ATOM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50]
+; ATOM-NEXT: nop # sched: [1:0.50]
+; ATOM-NEXT: nop # sched: [1:0.50]
; ATOM-NEXT: retq # sched: [79:39.50]
;
; SLM-LABEL: test_pabsb:
@@ -157,10 +158,11 @@ define <4 x i32> @test_pabsd(<4 x i32> %a0, <4 x i32> *%a1) {
;
; ATOM-LABEL: test_pabsd:
; ATOM: # %bb.0:
-; ATOM-NEXT: pabsd (%rdi), %xmm1 # sched: [1:1.00]
-; ATOM-NEXT: pabsd %xmm0, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: por %xmm0, %xmm1 # sched: [1:0.50]
-; ATOM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
+; ATOM-NEXT: pabsd %xmm0, %xmm1 # sched: [1:0.50]
+; ATOM-NEXT: pabsd (%rdi), %xmm0 # sched: [1:1.00]
+; ATOM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50]
+; ATOM-NEXT: nop # sched: [1:0.50]
+; ATOM-NEXT: nop # sched: [1:0.50]
; ATOM-NEXT: retq # sched: [79:39.50]
;
; SLM-LABEL: test_pabsd:
@@ -285,10 +287,11 @@ define <8 x i16> @test_pabsw(<8 x i16> %a0, <8 x i16> *%a1) {
;
; ATOM-LABEL: test_pabsw:
; ATOM: # %bb.0:
-; ATOM-NEXT: pabsw (%rdi), %xmm1 # sched: [1:1.00]
-; ATOM-NEXT: pabsw %xmm0, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: por %xmm0, %xmm1 # sched: [1:0.50]
-; ATOM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
+; ATOM-NEXT: pabsw %xmm0, %xmm1 # sched: [1:0.50]
+; ATOM-NEXT: pabsw (%rdi), %xmm0 # sched: [1:1.00]
+; ATOM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50]
+; ATOM-NEXT: nop # sched: [1:0.50]
+; ATOM-NEXT: nop # sched: [1:0.50]
; ATOM-NEXT: retq # sched: [79:39.50]
;
; SLM-LABEL: test_pabsw:
diff --git a/llvm/test/CodeGen/X86/x87-schedule.ll b/llvm/test/CodeGen/X86/x87-schedule.ll
index 41b62e833c0..5d01286cc60 100644
--- a/llvm/test/CodeGen/X86/x87-schedule.ll
+++ b/llvm/test/CodeGen/X86/x87-schedule.ll
@@ -177,10 +177,10 @@ define void @test_fadd(float *%a0, double *%a1) optsize {
; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
; ATOM-NEXT: #APP
-; ATOM-NEXT: fadd %st(0), %st(1) # sched: [0:?]
-; ATOM-NEXT: fadd %st(2) # sched: [0:?]
-; ATOM-NEXT: fadds (%ecx) # sched: [0:?]
-; ATOM-NEXT: faddl (%eax) # sched: [0:?]
+; ATOM-NEXT: fadd %st(0), %st(1) # sched: [5:5.00]
+; ATOM-NEXT: fadd %st(2) # sched: [5:5.00]
+; ATOM-NEXT: fadds (%ecx) # sched: [5:5.00]
+; ATOM-NEXT: faddl (%eax) # sched: [5:5.00]
; ATOM-NEXT: #NO_APP
; ATOM-NEXT: retl # sched: [79:39.50]
;
@@ -301,10 +301,10 @@ define void @test_faddp_fiadd(i16 *%a0, i32 *%a1) optsize {
; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
; ATOM-NEXT: #APP
-; ATOM-NEXT: faddp %st(1) # sched: [0:?]
-; ATOM-NEXT: faddp %st(2) # sched: [0:?]
-; ATOM-NEXT: fiadds (%ecx) # sched: [0:?]
-; ATOM-NEXT: fiaddl (%eax) # sched: [0:?]
+; ATOM-NEXT: faddp %st(1) # sched: [5:5.00]
+; ATOM-NEXT: faddp %st(2) # sched: [5:5.00]
+; ATOM-NEXT: fiadds (%ecx) # sched: [5:5.00]
+; ATOM-NEXT: fiaddl (%eax) # sched: [5:5.00]
; ATOM-NEXT: #NO_APP
; ATOM-NEXT: retl # sched: [79:39.50]
;
@@ -421,8 +421,8 @@ define void @test_fbld_fbstp(i8* %a0) optsize {
; ATOM: # %bb.0:
; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
; ATOM-NEXT: #APP
-; ATOM-NEXT: fbld (%eax) # sched: [0:?]
-; ATOM-NEXT: fbstp (%eax) # sched: [0:?]
+; ATOM-NEXT: fbld (%eax) # sched: [100:0.50]
+; ATOM-NEXT: fbstp (%eax) # sched: [100:0.50]
; ATOM-NEXT: #NO_APP
; ATOM-NEXT: retl # sched: [79:39.50]
;
@@ -895,10 +895,10 @@ define void @test_fcom(float *%a0, double *%a1) optsize {
; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
; ATOM-NEXT: #APP
-; ATOM-NEXT: fcom %st(1) # sched: [0:?]
-; ATOM-NEXT: fcom %st(3) # sched: [0:?]
-; ATOM-NEXT: fcoms (%ecx) # sched: [0:?]
-; ATOM-NEXT: fcoml (%eax) # sched: [0:?]
+; ATOM-NEXT: fcom %st(1) # sched: [5:5.00]
+; ATOM-NEXT: fcom %st(3) # sched: [5:5.00]
+; ATOM-NEXT: fcoms (%ecx) # sched: [5:5.00]
+; ATOM-NEXT: fcoml (%eax) # sched: [5:5.00]
; ATOM-NEXT: #NO_APP
; ATOM-NEXT: retl # sched: [79:39.50]
;
@@ -1020,10 +1020,10 @@ define void @test_fcomp_fcompp(float *%a0, double *%a1) optsize {
; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
; ATOM-NEXT: #APP
-; ATOM-NEXT: fcomp %st(1) # sched: [0:?]
-; ATOM-NEXT: fcomp %st(3) # sched: [0:?]
-; ATOM-NEXT: fcomps (%ecx) # sched: [0:?]
-; ATOM-NEXT: fcompl (%eax) # sched: [0:?]
+; ATOM-NEXT: fcomp %st(1) # sched: [5:5.00]
+; ATOM-NEXT: fcomp %st(3) # sched: [5:5.00]
+; ATOM-NEXT: fcomps (%ecx) # sched: [5:5.00]
+; ATOM-NEXT: fcompl (%eax) # sched: [5:5.00]
; ATOM-NEXT: fcompp # sched: [1:1.00]
; ATOM-NEXT: #NO_APP
; ATOM-NEXT: retl # sched: [79:39.50]
@@ -1385,10 +1385,10 @@ define void @test_fdiv(float *%a0, double *%a1) optsize {
; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
; ATOM-NEXT: #APP
-; ATOM-NEXT: fdiv %st(0), %st(1) # sched: [0:?]
-; ATOM-NEXT: fdiv %st(2) # sched: [0:?]
-; ATOM-NEXT: fdivs (%ecx) # sched: [0:?]
-; ATOM-NEXT: fdivl (%eax) # sched: [0:?]
+; ATOM-NEXT: fdiv %st(0), %st(1) # sched: [34:17.00]
+; ATOM-NEXT: fdiv %st(2) # sched: [34:17.00]
+; ATOM-NEXT: fdivs (%ecx) # sched: [34:17.00]
+; ATOM-NEXT: fdivl (%eax) # sched: [34:17.00]
; ATOM-NEXT: #NO_APP
; ATOM-NEXT: retl # sched: [79:39.50]
;
@@ -1509,10 +1509,10 @@ define void @test_fdivp_fidiv(i16 *%a0, i32 *%a1) optsize {
; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
; ATOM-NEXT: #APP
-; ATOM-NEXT: fdivp %st(1) # sched: [0:?]
-; ATOM-NEXT: fdivp %st(2) # sched: [0:?]
-; ATOM-NEXT: fidivs (%ecx) # sched: [0:?]
-; ATOM-NEXT: fidivl (%eax) # sched: [0:?]
+; ATOM-NEXT: fdivp %st(1) # sched: [34:17.00]
+; ATOM-NEXT: fdivp %st(2) # sched: [34:17.00]
+; ATOM-NEXT: fidivs (%ecx) # sched: [34:17.00]
+; ATOM-NEXT: fidivl (%eax) # sched: [34:17.00]
; ATOM-NEXT: #NO_APP
; ATOM-NEXT: retl # sched: [79:39.50]
;
@@ -1633,10 +1633,10 @@ define void @test_fdivr(float *%a0, double *%a1) optsize {
; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
; ATOM-NEXT: #APP
-; ATOM-NEXT: fdivr %st(0), %st(1) # sched: [0:?]
-; ATOM-NEXT: fdivr %st(2) # sched: [0:?]
-; ATOM-NEXT: fdivrs (%ecx) # sched: [0:?]
-; ATOM-NEXT: fdivrl (%eax) # sched: [0:?]
+; ATOM-NEXT: fdivr %st(0), %st(1) # sched: [34:17.00]
+; ATOM-NEXT: fdivr %st(2) # sched: [34:17.00]
+; ATOM-NEXT: fdivrs (%ecx) # sched: [34:17.00]
+; ATOM-NEXT: fdivrl (%eax) # sched: [34:17.00]
; ATOM-NEXT: #NO_APP
; ATOM-NEXT: retl # sched: [79:39.50]
;
@@ -1757,10 +1757,10 @@ define void @test_fdivrp_fidivr(i16 *%a0, i32 *%a1) optsize {
; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
; ATOM-NEXT: #APP
-; ATOM-NEXT: fdivrp %st(1) # sched: [0:?]
-; ATOM-NEXT: fdivrp %st(2) # sched: [0:?]
-; ATOM-NEXT: fidivrs (%ecx) # sched: [0:?]
-; ATOM-NEXT: fidivrl (%eax) # sched: [0:?]
+; ATOM-NEXT: fdivrp %st(1) # sched: [34:17.00]
+; ATOM-NEXT: fdivrp %st(2) # sched: [34:17.00]
+; ATOM-NEXT: fidivrs (%ecx) # sched: [34:17.00]
+; ATOM-NEXT: fidivrl (%eax) # sched: [34:17.00]
; ATOM-NEXT: #NO_APP
; ATOM-NEXT: retl # sched: [79:39.50]
;
@@ -1955,10 +1955,10 @@ define void @test_ficom(i16 *%a0, i32 *%a1) optsize {
; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
; ATOM-NEXT: #APP
-; ATOM-NEXT: ficoms (%ecx) # sched: [0:?]
-; ATOM-NEXT: ficoml (%eax) # sched: [0:?]
-; ATOM-NEXT: ficomps (%ecx) # sched: [0:?]
-; ATOM-NEXT: ficompl (%eax) # sched: [0:?]
+; ATOM-NEXT: ficoms (%ecx) # sched: [5:5.00]
+; ATOM-NEXT: ficoml (%eax) # sched: [5:5.00]
+; ATOM-NEXT: ficomps (%ecx) # sched: [5:5.00]
+; ATOM-NEXT: ficompl (%eax) # sched: [5:5.00]
; ATOM-NEXT: #NO_APP
; ATOM-NEXT: retl # sched: [79:39.50]
;
@@ -2740,7 +2740,7 @@ define void @test_fldcw_fldenv(i8* %a0) optsize {
; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
; ATOM-NEXT: #APP
; ATOM-NEXT: fldcw (%eax) # sched: [5:2.50]
-; ATOM-NEXT: fldenv (%eax) # sched: [0:?]
+; ATOM-NEXT: fldenv (%eax) # sched: [100:0.50]
; ATOM-NEXT: #NO_APP
; ATOM-NEXT: retl # sched: [79:39.50]
;
@@ -2961,10 +2961,10 @@ define void @test_fmul(float *%a0, double *%a1) optsize {
; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
; ATOM-NEXT: #APP
-; ATOM-NEXT: fmul %st(0), %st(1) # sched: [0:?]
-; ATOM-NEXT: fmul %st(2) # sched: [0:?]
-; ATOM-NEXT: fmuls (%ecx) # sched: [0:?]
-; ATOM-NEXT: fmull (%eax) # sched: [0:?]
+; ATOM-NEXT: fmul %st(0), %st(1) # sched: [4:4.00]
+; ATOM-NEXT: fmul %st(2) # sched: [4:4.00]
+; ATOM-NEXT: fmuls (%ecx) # sched: [4:4.00]
+; ATOM-NEXT: fmull (%eax) # sched: [4:4.00]
; ATOM-NEXT: #NO_APP
; ATOM-NEXT: retl # sched: [79:39.50]
;
@@ -3085,10 +3085,10 @@ define void @test_fmulp_fimul(i16 *%a0, i32 *%a1) optsize {
; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
; ATOM-NEXT: #APP
-; ATOM-NEXT: fmulp %st(1) # sched: [0:?]
-; ATOM-NEXT: fmulp %st(2) # sched: [0:?]
-; ATOM-NEXT: fimuls (%ecx) # sched: [0:?]
-; ATOM-NEXT: fimull (%eax) # sched: [0:?]
+; ATOM-NEXT: fmulp %st(1) # sched: [4:4.00]
+; ATOM-NEXT: fmulp %st(2) # sched: [4:4.00]
+; ATOM-NEXT: fimuls (%ecx) # sched: [4:4.00]
+; ATOM-NEXT: fimull (%eax) # sched: [4:4.00]
; ATOM-NEXT: #NO_APP
; ATOM-NEXT: retl # sched: [79:39.50]
;
@@ -3584,7 +3584,7 @@ define void @test_frstor(i8* %a0) optsize {
; ATOM: # %bb.0:
; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
; ATOM-NEXT: #APP
-; ATOM-NEXT: frstor (%eax) # sched: [0:?]
+; ATOM-NEXT: frstor (%eax) # sched: [100:0.50]
; ATOM-NEXT: #NO_APP
; ATOM-NEXT: retl # sched: [79:39.50]
;
@@ -3670,7 +3670,7 @@ define void @test_fsave(i8* %a0) optsize {
; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
; ATOM-NEXT: #APP
; ATOM-NEXT: wait # sched: [1:0.50]
-; ATOM-NEXT: fnsave (%eax) # sched: [0:?]
+; ATOM-NEXT: fnsave (%eax) # sched: [100:0.50]
; ATOM-NEXT: #NO_APP
; ATOM-NEXT: retl # sched: [79:39.50]
;
@@ -3762,7 +3762,7 @@ define void @test_fnsave(i8* %a0) optsize {
; ATOM: # %bb.0:
; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
; ATOM-NEXT: #APP
-; ATOM-NEXT: fnsave (%eax) # sched: [0:?]
+; ATOM-NEXT: fnsave (%eax) # sched: [100:0.50]
; ATOM-NEXT: #NO_APP
; ATOM-NEXT: retl # sched: [79:39.50]
;
@@ -4314,9 +4314,9 @@ define void @test_fstcw_fstenv_fstsw(i8* %a0) optsize {
; ATOM-NEXT: wait # sched: [1:0.50]
; ATOM-NEXT: fnstcw (%eax) # sched: [8:4.00]
; ATOM-NEXT: wait # sched: [1:0.50]
-; ATOM-NEXT: fnstenv (%eax) # sched: [0:?]
+; ATOM-NEXT: fnstenv (%eax) # sched: [100:0.50]
; ATOM-NEXT: wait # sched: [1:0.50]
-; ATOM-NEXT: fnstsw (%eax) # sched: [0:?]
+; ATOM-NEXT: fnstsw (%eax) # sched: [100:0.50]
; ATOM-NEXT: #NO_APP
; ATOM-NEXT: retl # sched: [79:39.50]
;
@@ -4443,8 +4443,8 @@ define void @test_fnstcw_fnstenv_fnstsw(i8* %a0) optsize {
; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
; ATOM-NEXT: #APP
; ATOM-NEXT: fnstcw (%eax) # sched: [8:4.00]
-; ATOM-NEXT: fnstenv (%eax) # sched: [0:?]
-; ATOM-NEXT: fnstsw (%eax) # sched: [0:?]
+; ATOM-NEXT: fnstenv (%eax) # sched: [100:0.50]
+; ATOM-NEXT: fnstsw (%eax) # sched: [100:0.50]
; ATOM-NEXT: #NO_APP
; ATOM-NEXT: retl # sched: [79:39.50]
;
@@ -4549,10 +4549,10 @@ define void @test_fsub(float *%a0, double *%a1) optsize {
; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
; ATOM-NEXT: #APP
-; ATOM-NEXT: fsub %st(0), %st(1) # sched: [0:?]
-; ATOM-NEXT: fsub %st(2) # sched: [0:?]
-; ATOM-NEXT: fsubs (%ecx) # sched: [0:?]
-; ATOM-NEXT: fsubl (%eax) # sched: [0:?]
+; ATOM-NEXT: fsub %st(0), %st(1) # sched: [5:5.00]
+; ATOM-NEXT: fsub %st(2) # sched: [5:5.00]
+; ATOM-NEXT: fsubs (%ecx) # sched: [5:5.00]
+; ATOM-NEXT: fsubl (%eax) # sched: [5:5.00]
; ATOM-NEXT: #NO_APP
; ATOM-NEXT: retl # sched: [79:39.50]
;
@@ -4673,10 +4673,10 @@ define void @test_fsubp_fisub(i16 *%a0, i32 *%a1) optsize {
; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
; ATOM-NEXT: #APP
-; ATOM-NEXT: fsubp %st(1) # sched: [0:?]
-; ATOM-NEXT: fsubp %st(2) # sched: [0:?]
-; ATOM-NEXT: fisubs (%ecx) # sched: [0:?]
-; ATOM-NEXT: fisubl (%eax) # sched: [0:?]
+; ATOM-NEXT: fsubp %st(1) # sched: [5:5.00]
+; ATOM-NEXT: fsubp %st(2) # sched: [5:5.00]
+; ATOM-NEXT: fisubs (%ecx) # sched: [5:5.00]
+; ATOM-NEXT: fisubl (%eax) # sched: [5:5.00]
; ATOM-NEXT: #NO_APP
; ATOM-NEXT: retl # sched: [79:39.50]
;
@@ -4797,10 +4797,10 @@ define void @test_fsubr(float *%a0, double *%a1) optsize {
; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
; ATOM-NEXT: #APP
-; ATOM-NEXT: fsubr %st(0), %st(1) # sched: [0:?]
-; ATOM-NEXT: fsubr %st(2) # sched: [0:?]
-; ATOM-NEXT: fsubrs (%ecx) # sched: [0:?]
-; ATOM-NEXT: fsubrl (%eax) # sched: [0:?]
+; ATOM-NEXT: fsubr %st(0), %st(1) # sched: [5:5.00]
+; ATOM-NEXT: fsubr %st(2) # sched: [5:5.00]
+; ATOM-NEXT: fsubrs (%ecx) # sched: [5:5.00]
+; ATOM-NEXT: fsubrl (%eax) # sched: [5:5.00]
; ATOM-NEXT: #NO_APP
; ATOM-NEXT: retl # sched: [79:39.50]
;
@@ -4921,10 +4921,10 @@ define void @test_fsubrp_fisubr(i16 *%a0, i32 *%a1) optsize {
; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
; ATOM-NEXT: #APP
-; ATOM-NEXT: fsubrp %st(1) # sched: [0:?]
-; ATOM-NEXT: fsubrp %st(2) # sched: [0:?]
-; ATOM-NEXT: fisubrs (%ecx) # sched: [0:?]
-; ATOM-NEXT: fisubrl (%eax) # sched: [0:?]
+; ATOM-NEXT: fsubrp %st(1) # sched: [5:5.00]
+; ATOM-NEXT: fsubrp %st(2) # sched: [5:5.00]
+; ATOM-NEXT: fisubrs (%ecx) # sched: [5:5.00]
+; ATOM-NEXT: fisubrl (%eax) # sched: [5:5.00]
; ATOM-NEXT: #NO_APP
; ATOM-NEXT: retl # sched: [79:39.50]
;
OpenPOWER on IntegriCloud