diff options
Diffstat (limited to 'arch')
53 files changed, 1278 insertions, 500 deletions
diff --git a/arch/powerpc/include/asm/checksum.h b/arch/powerpc/include/asm/checksum.h index 7cdf358337cf..ce0c28495f9a 100644 --- a/arch/powerpc/include/asm/checksum.h +++ b/arch/powerpc/include/asm/checksum.h @@ -52,12 +52,22 @@ extern __wsum csum_partial(const void *buff, int len, __wsum sum); extern __wsum csum_partial_copy_generic(const void *src, void *dst, int len, __wsum sum, int *src_err, int *dst_err); + +#ifdef __powerpc64__ +#define _HAVE_ARCH_COPY_AND_CSUM_FROM_USER +extern __wsum csum_and_copy_from_user(const void __user *src, void *dst, + int len, __wsum sum, int *err_ptr); +#define HAVE_CSUM_COPY_USER +extern __wsum csum_and_copy_to_user(const void *src, void __user *dst, + int len, __wsum sum, int *err_ptr); +#else /* * the same as csum_partial, but copies from src to dst while it * checksums. */ #define csum_partial_copy_from_user(src, dst, len, sum, errp) \ csum_partial_copy_generic((__force const void *)(src), (dst), (len), (sum), (errp), NULL) +#endif #define csum_partial_copy_nocheck(src, dst, len, sum) \ csum_partial_copy_generic((src), (dst), (len), (sum), NULL, NULL) diff --git a/arch/powerpc/include/asm/compat.h b/arch/powerpc/include/asm/compat.h index a11d4eac4f97..2296112e247b 100644 --- a/arch/powerpc/include/asm/compat.h +++ b/arch/powerpc/include/asm/compat.h @@ -143,7 +143,7 @@ static inline void __user *arch_compat_alloc_user_space(long len) * We cant access below the stack pointer in the 32bit ABI and * can access 288 bytes in the 64bit ABI */ - if (!(test_thread_flag(TIF_32BIT))) + if (!is_32bit_task()) usp -= 288; return (void __user *) (usp - len); @@ -213,7 +213,7 @@ struct compat_shmid64_ds { static inline int is_compat_task(void) { - return test_thread_flag(TIF_32BIT); + return is_32bit_task(); } #endif /* __KERNEL__ */ diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index 3a40a992e594..f3a1fdd9cf08 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -198,6 +198,7 @@ extern const char *powerpc_base_platform; #define CPU_FTR_CP_USE_DCBTZ LONG_ASM_CONST(0x0040000000000000) #define CPU_FTR_UNALIGNED_LD_STD LONG_ASM_CONST(0x0080000000000000) #define CPU_FTR_ASYM_SMT LONG_ASM_CONST(0x0100000000000000) +#define CPU_FTR_STCX_CHECKS_ADDRESS LONG_ASM_CONST(0x0200000000000000) #ifndef __ASSEMBLY__ @@ -392,28 +393,31 @@ extern const char *powerpc_base_platform; CPU_FTR_MMCRA | CPU_FTR_CTRL) #define CPU_FTRS_POWER4 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ - CPU_FTR_MMCRA | CPU_FTR_CP_USE_DCBTZ) + CPU_FTR_MMCRA | CPU_FTR_CP_USE_DCBTZ | \ + CPU_FTR_STCX_CHECKS_ADDRESS) #define CPU_FTRS_PPC970 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ CPU_FTR_ALTIVEC_COMP | CPU_FTR_CAN_NAP | CPU_FTR_MMCRA | \ - CPU_FTR_CP_USE_DCBTZ) + CPU_FTR_CP_USE_DCBTZ | CPU_FTR_STCX_CHECKS_ADDRESS) #define CPU_FTRS_POWER5 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ CPU_FTR_MMCRA | CPU_FTR_SMT | \ CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | \ - CPU_FTR_PURR) + CPU_FTR_PURR | CPU_FTR_STCX_CHECKS_ADDRESS) #define CPU_FTRS_POWER6 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ CPU_FTR_MMCRA | CPU_FTR_SMT | \ CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | \ CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \ - CPU_FTR_DSCR | CPU_FTR_UNALIGNED_LD_STD) + CPU_FTR_DSCR | CPU_FTR_UNALIGNED_LD_STD | \ + CPU_FTR_STCX_CHECKS_ADDRESS) #define CPU_FTRS_POWER7 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ CPU_FTR_MMCRA | CPU_FTR_SMT | \ CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | \ CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \ - CPU_FTR_DSCR | CPU_FTR_SAO | CPU_FTR_ASYM_SMT) + CPU_FTR_DSCR | CPU_FTR_SAO | CPU_FTR_ASYM_SMT | \ + CPU_FTR_STCX_CHECKS_ADDRESS) #define CPU_FTRS_CELL (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \ diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h index 8c9c6ad2004e..6d2416a85709 100644 --- a/arch/powerpc/include/asm/dma-mapping.h +++ b/arch/powerpc/include/asm/dma-mapping.h @@ -127,19 +127,7 @@ static inline int dma_supported(struct device *dev, u64 mask) return dma_ops->dma_supported(dev, mask); } -static inline int dma_set_mask(struct device *dev, u64 dma_mask) -{ - struct dma_map_ops *dma_ops = get_dma_ops(dev); - - if (unlikely(dma_ops == NULL)) - return -EIO; - if (dma_ops->set_dma_mask != NULL) - return dma_ops->set_dma_mask(dev, dma_mask); - if (!dev->dma_mask || !dma_supported(dev, dma_mask)) - return -EIO; - *dev->dma_mask = dma_mask; - return 0; -} +extern int dma_set_mask(struct device *dev, u64 dma_mask); static inline void *dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag) diff --git a/arch/powerpc/include/asm/elf.h b/arch/powerpc/include/asm/elf.h index c376eda15313..2b917c69ed15 100644 --- a/arch/powerpc/include/asm/elf.h +++ b/arch/powerpc/include/asm/elf.h @@ -250,7 +250,7 @@ do { \ * the 64bit ABI has never had these issues dont enable the workaround * even if we have an executable stack. */ -# define elf_read_implies_exec(ex, exec_stk) (test_thread_flag(TIF_32BIT) ? \ +# define elf_read_implies_exec(ex, exec_stk) (is_32bit_task() ? \ (exec_stk == EXSTACK_DEFAULT) : 0) #else # define SET_PERSONALITY(ex) \ diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 57c400071995..7778d6f0c878 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -137,7 +137,8 @@ li r10,0; \ ld r11,exception_marker@toc(r2); \ std r10,RESULT(r1); /* clear regs->result */ \ - std r11,STACK_FRAME_OVERHEAD-16(r1); /* mark the frame */ + std r11,STACK_FRAME_OVERHEAD-16(r1); /* mark the frame */ \ + ACCOUNT_STOLEN_TIME /* * Exception vectors. diff --git a/arch/powerpc/include/asm/kvm_fpu.h b/arch/powerpc/include/asm/kvm_fpu.h index c3d4f0518a67..92daae132492 100644 --- a/arch/powerpc/include/asm/kvm_fpu.h +++ b/arch/powerpc/include/asm/kvm_fpu.h @@ -82,7 +82,7 @@ FPD_THREE_IN(fmadd) FPD_THREE_IN(fnmsub) FPD_THREE_IN(fnmadd) -extern void kvm_cvt_fd(u32 *from, u64 *to, u64 *fpscr); -extern void kvm_cvt_df(u64 *from, u32 *to, u64 *fpscr); +extern void kvm_cvt_fd(u32 *from, u64 *to); +extern void kvm_cvt_df(u64 *from, u32 *to); #endif diff --git a/arch/powerpc/include/asm/lppaca.h b/arch/powerpc/include/asm/lppaca.h index 14b592dfb4e8..7f5e0fefebb0 100644 --- a/arch/powerpc/include/asm/lppaca.h +++ b/arch/powerpc/include/asm/lppaca.h @@ -153,6 +153,8 @@ struct lppaca { extern struct lppaca lppaca[]; +#define lppaca_of(cpu) (*paca[cpu].lppaca_ptr) + /* * SLB shadow buffer structure as defined in the PAPR. The save_area * contains adjacent ESID and VSID pairs for each shadowed SLB. The @@ -170,6 +172,33 @@ struct slb_shadow { extern struct slb_shadow slb_shadow[]; +/* + * Layout of entries in the hypervisor's dispatch trace log buffer. + */ +struct dtl_entry { + u8 dispatch_reason; + u8 preempt_reason; + u16 processor_id; + u32 enqueue_to_dispatch_time; + u32 ready_to_enqueue_time; + u32 waiting_to_ready_time; + u64 timebase; + u64 fault_addr; + u64 srr0; + u64 srr1; +}; + +#define DISPATCH_LOG_BYTES 4096 /* bytes per cpu */ +#define N_DISPATCH_LOG (DISPATCH_LOG_BYTES / sizeof(struct dtl_entry)) + +/* + * When CONFIG_VIRT_CPU_ACCOUNTING = y, the cpu accounting code controls + * reading from the dispatch trace log. If other code wants to consume + * DTL entries, it can set this pointer to a function that will get + * called once for each DTL entry that gets processed. + */ +extern void (*dtl_consumer)(struct dtl_entry *entry, u64 index); + #endif /* CONFIG_PPC_BOOK3S */ #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_LPPACA_H */ diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index adc8e6cdf339..d045b0145537 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -102,6 +102,9 @@ struct machdep_calls { void (*pci_dma_dev_setup)(struct pci_dev *dev); void (*pci_dma_bus_setup)(struct pci_bus *bus); + /* Platform set_dma_mask override */ + int (*dma_set_mask)(struct device *dev, u64 dma_mask); + int (*probe)(void); void (*setup_arch)(void); /* Optional, may be NULL */ void (*init_early)(void); diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 1ff6662f7faf..6af6c1613409 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -85,6 +85,8 @@ struct paca_struct { u8 kexec_state; /* set when kexec down has irqs off */ #ifdef CONFIG_PPC_STD_MMU_64 struct slb_shadow *slb_shadow_ptr; + struct dtl_entry *dispatch_log; + struct dtl_entry *dispatch_log_end; /* * Now, starting in cacheline 2, the exception save areas @@ -134,8 +136,14 @@ struct paca_struct { /* Stuff for accurate time accounting */ u64 user_time; /* accumulated usermode TB ticks */ u64 system_time; /* accumulated system TB ticks */ - u64 startpurr; /* PURR/TB value snapshot */ + u64 user_time_scaled; /* accumulated usermode SPURR ticks */ + u64 starttime; /* TB value snapshot */ + u64 starttime_user; /* TB value on exit to usermode */ u64 startspurr; /* SPURR value snapshot */ + u64 utime_sspurr; /* ->user_time when ->startspurr set */ + u64 stolen_time; /* TB ticks taken by hypervisor */ + u64 dtl_ridx; /* read index in dispatch log */ + struct dtl_entry *dtl_curr; /* pointer corresponding to dtl_ridx */ #ifdef CONFIG_KVM_BOOK3S_HANDLER /* We use this to store guest state in */ diff --git a/arch/powerpc/include/asm/page_64.h b/arch/powerpc/include/asm/page_64.h index 358ff14ea25e..932f88dcf6fa 100644 --- a/arch/powerpc/include/asm/page_64.h +++ b/arch/powerpc/include/asm/page_64.h @@ -163,7 +163,7 @@ do { \ #endif /* !CONFIG_HUGETLB_PAGE */ #define VM_DATA_DEFAULT_FLAGS \ - (test_thread_flag(TIF_32BIT) ? \ + (is_32bit_task() ? \ VM_DATA_DEFAULT_FLAGS32 : VM_DATA_DEFAULT_FLAGS64) /* @@ -179,7 +179,7 @@ do { \ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) #define VM_STACK_DEFAULT_FLAGS \ - (test_thread_flag(TIF_32BIT) ? \ + (is_32bit_task() ? \ VM_STACK_DEFAULT_FLAGS32 : VM_STACK_DEFAULT_FLAGS64) #include <asm-generic/getorder.h> diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index 498fe09263d3..98210067c1cc 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -9,6 +9,7 @@ #include <asm/asm-compat.h> #include <asm/processor.h> #include <asm/ppc-opcode.h> +#include <asm/firmware.h> #ifndef __ASSEMBLY__ #error __FILE__ should only be used in assembler files @@ -26,17 +27,13 @@ #ifndef CONFIG_VIRT_CPU_ACCOUNTING #define ACCOUNT_CPU_USER_ENTRY(ra, rb) #define ACCOUNT_CPU_USER_EXIT(ra, rb) +#define ACCOUNT_STOLEN_TIME #else #define ACCOUNT_CPU_USER_ENTRY(ra, rb) \ beq 2f; /* if from kernel mode */ \ -BEGIN_FTR_SECTION; \ - mfspr ra,SPRN_PURR; /* get processor util. reg */ \ -END_FTR_SECTION_IFSET(CPU_FTR_PURR); \ -BEGIN_FTR_SECTION; \ - MFTB(ra); /* or get TB if no PURR */ \ -END_FTR_SECTION_IFCLR(CPU_FTR_PURR); \ - ld rb,PACA_STARTPURR(r13); \ - std ra,PACA_STARTPURR(r13); \ + MFTB(ra); /* get timebase */ \ + ld rb,PACA_STARTTIME_USER(r13); \ + std ra,PACA_STARTTIME(r13); \ subf rb,rb,ra; /* subtract start value */ \ ld ra,PACA_USER_TIME(r13); \ add ra,ra,rb; /* add on to user time */ \ @@ -44,19 +41,34 @@ END_FTR_SECTION_IFCLR(CPU_FTR_PURR); \ 2: #define ACCOUNT_CPU_USER_EXIT(ra, rb) \ -BEGIN_FTR_SECTION; \ - mfspr ra,SPRN_PURR; /* get processor util. reg */ \ -END_FTR_SECTION_IFSET(CPU_FTR_PURR); \ -BEGIN_FTR_SECTION; \ - MFTB(ra); /* or get TB if no PURR */ \ -END_FTR_SECTION_IFCLR(CPU_FTR_PURR); \ - ld rb,PACA_STARTPURR(r13); \ - std ra,PACA_STARTPURR(r13); \ + MFTB(ra); /* get timebase */ \ + ld rb,PACA_STARTTIME(r13); \ + std ra,PACA_STARTTIME_USER(r13); \ subf rb,rb,ra; /* subtract start value */ \ ld ra,PACA_SYSTEM_TIME(r13); \ - add ra,ra,rb; /* add on to user time */ \ - std ra,PACA_SYSTEM_TIME(r13); -#endif + add ra,ra,rb; /* add on to system time */ \ + std ra,PACA_SYSTEM_TIME(r13) + +#ifdef CONFIG_PPC_SPLPAR +#define ACCOUNT_STOLEN_TIME \ +BEGIN_FW_FTR_SECTION; \ + beq 33f; \ + /* from user - see if there are any DTL entries to process */ \ + ld r10,PACALPPACAPTR(r13); /* get ptr to VPA */ \ + ld r11,PACA_DTL_RIDX(r13); /* get log read index */ \ + ld r10,LPPACA_DTLIDX(r10); /* get log write index */ \ + cmpd cr1,r11,r10; \ + beq+ cr1,33f; \ + bl .accumulate_stolen_time; \ +33: \ +END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR) + +#else /* CONFIG_PPC_SPLPAR */ +#define ACCOUNT_STOLEN_TIME + +#endif /* CONFIG_PPC_SPLPAR */ + +#endif /* CONFIG_VIRT_CPU_ACCOUNTING */ /* * Macros for storing registers into and loading registers from diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 19c05b0f74be..4c14187ba02d 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -118,7 +118,7 @@ extern struct task_struct *last_task_used_spe; #define TASK_UNMAPPED_BASE_USER32 (PAGE_ALIGN(TASK_SIZE_USER32 / 4)) #define TASK_UNMAPPED_BASE_USER64 (PAGE_ALIGN(TASK_SIZE_USER64 / 4)) -#define TASK_UNMAPPED_BASE ((test_thread_flag(TIF_32BIT)) ? \ +#define TASK_UNMAPPED_BASE ((is_32bit_task()) ? \ TASK_UNMAPPED_BASE_USER32 : TASK_UNMAPPED_BASE_USER64 ) #endif @@ -128,7 +128,7 @@ extern struct task_struct *last_task_used_spe; #define STACK_TOP_USER64 TASK_SIZE_USER64 #define STACK_TOP_USER32 TASK_SIZE_USER32 -#define STACK_TOP (test_thread_flag(TIF_32BIT) ? \ +#define STACK_TOP (is_32bit_task() ? \ STACK_TOP_USER32 : STACK_TOP_USER64) #define STACK_TOP_MAX STACK_TOP_USER64 diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h index 3d212669a130..aa0f1ebb4aaf 100644 --- a/arch/powerpc/include/asm/systbl.h +++ b/arch/powerpc/include/asm/systbl.h @@ -329,3 +329,22 @@ COMPAT_SYS(rt_tgsigqueueinfo) SYSCALL(fanotify_init) COMPAT_SYS(fanotify_mark) SYSCALL_SPU(prlimit64) +SYSCALL_SPU(socket) +SYSCALL_SPU(bind) +SYSCALL_SPU(connect) +SYSCALL_SPU(listen) +SYSCALL_SPU(accept) +SYSCALL_SPU(getsockname) +SYSCALL_SPU(getpeername) +SYSCALL_SPU(socketpair) +SYSCALL_SPU(send) +SYSCALL_SPU(sendto) +COMPAT_SYS_SPU(recv) +COMPAT_SYS_SPU(recvfrom) +SYSCALL_SPU(shutdown) +COMPAT_SYS_SPU(setsockopt) +COMPAT_SYS_SPU(getsockopt) +COMPAT_SYS_SPU(sendmsg) +COMPAT_SYS_SPU(recvmsg) +COMPAT_SYS_SPU(recvmmsg) +SYSCALL_SPU(accept4) diff --git a/arch/powerpc/include/asm/system.h b/arch/powerpc/include/asm/system.h index 6c294acac848..0b3fe78be71b 100644 --- a/arch/powerpc/include/asm/system.h +++ b/arch/powerpc/include/asm/system.h @@ -154,8 +154,8 @@ extern void enable_kernel_spe(void); extern void giveup_spe(struct task_struct *); extern void load_up_spe(struct task_struct *); extern int fix_alignment(struct pt_regs *); -extern void cvt_fd(float *from, double *to, struct thread_struct *thread); -extern void cvt_df(double *from, float *to, struct thread_struct *thread); +extern void cvt_fd(float *from, double *to); +extern void cvt_df(double *from, float *to); #ifndef CONFIG_SMP extern void discard_lazy_cpu_state(void); diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h index dc779dfcf258..fe6f7c2c9c68 100644 --- a/arch/powerpc/include/asm/time.h +++ b/arch/powerpc/include/asm/time.h @@ -34,7 +34,6 @@ extern void to_tm(int tim, struct rtc_time * tm); extern void GregorianDay(struct rtc_time *tm); extern void generic_calibrate_decr(void); -extern void snapshot_timebase(void); extern void set_dec_cpu6(unsigned int val); @@ -212,12 +211,8 @@ struct cpu_usage { DECLARE_PER_CPU(struct cpu_usage, cpu_usage_array); #if defined(CONFIG_VIRT_CPU_ACCOUNTING) -extern void calculate_steal_time(void); -extern void snapshot_timebases(void); #define account_process_vtime(tsk) account_process_tick(tsk, 0) #else -#define calculate_steal_time() do { } while (0) -#define snapshot_timebases() do { } while (0) #define account_process_vtime(tsk) do { } while (0) #endif diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h index 597e6f9d094a..6151937657f6 100644 --- a/arch/powerpc/include/asm/unistd.h +++ b/arch/powerpc/include/asm/unistd.h @@ -348,10 +348,29 @@ #define __NR_fanotify_init 323 #define __NR_fanotify_mark 324 #define __NR_prlimit64 325 +#define __NR_socket 326 +#define __NR_bind 327 +#define __NR_connect 328 +#define __NR_listen 329 +#define __NR_accept 330 +#define __NR_getsockname 331 +#define __NR_getpeername 332 +#define __NR_socketpair 333 +#define __NR_send 334 +#define __NR_sendto 335 +#define __NR_recv 336 +#define __NR_recvfrom 337 +#define __NR_shutdown 338 +#define __NR_setsockopt 339 +#define __NR_getsockopt 340 +#define __NR_sendmsg 341 +#define __NR_recvmsg 342 +#define __NR_recvmmsg 343 +#define __NR_accept4 344 #ifdef __KERNEL__ -#define __NR_syscalls 326 +#define __NR_syscalls 345 #define __NR__exit __NR_exit #define NR_syscalls __NR_syscalls diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c index b876e989220b..8184ee97e484 100644 --- a/arch/powerpc/kernel/align.c +++ b/arch/powerpc/kernel/align.c @@ -889,7 +889,7 @@ int fix_alignment(struct pt_regs *regs) #ifdef CONFIG_PPC_FPU preempt_disable(); enable_kernel_fp(); - cvt_df(&data.dd, (float *)&data.v[4], ¤t->thread); + cvt_df(&data.dd, (float *)&data.v[4]); preempt_enable(); #else return 0; @@ -933,7 +933,7 @@ int fix_alignment(struct pt_regs *regs) #ifdef CONFIG_PPC_FPU preempt_disable(); enable_kernel_fp(); - cvt_fd((float *)&data.v[4], &data.dd, ¤t->thread); + cvt_fd((float *)&data.v[4], &data.dd); preempt_enable(); #else return 0; diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 1c0607ddccc0..c63494090854 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -181,17 +181,19 @@ int main(void) offsetof(struct slb_shadow, save_area[SLB_NUM_BOLTED - 1].vsid)); DEFINE(SLBSHADOW_STACKESID, offsetof(struct slb_shadow, save_area[SLB_NUM_BOLTED - 1].esid)); + DEFINE(SLBSHADOW_SAVEAREA, offsetof(struct slb_shadow, save_area)); DEFINE(LPPACASRR0, offsetof(struct lppaca, saved_srr0)); DEFINE(LPPACASRR1, offsetof(struct lppaca, saved_srr1)); DEFINE(LPPACAANYINT, offsetof(struct lppaca, int_dword.any_int)); DEFINE(LPPACADECRINT, offsetof(struct lppaca, int_dword.fields.decr_int)); - DEFINE(SLBSHADOW_SAVEAREA, offsetof(struct slb_shadow, save_area)); + DEFINE(LPPACA_DTLIDX, offsetof(struct lppaca, dtl_idx)); + DEFINE(PACA_DTL_RIDX, offsetof(struct paca_struct, dtl_ridx)); #endif /* CONFIG_PPC_STD_MMU_64 */ DEFINE(PACAEMERGSP, offsetof(struct paca_struct, emergency_sp)); DEFINE(PACAHWCPUID, offsetof(struct paca_struct, hw_cpu_id)); DEFINE(PACAKEXECSTATE, offsetof(struct paca_struct, kexec_state)); - DEFINE(PACA_STARTPURR, offsetof(struct paca_struct, startpurr)); - DEFINE(PACA_STARTSPURR, offsetof(struct paca_struct, startspurr)); + DEFINE(PACA_STARTTIME, offsetof(struct paca_struct, starttime)); + DEFINE(PACA_STARTTIME_USER, offsetof(struct paca_struct, starttime_user)); DEFINE(PACA_USER_TIME, offsetof(struct paca_struct, user_time)); DEFINE(PACA_SYSTEM_TIME, offsetof(struct paca_struct, system_time)); DEFINE(PACA_TRAP_SAVE, offsetof(struct paca_struct, trap_save)); diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c index 84d6367ec003..f368c075c90b 100644 --- a/arch/powerpc/kernel/dma.c +++ b/arch/powerpc/kernel/dma.c @@ -12,6 +12,7 @@ #include <linux/memblock.h> #include <asm/bug.h> #include <asm/abs_addr.h> +#include <asm/machdep.h> /* * Generic direct DMA implementation @@ -154,6 +155,23 @@ EXPORT_SYMBOL(dma_direct_ops); #define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16) +int dma_set_mask(struct device *dev, u64 dma_mask) +{ + struct dma_map_ops *dma_ops = get_dma_ops(dev); + + if (ppc_md.dma_set_mask) + return ppc_md.dma_set_mask(dev, dma_mask); + if (unlikely(dma_ops == NULL)) + return -EIO; + if (dma_ops->set_dma_mask != NULL) + return dma_ops->set_dma_mask(dev, dma_mask); + if (!dev->dma_mask || !dma_supported(dev, dma_mask)) + return -EIO; + *dev->dma_mask = dma_mask; + return 0; +} +EXPORT_SYMBOL(dma_set_mask); + static int __init dma_init(void) { dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 42e9d908914a..d82878c4daa6 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -97,6 +97,24 @@ system_call_common: addi r9,r1,STACK_FRAME_OVERHEAD ld r11,exception_marker@toc(r2) std r11,-16(r9) /* "regshere" marker */ +#if defined(CONFIG_VIRT_CPU_ACCOUNTING) && defined(CONFIG_PPC_SPLPAR) +BEGIN_FW_FTR_SECTION + beq 33f + /* if from user, see if there are any DTL entries to process */ + ld r10,PACALPPACAPTR(r13) /* get ptr to VPA */ + ld r11,PACA_DTL_RIDX(r13) /* get log read index */ + ld r10,LPPACA_DTLIDX(r10) /* get log write index */ + cmpd cr1,r11,r10 + beq+ cr1,33f + bl .accumulate_stolen_time + REST_GPR(0,r1) + REST_4GPRS(3,r1) + REST_2GPRS(7,r1) + addi r9,r1,STACK_FRAME_OVERHEAD +33: +END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR) +#endif /* CONFIG_VIRT_CPU_ACCOUNTING && CONFIG_PPC_SPLPAR */ + #ifdef CONFIG_TRACE_IRQFLAGS bl .trace_hardirqs_on REST_GPR(0,r1) @@ -202,7 +220,9 @@ syscall_exit: bge- syscall_error syscall_error_cont: ld r7,_NIP(r1) +BEGIN_FTR_SECTION stdcx. r0,0,r1 /* to clear the reservation */ +END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) andi. r6,r8,MSR_PR ld r4,_LINK(r1) /* @@ -419,6 +439,17 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) sync #endif /* CONFIG_SMP */ + /* + * If we optimise away the clear of the reservation in system + * calls because we know the CPU tracks the address of the + * reservation, then we need to clear it here to cover the + * case that the kernel context switch path has no larx + * instructions. + */ +BEGIN_FTR_SECTION + ldarx r6,0,r1 +END_FTR_SECTION_IFSET(CPU_FTR_STCX_CHECKS_ADDRESS) + addi r6,r4,-THREAD /* Convert THREAD to 'current' */ std r6,PACACURRENT(r13) /* Set new 'current' */ @@ -576,7 +607,16 @@ ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES) andi. r0,r3,MSR_RI beq- unrecov_restore + /* + * Clear the reservation. If we know the CPU tracks the address of + * the reservation then we can potentially save some cycles and use + * a larx. On POWER6 and POWER7 this is significantly faster. + */ +BEGIN_FTR_SECTION stdcx. r0,0,r1 /* to clear the reservation */ +FTR_SECTION_ELSE + ldarx r4,0,r1 +ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) /* * Clear RI before restoring r13. If we are returning to diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S index fc8f5b14019c..e86c040ae585 100644 --- a/arch/powerpc/kernel/fpu.S +++ b/arch/powerpc/kernel/fpu.S @@ -163,24 +163,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) /* * These are used in the alignment trap handler when emulating * single-precision loads and stores. - * We restore and save the fpscr so the task gets the same result - * and exceptions as if the cpu had performed the load or store. */ _GLOBAL(cvt_fd) - lfd 0,THREAD_FPSCR(r5) /* load up fpscr value */ - MTFSF_L(0) lfs 0,0(r3) stfd 0,0(r4) - mffs 0 - stfd 0,THREAD_FPSCR(r5) /* save new fpscr value */ blr _GLOBAL(cvt_df) - lfd 0,THREAD_FPSCR(r5) /* load up fpscr value */ - MTFSF_L(0) lfd 0,0(r3) stfs 0,0(r4) - mffs 0 - stfd 0,THREAD_FPSCR(r5) /* save new fpscr value */ blr diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c index 50362b6ef6e9..8d9e3b9cda64 100644 --- a/arch/powerpc/kernel/lparcfg.c +++ b/arch/powerpc/kernel/lparcfg.c @@ -56,7 +56,7 @@ static unsigned long get_purr(void) for_each_possible_cpu(cpu) { if (firmware_has_feature(FW_FEATURE_ISERIES)) - sum_purr += lppaca[cpu].emulated_time_base; + sum_purr += lppaca_of(cpu).emulated_time_base; else { struct cpu_usage *cu; @@ -263,7 +263,7 @@ static void parse_ppp_data(struct seq_file *m) ppp_data.active_system_procs); /* pool related entries are apropriate for shared configs */ - if (lppaca[0].shared_proc) { + if (lppaca_of(0).shared_proc) { unsigned long pool_idle_time, pool_procs; seq_printf(m, "pool=%d\n", ppp_data.pool_num); @@ -460,8 +460,8 @@ static void pseries_cmo_data(struct seq_file *m) return; for_each_possible_cpu(cpu) { - cmo_faults += lppaca[cpu].cmo_faults; - cmo_fault_time += lppaca[cpu].cmo_fault_time; + cmo_faults += lppaca_of(cpu).cmo_faults; + cmo_fault_time += lppaca_of(cpu).cmo_fault_time; } seq_printf(m, "cmo_faults=%lu\n", cmo_faults); @@ -479,8 +479,8 @@ static void splpar_dispatch_data(struct seq_file *m) unsigned long dispatch_dispersions = 0; for_each_possible_cpu(cpu) { - dispatches += lppaca[cpu].yield_count; - dispatch_dispersions += lppaca[cpu].dispersion_count; + dispatches += lppaca_of(cpu).yield_count; + dispatch_dispersions += lppaca_of(cpu).dispersion_count; } seq_printf(m, "dispatches=%lu\n", dispatches); @@ -545,7 +545,7 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v) seq_printf(m, "partition_potential_processors=%d\n", partition_potential_processors); - seq_printf(m, "shared_processor_mode=%d\n", lppaca[0].shared_proc); + seq_printf(m, "shared_processor_mode=%d\n", lppaca_of(0).shared_proc); seq_printf(m, "slb_size=%d\n", mmu_slb_size); diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index d0a26f1770fe..1e068a46e6c3 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -27,6 +27,20 @@ extern unsigned long __toc_start; #ifdef CONFIG_PPC_BOOK3S /* + * We only have to have statically allocated lppaca structs on + * legacy iSeries, which supports at most 64 cpus. + */ +#ifdef CONFIG_PPC_ISERIES +#if NR_CPUS < 64 +#define NR_LPPACAS NR_CPUS +#else +#define NR_LPPACAS 64 +#endif +#else /* not iSeries */ +#define NR_LPPACAS 1 +#endif + +/* * The structure which the hypervisor knows about - this structure * should not cross a page boundary. The vpa_init/register_vpa call * is now known to fail if the lppaca structure crosses a page @@ -36,7 +50,7 @@ extern unsigned long __toc_start; * will suffice to ensure that it doesn't cross a page boundary. */ struct lppaca lppaca[] = { - [0 ... (NR_CPUS-1)] = { + [0 ... (NR_LPPACAS-1)] = { .desc = 0xd397d781, /* "LpPa" */ .size = sizeof(struct lppaca), .dyn_proc_status = 2, @@ -49,6 +63,54 @@ struct lppaca lppaca[] = { }, }; +static struct lppaca *extra_lppacas; +static long __initdata lppaca_size; + +static void allocate_lppacas(int nr_cpus, unsigned long limit) +{ + if (nr_cpus <= NR_LPPACAS) + return; + + lppaca_size = PAGE_ALIGN(sizeof(struct lppaca) * + (nr_cpus - NR_LPPACAS)); + extra_lppacas = __va(memblock_alloc_base(lppaca_size, + PAGE_SIZE, limit)); +} + +static struct lppaca *new_lppaca(int cpu) +{ + struct lppaca *lp; + + if (cpu < NR_LPPACAS) + return &lppaca[cpu]; + + lp = extra_lppacas + (cpu - NR_LPPACAS); + *lp = lppaca[0]; + + return lp; +} + +static void free_lppacas(void) +{ + long new_size = 0, nr; + + if (!lppaca_size) + return; + nr = num_possible_cpus() - NR_LPPACAS; + if (nr > 0) + new_size = PAGE_ALIGN(nr * sizeof(struct lppaca)); + if (new_size >= lppaca_size) + return; + + memblock_free(__pa(extra_lppacas) + new_size, lppaca_size - new_size); + lppaca_size = new_size; +} + +#else + +static inline void allocate_lppacas(int, unsigned long) { } +static inline void free_lppacas(void) { } + #endif /* CONFIG_PPC_BOOK3S */ #ifdef CONFIG_PPC_STD_MMU_64 @@ -88,7 +150,7 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu) unsigned long kernel_toc = (unsigned long)(&__toc_start) + 0x8000UL; #ifdef CONFIG_PPC_BOOK3S - new_paca->lppaca_ptr = &lppaca[cpu]; + new_paca->lppaca_ptr = new_lppaca(cpu); #else new_paca->kernel_pgd = swapper_pg_dir; #endif @@ -144,6 +206,8 @@ void __init allocate_pacas(void) printk(KERN_DEBUG "Allocated %u bytes for %d pacas at %p\n", paca_size, nr_cpus, paca); + allocate_lppacas(nr_cpus, limit); + /* Can't use for_each_*_cpu, as they aren't functional yet */ for (cpu = 0; cpu < nr_cpus; cpu++) initialise_paca(&paca[cpu], cpu); @@ -164,4 +228,6 @@ void __init free_unused_pacas(void) paca_size - new_size); paca_size = new_size; + + free_lppacas(); } diff --git a/arch/powerpc/kernel/ppc970-pmu.c b/arch/powerpc/kernel/ppc970-pmu.c index 8eff48e20dba..3fee685de4df 100644 --- a/arch/powerpc/kernel/ppc970-pmu.c +++ b/arch/powerpc/kernel/ppc970-pmu.c @@ -169,9 +169,11 @@ static int p970_marked_instr_event(u64 event) switch (unit) { case PM_VPU: mask = 0x4c; /* byte 0 bits 2,3,6 */ + break; case PM_LSU0: /* byte 2 bits 0,2,3,4,6; all of byte 1 */ mask = 0x085dff00; + break; case PM_LSU1L: mask = 0x50 << 24; /* byte 3 bits 4,6 */ break; diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index b1c648a36b03..84906d3fc860 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -517,7 +517,6 @@ struct task_struct *__switch_to(struct task_struct *prev, account_system_vtime(current); account_process_vtime(current); - calculate_steal_time(); /* * We can't take a PMU exception inside _switch() since there is a @@ -1298,14 +1297,3 @@ unsigned long randomize_et_dyn(unsigned long base) return ret; } - -#ifdef CONFIG_SMP -int arch_sd_sibling_asym_packing(void) -{ - if (cpu_has_feature(CPU_FTR_ASYM_SMT)) { - printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n"); - return SD_ASYM_PACKING; - } - return 0; -} -#endif diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 11f3cd9c832f..286d9783d93f 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -1681,7 +1681,7 @@ long do_syscall_trace_enter(struct pt_regs *regs) if (unlikely(current->audit_context)) { #ifdef CONFIG_PPC64 - if (!test_thread_flag(TIF_32BIT)) + if (!is_32bit_task()) audit_syscall_entry(AUDIT_ARCH_PPC64, regs->gpr[0], regs->gpr[3], regs->gpr[4], diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 0008bc58e826..68034bbf2e4f 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -508,9 +508,6 @@ int __devinit start_secondary(void *unused) if (smp_ops->take_timebase) smp_ops->take_timebase(); - if (system_state > SYSTEM_BOOTING) - snapshot_timebase(); - secondary_cpu_time_init(); ipi_call_lock(); @@ -575,11 +572,18 @@ void __init smp_cpus_done(unsigned int max_cpus) free_cpumask_var(old_mask); - snapshot_timebases(); - dump_numa_cpu_topology(); } +int arch_sd_sibling_asym_packing(void) +{ + if (cpu_has_feature(CPU_FTR_ASYM_SMT)) { + printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n"); + return SD_ASYM_PACKING; + } + return 0; +} + #ifdef CONFIG_HOTPLUG_CPU int __cpu_disable(void) { diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 8533b3b83f5d..bcb738b9ff8c 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -164,8 +164,6 @@ unsigned long ppc_proc_freq; EXPORT_SYMBOL(ppc_proc_freq); unsigned long ppc_tb_freq; -static DEFINE_PER_CPU(u64, last_jiffy); - #ifdef CONFIG_VIRT_CPU_ACCOUNTING /* * Factors for converting from cputime_t (timebase ticks) to @@ -185,6 +183,8 @@ DEFINE_PER_CPU(unsigned long, cputime_scaled_last_delta); cputime_t cputime_one_jiffy; +void (*dtl_consumer)(struct dtl_entry *, u64); + static void calc_cputime_factors(void) { struct div_result res; @@ -200,62 +200,153 @@ static void calc_cputime_factors(void) } /* - * Read the PURR on systems that have it, otherwise the timebase. + * Read the SPURR on systems that have it, otherwise the PURR, + * or if that doesn't exist return the timebase value passed in. */ -static u64 read_purr(void) +static u64 read_spurr(u64 tb) { + if (cpu_has_feature(CPU_FTR_SPURR)) + return mfspr(SPRN_SPURR); if (cpu_has_feature(CPU_FTR_PURR)) return mfspr(SPRN_PURR); - return mftb(); + return tb; } +#ifdef CONFIG_PPC_SPLPAR + /* - * Read the SPURR on systems that have it, otherwise the purr + * Scan the dispatch trace log and count up the stolen time. + * Should be called with interrupts disabled. */ -static u64 read_spurr(u64 purr) +static u64 scan_dispatch_log(u64 stop_tb) { - /* - * cpus without PURR won't have a SPURR - * We already know the former when we use this, so tell gcc - */ - if (cpu_has_feature(CPU_FTR_PURR) && cpu_has_feature(CPU_FTR_SPURR)) - return mfspr(SPRN_SPURR); - return purr; + u64 i = local_paca->dtl_ridx; + struct dtl_entry *dtl = local_paca->dtl_curr; + struct dtl_entry *dtl_end = local_paca->dispatch_log_end; + struct lppaca *vpa = local_paca->lppaca_ptr; + u64 tb_delta; + u64 stolen = 0; + u64 dtb; + + if (i == vpa->dtl_idx) + return 0; + while (i < vpa->dtl_idx) { + if (dtl_consumer) + dtl_consumer(dtl, i); + dtb = dtl->timebase; + tb_delta = dtl->enqueue_to_dispatch_time + + dtl->ready_to_enqueue_time; + barrier(); + if (i + N_DISPATCH_LOG < vpa->dtl_idx) { + /* buffer has overflowed */ + i = vpa->dtl_idx - N_DISPATCH_LOG; + dtl = local_paca->dispatch_log + (i % N_DISPATCH_LOG); + continue; + } + if (dtb > stop_tb) + break; + stolen += tb_delta; + ++i; + ++dtl; + if (dtl == dtl_end) + dtl = local_paca->dispatch_log; + } + local_paca->dtl_ridx = i; + local_paca->dtl_curr = dtl; + return stolen; } /* + * Accumulate stolen time by scanning the dispatch trace log. + * Called on entry from user mode. + */ +void accumulate_stolen_time(void) +{ + u64 sst, ust; + + sst = scan_dispatch_log(get_paca()->starttime_user); + ust = scan_dispatch_log(get_paca()->starttime); + get_paca()->system_time -= sst; + get_paca()->user_time -= ust; + get_paca()->stolen_time += ust + sst; +} + +static inline u64 calculate_stolen_time(u64 stop_tb) +{ + u64 stolen = 0; + + if (get_paca()->dtl_ridx != get_paca()->lppaca_ptr->dtl_idx) { + stolen = scan_dispatch_log(stop_tb); + get_paca()->system_time -= stolen; + } + + stolen += get_paca()->stolen_time; + get_paca()->stolen_time = 0; + return stolen; +} + +#else /* CONFIG_PPC_SPLPAR */ +static inline u64 calculate_stolen_time(u64 stop_tb) +{ + return 0; +} + +#endif /* CONFIG_PPC_SPLPAR */ + +/* * Account time for a transition between system, hard irq * or soft irq state. */ void account_system_vtime(struct task_struct *tsk) { - u64 now, nowscaled, delta, deltascaled, sys_time; + u64 now, nowscaled, delta, deltascaled; unsigned long flags; + u64 stolen, udelta, sys_scaled, user_scaled; local_irq_save(flags); - now = read_purr(); + now = mftb(); nowscaled = read_spurr(now); - delta = now - get_paca()->startpurr; + get_paca()->system_time += now - get_paca()->starttime; + get_paca()->starttime = now; deltascaled = nowscaled - get_paca()->startspurr; - get_paca()->startpurr = now; get_paca()->startspurr = nowscaled; - if (!in_interrupt()) { - /* deltascaled includes both user and system time. - * Hence scale it based on the purr ratio to estimate - * the system time */ - sys_time = get_paca()->system_time; - if (get_paca()->user_time) - deltascaled = deltascaled * sys_time / - (sys_time + get_paca()->user_time); - delta += sys_time; - get_paca()->system_time = 0; + + stolen = calculate_stolen_time(now); + + delta = get_paca()->system_time; + get_paca()->system_time = 0; + udelta = get_paca()->user_time - get_paca()->utime_sspurr; + get_paca()->utime_sspurr = get_paca()->user_time; + + /* + * Because we don't read the SPURR on every kernel entry/exit, + * deltascaled includes both user and system SPURR ticks. + * Apportion these ticks to system SPURR ticks and user + * SPURR ticks in the same ratio as the system time (delta) + * and user time (udelta) values obtained from the timebase + * over the same interval. The system ticks get accounted here; + * the user ticks get saved up in paca->user_time_scaled to be + * used by account_process_tick. + */ + sys_scaled = delta; + user_scaled = udelta; + if (deltascaled != delta + udelta) { + if (udelta) { + sys_scaled = deltascaled * delta / (delta + udelta); + user_scaled = deltascaled - sys_scaled; + } else { + sys_scaled = deltascaled; + } + } + get_paca()->user_time_scaled += user_scaled; + + if (in_irq() || idle_task(smp_processor_id()) != tsk) { + account_system_time(tsk, 0, delta, sys_scaled); + if (stolen) + account_steal_time(stolen); + } else { + account_idle_time(delta + stolen); } - if (in_irq() || idle_task(smp_processor_id()) != tsk) - account_system_time(tsk, 0, delta, deltascaled); - else - account_idle_time(delta); - __get_cpu_var(cputime_last_delta) = delta; - __get_cpu_var(cputime_scaled_last_delta) = deltascaled; local_irq_restore(flags); } EXPORT_SYMBOL_GPL(account_system_vtime); @@ -265,125 +356,26 @@ EXPORT_SYMBOL_GPL(account_system_vtime); * by the exception entry and exit code to the generic process * user and system time records. * Must be called with interrupts disabled. + * Assumes that account_system_vtime() has been called recently + * (i.e. since the last entry from usermode) so that + * get_paca()->user_time_scaled is up to date. */ void account_process_tick(struct task_struct *tsk, int user_tick) { cputime_t utime, utimescaled; utime = get_paca()->user_time; + utimescaled = get_paca()->user_time_scaled; get_paca()->user_time = 0; - utimescaled = cputime_to_scaled(utime); + get_paca()->user_time_scaled = 0; + get_paca()->utime_sspurr = 0; account_user_time(tsk, utime, utimescaled); } -/* - * Stuff for accounting stolen time. - */ -struct cpu_purr_data { - int initialized; /* thread is running */ - u64 tb; /* last TB value read */ - u64 purr; /* last PURR value read */ - u64 spurr; /* last SPURR value read */ -}; - -/* - * Each entry in the cpu_purr_data array is manipulated only by its - * "owner" cpu -- usually in the timer interrupt but also occasionally - * in process context for cpu online. As long as cpus do not touch - * each others' cpu_purr_data, disabling local interrupts is - * sufficient to serialize accesses. - */ -static DEFINE_PER_CPU(struct cpu_purr_data, cpu_purr_data); - -static void snapshot_tb_and_purr(void *data) -{ - unsigned long flags; - struct cpu_purr_data *p = &__get_cpu_var(cpu_purr_data); - - local_irq_save(flags); - p->tb = get_tb_or_rtc(); - p->purr = mfspr(SPRN_PURR); - wmb(); - p->initialized = 1; - local_irq_restore(flags); -} - -/* - * Called during boot when all cpus have come up. - */ -void snapshot_timebases(void) -{ - if (!cpu_has_feature(CPU_FTR_PURR)) - return; - on_each_cpu(snapshot_tb_and_purr, NULL, 1); -} - -/* - * Must be called with interrupts disabled. - */ -void calculate_steal_time(void) -{ - u64 tb, purr; - s64 stolen; - struct cpu_purr_data *pme; - - pme = &__get_cpu_var(cpu_purr_data); - if (!pme->initialized) - return; /* !CPU_FTR_PURR or early in early boot */ - tb = mftb(); - purr = mfspr(SPRN_PURR); - stolen = (tb - pme->tb) - (purr - pme->purr); - if (stolen > 0) { - if (idle_task(smp_processor_id()) != current) - account_steal_time(stolen); - else - account_idle_time(stolen); - } - pme->tb = tb; - pme->purr = purr; -} - -#ifdef CONFIG_PPC_SPLPAR -/* - * Must be called before the cpu is added to the online map when - * a cpu is being brought up at runtime. - */ -static void snapshot_purr(void) -{ - struct cpu_purr_data *pme; - unsigned long flags; - - if (!cpu_has_feature(CPU_FTR_PURR)) - return; - local_irq_save(flags); - pme = &__get_cpu_var(cpu_purr_data); - pme->tb = mftb(); - pme->purr = mfspr(SPRN_PURR); - pme->initialized = 1; - local_irq_restore(flags); -} - -#endif /* CONFIG_PPC_SPLPAR */ - #else /* ! CONFIG_VIRT_CPU_ACCOUNTING */ #define calc_cputime_factors() -#define calculate_steal_time() do { } while (0) #endif -#if !(defined(CONFIG_VIRT_CPU_ACCOUNTING) && defined(CONFIG_PPC_SPLPAR)) -#define snapshot_purr() do { } while (0) -#endif - -/* - * Called when a cpu comes up after the system has finished booting, - * i.e. as a result of a hotplug cpu action. - */ -void snapshot_timebase(void) -{ - __get_cpu_var(last_jiffy) = get_tb_or_rtc(); - snapshot_purr(); -} - void __delay(unsigned long loops) { unsigned long start; @@ -585,8 +577,6 @@ void timer_interrupt(struct pt_regs * regs) old_regs = set_irq_regs(regs); irq_enter(); - calculate_steal_time(); - if (test_perf_event_pending()) { clear_perf_event_pending(); perf_event_do_pending(); diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index 13002fe206e7..fd8728729abc 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -159,7 +159,7 @@ static void dump_vdso_pages(struct vm_area_struct * vma) { int i; - if (!vma || test_thread_flag(TIF_32BIT)) { + if (!vma || is_32bit_task()) { printk("vDSO32 @ %016lx:\n", (unsigned long)vdso32_kbase); for (i=0; i<vdso32_pages; i++) { struct page *pg = virt_to_page(vdso32_kbase + @@ -170,7 +170,7 @@ static void dump_vdso_pages(struct vm_area_struct * vma) dump_one_vdso_page(pg, upg); } } - if (!vma || !test_thread_flag(TIF_32BIT)) { + if (!vma || !is_32bit_task()) { printk("vDSO64 @ %016lx:\n", (unsigned long)vdso64_kbase); for (i=0; i<vdso64_pages; i++) { struct page *pg = virt_to_page(vdso64_kbase + @@ -200,7 +200,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) return 0; #ifdef CONFIG_PPC64 - if (test_thread_flag(TIF_32BIT)) { + if (is_32bit_task()) { vdso_pagelist = vdso32_pagelist; vdso_pages = vdso32_pages; vdso_base = VDSO32_MBASE; diff --git a/arch/powerpc/kvm/book3s_paired_singles.c b/arch/powerpc/kvm/book3s_paired_singles.c index 474f2e24050a..35a701f3ece4 100644 --- a/arch/powerpc/kvm/book3s_paired_singles.c +++ b/arch/powerpc/kvm/book3s_paired_singles.c @@ -159,7 +159,7 @@ static inline void kvmppc_sync_qpr(struct kvm_vcpu *vcpu, int rt) { - kvm_cvt_df(&vcpu->arch.fpr[rt], &vcpu->arch.qpr[rt], &vcpu->arch.fpscr); + kvm_cvt_df(&vcpu->arch.fpr[rt], &vcpu->arch.qpr[rt]); } static void kvmppc_inject_pf(struct kvm_vcpu *vcpu, ulong eaddr, bool is_store) @@ -204,7 +204,7 @@ static int kvmppc_emulate_fpr_load(struct kvm_run *run, struct kvm_vcpu *vcpu, /* put in registers */ switch (ls_type) { case FPU_LS_SINGLE: - kvm_cvt_fd((u32*)tmp, &vcpu->arch.fpr[rs], &vcpu->arch.fpscr); + kvm_cvt_fd((u32*)tmp, &vcpu->arch.fpr[rs]); vcpu->arch.qpr[rs] = *((u32*)tmp); break; case FPU_LS_DOUBLE: @@ -230,7 +230,7 @@ static int kvmppc_emulate_fpr_store(struct kvm_run *run, struct kvm_vcpu *vcpu, switch (ls_type) { case FPU_LS_SINGLE: - kvm_cvt_df(&vcpu->arch.fpr[rs], (u32*)tmp, &vcpu->arch.fpscr); + kvm_cvt_df(&vcpu->arch.fpr[rs], (u32*)tmp); val = *((u32*)tmp); len = sizeof(u32); break; @@ -296,7 +296,7 @@ static int kvmppc_emulate_psq_load(struct kvm_run *run, struct kvm_vcpu *vcpu, emulated = EMULATE_DONE; /* put in registers */ - kvm_cvt_fd(&tmp[0], &vcpu->arch.fpr[rs], &vcpu->arch.fpscr); + kvm_cvt_fd(&tmp[0], &vcpu->arch.fpr[rs]); vcpu->arch.qpr[rs] = tmp[1]; dprintk(KERN_INFO "KVM: PSQ_LD [0x%x, 0x%x] at 0x%lx (%d)\n", tmp[0], @@ -314,7 +314,7 @@ static int kvmppc_emulate_psq_store(struct kvm_run *run, struct kvm_vcpu *vcpu, u32 tmp[2]; int len = w ? sizeof(u32) : sizeof(u64); - kvm_cvt_df(&vcpu->arch.fpr[rs], &tmp[0], &vcpu->arch.fpscr); + kvm_cvt_df(&vcpu->arch.fpr[rs], &tmp[0]); tmp[1] = vcpu->arch.qpr[rs]; r = kvmppc_st(vcpu, &addr, len, tmp, true); @@ -516,9 +516,9 @@ static int kvmppc_ps_three_in(struct kvm_vcpu *vcpu, bool rc, WARN_ON(rc); /* PS0 */ - kvm_cvt_df(&fpr[reg_in1], &ps0_in1, &vcpu->arch.fpscr); - kvm_cvt_df(&fpr[reg_in2], &ps0_in2, &vcpu->arch.fpscr); - kvm_cvt_df(&fpr[reg_in3], &ps0_in3, &vcpu->arch.fpscr); + kvm_cvt_df(&fpr[reg_in1], &ps0_in1); + kvm_cvt_df(&fpr[reg_in2], &ps0_in2); + kvm_cvt_df(&fpr[reg_in3], &ps0_in3); if (scalar & SCALAR_LOW) ps0_in2 = qpr[reg_in2]; @@ -529,7 +529,7 @@ static int kvmppc_ps_three_in(struct kvm_vcpu *vcpu, bool rc, ps0_in1, ps0_in2, ps0_in3, ps0_out); if (!(scalar & SCALAR_NO_PS0)) - kvm_cvt_fd(&ps0_out, &fpr[reg_out], &vcpu->arch.fpscr); + kvm_cvt_fd(&ps0_out, &fpr[reg_out]); /* PS1 */ ps1_in1 = qpr[reg_in1]; @@ -566,12 +566,12 @@ static int kvmppc_ps_two_in(struct kvm_vcpu *vcpu, bool rc, WARN_ON(rc); /* PS0 */ - kvm_cvt_df(&fpr[reg_in1], &ps0_in1, &vcpu->arch.fpscr); + kvm_cvt_df(&fpr[reg_in1], &ps0_in1); if (scalar & SCALAR_LOW) ps0_in2 = qpr[reg_in2]; else - kvm_cvt_df(&fpr[reg_in2], &ps0_in2, &vcpu->arch.fpscr); + kvm_cvt_df(&fpr[reg_in2], &ps0_in2); func(&vcpu->arch.fpscr, &ps0_out, &ps0_in1, &ps0_in2); @@ -579,7 +579,7 @@ static int kvmppc_ps_two_in(struct kvm_vcpu *vcpu, bool rc, dprintk(KERN_INFO "PS2 ps0 -> f(0x%x, 0x%x) = 0x%x\n", ps0_in1, ps0_in2, ps0_out); - kvm_cvt_fd(&ps0_out, &fpr[reg_out], &vcpu->arch.fpscr); + kvm_cvt_fd(&ps0_out, &fpr[reg_out]); } /* PS1 */ @@ -615,13 +615,13 @@ static int kvmppc_ps_one_in(struct kvm_vcpu *vcpu, bool rc, WARN_ON(rc); /* PS0 */ - kvm_cvt_df(&fpr[reg_in], &ps0_in, &vcpu->arch.fpscr); + kvm_cvt_df(&fpr[reg_in], &ps0_in); func(&vcpu->arch.fpscr, &ps0_out, &ps0_in); dprintk(KERN_INFO "PS1 ps0 -> f(0x%x) = 0x%x\n", ps0_in, ps0_out); - kvm_cvt_fd(&ps0_out, &fpr[reg_out], &vcpu->arch.fpscr); + kvm_cvt_fd(&ps0_out, &fpr[reg_out]); /* PS1 */ ps1_in = qpr[reg_in]; @@ -671,7 +671,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) #ifdef DEBUG for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) { u32 f; - kvm_cvt_df(&vcpu->arch.fpr[i], &f, &vcpu->arch.fpscr); + kvm_cvt_df(&vcpu->arch.fpr[i], &f); dprintk(KERN_INFO "FPR[%d] = 0x%x / 0x%llx QPR[%d] = 0x%x\n", i, f, vcpu->arch.fpr[i], i, vcpu->arch.qpr[i]); } @@ -796,8 +796,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_ra]; /* vcpu->arch.qpr[ax_rd] = vcpu->arch.fpr[ax_rb]; */ kvm_cvt_df(&vcpu->arch.fpr[ax_rb], - &vcpu->arch.qpr[ax_rd], - &vcpu->arch.fpscr); + &vcpu->arch.qpr[ax_rd]); break; case OP_4X_PS_MERGE01: WARN_ON(rcomp); @@ -808,19 +807,16 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) WARN_ON(rcomp); /* vcpu->arch.fpr[ax_rd] = vcpu->arch.qpr[ax_ra]; */ kvm_cvt_fd(&vcpu->arch.qpr[ax_ra], - &vcpu->arch.fpr[ax_rd], - &vcpu->arch.fpscr); + &vcpu->arch.fpr[ax_rd]); /* vcpu->arch.qpr[ax_rd] = vcpu->arch.fpr[ax_rb]; */ kvm_cvt_df(&vcpu->arch.fpr[ax_rb], - &vcpu->arch.qpr[ax_rd], - &vcpu->arch.fpscr); + &vcpu->arch.qpr[ax_rd]); break; case OP_4X_PS_MERGE11: WARN_ON(rcomp); /* vcpu->arch.fpr[ax_rd] = vcpu->arch.qpr[ax_ra]; */ kvm_cvt_fd(&vcpu->arch.qpr[ax_ra], - &vcpu->arch.fpr[ax_rd], - &vcpu->arch.fpscr); + &vcpu->arch.fpr[ax_rd]); vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb]; break; } @@ -1255,7 +1251,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) #ifdef DEBUG for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) { u32 f; - kvm_cvt_df(&vcpu->arch.fpr[i], &f, &vcpu->arch.fpscr); + kvm_cvt_df(&vcpu->arch.fpr[i], &f); dprintk(KERN_INFO "FPR[%d] = 0x%x\n", i, f); } #endif diff --git a/arch/powerpc/kvm/fpu.S b/arch/powerpc/kvm/fpu.S index cb34bbe16113..bf68d597549e 100644 --- a/arch/powerpc/kvm/fpu.S +++ b/arch/powerpc/kvm/fpu.S @@ -273,19 +273,11 @@ FPD_THREE_IN(fnmsub) FPD_THREE_IN(fnmadd) _GLOBAL(kvm_cvt_fd) - lfd 0,0(r5) /* load up fpscr value */ - MTFSF_L(0) lfs 0,0(r3) stfd 0,0(r4) - mffs 0 - stfd 0,0(r5) /* save new fpscr value */ blr _GLOBAL(kvm_cvt_df) - lfd 0,0(r5) /* load up fpscr value */ - MTFSF_L(0) lfd 0,0(r3) stfs 0,0(r4) - mffs 0 - stfd 0,0(r5) /* save new fpscr value */ blr diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index 5bb89c828070..ad4a36848f25 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -17,7 +17,8 @@ obj-$(CONFIG_PPC32) += div64.o copy_32.o obj-$(CONFIG_HAS_IOMEM) += devres.o obj-$(CONFIG_PPC64) += copypage_64.o copyuser_64.o \ - memcpy_64.o usercopy_64.o mem_64.o string.o + memcpy_64.o usercopy_64.o mem_64.o string.o \ + checksum_wrappers_64.o obj-$(CONFIG_XMON) += sstep.o ldstfp.o obj-$(CONFIG_KPROBES) += sstep.o ldstfp.o obj-$(CONFIG_HAVE_HW_BREAKPOINT) += sstep.o ldstfp.o diff --git a/arch/powerpc/lib/checksum_64.S b/arch/powerpc/lib/checksum_64.S index ef96c6c58efc..18245af38aea 100644 --- a/arch/powerpc/lib/checksum_64.S +++ b/arch/powerpc/lib/checksum_64.S @@ -65,165 +65,393 @@ _GLOBAL(csum_tcpudp_magic) srwi r3,r3,16 blr +#define STACKFRAMESIZE 256 +#define STK_REG(i) (112 + ((i)-14)*8) + /* * Computes the checksum of a memory block at buff, length len, * and adds in "sum" (32-bit). * - * This code assumes at least halfword alignment, though the length - * can be any number of bytes. The sum is accumulated in r5. - * * csum_partial(r3=buff, r4=len, r5=sum) */ _GLOBAL(csum_partial) - subi r3,r3,8 /* we'll offset by 8 for the loads */ - srdi. r6,r4,3 /* divide by 8 for doubleword count */ - addic r5,r5,0 /* clear carry */ - beq 3f /* if we're doing < 8 bytes */ - andi. r0,r3,2 /* aligned on a word boundary already? */ - beq+ 1f - lhz r6,8(r3) /* do 2 bytes to get aligned */ - addi r3,r3,2 - subi r4,r4,2 - addc r5,r5,r6 - srdi. r6,r4,3 /* recompute number of doublewords */ - beq 3f /* any left? */ -1: mtctr r6 -2: ldu r6,8(r3) /* main sum loop */ - adde r5,r5,r6 - bdnz 2b - andi. r4,r4,7 /* compute bytes left to sum after doublewords */ -3: cmpwi 0,r4,4 /* is at least a full word left? */ - blt 4f - lwz r6,8(r3) /* sum this word */ + addic r0,r5,0 /* clear carry */ + + srdi. r6,r4,3 /* less than 8 bytes? */ + beq .Lcsum_tail_word + + /* + * If only halfword aligned, align to a double word. Since odd + * aligned addresses should be rare and they would require more + * work to calculate the correct checksum, we ignore that case + * and take the potential slowdown of unaligned loads. + */ + rldicl. r6,r3,64-1,64-2 /* r6 = (r3 & 0x3) >> 1 */ + beq .Lcsum_aligned + + li r7,4 + sub r6,r7,r6 + mtctr r6 + +1: + lhz r6,0(r3) /* align to doubleword */ + subi r4,r4,2 + addi r3,r3,2 + adde r0,r0,r6 + bdnz 1b + +.Lcsum_aligned: + /* + * We unroll the loop such that each iteration is 64 bytes with an + * entry and exit limb of 64 bytes, meaning a minimum size of + * 128 bytes. + */ + srdi. r6,r4,7 + beq .Lcsum_tail_doublewords /* len < 128 */ + + srdi r6,r4,6 + subi r6,r6,1 + mtctr r6 + + stdu r1,-STACKFRAMESIZE(r1) + std r14,STK_REG(r14)(r1) + std r15,STK_REG(r15)(r1) + std r16,STK_REG(r16)(r1) + + ld r6,0(r3) + ld r9,8(r3) + + ld r10,16(r3) + ld r11,24(r3) + + /* + * On POWER6 and POWER7 back to back addes take 2 cycles because of + * the XER dependency. This means the fastest this loop can go is + * 16 cycles per iteration. The scheduling of the loop below has + * been shown to hit this on both POWER6 and POWER7. + */ + .align 5 +2: + adde r0,r0,r6 + ld r12,32(r3) + ld r14,40(r3) + + adde r0,r0,r9 + ld r15,48(r3) + ld r16,56(r3) + addi r3,r3,64 + + adde r0,r0,r10 + + adde r0,r0,r11 + + adde r0,r0,r12 + + adde r0,r0,r14 + + adde r0,r0,r15 + ld r6,0(r3) + ld r9,8(r3) + + adde r0,r0,r16 + ld r10,16(r3) + ld r11,24(r3) + bdnz 2b + + + adde r0,r0,r6 + ld r12,32(r3) + ld r14,40(r3) + + adde r0,r0,r9 + ld r15,48(r3) + ld r16,56(r3) + addi r3,r3,64 + + adde r0,r0,r10 + adde r0,r0,r11 + adde r0,r0,r12 + adde r0,r0,r14 + adde r0,r0,r15 + adde r0,r0,r16 + + ld r14,STK_REG(r14)(r1) + ld r15,STK_REG(r15)(r1) + ld r16,STK_REG(r16)(r1) + addi r1,r1,STACKFRAMESIZE + + andi. r4,r4,63 + +.Lcsum_tail_doublewords: /* Up to 127 bytes to go */ + srdi. r6,r4,3 + beq .Lcsum_tail_word + + mtctr r6 +3: + ld r6,0(r3) + addi r3,r3,8 + adde r0,r0,r6 + bdnz 3b + + andi. r4,r4,7 + +.Lcsum_tail_word: /* Up to 7 bytes to go */ + srdi. r6,r4,2 + beq .Lcsum_tail_halfword + + lwz r6,0(r3) addi r3,r3,4 + adde r0,r0,r6 subi r4,r4,4 - adde r5,r5,r6 -4: cmpwi 0,r4,2 /* is at least a halfword left? */ - blt+ 5f - lhz r6,8(r3) /* sum this halfword */ - addi r3,r3,2 - subi r4,r4,2 - adde r5,r5,r6 -5: cmpwi 0,r4,1 /* is at least a byte left? */ - bne+ 6f - lbz r6,8(r3) /* sum this byte */ - slwi r6,r6,8 /* this byte is assumed to be the upper byte of a halfword */ - adde r5,r5,r6 -6: addze r5,r5 /* add in final carry */ - rldicl r4,r5,32,0 /* fold two 32-bit halves together */ - add r3,r4,r5 - srdi r3,r3,32 - blr + +.Lcsum_tail_halfword: /* Up to 3 bytes to go */ + srdi. r6,r4,1 + beq .Lcsum_tail_byte + + lhz r6,0(r3) + addi r3,r3,2 + adde r0,r0,r6 + subi r4,r4,2 + +.Lcsum_tail_byte: /* Up to 1 byte to go */ + andi. r6,r4,1 + beq .Lcsum_finish + + lbz r6,0(r3) + sldi r9,r6,8 /* Pad the byte out to 16 bits */ + adde r0,r0,r9 + +.Lcsum_finish: + addze r0,r0 /* add in final carry */ + rldicl r4,r0,32,0 /* fold two 32 bit halves together */ + add r3,r4,r0 + srdi r3,r3,32 + blr + + + .macro source +100: + .section __ex_table,"a" + .align 3 + .llong 100b,.Lsrc_error + .previous + .endm + + .macro dest +200: + .section __ex_table,"a" + .align 3 + .llong 200b,.Ldest_error + .previous + .endm /* * Computes the checksum of a memory block at src, length len, * and adds in "sum" (32-bit), while copying the block to dst. * If an access exception occurs on src or dst, it stores -EFAULT - * to *src_err or *dst_err respectively, and (for an error on - * src) zeroes the rest of dst. - * - * This code needs to be reworked to take advantage of 64 bit sum+copy. - * However, due to tokenring halfword alignment problems this will be very - * tricky. For now we'll leave it until we instrument it somehow. + * to *src_err or *dst_err respectively. The caller must take any action + * required in this case (zeroing memory, recalculating partial checksum etc). * * csum_partial_copy_generic(r3=src, r4=dst, r5=len, r6=sum, r7=src_err, r8=dst_err) */ _GLOBAL(csum_partial_copy_generic) - addic r0,r6,0 - subi r3,r3,4 - subi r4,r4,4 - srwi. r6,r5,2 - beq 3f /* if we're doing < 4 bytes */ - andi. r9,r4,2 /* Align dst to longword boundary */ - beq+ 1f -81: lhz r6,4(r3) /* do 2 bytes to get aligned */ - addi r3,r3,2 + addic r0,r6,0 /* clear carry */ + + srdi. r6,r5,3 /* less than 8 bytes? */ + beq .Lcopy_tail_word + + /* + * If only halfword aligned, align to a double word. Since odd + * aligned addresses should be rare and they would require more + * work to calculate the correct checksum, we ignore that case + * and take the potential slowdown of unaligned loads. + * + * If the source and destination are relatively unaligned we only + * align the source. This keeps things simple. + */ + rldicl. r6,r3,64-1,64-2 /* r6 = (r3 & 0x3) >> 1 */ + beq .Lcopy_aligned + + li r7,4 + sub r6,r7,r6 + mtctr r6 + +1: +source; lhz r6,0(r3) /* align to doubleword */ subi r5,r5,2 -91: sth r6,4(r4) - addi r4,r4,2 - addc r0,r0,r6 - srwi. r6,r5,2 /* # words to do */ - beq 3f -1: mtctr r6 -82: lwzu r6,4(r3) /* the bdnz has zero overhead, so it should */ -92: stwu r6,4(r4) /* be unnecessary to unroll this loop */ - adde r0,r0,r6 - bdnz 82b - andi. r5,r5,3 -3: cmpwi 0,r5,2 - blt+ 4f -83: lhz r6,4(r3) addi r3,r3,2 - subi r5,r5,2 -93: sth r6,4(r4) + adde r0,r0,r6 +dest; sth r6,0(r4) addi r4,r4,2 + bdnz 1b + +.Lcopy_aligned: + /* + * We unroll the loop such that each iteration is 64 bytes with an + * entry and exit limb of 64 bytes, meaning a minimum size of + * 128 bytes. + */ + srdi. r6,r5,7 + beq .Lcopy_tail_doublewords /* len < 128 */ + + srdi r6,r5,6 + subi r6,r6,1 + mtctr r6 + + stdu r1,-STACKFRAMESIZE(r1) + std r14,STK_REG(r14)(r1) + std r15,STK_REG(r15)(r1) + std r16,STK_REG(r16)(r1) + +source; ld r6,0(r3) +source; ld r9,8(r3) + +source; ld r10,16(r3) +source; ld r11,24(r3) + + /* + * On POWER6 and POWER7 back to back addes take 2 cycles because of + * the XER dependency. This means the fastest this loop can go is + * 16 cycles per iteration. The scheduling of the loop below has + * been shown to hit this on both POWER6 and POWER7. + */ + .align 5 +2: adde r0,r0,r6 -4: cmpwi 0,r5,1 - bne+ 5f -84: lbz r6,4(r3) -94: stb r6,4(r4) - slwi r6,r6,8 /* Upper byte of word */ +source; ld r12,32(r3) +source; ld r14,40(r3) + + adde r0,r0,r9 +source; ld r15,48(r3) +source; ld r16,56(r3) + addi r3,r3,64 + + adde r0,r0,r10 +dest; std r6,0(r4) +dest; std r9,8(r4) + + adde r0,r0,r11 +dest; std r10,16(r4) +dest; std r11,24(r4) + + adde r0,r0,r12 +dest; std r12,32(r4) +dest; std r14,40(r4) + + adde r0,r0,r14 +dest; std r15,48(r4) +dest; std r16,56(r4) + addi r4,r4,64 + + adde r0,r0,r15 +source; ld r6,0(r3) +source; ld r9,8(r3) + + adde r0,r0,r16 +source; ld r10,16(r3) +source; ld r11,24(r3) + bdnz 2b + + adde r0,r0,r6 -5: addze r3,r0 /* add in final carry (unlikely with 64-bit regs) */ - rldicl r4,r3,32,0 /* fold 64 bit value */ - add r3,r4,r3 - srdi r3,r3,32 - blr +source; ld r12,32(r3) +source; ld r14,40(r3) -/* These shouldn't go in the fixup section, since that would - cause the ex_table addresses to get out of order. */ + adde r0,r0,r9 +source; ld r15,48(r3) +source; ld r16,56(r3) + addi r3,r3,64 + + adde r0,r0,r10 +dest; std r6,0(r4) +dest; std r9,8(r4) + + adde r0,r0,r11 +dest; std r10,16(r4) +dest; std r11,24(r4) + + adde r0,r0,r12 +dest; std r12,32(r4) +dest; std r14,40(r4) + + adde r0,r0,r14 +dest; std r15,48(r4) +dest; std r16,56(r4) + addi r4,r4,64 + + adde r0,r0,r15 + adde r0,r0,r16 + + ld r14,STK_REG(r14)(r1) + ld r15,STK_REG(r15)(r1) + ld r16,STK_REG(r16)(r1) + addi r1,r1,STACKFRAMESIZE + + andi. r5,r5,63 + +.Lcopy_tail_doublewords: /* Up to 127 bytes to go */ + srdi. r6,r5,3 + beq .Lcopy_tail_word - .globl src_error_1 -src_error_1: - li r6,0 - subi r5,r5,2 -95: sth r6,4(r4) - addi r4,r4,2 - srwi. r6,r5,2 - beq 3f mtctr r6 - .globl src_error_2 -src_error_2: - li r6,0 -96: stwu r6,4(r4) - bdnz 96b -3: andi. r5,r5,3 - beq src_error - .globl src_error_3 -src_error_3: - li r6,0 - mtctr r5 - addi r4,r4,3 -97: stbu r6,1(r4) - bdnz 97b - .globl src_error -src_error: +3: +source; ld r6,0(r3) + addi r3,r3,8 + adde r0,r0,r6 +dest; std r6,0(r4) + addi r4,r4,8 + bdnz 3b + + andi. r5,r5,7 + +.Lcopy_tail_word: /* Up to 7 bytes to go */ + srdi. r6,r5,2 + beq .Lcopy_tail_halfword + +source; lwz r6,0(r3) + addi r3,r3,4 + adde r0,r0,r6 +dest; stw r6,0(r4) + addi r4,r4,4 + subi r5,r5,4 + +.Lcopy_tail_halfword: /* Up to 3 bytes to go */ + srdi. r6,r5,1 + beq .Lcopy_tail_byte + +source; lhz r6,0(r3) + addi r3,r3,2 + adde r0,r0,r6 +dest; sth r6,0(r4) + addi r4,r4,2 + subi r5,r5,2 + +.Lcopy_tail_byte: /* Up to 1 byte to go */ + andi. r6,r5,1 + beq .Lcopy_finish + +source; lbz r6,0(r3) + sldi r9,r6,8 /* Pad the byte out to 16 bits */ + adde r0,r0,r9 +dest; stb r6,0(r4) + +.Lcopy_finish: + addze r0,r0 /* add in final carry */ + rldicl r4,r0,32,0 /* fold two 32 bit halves together */ + add r3,r4,r0 + srdi r3,r3,32 + blr + +.Lsrc_error: cmpdi 0,r7,0 - beq 1f + beqlr li r6,-EFAULT stw r6,0(r7) -1: addze r3,r0 blr - .globl dst_error -dst_error: +.Ldest_error: cmpdi 0,r8,0 - beq 1f + beqlr li r6,-EFAULT stw r6,0(r8) -1: addze r3,r0 blr - -.section __ex_table,"a" - .align 3 - .llong 81b,src_error_1 - .llong 91b,dst_error - .llong 82b,src_error_2 - .llong 92b,dst_error - .llong 83b,src_error_3 - .llong 93b,dst_error - .llong 84b,src_error_3 - .llong 94b,dst_error - .llong 95b,dst_error - .llong 96b,dst_error - .llong 97b,dst_error diff --git a/arch/powerpc/lib/checksum_wrappers_64.c b/arch/powerpc/lib/checksum_wrappers_64.c new file mode 100644 index 000000000000..769b817fbb32 --- /dev/null +++ b/arch/powerpc/lib/checksum_wrappers_64.c @@ -0,0 +1,102 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2010 + * + * Author: Anton Blanchard <anton@au.ibm.com> + */ +#include <linux/module.h> +#include <linux/compiler.h> +#include <linux/types.h> +#include <asm/checksum.h> +#include <asm/uaccess.h> + +__wsum csum_and_copy_from_user(const void __user *src, void *dst, + int len, __wsum sum, int *err_ptr) +{ + unsigned int csum; + + might_sleep(); + + *err_ptr = 0; + + if (!len) { + csum = 0; + goto out; + } + + if (unlikely((len < 0) || !access_ok(VERIFY_READ, src, len))) { + *err_ptr = -EFAULT; + csum = (__force unsigned int)sum; + goto out; + } + + csum = csum_partial_copy_generic((void __force *)src, dst, + len, sum, err_ptr, NULL); + + if (unlikely(*err_ptr)) { + int missing = __copy_from_user(dst, src, len); + + if (missing) { + memset(dst + len - missing, 0, missing); + *err_ptr = -EFAULT; + } else { + *err_ptr = 0; + } + + csum = csum_partial(dst, len, sum); + } + +out: + return (__force __wsum)csum; +} +EXPORT_SYMBOL(csum_and_copy_from_user); + +__wsum csum_and_copy_to_user(const void *src, void __user *dst, int len, + __wsum sum, int *err_ptr) +{ + unsigned int csum; + + might_sleep(); + + *err_ptr = 0; + + if (!len) { + csum = 0; + goto out; + } + + if (unlikely((len < 0) || !access_ok(VERIFY_WRITE, dst, len))) { + *err_ptr = -EFAULT; + csum = -1; /* invalid checksum */ + goto out; + } + + csum = csum_partial_copy_generic(src, (void __force *)dst, + len, sum, NULL, err_ptr); + + if (unlikely(*err_ptr)) { + csum = csum_partial(src, len, sum); + + if (copy_to_user(dst, src, len)) { + *err_ptr = -EFAULT; + csum = -1; /* invalid checksum */ + } + } + +out: + return (__force __wsum)csum; +} +EXPORT_SYMBOL(csum_and_copy_to_user); diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S index 74a7f4130b4c..55f19f9fd708 100644 --- a/arch/powerpc/lib/copy_32.S +++ b/arch/powerpc/lib/copy_32.S @@ -62,7 +62,7 @@ .text .stabs "arch/powerpc/lib/",N_SO,0,0,0f - .stabs "copy32.S",N_SO,0,0,0f + .stabs "copy_32.S",N_SO,0,0,0f 0: CACHELINE_BYTES = L1_CACHE_BYTES diff --git a/arch/powerpc/lib/ldstfp.S b/arch/powerpc/lib/ldstfp.S index f6448636baf5..6a85380520b6 100644 --- a/arch/powerpc/lib/ldstfp.S +++ b/arch/powerpc/lib/ldstfp.S @@ -17,6 +17,8 @@ #include <asm/asm-offsets.h> #include <linux/errno.h> +#ifdef CONFIG_PPC_FPU + #define STKFRM (PPC_MIN_STKFRM + 16) .macro extab instr,handler @@ -81,7 +83,7 @@ _GLOBAL(do_lfs) mfmsr r6 ori r7,r6,MSR_FP cmpwi cr7,r3,0 - mtmsrd r7 + MTMSRD(r7) isync beq cr7,1f stfd fr0,STKFRM-16(r1) @@ -93,7 +95,7 @@ _GLOBAL(do_lfs) lfd fr0,STKFRM-16(r1) 4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1) mtlr r0 - mtmsrd r6 + MTMSRD(r6) isync mr r3,r9 addi r1,r1,STKFRM @@ -108,7 +110,7 @@ _GLOBAL(do_lfd) mfmsr r6 ori r7,r6,MSR_FP cmpwi cr7,r3,0 - mtmsrd r7 + MTMSRD(r7) isync beq cr7,1f stfd fr0,STKFRM-16(r1) @@ -120,7 +122,7 @@ _GLOBAL(do_lfd) lfd fr0,STKFRM-16(r1) 4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1) mtlr r0 - mtmsrd r6 + MTMSRD(r6) isync mr r3,r9 addi r1,r1,STKFRM @@ -135,7 +137,7 @@ _GLOBAL(do_stfs) mfmsr r6 ori r7,r6,MSR_FP cmpwi cr7,r3,0 - mtmsrd r7 + MTMSRD(r7) isync beq cr7,1f stfd fr0,STKFRM-16(r1) @@ -147,7 +149,7 @@ _GLOBAL(do_stfs) lfd fr0,STKFRM-16(r1) 4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1) mtlr r0 - mtmsrd r6 + MTMSRD(r6) isync mr r3,r9 addi r1,r1,STKFRM @@ -162,7 +164,7 @@ _GLOBAL(do_stfd) mfmsr r6 ori r7,r6,MSR_FP cmpwi cr7,r3,0 - mtmsrd r7 + MTMSRD(r7) isync beq cr7,1f stfd fr0,STKFRM-16(r1) @@ -174,7 +176,7 @@ _GLOBAL(do_stfd) lfd fr0,STKFRM-16(r1) 4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1) mtlr r0 - mtmsrd r6 + MTMSRD(r6) isync mr r3,r9 addi r1,r1,STKFRM @@ -229,7 +231,7 @@ _GLOBAL(do_lvx) oris r7,r6,MSR_VEC@h cmpwi cr7,r3,0 li r8,STKFRM-16 - mtmsrd r7 + MTMSRD(r7) isync beq cr7,1f stvx vr0,r1,r8 @@ -241,7 +243,7 @@ _GLOBAL(do_lvx) lvx vr0,r1,r8 4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1) mtlr r0 - mtmsrd r6 + MTMSRD(r6) isync mr r3,r9 addi r1,r1,STKFRM @@ -257,7 +259,7 @@ _GLOBAL(do_stvx) oris r7,r6,MSR_VEC@h cmpwi cr7,r3,0 li r8,STKFRM-16 - mtmsrd r7 + MTMSRD(r7) isync beq cr7,1f stvx vr0,r1,r8 @@ -269,7 +271,7 @@ _GLOBAL(do_stvx) lvx vr0,r1,r8 4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1) mtlr r0 - mtmsrd r6 + MTMSRD(r6) isync mr r3,r9 addi r1,r1,STKFRM @@ -325,7 +327,7 @@ _GLOBAL(do_lxvd2x) oris r7,r6,MSR_VSX@h cmpwi cr7,r3,0 li r8,STKFRM-16 - mtmsrd r7 + MTMSRD(r7) isync beq cr7,1f STXVD2X(0,r1,r8) @@ -337,7 +339,7 @@ _GLOBAL(do_lxvd2x) LXVD2X(0,r1,r8) 4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1) mtlr r0 - mtmsrd r6 + MTMSRD(r6) isync mr r3,r9 addi r1,r1,STKFRM @@ -353,7 +355,7 @@ _GLOBAL(do_stxvd2x) oris r7,r6,MSR_VSX@h cmpwi cr7,r3,0 li r8,STKFRM-16 - mtmsrd r7 + MTMSRD(r7) isync beq cr7,1f STXVD2X(0,r1,r8) @@ -365,7 +367,7 @@ _GLOBAL(do_stxvd2x) LXVD2X(0,r1,r8) 4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1) mtlr r0 - mtmsrd r6 + MTMSRD(r6) isync mr r3,r9 addi r1,r1,STKFRM @@ -373,3 +375,5 @@ _GLOBAL(do_stxvd2x) extab 2b,3b #endif /* CONFIG_VSX */ + +#endif /* CONFIG_PPC_FPU */ diff --git a/arch/powerpc/lib/locks.c b/arch/powerpc/lib/locks.c index 58e14fba11b1..9b8182e82166 100644 --- a/arch/powerpc/lib/locks.c +++ b/arch/powerpc/lib/locks.c @@ -34,7 +34,7 @@ void __spin_yield(arch_spinlock_t *lock) return; holder_cpu = lock_value & 0xffff; BUG_ON(holder_cpu >= NR_CPUS); - yield_count = lppaca[holder_cpu].yield_count; + yield_count = lppaca_of(holder_cpu).yield_count; if ((yield_count & 1) == 0) return; /* virtual cpu is currently running */ rmb(); @@ -65,7 +65,7 @@ void __rw_yield(arch_rwlock_t *rw) return; /* no write lock at present */ holder_cpu = lock_value & 0xffff; BUG_ON(holder_cpu >= NR_CPUS); - yield_count = lppaca[holder_cpu].yield_count; + yield_count = lppaca_of(holder_cpu).yield_count; if ((yield_count & 1) == 0) return; /* virtual cpu is currently running */ rmb(); diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index e0a9858d537e..ae5189ab0049 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -30,6 +30,7 @@ extern char system_call_common[]; #define XER_OV 0x40000000U #define XER_CA 0x20000000U +#ifdef CONFIG_PPC_FPU /* * Functions in ldstfp.S */ @@ -41,6 +42,7 @@ extern int do_lvx(int rn, unsigned long ea); extern int do_stvx(int rn, unsigned long ea); extern int do_lxvd2x(int rn, unsigned long ea); extern int do_stxvd2x(int rn, unsigned long ea); +#endif /* * Determine whether a conditional branch instruction would branch. @@ -290,6 +292,7 @@ static int __kprobes write_mem(unsigned long val, unsigned long ea, int nb, return write_mem_unaligned(val, ea, nb, regs); } +#ifdef CONFIG_PPC_FPU /* * Check the address and alignment, and call func to do the actual * load or store. @@ -351,6 +354,7 @@ static int __kprobes do_fp_store(int rn, int (*func)(int, unsigned long), } return err; } +#endif #ifdef CONFIG_ALTIVEC /* For Altivec/VMX, no need to worry about alignment */ @@ -1393,6 +1397,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) regs->gpr[rd] = byterev_4(val); goto ldst_done; +#ifdef CONFIG_PPC_CPU case 535: /* lfsx */ case 567: /* lfsux */ if (!(regs->msr & MSR_FP)) @@ -1424,6 +1429,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) ea = xform_ea(instr, regs, u); err = do_fp_store(rd, do_stfd, ea, 8, regs); goto ldst_done; +#endif #ifdef __powerpc64__ case 660: /* stdbrx */ @@ -1534,6 +1540,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) } while (++rd < 32); goto instr_done; +#ifdef CONFIG_PPC_FPU case 48: /* lfs */ case 49: /* lfsu */ if (!(regs->msr & MSR_FP)) @@ -1565,6 +1572,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) ea = dform_ea(instr, regs); err = do_fp_store(rd, do_stfd, ea, 8, regs); goto ldst_done; +#endif #ifdef __powerpc64__ case 58: /* ld[u], lwa */ diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 1bd712c33ce2..54f4fb994e99 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -30,6 +30,7 @@ #include <linux/kprobes.h> #include <linux/kdebug.h> #include <linux/perf_event.h> +#include <linux/magic.h> #include <asm/firmware.h> #include <asm/page.h> @@ -385,6 +386,7 @@ do_sigbus: void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) { const struct exception_table_entry *entry; + unsigned long *stackend; /* Are we prepared to handle this fault? */ if ((entry = search_exception_tables(regs->nip)) != NULL) { @@ -413,5 +415,9 @@ void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) printk(KERN_ALERT "Faulting instruction address: 0x%08lx\n", regs->nip); + stackend = end_of_stack(current); + if (current != &init_task && *stackend != STACK_END_MAGIC) + printk(KERN_ALERT "Thread overran stack, or stack corrupted\n"); + die("Kernel access of bad area", regs, sig); } diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c index ddfd7ad4e1d6..5ce99848d91e 100644 --- a/arch/powerpc/mm/mmu_context_nohash.c +++ b/arch/powerpc/mm/mmu_context_nohash.c @@ -334,7 +334,7 @@ static int __cpuinit mmu_context_cpu_notify(struct notifier_block *self, /* We don't touch CPU 0 map, it's allocated at aboot and kept * around forever */ - if (cpu == 0) + if (cpu == boot_cpuid) return NOTIFY_OK; switch (action) { @@ -420,9 +420,11 @@ void __init mmu_context_init(void) */ context_map = alloc_bootmem(CTX_MAP_SIZE); context_mm = alloc_bootmem(sizeof(void *) * (last_context + 1)); +#ifndef CONFIG_SMP stale_map[0] = alloc_bootmem(CTX_MAP_SIZE); +#else + stale_map[boot_cpuid] = alloc_bootmem(CTX_MAP_SIZE); -#ifdef CONFIG_SMP register_cpu_notifier(&mmu_context_cpu_nb); #endif diff --git a/arch/powerpc/oprofile/backtrace.c b/arch/powerpc/oprofile/backtrace.c index b4278cfd1f80..f75301f2c85f 100644 --- a/arch/powerpc/oprofile/backtrace.c +++ b/arch/powerpc/oprofile/backtrace.c @@ -105,7 +105,7 @@ void op_powerpc_backtrace(struct pt_regs * const regs, unsigned int depth) } } else { #ifdef CONFIG_PPC64 - if (!test_thread_flag(TIF_32BIT)) { + if (!is_32bit_task()) { while (depth--) { sp = user_getsp64(sp, first_frame); if (!sp) diff --git a/arch/powerpc/platforms/cell/ras.c b/arch/powerpc/platforms/cell/ras.c index 1d3c4effea10..5ec1e47a0d77 100644 --- a/arch/powerpc/platforms/cell/ras.c +++ b/arch/powerpc/platforms/cell/ras.c @@ -173,8 +173,10 @@ static int __init cbe_ptcal_enable(void) return -ENODEV; size = of_get_property(np, "ibm,cbe-ptcal-size", NULL); - if (!size) + if (!size) { + of_node_put(np); return -ENODEV; + } pr_debug("%s: enabling PTCAL, size = 0x%x\n", __func__, *size); order = get_order(*size); diff --git a/arch/powerpc/platforms/cell/spider-pic.c b/arch/powerpc/platforms/cell/spider-pic.c index 5876e888e412..3f2e557344a3 100644 --- a/arch/powerpc/platforms/cell/spider-pic.c +++ b/arch/powerpc/platforms/cell/spider-pic.c @@ -258,8 +258,10 @@ static unsigned int __init spider_find_cascade_and_node(struct spider_pic *pic) return NO_IRQ; imap += intsize + 1; tmp = of_get_property(iic, "#interrupt-cells", NULL); - if (tmp == NULL) + if (tmp == NULL) { + of_node_put(iic); return NO_IRQ; + } intsize = *tmp; /* Assume unit is last entry of interrupt specifier */ unit = imap[intsize - 1]; diff --git a/arch/powerpc/platforms/chrp/nvram.c b/arch/powerpc/platforms/chrp/nvram.c index ba3588f2d8e0..d3ceff04ffc7 100644 --- a/arch/powerpc/platforms/chrp/nvram.c +++ b/arch/powerpc/platforms/chrp/nvram.c @@ -74,8 +74,10 @@ void __init chrp_nvram_init(void) return; nbytes_p = of_get_property(nvram, "#bytes", &proplen); - if (nbytes_p == NULL || proplen != sizeof(unsigned int)) + if (nbytes_p == NULL || proplen != sizeof(unsigned int)) { + of_node_put(nvram); return; + } nvram_size = *nbytes_p; diff --git a/arch/powerpc/platforms/iseries/dt.c b/arch/powerpc/platforms/iseries/dt.c index 7f45a51fe793..fdb7384c0c4f 100644 --- a/arch/powerpc/platforms/iseries/dt.c +++ b/arch/powerpc/platforms/iseries/dt.c @@ -243,7 +243,7 @@ static void __init dt_cpus(struct iseries_flat_dt *dt) pft_size[1] = __ilog2(HvCallHpt_getHptPages() * HW_PAGE_SIZE); for (i = 0; i < NR_CPUS; i++) { - if (lppaca[i].dyn_proc_status >= 2) + if (lppaca_of(i).dyn_proc_status >= 2) continue; snprintf(p, 32 - (p - buf), "@%d", i); @@ -251,7 +251,7 @@ static void __init dt_cpus(struct iseries_flat_dt *dt) dt_prop_str(dt, "device_type", device_type_cpu); - index = lppaca[i].dyn_hv_phys_proc_index; + index = lppaca_of(i).dyn_hv_phys_proc_index; d = &xIoHriProcessorVpd[index]; dt_prop_u32(dt, "i-cache-size", d->xInstCacheSize * 1024); diff --git a/arch/powerpc/platforms/iseries/smp.c b/arch/powerpc/platforms/iseries/smp.c index 6590850045af..6c6029914dbc 100644 --- a/arch/powerpc/platforms/iseries/smp.c +++ b/arch/powerpc/platforms/iseries/smp.c @@ -91,7 +91,7 @@ static void smp_iSeries_kick_cpu(int nr) BUG_ON((nr < 0) || (nr >= NR_CPUS)); /* Verify that our partition has a processor nr */ - if (lppaca[nr].dyn_proc_status >= 2) + if (lppaca_of(nr).dyn_proc_status >= 2) return; /* The processor is currently spinning, waiting diff --git a/arch/powerpc/platforms/maple/setup.c b/arch/powerpc/platforms/maple/setup.c index 3fff8d979b41..fe34c3d9bb74 100644 --- a/arch/powerpc/platforms/maple/setup.c +++ b/arch/powerpc/platforms/maple/setup.c @@ -358,6 +358,7 @@ static int __init maple_cpc925_edac_setup(void) model = (const unsigned char *)of_get_property(np, "model", NULL); if (!model) { printk(KERN_ERR "%s: Unabel to get model info\n", __func__); + of_node_put(np); return -ENODEV; } diff --git a/arch/powerpc/platforms/powermac/pfunc_core.c b/arch/powerpc/platforms/powermac/pfunc_core.c index cec635942657..b0c3777528a1 100644 --- a/arch/powerpc/platforms/powermac/pfunc_core.c +++ b/arch/powerpc/platforms/powermac/pfunc_core.c @@ -837,8 +837,10 @@ struct pmf_function *__pmf_find_function(struct device_node *target, return NULL; find_it: dev = pmf_find_device(actor); - if (dev == NULL) - return NULL; + if (dev == NULL) { + result = NULL; + goto out; + } list_for_each_entry(func, &dev->functions, link) { if (name && strcmp(name, func->name)) @@ -850,8 +852,9 @@ struct pmf_function *__pmf_find_function(struct device_node *target, result = func; break; } - of_node_put(actor); pmf_put_device(dev); +out: + of_node_put(actor); return result; } diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c index a00addb55945..c371bc06434b 100644 --- a/arch/powerpc/platforms/pseries/dtl.c +++ b/arch/powerpc/platforms/pseries/dtl.c @@ -23,37 +23,22 @@ #include <linux/init.h> #include <linux/slab.h> #include <linux/debugfs.h> +#include <linux/spinlock.h> #include <asm/smp.h> #include <asm/system.h> #include <asm/uaccess.h> #include <asm/firmware.h> +#include <asm/lppaca.h> #include "plpar_wrappers.h" -/* - * Layout of entries in the hypervisor's DTL buffer. Although we don't - * actually access the internals of an entry (we only need to know the size), - * we might as well define it here for reference. - */ -struct dtl_entry { - u8 dispatch_reason; - u8 preempt_reason; - u16 processor_id; - u32 enqueue_to_dispatch_time; - u32 ready_to_enqueue_time; - u32 waiting_to_ready_time; - u64 timebase; - u64 fault_addr; - u64 srr0; - u64 srr1; -}; - struct dtl { struct dtl_entry *buf; struct dentry *file; int cpu; int buf_entries; u64 last_idx; + spinlock_t lock; }; static DEFINE_PER_CPU(struct dtl, cpu_dtl); @@ -72,25 +57,97 @@ static u8 dtl_event_mask = 0x7; static int dtl_buf_entries = (16 * 85); -static int dtl_enable(struct dtl *dtl) +#ifdef CONFIG_VIRT_CPU_ACCOUNTING +struct dtl_ring { + u64 write_index; + struct dtl_entry *write_ptr; + struct dtl_entry *buf; + struct dtl_entry *buf_end; + u8 saved_dtl_mask; +}; + +static DEFINE_PER_CPU(struct dtl_ring, dtl_rings); + +static atomic_t dtl_count; + +/* + * The cpu accounting code controls the DTL ring buffer, and we get + * given entries as they are processed. + */ +static void consume_dtle(struct dtl_entry *dtle, u64 index) { - unsigned long addr; - int ret, hwcpu; + struct dtl_ring *dtlr = &__get_cpu_var(dtl_rings); + struct dtl_entry *wp = dtlr->write_ptr; + struct lppaca *vpa = local_paca->lppaca_ptr; - /* only allow one reader */ - if (dtl->buf) - return -EBUSY; + if (!wp) + return; - /* we need to store the original allocation size for use during read */ - dtl->buf_entries = dtl_buf_entries; + *wp = *dtle; + barrier(); - dtl->buf = kmalloc_node(dtl->buf_entries * sizeof(struct dtl_entry), - GFP_KERNEL, cpu_to_node(dtl->cpu)); - if (!dtl->buf) { - printk(KERN_WARNING "%s: buffer alloc failed for cpu %d\n", - __func__, dtl->cpu); - return -ENOMEM; - } + /* check for hypervisor ring buffer overflow, ignore this entry if so */ + if (index + N_DISPATCH_LOG < vpa->dtl_idx) + return; + + ++wp; + if (wp == dtlr->buf_end) + wp = dtlr->buf; + dtlr->write_ptr = wp; + + /* incrementing write_index makes the new entry visible */ + smp_wmb(); + ++dtlr->write_index; +} + +static int dtl_start(struct dtl *dtl) +{ + struct dtl_ring *dtlr = &per_cpu(dtl_rings, dtl->cpu); + + dtlr->buf = dtl->buf; + dtlr->buf_end = dtl->buf + dtl->buf_entries; + dtlr->write_index = 0; + + /* setting write_ptr enables logging into our buffer */ + smp_wmb(); + dtlr->write_ptr = dtl->buf; + + /* enable event logging */ + dtlr->saved_dtl_mask = lppaca_of(dtl->cpu).dtl_enable_mask; + lppaca_of(dtl->cpu).dtl_enable_mask |= dtl_event_mask; + + dtl_consumer = consume_dtle; + atomic_inc(&dtl_count); + return 0; +} + +static void dtl_stop(struct dtl *dtl) +{ + struct dtl_ring *dtlr = &per_cpu(dtl_rings, dtl->cpu); + + dtlr->write_ptr = NULL; + smp_wmb(); + + dtlr->buf = NULL; + + /* restore dtl_enable_mask */ + lppaca_of(dtl->cpu).dtl_enable_mask = dtlr->saved_dtl_mask; + + if (atomic_dec_and_test(&dtl_count)) + dtl_consumer = NULL; +} + +static u64 dtl_current_index(struct dtl *dtl) +{ + return per_cpu(dtl_rings, dtl->cpu).write_index; +} + +#else /* CONFIG_VIRT_CPU_ACCOUNTING */ + +static int dtl_start(struct dtl *dtl) +{ + unsigned long addr; + int ret, hwcpu; /* Register our dtl buffer with the hypervisor. The HV expects the * buffer size to be passed in the second word of the buffer */ @@ -102,34 +159,82 @@ static int dtl_enable(struct dtl *dtl) if (ret) { printk(KERN_WARNING "%s: DTL registration for cpu %d (hw %d) " "failed with %d\n", __func__, dtl->cpu, hwcpu, ret); - kfree(dtl->buf); return -EIO; } /* set our initial buffer indices */ - dtl->last_idx = lppaca[dtl->cpu].dtl_idx = 0; + lppaca_of(dtl->cpu).dtl_idx = 0; /* ensure that our updates to the lppaca fields have occurred before * we actually enable the logging */ smp_wmb(); /* enable event logging */ - lppaca[dtl->cpu].dtl_enable_mask = dtl_event_mask; + lppaca_of(dtl->cpu).dtl_enable_mask = dtl_event_mask; return 0; } -static void dtl_disable(struct dtl *dtl) +static void dtl_stop(struct dtl *dtl) { int hwcpu = get_hard_smp_processor_id(dtl->cpu); - lppaca[dtl->cpu].dtl_enable_mask = 0x0; + lppaca_of(dtl->cpu).dtl_enable_mask = 0x0; unregister_dtl(hwcpu, __pa(dtl->buf)); +} + +static u64 dtl_current_index(struct dtl *dtl) +{ + return lppaca_of(dtl->cpu).dtl_idx; +} +#endif /* CONFIG_VIRT_CPU_ACCOUNTING */ + +static int dtl_enable(struct dtl *dtl) +{ + long int n_entries; + long int rc; + struct dtl_entry *buf = NULL; + /* only allow one reader */ + if (dtl->buf) + return -EBUSY; + + n_entries = dtl_buf_entries; + buf = kmalloc_node(n_entries * sizeof(struct dtl_entry), + GFP_KERNEL, cpu_to_node(dtl->cpu)); + if (!buf) { + printk(KERN_WARNING "%s: buffer alloc failed for cpu %d\n", + __func__, dtl->cpu); + return -ENOMEM; + } + + spin_lock(&dtl->lock); + rc = -EBUSY; + if (!dtl->buf) { + /* store the original allocation size for use during read */ + dtl->buf_entries = n_entries; + dtl->buf = buf; + dtl->last_idx = 0; + rc = dtl_start(dtl); + if (rc) + dtl->buf = NULL; + } + spin_unlock(&dtl->lock); + + if (rc) + kfree(buf); + return rc; +} + +static void dtl_disable(struct dtl *dtl) +{ + spin_lock(&dtl->lock); + dtl_stop(dtl); kfree(dtl->buf); dtl->buf = NULL; dtl->buf_entries = 0; + spin_unlock(&dtl->lock); } /* file interface */ @@ -157,8 +262,9 @@ static int dtl_file_release(struct inode *inode, struct file *filp) static ssize_t dtl_file_read(struct file *filp, char __user *buf, size_t len, loff_t *pos) { - int rc, cur_idx, last_idx, n_read, n_req, read_size; + long int rc, n_read, n_req, read_size; struct dtl *dtl; + u64 cur_idx, last_idx, i; if ((len % sizeof(struct dtl_entry)) != 0) return -EINVAL; @@ -171,41 +277,48 @@ static ssize_t dtl_file_read(struct file *filp, char __user *buf, size_t len, /* actual number of entries read */ n_read = 0; - cur_idx = lppaca[dtl->cpu].dtl_idx; + spin_lock(&dtl->lock); + + cur_idx = dtl_current_index(dtl); last_idx = dtl->last_idx; - if (cur_idx - last_idx > dtl->buf_entries) { - pr_debug("%s: hv buffer overflow for cpu %d, samples lost\n", - __func__, dtl->cpu); - } + if (last_idx + dtl->buf_entries <= cur_idx) + last_idx = cur_idx - dtl->buf_entries + 1; + + if (last_idx + n_req > cur_idx) + n_req = cur_idx - last_idx; + + if (n_req > 0) + dtl->last_idx = last_idx + n_req; + + spin_unlock(&dtl->lock); + + if (n_req <= 0) + return 0; - cur_idx %= dtl->buf_entries; - last_idx %= dtl->buf_entries; + i = last_idx % dtl->buf_entries; /* read the tail of the buffer if we've wrapped */ - if (last_idx > cur_idx) { - read_size = min(n_req, dtl->buf_entries - last_idx); + if (i + n_req > dtl->buf_entries) { + read_size = dtl->buf_entries - i; - rc = copy_to_user(buf, &dtl->buf[last_idx], + rc = copy_to_user(buf, &dtl->buf[i], read_size * sizeof(struct dtl_entry)); if (rc) return -EFAULT; - last_idx = 0; + i = 0; n_req -= read_size; n_read += read_size; buf += read_size * sizeof(struct dtl_entry); } /* .. and now the head */ - read_size = min(n_req, cur_idx - last_idx); - rc = copy_to_user(buf, &dtl->buf[last_idx], - read_size * sizeof(struct dtl_entry)); + rc = copy_to_user(buf, &dtl->buf[i], n_req * sizeof(struct dtl_entry)); if (rc) return -EFAULT; - n_read += read_size; - dtl->last_idx += n_read; + n_read += n_req; return n_read * sizeof(struct dtl_entry); } @@ -263,6 +376,7 @@ static int dtl_init(void) /* set up the per-cpu log structures */ for_each_possible_cpu(i) { struct dtl *dtl = &per_cpu(cpu_dtl, i); + spin_lock_init(&dtl->lock); dtl->cpu = i; rc = dtl_setup_file(dtl); diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index cf79b46d8f88..f129040d974c 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -248,11 +248,13 @@ void vpa_init(int cpu) int hwcpu = get_hard_smp_processor_id(cpu); unsigned long addr; long ret; + struct paca_struct *pp; + struct dtl_entry *dtl; if (cpu_has_feature(CPU_FTR_ALTIVEC)) - lppaca[cpu].vmxregs_in_use = 1; + lppaca_of(cpu).vmxregs_in_use = 1; - addr = __pa(&lppaca[cpu]); + addr = __pa(&lppaca_of(cpu)); ret = register_vpa(hwcpu, addr); if (ret) { @@ -274,6 +276,25 @@ void vpa_init(int cpu) "registration for cpu %d (hw %d) of area %lx " "returns %ld\n", cpu, hwcpu, addr, ret); } + + /* + * Register dispatch trace log, if one has been allocated. + */ + pp = &paca[cpu]; + dtl = pp->dispatch_log; + if (dtl) { + pp->dtl_ridx = 0; + pp->dtl_curr = dtl; + lppaca_of(cpu).dtl_idx = 0; + + /* hypervisor reads buffer length from this field */ + dtl->enqueue_to_dispatch_time = DISPATCH_LOG_BYTES; + ret = register_dtl(hwcpu, __pa(dtl)); + if (ret) + pr_warn("DTL registration failed for cpu %d (%ld)\n", + cpu, ret); + lppaca_of(cpu).dtl_enable_mask = 2; + } } static long pSeries_lpar_hpte_insert(unsigned long hpte_group, diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index a6d19e3a505e..d345bfd56bbe 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -273,6 +273,58 @@ static struct notifier_block pci_dn_reconfig_nb = { .notifier_call = pci_dn_reconfig_notifier, }; +#ifdef CONFIG_VIRT_CPU_ACCOUNTING +/* + * Allocate space for the dispatch trace log for all possible cpus + * and register the buffers with the hypervisor. This is used for + * computing time stolen by the hypervisor. + */ +static int alloc_dispatch_logs(void) +{ + int cpu, ret; + struct paca_struct *pp; + struct dtl_entry *dtl; + + if (!firmware_has_feature(FW_FEATURE_SPLPAR)) + return 0; + + for_each_possible_cpu(cpu) { + pp = &paca[cpu]; + dtl = kmalloc_node(DISPATCH_LOG_BYTES, GFP_KERNEL, + cpu_to_node(cpu)); + if (!dtl) { + pr_warn("Failed to allocate dispatch trace log for cpu %d\n", + cpu); + pr_warn("Stolen time statistics will be unreliable\n"); + break; + } + + pp->dtl_ridx = 0; + pp->dispatch_log = dtl; + pp->dispatch_log_end = dtl + N_DISPATCH_LOG; + pp->dtl_curr = dtl; + } + + /* Register the DTL for the current (boot) cpu */ + dtl = get_paca()->dispatch_log; + get_paca()->dtl_ridx = 0; + get_paca()->dtl_curr = dtl; + get_paca()->lppaca_ptr->dtl_idx = 0; + + /* hypervisor reads buffer length from this field */ + dtl->enqueue_to_dispatch_time = DISPATCH_LOG_BYTES; + ret = register_dtl(hard_smp_processor_id(), __pa(dtl)); + if (ret) + pr_warn("DTL registration failed for boot cpu %d (%d)\n", + smp_processor_id(), ret); + get_paca()->lppaca_ptr->dtl_enable_mask = 2; + + return 0; +} + +early_initcall(alloc_dispatch_logs); +#endif /* CONFIG_VIRT_CPU_ACCOUNTING */ + static void __init pSeries_setup_arch(void) { /* Discover PIC type and setup ppc_md accordingly */ diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c index 559db2b846a9..17cf15ec38be 100644 --- a/arch/powerpc/sysdev/dart_iommu.c +++ b/arch/powerpc/sysdev/dart_iommu.c @@ -70,6 +70,8 @@ static int iommu_table_dart_inited; static int dart_dirty; static int dart_is_u4; +#define DART_U4_BYPASS_BASE 0x8000000000ull + #define DBG(...) static inline void dart_tlb_invalidate_all(void) @@ -292,12 +294,20 @@ static void iommu_table_dart_setup(void) set_bit(iommu_table_dart.it_size - 1, iommu_table_dart.it_map); } -static void pci_dma_dev_setup_dart(struct pci_dev *dev) +static void dma_dev_setup_dart(struct device *dev) { /* We only have one iommu table on the mac for now, which makes * things simple. Setup all PCI devices to point to this table */ - set_iommu_table_base(&dev->dev, &iommu_table_dart); + if (get_dma_ops(dev) == &dma_direct_ops) + set_dma_offset(dev, DART_U4_BYPASS_BASE); + else + set_iommu_table_base(dev, &iommu_table_dart); +} + +static void pci_dma_dev_setup_dart(struct pci_dev *dev) +{ + dma_dev_setup_dart(&dev->dev); } static void pci_dma_bus_setup_dart(struct pci_bus *bus) @@ -315,6 +325,45 @@ static void pci_dma_bus_setup_dart(struct pci_bus *bus) PCI_DN(dn)->iommu_table = &iommu_table_dart; } +static bool dart_device_on_pcie(struct device *dev) +{ + struct device_node *np = of_node_get(dev->of_node); + + while(np) { + if (of_device_is_compatible(np, "U4-pcie") || + of_device_is_compatible(np, "u4-pcie")) { + of_node_put(np); + return true; + } + np = of_get_next_parent(np); + } + return false; +} + +static int dart_dma_set_mask(struct device *dev, u64 dma_mask) +{ + if (!dev->dma_mask || !dma_supported(dev, dma_mask)) + return -EIO; + + /* U4 supports a DART bypass, we use it for 64-bit capable + * devices to improve performances. However, that only works + * for devices connected to U4 own PCIe interface, not bridged + * through hypertransport. We need the device to support at + * least 40 bits of addresses. + */ + if (dart_device_on_pcie(dev) && dma_mask >= DMA_BIT_MASK(40)) { + dev_info(dev, "Using 64-bit DMA iommu bypass\n"); + set_dma_ops(dev, &dma_direct_ops); + } else { + dev_info(dev, "Using 32-bit DMA via iommu\n"); + set_dma_ops(dev, &dma_iommu_ops); + } + dma_dev_setup_dart(dev); + + *dev->dma_mask = dma_mask; + return 0; +} + void __init iommu_init_early_dart(void) { struct device_node *dn; @@ -328,20 +377,25 @@ void __init iommu_init_early_dart(void) dart_is_u4 = 1; } + /* Initialize the DART HW */ + if (dart_init(dn) != 0) + goto bail; + /* Setup low level TCE operations for the core IOMMU code */ ppc_md.tce_build = dart_build; ppc_md.tce_free = dart_free; ppc_md.tce_flush = dart_flush; - /* Initialize the DART HW */ - if (dart_init(dn) == 0) { - ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_dart; - ppc_md.pci_dma_bus_setup = pci_dma_bus_setup_dart; + /* Setup bypass if supported */ + if (dart_is_u4) + ppc_md.dma_set_mask = dart_dma_set_mask; - /* Setup pci_dma ops */ - set_pci_dma_ops(&dma_iommu_ops); - return; - } + ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_dart; + ppc_md.pci_dma_bus_setup = pci_dma_bus_setup_dart; + + /* Setup pci_dma ops */ + set_pci_dma_ops(&dma_iommu_ops); + return; bail: /* If init failed, use direct iommu and null setup functions */ |